{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"provenance": [],
"collapsed_sections": [
"9pWjsoRu0NP1"
],
"toc_visible": true,
"gpuType": "T4",
"authorship_tag": "ABX9TyNwUGOJJmdoUhSvbivS88h8",
"include_colab_link": true
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"language_info": {
"name": "python"
},
"accelerator": "GPU"
},
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "view-in-github",
"colab_type": "text"
},
"source": [
""
]
},
{
"cell_type": "markdown",
"source": [
"# Tiny test of recent text-to-music (TTM) models\n",
"\n",
"To run this notebook you need to do three things:\n",
"1. Make sure the Colab runtime has a NVIDIA GPU available because CUDA is assumed.\n",
"1. Request access to [Stable Audio Open](https://huggingface.co/stabilityai/stable-audio-open-1.0) and create a corresponding [access token](https://huggingface.co/settings/tokens) to paste into the Hugging Face login screen below.\n",
"1. Pray to the software dependency gods that the `pip` install below still works[.](https://nixos.org/)"
],
"metadata": {
"id": "BqyjOuyQl2Un"
}
},
{
"cell_type": "markdown",
"source": [
"## Setup"
],
"metadata": {
"id": "9pWjsoRu0NP1"
}
},
{
"cell_type": "code",
"source": [
"pip install diffusers transformers torchsde"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "E__6vJXkf6GE",
"outputId": "563a70f6-7ecb-4b84-9308-5271c1ff1c4b"
},
"execution_count": 1,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Requirement already satisfied: diffusers in /usr/local/lib/python3.10/dist-packages (0.30.2)\n",
"Requirement already satisfied: transformers in /usr/local/lib/python3.10/dist-packages (4.44.2)\n",
"Requirement already satisfied: torchsde in /usr/local/lib/python3.10/dist-packages (0.2.6)\n",
"Requirement already satisfied: importlib-metadata in /usr/local/lib/python3.10/dist-packages (from diffusers) (8.4.0)\n",
"Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from diffusers) (3.15.4)\n",
"Requirement already satisfied: huggingface-hub>=0.23.2 in /usr/local/lib/python3.10/dist-packages (from diffusers) (0.24.6)\n",
"Requirement already satisfied: numpy in /usr/local/lib/python3.10/dist-packages (from diffusers) (1.26.4)\n",
"Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.10/dist-packages (from diffusers) (2024.5.15)\n",
"Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from diffusers) (2.32.3)\n",
"Requirement already satisfied: safetensors>=0.3.1 in /usr/local/lib/python3.10/dist-packages (from diffusers) (0.4.4)\n",
"Requirement already satisfied: Pillow in /usr/local/lib/python3.10/dist-packages (from diffusers) (9.4.0)\n",
"Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from transformers) (24.1)\n",
"Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.10/dist-packages (from transformers) (6.0.2)\n",
"Requirement already satisfied: tokenizers<0.20,>=0.19 in /usr/local/lib/python3.10/dist-packages (from transformers) (0.19.1)\n",
"Requirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.10/dist-packages (from transformers) (4.66.5)\n",
"Requirement already satisfied: scipy>=1.5 in /usr/local/lib/python3.10/dist-packages (from torchsde) (1.13.1)\n",
"Requirement already satisfied: torch>=1.6.0 in /usr/local/lib/python3.10/dist-packages (from torchsde) (2.4.0+cu121)\n",
"Requirement already satisfied: trampoline>=0.1.2 in /usr/local/lib/python3.10/dist-packages (from torchsde) (0.1.2)\n",
"Requirement already satisfied: fsspec>=2023.5.0 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.23.2->diffusers) (2024.6.1)\n",
"Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.23.2->diffusers) (4.12.2)\n",
"Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from torch>=1.6.0->torchsde) (1.13.2)\n",
"Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch>=1.6.0->torchsde) (3.3)\n",
"Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch>=1.6.0->torchsde) (3.1.4)\n",
"Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.10/dist-packages (from importlib-metadata->diffusers) (3.20.1)\n",
"Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests->diffusers) (3.3.2)\n",
"Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->diffusers) (3.8)\n",
"Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->diffusers) (2.0.7)\n",
"Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->diffusers) (2024.8.30)\n",
"Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch>=1.6.0->torchsde) (2.1.5)\n",
"Requirement already satisfied: mpmath<1.4,>=1.1.0 in /usr/local/lib/python3.10/dist-packages (from sympy->torch>=1.6.0->torchsde) (1.3.0)\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"import numpy\n",
"import scipy\n",
"import torch\n",
"import pandas as pd\n",
"import soundfile as sf\n",
"import IPython.display as ipd"
],
"metadata": {
"id": "hE4uAUp7hH8j"
},
"execution_count": 2,
"outputs": []
},
{
"cell_type": "code",
"source": [
"from huggingface_hub import login\n",
"\n",
"login()"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 359,
"referenced_widgets": [
"b46758b315f24421afc14872a2828c7e",
"e6d345ae656840d7b70a15a999b27020",
"ad3929abedc84051baeeb9c13aa593bc",
"9ad8999e8f6a4ea0b1a8a2372163f371",
"086ef5823b174a198aff1797b1c7f8c6",
"8fc8fa84f8054d008e7ff466bc5f7c62",
"1b0db3a6526a4d6795356f1361cf8c0d",
"b93bb18bb1c344b68049a200db7abfbc",
"a59a510968e44ee19c780d5950ce56fe",
"35003bb73f2341f593e38482fc1802bd",
"e468da573da4422da44d4cd43cd3a958",
"e6d83d6d97b04196b79ddf65368963ac",
"fa517e7e214b4866ad951fc5a3df5c55",
"c97d6f5c94a74eaeb6ec5aee3a555bf0",
"9206a67e399a405ea60f3c3975ccf348",
"3ca58b479dcd4f3f81869ef52070478b",
"4883a5dd4b61450682f765ae6d8cf231"
]
},
"id": "4yD38XImg_JL",
"outputId": "e1ed8bf0-6cfc-40c4-827e-a8a1b610c72a"
},
"execution_count": 3,
"outputs": [
{
"output_type": "display_data",
"data": {
"text/plain": [
"VBox(children=(HTML(value='
\n", " | model | \n", "prompt | \n", "audio | \n", "
---|---|---|---|
0 | \n", "AudioLDM | \n", "relaxing piano music with a banjo solo and lo-fi beats | \n", "\n", " |
1 | \n", "Stable Audio Open | \n", "relaxing piano music with a banjo solo and lo-fi beats | \n", "\n", " |
2 | \n", "MusicGen | \n", "relaxing piano music with a banjo solo and lo-fi beats | \n", "\n", " |