Skip to content

Instantly share code, notes, and snippets.

@eyaler
Last active May 15, 2023 14:55
Show Gist options
  • Save eyaler/1ba839b7ed06aeb6a6ed5b31caa47c4a to your computer and use it in GitHub Desktop.
Save eyaler/1ba839b7ed06aeb6a6ed5b31caa47c4a to your computer and use it in GitHub Desktop.
roboshaul_inference
Display the source blob
Display the rendered blob
Raw
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"private_outputs": true,
"provenance": [],
"include_colab_link": true
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"language_info": {
"name": "python"
},
"accelerator": "GPU",
"gpuClass": "standard"
},
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "view-in-github",
"colab_type": "text"
},
"source": [
"<a href=\"https://colab.research.google.com/gist/eyaler/fdb1f73cd0e18aac76c27235cc144f89/roboshaul_inference.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
]
},
{
"cell_type": "code",
"source": [
"#@title התקנות\n",
"\n",
"!pip install gtts\n",
"!pip install edge-tts\n",
"!pip install tts\n",
"!mkdir -p tts_model\n",
"!mkdir -p hifigan_model\n",
"!gdown --fuzzy \"https://drive.google.com/file/d/1eK1XR_ZwuUy4yWh80nui-q5PBifJsYfy/view\" -O tts_model/\n",
"!gdown --fuzzy \"https://drive.google.com/file/d/1dExa0AZqmyjz8rSZz1noyQY9aF7dR8ew/view\" -O tts_model/\n",
"!gdown --fuzzy \"https://drive.google.com/file/d/1An6cTCYkxXWhagIJe3NGkoP8n2CQWQ-3/view\" -O hifigan_model/\n",
"!gdown --fuzzy \"https://drive.google.com/file/d/1XdmRRHjZ_eZOFKoAQgQ8wivrLDJnNDkh/view\" -O hifigan_model/\n",
"\n",
"import librosa\n",
"import soundfile as sf\n",
"import os\n",
"\n",
"!mkdir -p /content/ref\n",
"%cd ref\n",
"!wget -nc \"https://anonymous19283746.github.io/saspeech/wavs/gt/Mom3_0007.wav\"\n",
"!wget -nc \"https://anonymous19283746.github.io/saspeech/wavs/gt/Meat_0089.wav\"\n",
"!wget -nc \"https://anonymous19283746.github.io/saspeech/wavs/gt/Aharon Fogel_0002.wav\"\n",
"!wget -nc \"https://anonymous19283746.github.io/saspeech/wavs/gt/Corona4_0007.wav\"\n",
"!wget -nc \"https://anonymous19283746.github.io/saspeech/wavs/gt/Credit Rating_0006.wav\"\n",
"!wget -nc \"https://anonymous19283746.github.io/saspeech/wavs/gt/Electricity_0001.wav\"\n",
"!wget -nc \"https://anonymous19283746.github.io/saspeech/wavs/gt/Gedera_0019.wav\"\n",
"!wget -nc \"https://anonymous19283746.github.io/saspeech/wavs/gt/Hashlama112_0024.wav\"\n",
"!wget -nc \"https://anonymous19283746.github.io/saspeech/wavs/gt/Hashlama117_0009.wav\"\n",
"!wget -nc \"https://anonymous19283746.github.io/saspeech/wavs/gt/Hedva_0035.wav\"\n",
"!wget -nc \"https://anonymous19283746.github.io/saspeech/wavs/gt/Hungary1_0068.wav\"\n",
"!wget -nc \"https://anonymous19283746.github.io/saspeech/wavs/gt/Lod_0053.wav\"\n",
"!wget -nc \"https://anonymous19283746.github.io/saspeech/wavs/gt/Lod_0065.wav\"\n",
"!wget -nc \"https://anonymous19283746.github.io/saspeech/wavs/gt/Meat_0039.wav\"\n",
"!wget -nc \"https://anonymous19283746.github.io/saspeech/wavs/gt/Robo Shaul 1_0017.wav\"\n",
"!wget -nc \"https://anonymous19283746.github.io/saspeech/wavs/gt/Robo Shaul 1_0065.wav\"\n",
"!wget -nc \"https://anonymous19283746.github.io/saspeech/wavs/gt/Robo Shaul 2_0040.wav\"\n",
"!mkdir -p /content/ref16\n",
"for file in os.listdir():\n",
" if ' ' in file:\n",
" fix = file.replace(' ', '_')\n",
" !mv \"$file\" \"$fix\"\n",
" file = fix\n",
" y, sr = librosa.load(file, sr=16000)\n",
" sf.write('/content/ref16/' + file, y, sr)\n",
"%cd /content"
],
"metadata": {
"id": "pJqVVqlDmiiW",
"cellView": "form"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"#@title נקדן { run: \"auto\" }\n",
"\n",
"from IPython.display import IFrame\n",
"\n",
"IFrame(src='https://nakdimon.org', width=700, height=400)"
],
"metadata": {
"id": "VFOMC6r0uMzq",
"cellView": "form"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"#@title דבר שאול\n",
"\n",
"from IPython.display import Audio\n",
"from gtts import gTTS\n",
"#!rm -rf /root/.local/share/tts\n",
"\n",
"text = '\\u05E7\\u05D5\\u05B9\\u05E8\\u05B0\\u05D0\\u05B4\\u05D9\\u05DD \\u05DC\\u05B4\\u05D9 \\u05E8\\u05D5\\u05B9\\u05D1\\u05BC\\u05D5\\u05B9-\\u05E9\\u05C1\\u05B8\\u05D0\\u05D5\\u05BC\\u05DC \\u05D5\\u05B7\\u05D0\\u05B2\\u05E0\\u05B4\\u05D9 \\u05DE\\u05B7\\u05DB\\u05BC\\u05B4\\u05D9\\u05E8 \\u05D1\\u05BC\\u05B8\\u05E0\\u05D5\\u05B9\\u05EA \\u05DE\\u05B5\\u05D0\\u05B7\\u05E8\\u05B0\\u05D2\\u05BC\\u05B6\\u05E0\\u05B0\\u05D8\\u05B4\\u05D9\\u05E0\\u05B8\\u05D4 \\u05D5\\u05BC\\u05D1\\u05B8\\u05E0\\u05B4\\u05D9\\u05DD \\u05DE\\u05B4\\u05D1\\u05BC\\u05B0\\u05E8\\u05B8\\u05D6\\u05B4\\u05D9\\u05DC.' #@param {type: \"string\"}\n",
"temp_file = 'stage1.wav' #@param {type: \"string\"}\n",
"final_file = 'stage2.wav' #@param {type: \"string\"}\n",
"engine = 'google' #@param ['baseline', 'google', 'edge']\n",
"downsample = True #@param {type: \"boolean\"}\n",
"lang = 'fr-fr' #@param ['en', 'fr-fr', 'pt-br']\n",
"\n",
"\n",
"def mytts(text, engine):\n",
" text = text.replace('\"', r'\\\"')\n",
" print(text + '\\n')\n",
" if engine == 'baseline':\n",
" !tts --use_cuda 1 --text \"$text\" --model_path tts_model/saspeech_nikud_7350.pth --config_path tts_model/config_overflow.json --vocoder_path hifigan_model/checkpoint_500000.pth --vocoder_config_path hifigan_model/config_hifigan.json --out_path \"$temp_file\" >/dev/null\n",
" elif engine == 'google':\n",
" gTTS(text=text, lang='iw').save(temp_file)\n",
" else:\n",
" !edge-tts --voice \"he-IL-AvriNeural\" --text \"$text\" --write-media \"$temp_file\"\n",
" if downsample:\n",
" y, sr = librosa.load(temp_file, sr=16000)\n",
" sf.write(temp_file, y, sr)\n",
" display(Audio(temp_file))\n",
" ref_path = 'ref'\n",
" if downsample:\n",
" ref_path += '16'\n",
" files = [ref_path + '/' + file for file in os.listdir(ref_path) if file.endswith('.wav')]\n",
" ref_files = ' '.join(files)\n",
" !tts --use_cuda 1 --model_name tts_models/multilingual/multi-dataset/your_tts --speaker_wav $ref_files --reference_wav \"$temp_file\" --language_idx \"$lang\" --out_path \"$final_file\" >/dev/null\n",
" display(Audio(final_file))\n",
" \n",
"mytts(text, engine)"
],
"metadata": {
"id": "3PSi4oR3pEND",
"cellView": "form"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"#@title הורדה\n",
"\n",
"from google.colab import files\n",
"files.download(temp_file)\n",
"files.download(final_file)"
],
"metadata": {
"cellView": "form",
"id": "dOPOwL0PpRog"
},
"execution_count": null,
"outputs": []
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment