Skip to content

Instantly share code, notes, and snippets.

@kleysonr
Last active June 27, 2023 23:13
Show Gist options
  • Save kleysonr/be7e870fa372875a8b55ed5e901d02e0 to your computer and use it in GitHub Desktop.
Save kleysonr/be7e870fa372875a8b55ed5e901d02e0 to your computer and use it in GitHub Desktop.
# Convert video .mkv em audio .mp3 16khz
# ffmpeg -i video.mkv -acodec libmp3lame -ac 1 -ar 16000 output.mp3
#
# Quebra o audio .mp3 em pedacos de 30 segundos cada
# ffmpeg -i output.mp3 -f segment -segment_time 30 -c copy samples/sample%05d.mp3
from huggingsound import SpeechRecognitionModel, KenshoLMDecoder
import torch
import os
def listar_arquivos_mp3(diretorio):
arquivos = os.listdir(diretorio)
arquivos_mp3 = [os.path.join(diretorio, arquivo) for arquivo in arquivos if arquivo.endswith('.mp3')]
arquivos_mp3.sort()
return arquivos_mp3
device = "cuda" if torch.cuda.is_available() else "cpu"
model = SpeechRecognitionModel("jonatasgrosman/wav2vec2-large-xlsr-53-portuguese", device=device)
input_files = listar_arquivos_mp3('./samples')
# The LM format used by the LM decoders is the KenLM format (arpa or binary file).
# You can download some LM files examples from here: https://huggingface.co/jonatasgrosman/wav2vec2-large-xlsr-53-portuguese/tree/main/language_model
# lm_path = "lm_files/lm.binary"
# unigrams_path = "lm_files/unigrams.txt"
# We implemented three different decoders for LM boosted decoding: KenshoLMDecoder, ParlanceLMDecoder, and FlashlightLMDecoder
# On this example, we'll use the KenshoLMDecoder
# To use this decoder you'll need to install the Kensho's ctcdecode first (https://github.com/kensho-technologies/pyctcdecode)
# decoder = KenshoLMDecoder(model.token_set, lm_path=lm_path, unigrams_path=unigrams_path)
# transcription = model.transcribe(input_files, decoder=decoder)
transcription = model.transcribe(input_files)
with open('transcription.txt', 'w', encoding='utf-8') as f:
for i in transcription:
text = i['transcription']
f.write(text)
f.write('\n')
# print(transcription)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment