Генерация субтитров в Jupyter Notebook: подробное руководство - Fcodenotes

import speech_recognition as sr
def generate_subtitles(audio_file):
    recognizer = sr.Recognizer()
    with sr.AudioFile(audio_file) as source:
        audio = recognizer.record(source)
        text = recognizer.recognize_google(audio)
        return text
audio_file = "path/to/audio.wav"
subtitles = generate_subtitles(audio_file)
print(subtitles)

Метод 2. Использование API преобразования речи в текст Google Cloud
Если вам нужны более точные и расширенные возможности распознавания речи, вы можете использовать API преобразования речи в текст Google Cloud. Этот метод требует настройки проекта на Google Cloud Platform и получения учетных данных API. Вот пример фрагмента кода, который поможет вам начать:

from google.cloud import speech
def generate_subtitles(audio_file):
    client = speech.SpeechClient()
    with open(audio_file, "rb") as audio_file:
        content = audio_file.read()
    audio = speech.RecognitionAudio(content=content)
    config = speech.RecognitionConfig(
        encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16,
        sample_rate_hertz=16000,
        language_code="en-US",
    )
    response = client.recognize(config=config, audio=audio)
    subtitles = ""
    for result in response.results:
        subtitles += result.alternatives[0].transcript + " "
    return subtitles
audio_file = "path/to/audio.wav"
subtitles = generate_subtitles(audio_file)
print(subtitles)

Метод 3: использование API автоматического распознавания речи (ASR).
Помимо Google Cloud, существует несколько других API ASR, таких как Microsoft Azure Speech Service, IBM Watson Speech to Text и AWS Transcribe. Эти API предоставляют функциональность, аналогичную Google Cloud Speech-to-Text API, и могут быть интегрированы в Jupyter Notebook с помощью соответствующих SDK Python. Вот пример фрагмента кода с использованием API IBM Watson Speech to Text:

from ibm_watson import SpeechToTextV1
from ibm_cloud_sdk_core.authenticators import IAMAuthenticator
def generate_subtitles(audio_file):
    authenticator = IAMAuthenticator('api_key')
    speech_to_text = SpeechToTextV1(authenticator=authenticator)
    speech_to_text.set_service_url('service_url')
    with open(audio_file, 'rb') as audio_file:
        response = speech_to_text.recognize(audio=audio_file, content_type='audio/wav')
    subtitles = ""
    for result in response.result['results']:
        subtitles += result['alternatives'][0]['transcript'] + " "
    return subtitles
audio_file = "path/to/audio.wav"
subtitles = generate_subtitles(audio_file)
print(subtitles)