From cb9761138e11496e596c551550137f0075c38493 Mon Sep 17 00:00:00 2001 From: Nat Quayle Nelson Date: Sun, 22 May 2022 23:53:03 +0000 Subject: [PATCH] transcribe-voice-track allow multiple wav args --- .../scripts/transcribe-voice-track.py | 28 +++++++++---------- projects/hollywoo/scripts/util.py | 12 +++++++- 2 files changed, 25 insertions(+), 15 deletions(-) diff --git a/projects/hollywoo/scripts/transcribe-voice-track.py b/projects/hollywoo/scripts/transcribe-voice-track.py index 2305c557..7e1f1b23 100644 --- a/projects/hollywoo/scripts/transcribe-voice-track.py +++ b/projects/hollywoo/scripts/transcribe-voice-track.py @@ -1,6 +1,9 @@ #! /usr/bin/env python # pip install -r requirements.txt -usage = 'python transcribe-voice-track.py ' +usage = 'python transcribe-voice-track.py ' + +# https://towardsdatascience.com/speech-recognition-with-timestamps-934ede4234b2 +# If you don't get results, try re-exporting as Signed 16-bit PCM import util import wave @@ -29,16 +32,19 @@ if not os.path.exists(model_path): with ZipFile(model_zip_path, "r") as zip_file: zip_file.extractall('models') -audio_filename = util.arg(1, usage) -wf = wave.open(audio_filename, "rb") - model = Model(model_path) -rec = KaldiRecognizer(model, wf.getframerate()) -rec.SetWords(True) -frames = 4000 -while True: +audio_filenames = util.args(1, usage) +for audio_filename in audio_filenames: + wf = wave.open(audio_filename, "rb") + + rec = KaldiRecognizer(model, wf.getframerate()) + rec.SetWords(True) + + frames = 4000 + # Mix channels together if the input is stereo + # or the sample width is incompatible if wf.getnchannels() == 2: wf.close() mono_filename = '.'.join(audio_filename.split('.')[:-1]) + '_mono.wav' @@ -72,9 +78,3 @@ while True: lines[text].append({'start': words[0]['start'], 'end': words[-1]['end']}) print(f'{text}: {words[0]["start"]} {words[-1]["end"]}') json.dump(lines, f) - - frames = input(f"Try different frames num? (was {frames}) (press ENTER to quit): ") - if len(frames) == 0: - sys.exit(0) - else: - frames = int(frames) diff --git a/projects/hollywoo/scripts/util.py b/projects/hollywoo/scripts/util.py index 90565e3b..134ffa41 100644 --- a/projects/hollywoo/scripts/util.py +++ b/projects/hollywoo/scripts/util.py @@ -8,4 +8,14 @@ def arg(num, usage, default=None): if default != None: return default raise ValueError(usage) - return val \ No newline at end of file + return val + +def args(starting_num, usage, default=None): + l = [] + if len(sys.argv) > starting_num: + l = sys.argv[starting_num:] + else: + if default != None: + return default + raise ValueError(usage) + return l \ No newline at end of file