From cb9761138e11496e596c551550137f0075c38493 Mon Sep 17 00:00:00 2001
From: Nat Quayle Nelson <natquaylenelson@gmail.com>
Date: Sun, 22 May 2022 23:53:03 +0000
Subject: [PATCH] transcribe-voice-track allow multiple wav args

---
 .../scripts/transcribe-voice-track.py         | 28 +++++++++----------
 projects/hollywoo/scripts/util.py             | 12 +++++++-
 2 files changed, 25 insertions(+), 15 deletions(-)
diff --git a/projects/hollywoo/scripts/transcribe-voice-track.py b/projects/hollywoo/scripts/transcribe-voice-track.py
index 2305c557..7e1f1b23 100644
--- a/projects/hollywoo/scripts/transcribe-voice-track.py
+++ b/projects/hollywoo/scripts/transcribe-voice-track.py
@@ -1,6 +1,9 @@
 #! /usr/bin/env python
 # pip install -r requirements.txt
-usage = 'python transcribe-voice-track.py <?wav filename>'
+usage = 'python transcribe-voice-track.py <wav filenames...> '
+
+# https://towardsdatascience.com/speech-recognition-with-timestamps-934ede4234b2
+# If you don't get results, try re-exporting as Signed 16-bit PCM
 
 import util
 import wave
@@ -29,16 +32,19 @@ if not os.path.exists(model_path):
     with ZipFile(model_zip_path, "r") as zip_file:
         zip_file.extractall('models')
 
-audio_filename = util.arg(1, usage)
-wf = wave.open(audio_filename, "rb")
-
 model = Model(model_path)
-rec = KaldiRecognizer(model, wf.getframerate())
-rec.SetWords(True)
 
-frames = 4000
-while True:
+audio_filenames = util.args(1, usage)
+for audio_filename in audio_filenames:
+    wf = wave.open(audio_filename, "rb")
+
+    rec = KaldiRecognizer(model, wf.getframerate())
+    rec.SetWords(True)
+
+    frames = 4000
+    
     # Mix channels together if the input is stereo
+    # or the sample width is incompatible
     if wf.getnchannels() == 2:
         wf.close()
         mono_filename = '.'.join(audio_filename.split('.')[:-1]) + '_mono.wav'
@@ -72,9 +78,3 @@ while True:
             lines[text].append({'start': words[0]['start'], 'end': words[-1]['end']})
             print(f'{text}: {words[0]["start"]} {words[-1]["end"]}')
         json.dump(lines, f)
-
-    frames = input(f"Try different frames num? (was {frames}) (press ENTER to quit): ")
-    if len(frames) == 0:
-        sys.exit(0)
-    else:
-        frames = int(frames)
diff --git a/projects/hollywoo/scripts/util.py b/projects/hollywoo/scripts/util.py
index 90565e3b..134ffa41 100644
--- a/projects/hollywoo/scripts/util.py
+++ b/projects/hollywoo/scripts/util.py
@@ -8,4 +8,14 @@ def arg(num, usage, default=None):
         if default != None:
             return default
         raise ValueError(usage)
-    return val
\ No newline at end of file
+    return val
+
+def args(starting_num, usage, default=None):
+    l = []
+    if len(sys.argv) > starting_num:
+        l = sys.argv[starting_num:]
+    else:
+        if default != None:
+            return default
+        raise ValueError(usage)
+    return l
\ No newline at end of file