refactor hollywoo audio python scripts so looping through tagged audio is reusable

2022-09-27 04:42:28 +00:00
parent fb64dfca7e
commit bfa4eccda5
4 changed files with 119 additions and 78 deletions
--- a/scripts/cut-voice-track.py
+++ b/scripts/cut-voice-track.py
@@ -1,19 +1,10 @@
 #! /usr/bin/env python
 # pip install -r requirements.txt
-usage = 'python cut-voice-track.py <?wav timestamp json> <?wav filename>'
+usage = 'python cut-voice-track.py <wav timestamp json> <?wav filename>'

+from imports import *
 import util
-import json
-import sys
-from numpy import vstack
-from scipy.io import wavfile
-from simpleaudio import play_buffer
-import wave
 import string
-try:
-    from getch import getch
-except:
-    from msvcrt import getwch as getch
 from os.path import exists
 from os import system
 system('color')
@@ -22,48 +13,27 @@ json_filename = util.arg(1, usage)
 default_wav_name = json_filename.replace('_4000.json', '')
 wav_filename = util.arg(2, usage, default_wav_name)

-timestamps = {}
-with open(json_filename, 'r') as f:
-    timestamps = json.load(f)
+cutter = util.AudioCutter(wav_filename, json_filename)

-wav = None
-with open(wav_filename, 'rb') as f:
-    wav = wave.open(f)
-
-nchannels, sampwidth, framerate, nframes, comptype, compname = wav.getparams()
-
-_, data = wavfile.read(wav_filename)
-
-new_data = data[0:1]
-new_json = {}
-
-def save():
+def new_wav_file():
    suffix = "0"
    new_wav = wav_filename.replace(".wav", f"-cut{suffix}.wav")
    while exists(new_wav):
        new_suffix = str(int(suffix) + 1)
        new_wav = new_wav.replace(f"-cut{suffix}.wav", f"-cut{new_suffix}.wav")
        suffix = new_suffix
-    wavfile.write(new_wav, framerate, new_data)
-    with open(new_wav.replace(".wav", ".json"), 'w') as f:
-        json.dump(new_json, f)
-    sys.exit(0)
+    return new_wav

-current_sec = 0
-searching_for = None
-last_search = None
-for (audio_guess, possible_sections) in timestamps.items():
-    if searching_for != None:
-        if searching_for in audio_guess:
-            searching_for = None
-        else:
-            continue
+def save():
+    new_wav = new_wav_file()
+    cutter.save_and_quit(new_wav)

+def process_chunk(audio_guess, possible_sections):
    num_takes = len(possible_sections)
    if num_takes > 36:
        print('\033[31m' + audio_guess + '\033[0m')
        print('\033[31m' + f'Warning! {num_takes} is too many! Skipping' + '\033[0m')
-        continue
+        return
    assert num_takes <= 36, "I didn't plan for this many takes of any line"
    alphabet_takes = 0
    if num_takes > 10:
@@ -73,7 +43,7 @@ for (audio_guess, possible_sections) in timestamps.items():
    if alphabet_takes > 0:
        takes += '/' + '/'.join(string.ascii_uppercase[:alphabet_takes])

-    def audio_and_length(choice):
+    def start_and_end(choice):
        take_num = -1
        if choice in string.ascii_uppercase:
            take_num = 10 + string.ascii_uppercase.index(choice)
@@ -82,9 +52,7 @@ for (audio_guess, possible_sections) in timestamps.items():
        take_info = possible_sections[take_num]
        start = take_info['start']
        end = take_info['end']
-        start_frame = int(start * framerate)
-        end_frame = int(end * framerate)
-        return data[start_frame:end_frame], end - start
+        return start, end
    
    print('\033[31m' + audio_guess + '\033[0m')
    print(f'{takes}/u({takes}/*)/d/f/n/h/q')
@@ -95,47 +63,44 @@ for (audio_guess, possible_sections) in timestamps.items():
        elif choice == 'd':
            break
        elif choice != '/' and choice in takes:
-            audio, _ = audio_and_length(choice)
-            play_buffer(audio, nchannels, sampwidth, framerate)
+            start, end = start_and_end(choice)
+            cutter.play_audio(start, end)
        elif choice == 'f':
-            phrase = input("phrase (lower-case) to search for?")
-            last_search = phrase
-            searching_for = phrase
+            cutter.search()
            break
        elif choice == 'n':
-            searching_for = last_search
+            cutter.repeat_search()
            break
        elif choice == 'q':
            save()
        elif choice == 'u':
            choice = getch()
+            choices = takes.split('/')
            if choice == '*':
                # use all the takes
                print('using all')
                line_with_alts = {}
-                choices = takes.split('/')
-                audio, length = audio_and_length(choices[0])
-                new_data = vstack((new_data, audio))
-                line_with_alts['start'] = current_sec
-                line_with_alts['end'] = current_sec + length
-                current_sec += length
+                start, end = start_and_end(choices[0])
+                length = end - start
+                line_with_alts['start'] = cutter.current_sec
+                line_with_alts['end'] = cutter.current_sec + length
+                cutter.take_audio(audio_guess, line_with_alts, start, end)
                alts = []
                for choice in choices[1:]:
-                    audio, length = audio_and_length(choices[0])
-                    alts.append({'start': current_sec, 'end': current_sec + length})
-                    current_sec += length
-                    new_data = vstack((new_data, audio))
-                line_with_alts['alts'] = alts
-                new_json[audio_guess] = line_with_alts
+                    start, end = start_and_end(choices[0])
+                    length = end - start
+                    alts.append({'start': cutter.current_sec, 'end': cutter.current_sec + length})
+                    line_with_alts['alts'] = alts
+                    cutter.take_audio(audio_guess, line_with_alts, start, end)
                break
            elif choice != '/' and choice in takes:
-                audio, length = audio_and_length(choice)
-                new_json[audio_guess] = {
-                    'start': current_sec,
-                    'end': current_sec + length
+                start, end = start_and_end(choices[0])
+                length = end - start
+                info = {
+                    'start': cutter.current_sec,
+                    'end': cutter.current_sec + length
                }
-                new_data = vstack((new_data, audio))
-                current_sec += length
+                cutter.take_audio(audio_guess, info, start, end)
                break
            else:
                print(f'{choice} is not a valid take to use')
@@ -143,7 +108,4 @@ for (audio_guess, possible_sections) in timestamps.items():
        else:
            print(f'{choice} is not a valid option')

-if searching_for != None:
-    print(f"{searching_for} not found")
-
-save()
+cutter.process_audio(process_chunk, new_wav_file())
--- a/scripts/imports.py
+++ b/scripts/imports.py
@@ -0,0 +1,11 @@
+import sys
+import json
+import wave
+from scipy.io import wavfile
+from simpleaudio import play_buffer
+from numpy import vstack
+try:
+    from getch import getch
+except:
+    from msvcrt import getwch as getch
+__all__ = ['sys', 'json', 'wave', 'wavfile', 'play_buffer', 'vstack', 'getch']
--- a/scripts/transcribe-voice-track.py
+++ b/scripts/transcribe-voice-track.py
@@ -6,13 +6,9 @@ usage = 'python transcribe-voice-track.py <wav filenames...> '
 # If you don't get results, try re-exporting as Signed 16-bit PCM

 import util
-import wave
-import json
-import sys
 import os
 import requests
 from zipfile import ZipFile
-from scipy.io import wavfile

 from vosk import Model, KaldiRecognizer

--- a/scripts/util.py
+++ b/scripts/util.py
@@ -1,4 +1,4 @@
-import sys
+from imports import *

 def arg(num, usage, default=None):
    val = ''
@@ -18,4 +18,76 @@ def args(starting_num, usage, default=None):
        if default != None:
            return default
        raise ValueError(usage)
-    return l
+    return l
+
+class AudioCutter:
+    def __init__(self, wav_file, json_file):
+        # Store a wav file's sound data and json data representing tagged chunks of audio in the wav
+        with open(json_file, 'r') as f:
+            self.json_info = json.load(f)
+        
+        with open(wav_file, 'rb') as f:
+            self.wav = wave.open(f)
+
+        self.nchannels, self.sampwidth, self.framerate, self.nframes, self.comptype, self.compname = self.wav.getparams()
+        _, self.data = wavfile.read(wav_file)
+        
+        # Accumulate new sound data cut from the original, along with new related json data
+        self.new_data = self.data[0:1]
+        self.new_json_info = {}
+
+        # State of a search through the json/wav file:
+        self.current_sec = 0
+        self.searching_for = None
+        self.last_search = None
+
+    def save_and_quit(self, new_wav_file):
+        wavfile.write(new_wav_file, self.framerate, self.new_data)
+        with open(new_wav_file.replace(".wav", ".json"), 'w') as f:
+            json.dump(self.new_json_info, f)
+        sys.exit(0)
+
+    def audio_and_length(self, start, end):
+        start_frame = int(start * self.framerate)
+        end_frame = int(end * self.framerate)
+        return self.data[start_frame:end_frame], end - start
+
+    def take_audio(self, tag, info, start, end):
+        audio, length = self.audio_and_length(start, end)
+        self.new_data = vstack((self.new_data, audio))
+        self.current_sec += length
+        self.new_json_info[tag] = info
+
+    def play_audio(self, start, end):
+        audio, _ = self.audio_and_length(start, end)
+        play_buffer(audio, self.nchannels, self.sampwidth, self.framerate)
+
+    def search(self):
+        phrase = input("phrase (lower-case) to search for?")
+        self.last_search = phrase
+        self.searching_for = phrase
+    
+    def repeat_search(self):
+        self.searching_for = self.last_search
+
+    def process_audio(self, chunk_processor, new_wav_file):
+        for (audio_tag, chunk_info) in self.json_info.items():
+            # When the AudioCutter is searching for a phrase, skip all audio tags that don't match
+            if self.searching_for != None:
+                if self.searching_for in audio_tag:
+                    self.searching_for = None
+                else:
+                    continue
+            
+            chunk_processor(audio_tag, chunk_info)
+        
+        if self.searching_for != None:
+            print(f"{self.searching_for} not found")
+        
+        self.save_and_quit(new_wav_file)
+
+
+    
+
+
+