refactor hollywoo audio python scripts so looping through tagged audio is reusable

2022-09-27 04:42:28 +00:00
parent 67a640d204
commit 10492767cd
4 changed files with 119 additions and 78 deletions
--- a/projects/hollywoo/scripts/cut-voice-track.py
+++ b/projects/hollywoo/scripts/cut-voice-track.py
@@ -1,19 +1,10 @@
 #! /usr/bin/env python
 # pip install -r requirements.txt
-usage = 'python cut-voice-track.py <?wav timestamp json> <?wav filename>'
+usage = 'python cut-voice-track.py <wav timestamp json> <?wav filename>'
 from imports import *
 import util
 import json
 import sys
 from numpy import vstack
 from scipy.io import wavfile
 from simpleaudio import play_buffer
 import wave
 import string
 try:
    from getch import getch
 except:
    from msvcrt import getwch as getch
 from os.path import exists
 from os import system
 system('color')
@@ -22,48 +13,27 @@ json_filename = util.arg(1, usage)
 default_wav_name = json_filename.replace('_4000.json', '')
 wav_filename = util.arg(2, usage, default_wav_name)
-timestamps = {}
+cutter = util.AudioCutter(wav_filename, json_filename)
 with open(json_filename, 'r') as f:
    timestamps = json.load(f)
-wav = None
+def new_wav_file():
 with open(wav_filename, 'rb') as f:
    wav = wave.open(f)
 nchannels, sampwidth, framerate, nframes, comptype, compname = wav.getparams()
 _, data = wavfile.read(wav_filename)
 new_data = data[0:1]
 new_json = {}
 def save():
    suffix = "0"
    new_wav = wav_filename.replace(".wav", f"-cut{suffix}.wav")
    while exists(new_wav):
        new_suffix = str(int(suffix) + 1)
        new_wav = new_wav.replace(f"-cut{suffix}.wav", f"-cut{new_suffix}.wav")
        suffix = new_suffix
-    wavfile.write(new_wav, framerate, new_data)
+    return new_wav
    with open(new_wav.replace(".wav", ".json"), 'w') as f:
        json.dump(new_json, f)
    sys.exit(0)
-current_sec = 0
+def save():
-searching_for = None
+    new_wav = new_wav_file()
-last_search = None
+    cutter.save_and_quit(new_wav)
 for (audio_guess, possible_sections) in timestamps.items():
    if searching_for != None:
        if searching_for in audio_guess:
            searching_for = None
        else:
            continue
 def process_chunk(audio_guess, possible_sections):
    num_takes = len(possible_sections)
    if num_takes > 36:
        print('\033[31m' + audio_guess + '\033[0m')
        print('\033[31m' + f'Warning! {num_takes} is too many! Skipping' + '\033[0m')
-        continue
+        return
    assert num_takes <= 36, "I didn't plan for this many takes of any line"
    alphabet_takes = 0
    if num_takes > 10:
@@ -73,7 +43,7 @@ for (audio_guess, possible_sections) in timestamps.items():
    if alphabet_takes > 0:
        takes += '/' + '/'.join(string.ascii_uppercase[:alphabet_takes])
-    def audio_and_length(choice):
+    def start_and_end(choice):
        take_num = -1
        if choice in string.ascii_uppercase:
            take_num = 10 + string.ascii_uppercase.index(choice)
@@ -82,9 +52,7 @@ for (audio_guess, possible_sections) in timestamps.items():
        take_info = possible_sections[take_num]
        start = take_info['start']
        end = take_info['end']
-        start_frame = int(start * framerate)
+        return start, end
        end_frame = int(end * framerate)
        return data[start_frame:end_frame], end - start
    print('\033[31m' + audio_guess + '\033[0m')
    print(f'{takes}/u({takes}/*)/d/f/n/h/q')
@@ -95,47 +63,44 @@ for (audio_guess, possible_sections) in timestamps.items():
        elif choice == 'd':
            break
        elif choice != '/' and choice in takes:
-            audio, _ = audio_and_length(choice)
+            start, end = start_and_end(choice)
-            play_buffer(audio, nchannels, sampwidth, framerate)
+            cutter.play_audio(start, end)
        elif choice == 'f':
-            phrase = input("phrase (lower-case) to search for?")
+            cutter.search()
            last_search = phrase
            searching_for = phrase
            break
        elif choice == 'n':
-            searching_for = last_search
+            cutter.repeat_search()
            break
        elif choice == 'q':
            save()
        elif choice == 'u':
            choice = getch()
            choices = takes.split('/')
            if choice == '*':
                # use all the takes
                print('using all')
                line_with_alts = {}
-                choices = takes.split('/')
+                start, end = start_and_end(choices[0])
-                audio, length = audio_and_length(choices[0])
+                length = end - start
-                new_data = vstack((new_data, audio))
+                line_with_alts['start'] = cutter.current_sec
-                line_with_alts['start'] = current_sec
+                line_with_alts['end'] = cutter.current_sec + length
-                line_with_alts['end'] = current_sec + length
+                cutter.take_audio(audio_guess, line_with_alts, start, end)
                current_sec += length
                alts = []
                for choice in choices[1:]:
-                    audio, length = audio_and_length(choices[0])
+                    start, end = start_and_end(choices[0])
-                    alts.append({'start': current_sec, 'end': current_sec + length})
+                    length = end - start
-                    current_sec += length
+                    alts.append({'start': cutter.current_sec, 'end': cutter.current_sec + length})
-                    new_data = vstack((new_data, audio))
+                    line_with_alts['alts'] = alts
-                line_with_alts['alts'] = alts
+                    cutter.take_audio(audio_guess, line_with_alts, start, end)
                new_json[audio_guess] = line_with_alts
                break
            elif choice != '/' and choice in takes:
-                audio, length = audio_and_length(choice)
+                start, end = start_and_end(choices[0])
-                new_json[audio_guess] = {
+                length = end - start
-                    'start': current_sec,
+                info = {
-                    'end': current_sec + length
+                    'start': cutter.current_sec,
                    'end': cutter.current_sec + length
                }
-                new_data = vstack((new_data, audio))
+                cutter.take_audio(audio_guess, info, start, end)
                current_sec += length
                break
            else:
                print(f'{choice} is not a valid take to use')
@@ -143,7 +108,4 @@ for (audio_guess, possible_sections) in timestamps.items():
        else:
            print(f'{choice} is not a valid option')
-if searching_for != None:
+cutter.process_audio(process_chunk, new_wav_file())
    print(f"{searching_for} not found")
 save()
--- a/projects/hollywoo/scripts/imports.py
+++ b/projects/hollywoo/scripts/imports.py
@@ -0,0 +1,11 @@
 import sys
 import json
 import wave
 from scipy.io import wavfile
 from simpleaudio import play_buffer
 from numpy import vstack
 try:
    from getch import getch
 except:
    from msvcrt import getwch as getch
 __all__ = ['sys', 'json', 'wave', 'wavfile', 'play_buffer', 'vstack', 'getch']
--- a/projects/hollywoo/scripts/transcribe-voice-track.py
+++ b/projects/hollywoo/scripts/transcribe-voice-track.py
@@ -6,13 +6,9 @@ usage = 'python transcribe-voice-track.py <wav filenames...> '
 # If you don't get results, try re-exporting as Signed 16-bit PCM
 import util
 import wave
 import json
 import sys
 import os
 import requests
 from zipfile import ZipFile
 from scipy.io import wavfile
 from vosk import Model, KaldiRecognizer
--- a/projects/hollywoo/scripts/util.py
+++ b/projects/hollywoo/scripts/util.py
@@ -1,4 +1,4 @@
-import sys
+from imports import *
 def arg(num, usage, default=None):
    val = ''
@@ -19,3 +19,75 @@ def args(starting_num, usage, default=None):
            return default
        raise ValueError(usage)
    return l
 class AudioCutter:
    def __init__(self, wav_file, json_file):
        # Store a wav file's sound data and json data representing tagged chunks of audio in the wav
        with open(json_file, 'r') as f:
            self.json_info = json.load(f)
        with open(wav_file, 'rb') as f:
            self.wav = wave.open(f)
        self.nchannels, self.sampwidth, self.framerate, self.nframes, self.comptype, self.compname = self.wav.getparams()
        _, self.data = wavfile.read(wav_file)
        # Accumulate new sound data cut from the original, along with new related json data
        self.new_data = self.data[0:1]
        self.new_json_info = {}
        # State of a search through the json/wav file:
        self.current_sec = 0
        self.searching_for = None
        self.last_search = None
    def save_and_quit(self, new_wav_file):
        wavfile.write(new_wav_file, self.framerate, self.new_data)
        with open(new_wav_file.replace(".wav", ".json"), 'w') as f:
            json.dump(self.new_json_info, f)
        sys.exit(0)
    def audio_and_length(self, start, end):
        start_frame = int(start * self.framerate)
        end_frame = int(end * self.framerate)
        return self.data[start_frame:end_frame], end - start
    def take_audio(self, tag, info, start, end):
        audio, length = self.audio_and_length(start, end)
        self.new_data = vstack((self.new_data, audio))
        self.current_sec += length
        self.new_json_info[tag] = info
    def play_audio(self, start, end):
        audio, _ = self.audio_and_length(start, end)
        play_buffer(audio, self.nchannels, self.sampwidth, self.framerate)
    def search(self):
        phrase = input("phrase (lower-case) to search for?")
        self.last_search = phrase
        self.searching_for = phrase
    def repeat_search(self):
        self.searching_for = self.last_search
    def process_audio(self, chunk_processor, new_wav_file):
        for (audio_tag, chunk_info) in self.json_info.items():
            # When the AudioCutter is searching for a phrase, skip all audio tags that don't match
            if self.searching_for != None:
                if self.searching_for in audio_tag:
                    self.searching_for = None
                else:
                    continue
            chunk_processor(audio_tag, chunk_info)
        if self.searching_for != None:
            print(f"{self.searching_for} not found")
        self.save_and_quit(new_wav_file)