refactor hollywoo audio python scripts so looping through tagged audio is reusable

This commit is contained in:
2022-09-27 04:42:28 +00:00
parent 67a640d204
commit 10492767cd
4 changed files with 119 additions and 78 deletions

View File

@@ -1,19 +1,10 @@
#! /usr/bin/env python #! /usr/bin/env python
# pip install -r requirements.txt # pip install -r requirements.txt
usage = 'python cut-voice-track.py <?wav timestamp json> <?wav filename>' usage = 'python cut-voice-track.py <wav timestamp json> <?wav filename>'
from imports import *
import util import util
import json
import sys
from numpy import vstack
from scipy.io import wavfile
from simpleaudio import play_buffer
import wave
import string import string
try:
from getch import getch
except:
from msvcrt import getwch as getch
from os.path import exists from os.path import exists
from os import system from os import system
system('color') system('color')
@@ -22,48 +13,27 @@ json_filename = util.arg(1, usage)
default_wav_name = json_filename.replace('_4000.json', '') default_wav_name = json_filename.replace('_4000.json', '')
wav_filename = util.arg(2, usage, default_wav_name) wav_filename = util.arg(2, usage, default_wav_name)
timestamps = {} cutter = util.AudioCutter(wav_filename, json_filename)
with open(json_filename, 'r') as f:
timestamps = json.load(f)
wav = None def new_wav_file():
with open(wav_filename, 'rb') as f:
wav = wave.open(f)
nchannels, sampwidth, framerate, nframes, comptype, compname = wav.getparams()
_, data = wavfile.read(wav_filename)
new_data = data[0:1]
new_json = {}
def save():
suffix = "0" suffix = "0"
new_wav = wav_filename.replace(".wav", f"-cut{suffix}.wav") new_wav = wav_filename.replace(".wav", f"-cut{suffix}.wav")
while exists(new_wav): while exists(new_wav):
new_suffix = str(int(suffix) + 1) new_suffix = str(int(suffix) + 1)
new_wav = new_wav.replace(f"-cut{suffix}.wav", f"-cut{new_suffix}.wav") new_wav = new_wav.replace(f"-cut{suffix}.wav", f"-cut{new_suffix}.wav")
suffix = new_suffix suffix = new_suffix
wavfile.write(new_wav, framerate, new_data) return new_wav
with open(new_wav.replace(".wav", ".json"), 'w') as f:
json.dump(new_json, f)
sys.exit(0)
current_sec = 0 def save():
searching_for = None new_wav = new_wav_file()
last_search = None cutter.save_and_quit(new_wav)
for (audio_guess, possible_sections) in timestamps.items():
if searching_for != None:
if searching_for in audio_guess:
searching_for = None
else:
continue
def process_chunk(audio_guess, possible_sections):
num_takes = len(possible_sections) num_takes = len(possible_sections)
if num_takes > 36: if num_takes > 36:
print('\033[31m' + audio_guess + '\033[0m') print('\033[31m' + audio_guess + '\033[0m')
print('\033[31m' + f'Warning! {num_takes} is too many! Skipping' + '\033[0m') print('\033[31m' + f'Warning! {num_takes} is too many! Skipping' + '\033[0m')
continue return
assert num_takes <= 36, "I didn't plan for this many takes of any line" assert num_takes <= 36, "I didn't plan for this many takes of any line"
alphabet_takes = 0 alphabet_takes = 0
if num_takes > 10: if num_takes > 10:
@@ -73,7 +43,7 @@ for (audio_guess, possible_sections) in timestamps.items():
if alphabet_takes > 0: if alphabet_takes > 0:
takes += '/' + '/'.join(string.ascii_uppercase[:alphabet_takes]) takes += '/' + '/'.join(string.ascii_uppercase[:alphabet_takes])
def audio_and_length(choice): def start_and_end(choice):
take_num = -1 take_num = -1
if choice in string.ascii_uppercase: if choice in string.ascii_uppercase:
take_num = 10 + string.ascii_uppercase.index(choice) take_num = 10 + string.ascii_uppercase.index(choice)
@@ -82,9 +52,7 @@ for (audio_guess, possible_sections) in timestamps.items():
take_info = possible_sections[take_num] take_info = possible_sections[take_num]
start = take_info['start'] start = take_info['start']
end = take_info['end'] end = take_info['end']
start_frame = int(start * framerate) return start, end
end_frame = int(end * framerate)
return data[start_frame:end_frame], end - start
print('\033[31m' + audio_guess + '\033[0m') print('\033[31m' + audio_guess + '\033[0m')
print(f'{takes}/u({takes}/*)/d/f/n/h/q') print(f'{takes}/u({takes}/*)/d/f/n/h/q')
@@ -95,47 +63,44 @@ for (audio_guess, possible_sections) in timestamps.items():
elif choice == 'd': elif choice == 'd':
break break
elif choice != '/' and choice in takes: elif choice != '/' and choice in takes:
audio, _ = audio_and_length(choice) start, end = start_and_end(choice)
play_buffer(audio, nchannels, sampwidth, framerate) cutter.play_audio(start, end)
elif choice == 'f': elif choice == 'f':
phrase = input("phrase (lower-case) to search for?") cutter.search()
last_search = phrase
searching_for = phrase
break break
elif choice == 'n': elif choice == 'n':
searching_for = last_search cutter.repeat_search()
break break
elif choice == 'q': elif choice == 'q':
save() save()
elif choice == 'u': elif choice == 'u':
choice = getch() choice = getch()
choices = takes.split('/')
if choice == '*': if choice == '*':
# use all the takes # use all the takes
print('using all') print('using all')
line_with_alts = {} line_with_alts = {}
choices = takes.split('/') start, end = start_and_end(choices[0])
audio, length = audio_and_length(choices[0]) length = end - start
new_data = vstack((new_data, audio)) line_with_alts['start'] = cutter.current_sec
line_with_alts['start'] = current_sec line_with_alts['end'] = cutter.current_sec + length
line_with_alts['end'] = current_sec + length cutter.take_audio(audio_guess, line_with_alts, start, end)
current_sec += length
alts = [] alts = []
for choice in choices[1:]: for choice in choices[1:]:
audio, length = audio_and_length(choices[0]) start, end = start_and_end(choices[0])
alts.append({'start': current_sec, 'end': current_sec + length}) length = end - start
current_sec += length alts.append({'start': cutter.current_sec, 'end': cutter.current_sec + length})
new_data = vstack((new_data, audio)) line_with_alts['alts'] = alts
line_with_alts['alts'] = alts cutter.take_audio(audio_guess, line_with_alts, start, end)
new_json[audio_guess] = line_with_alts
break break
elif choice != '/' and choice in takes: elif choice != '/' and choice in takes:
audio, length = audio_and_length(choice) start, end = start_and_end(choices[0])
new_json[audio_guess] = { length = end - start
'start': current_sec, info = {
'end': current_sec + length 'start': cutter.current_sec,
'end': cutter.current_sec + length
} }
new_data = vstack((new_data, audio)) cutter.take_audio(audio_guess, info, start, end)
current_sec += length
break break
else: else:
print(f'{choice} is not a valid take to use') print(f'{choice} is not a valid take to use')
@@ -143,7 +108,4 @@ for (audio_guess, possible_sections) in timestamps.items():
else: else:
print(f'{choice} is not a valid option') print(f'{choice} is not a valid option')
if searching_for != None: cutter.process_audio(process_chunk, new_wav_file())
print(f"{searching_for} not found")
save()

View File

@@ -0,0 +1,11 @@
import sys
import json
import wave
from scipy.io import wavfile
from simpleaudio import play_buffer
from numpy import vstack
try:
from getch import getch
except:
from msvcrt import getwch as getch
__all__ = ['sys', 'json', 'wave', 'wavfile', 'play_buffer', 'vstack', 'getch']

View File

@@ -6,13 +6,9 @@ usage = 'python transcribe-voice-track.py <wav filenames...> '
# If you don't get results, try re-exporting as Signed 16-bit PCM # If you don't get results, try re-exporting as Signed 16-bit PCM
import util import util
import wave
import json
import sys
import os import os
import requests import requests
from zipfile import ZipFile from zipfile import ZipFile
from scipy.io import wavfile
from vosk import Model, KaldiRecognizer from vosk import Model, KaldiRecognizer

View File

@@ -1,4 +1,4 @@
import sys from imports import *
def arg(num, usage, default=None): def arg(num, usage, default=None):
val = '' val = ''
@@ -18,4 +18,76 @@ def args(starting_num, usage, default=None):
if default != None: if default != None:
return default return default
raise ValueError(usage) raise ValueError(usage)
return l return l
class AudioCutter:
def __init__(self, wav_file, json_file):
# Store a wav file's sound data and json data representing tagged chunks of audio in the wav
with open(json_file, 'r') as f:
self.json_info = json.load(f)
with open(wav_file, 'rb') as f:
self.wav = wave.open(f)
self.nchannels, self.sampwidth, self.framerate, self.nframes, self.comptype, self.compname = self.wav.getparams()
_, self.data = wavfile.read(wav_file)
# Accumulate new sound data cut from the original, along with new related json data
self.new_data = self.data[0:1]
self.new_json_info = {}
# State of a search through the json/wav file:
self.current_sec = 0
self.searching_for = None
self.last_search = None
def save_and_quit(self, new_wav_file):
wavfile.write(new_wav_file, self.framerate, self.new_data)
with open(new_wav_file.replace(".wav", ".json"), 'w') as f:
json.dump(self.new_json_info, f)
sys.exit(0)
def audio_and_length(self, start, end):
start_frame = int(start * self.framerate)
end_frame = int(end * self.framerate)
return self.data[start_frame:end_frame], end - start
def take_audio(self, tag, info, start, end):
audio, length = self.audio_and_length(start, end)
self.new_data = vstack((self.new_data, audio))
self.current_sec += length
self.new_json_info[tag] = info
def play_audio(self, start, end):
audio, _ = self.audio_and_length(start, end)
play_buffer(audio, self.nchannels, self.sampwidth, self.framerate)
def search(self):
phrase = input("phrase (lower-case) to search for?")
self.last_search = phrase
self.searching_for = phrase
def repeat_search(self):
self.searching_for = self.last_search
def process_audio(self, chunk_processor, new_wav_file):
for (audio_tag, chunk_info) in self.json_info.items():
# When the AudioCutter is searching for a phrase, skip all audio tags that don't match
if self.searching_for != None:
if self.searching_for in audio_tag:
self.searching_for = None
else:
continue
chunk_processor(audio_tag, chunk_info)
if self.searching_for != None:
print(f"{self.searching_for} not found")
self.save_and_quit(new_wav_file)