refactor hollywoo audio python scripts so looping through tagged audio is reusable
This commit is contained in:
@@ -1,19 +1,10 @@
|
|||||||
#! /usr/bin/env python
|
#! /usr/bin/env python
|
||||||
# pip install -r requirements.txt
|
# pip install -r requirements.txt
|
||||||
usage = 'python cut-voice-track.py <?wav timestamp json> <?wav filename>'
|
usage = 'python cut-voice-track.py <wav timestamp json> <?wav filename>'
|
||||||
|
|
||||||
|
from imports import *
|
||||||
import util
|
import util
|
||||||
import json
|
|
||||||
import sys
|
|
||||||
from numpy import vstack
|
|
||||||
from scipy.io import wavfile
|
|
||||||
from simpleaudio import play_buffer
|
|
||||||
import wave
|
|
||||||
import string
|
import string
|
||||||
try:
|
|
||||||
from getch import getch
|
|
||||||
except:
|
|
||||||
from msvcrt import getwch as getch
|
|
||||||
from os.path import exists
|
from os.path import exists
|
||||||
from os import system
|
from os import system
|
||||||
system('color')
|
system('color')
|
||||||
@@ -22,48 +13,27 @@ json_filename = util.arg(1, usage)
|
|||||||
default_wav_name = json_filename.replace('_4000.json', '')
|
default_wav_name = json_filename.replace('_4000.json', '')
|
||||||
wav_filename = util.arg(2, usage, default_wav_name)
|
wav_filename = util.arg(2, usage, default_wav_name)
|
||||||
|
|
||||||
timestamps = {}
|
cutter = util.AudioCutter(wav_filename, json_filename)
|
||||||
with open(json_filename, 'r') as f:
|
|
||||||
timestamps = json.load(f)
|
|
||||||
|
|
||||||
wav = None
|
def new_wav_file():
|
||||||
with open(wav_filename, 'rb') as f:
|
|
||||||
wav = wave.open(f)
|
|
||||||
|
|
||||||
nchannels, sampwidth, framerate, nframes, comptype, compname = wav.getparams()
|
|
||||||
|
|
||||||
_, data = wavfile.read(wav_filename)
|
|
||||||
|
|
||||||
new_data = data[0:1]
|
|
||||||
new_json = {}
|
|
||||||
|
|
||||||
def save():
|
|
||||||
suffix = "0"
|
suffix = "0"
|
||||||
new_wav = wav_filename.replace(".wav", f"-cut{suffix}.wav")
|
new_wav = wav_filename.replace(".wav", f"-cut{suffix}.wav")
|
||||||
while exists(new_wav):
|
while exists(new_wav):
|
||||||
new_suffix = str(int(suffix) + 1)
|
new_suffix = str(int(suffix) + 1)
|
||||||
new_wav = new_wav.replace(f"-cut{suffix}.wav", f"-cut{new_suffix}.wav")
|
new_wav = new_wav.replace(f"-cut{suffix}.wav", f"-cut{new_suffix}.wav")
|
||||||
suffix = new_suffix
|
suffix = new_suffix
|
||||||
wavfile.write(new_wav, framerate, new_data)
|
return new_wav
|
||||||
with open(new_wav.replace(".wav", ".json"), 'w') as f:
|
|
||||||
json.dump(new_json, f)
|
|
||||||
sys.exit(0)
|
|
||||||
|
|
||||||
current_sec = 0
|
def save():
|
||||||
searching_for = None
|
new_wav = new_wav_file()
|
||||||
last_search = None
|
cutter.save_and_quit(new_wav)
|
||||||
for (audio_guess, possible_sections) in timestamps.items():
|
|
||||||
if searching_for != None:
|
|
||||||
if searching_for in audio_guess:
|
|
||||||
searching_for = None
|
|
||||||
else:
|
|
||||||
continue
|
|
||||||
|
|
||||||
|
def process_chunk(audio_guess, possible_sections):
|
||||||
num_takes = len(possible_sections)
|
num_takes = len(possible_sections)
|
||||||
if num_takes > 36:
|
if num_takes > 36:
|
||||||
print('\033[31m' + audio_guess + '\033[0m')
|
print('\033[31m' + audio_guess + '\033[0m')
|
||||||
print('\033[31m' + f'Warning! {num_takes} is too many! Skipping' + '\033[0m')
|
print('\033[31m' + f'Warning! {num_takes} is too many! Skipping' + '\033[0m')
|
||||||
continue
|
return
|
||||||
assert num_takes <= 36, "I didn't plan for this many takes of any line"
|
assert num_takes <= 36, "I didn't plan for this many takes of any line"
|
||||||
alphabet_takes = 0
|
alphabet_takes = 0
|
||||||
if num_takes > 10:
|
if num_takes > 10:
|
||||||
@@ -73,7 +43,7 @@ for (audio_guess, possible_sections) in timestamps.items():
|
|||||||
if alphabet_takes > 0:
|
if alphabet_takes > 0:
|
||||||
takes += '/' + '/'.join(string.ascii_uppercase[:alphabet_takes])
|
takes += '/' + '/'.join(string.ascii_uppercase[:alphabet_takes])
|
||||||
|
|
||||||
def audio_and_length(choice):
|
def start_and_end(choice):
|
||||||
take_num = -1
|
take_num = -1
|
||||||
if choice in string.ascii_uppercase:
|
if choice in string.ascii_uppercase:
|
||||||
take_num = 10 + string.ascii_uppercase.index(choice)
|
take_num = 10 + string.ascii_uppercase.index(choice)
|
||||||
@@ -82,9 +52,7 @@ for (audio_guess, possible_sections) in timestamps.items():
|
|||||||
take_info = possible_sections[take_num]
|
take_info = possible_sections[take_num]
|
||||||
start = take_info['start']
|
start = take_info['start']
|
||||||
end = take_info['end']
|
end = take_info['end']
|
||||||
start_frame = int(start * framerate)
|
return start, end
|
||||||
end_frame = int(end * framerate)
|
|
||||||
return data[start_frame:end_frame], end - start
|
|
||||||
|
|
||||||
print('\033[31m' + audio_guess + '\033[0m')
|
print('\033[31m' + audio_guess + '\033[0m')
|
||||||
print(f'{takes}/u({takes}/*)/d/f/n/h/q')
|
print(f'{takes}/u({takes}/*)/d/f/n/h/q')
|
||||||
@@ -95,47 +63,44 @@ for (audio_guess, possible_sections) in timestamps.items():
|
|||||||
elif choice == 'd':
|
elif choice == 'd':
|
||||||
break
|
break
|
||||||
elif choice != '/' and choice in takes:
|
elif choice != '/' and choice in takes:
|
||||||
audio, _ = audio_and_length(choice)
|
start, end = start_and_end(choice)
|
||||||
play_buffer(audio, nchannels, sampwidth, framerate)
|
cutter.play_audio(start, end)
|
||||||
elif choice == 'f':
|
elif choice == 'f':
|
||||||
phrase = input("phrase (lower-case) to search for?")
|
cutter.search()
|
||||||
last_search = phrase
|
|
||||||
searching_for = phrase
|
|
||||||
break
|
break
|
||||||
elif choice == 'n':
|
elif choice == 'n':
|
||||||
searching_for = last_search
|
cutter.repeat_search()
|
||||||
break
|
break
|
||||||
elif choice == 'q':
|
elif choice == 'q':
|
||||||
save()
|
save()
|
||||||
elif choice == 'u':
|
elif choice == 'u':
|
||||||
choice = getch()
|
choice = getch()
|
||||||
|
choices = takes.split('/')
|
||||||
if choice == '*':
|
if choice == '*':
|
||||||
# use all the takes
|
# use all the takes
|
||||||
print('using all')
|
print('using all')
|
||||||
line_with_alts = {}
|
line_with_alts = {}
|
||||||
choices = takes.split('/')
|
start, end = start_and_end(choices[0])
|
||||||
audio, length = audio_and_length(choices[0])
|
length = end - start
|
||||||
new_data = vstack((new_data, audio))
|
line_with_alts['start'] = cutter.current_sec
|
||||||
line_with_alts['start'] = current_sec
|
line_with_alts['end'] = cutter.current_sec + length
|
||||||
line_with_alts['end'] = current_sec + length
|
cutter.take_audio(audio_guess, line_with_alts, start, end)
|
||||||
current_sec += length
|
|
||||||
alts = []
|
alts = []
|
||||||
for choice in choices[1:]:
|
for choice in choices[1:]:
|
||||||
audio, length = audio_and_length(choices[0])
|
start, end = start_and_end(choices[0])
|
||||||
alts.append({'start': current_sec, 'end': current_sec + length})
|
length = end - start
|
||||||
current_sec += length
|
alts.append({'start': cutter.current_sec, 'end': cutter.current_sec + length})
|
||||||
new_data = vstack((new_data, audio))
|
line_with_alts['alts'] = alts
|
||||||
line_with_alts['alts'] = alts
|
cutter.take_audio(audio_guess, line_with_alts, start, end)
|
||||||
new_json[audio_guess] = line_with_alts
|
|
||||||
break
|
break
|
||||||
elif choice != '/' and choice in takes:
|
elif choice != '/' and choice in takes:
|
||||||
audio, length = audio_and_length(choice)
|
start, end = start_and_end(choices[0])
|
||||||
new_json[audio_guess] = {
|
length = end - start
|
||||||
'start': current_sec,
|
info = {
|
||||||
'end': current_sec + length
|
'start': cutter.current_sec,
|
||||||
|
'end': cutter.current_sec + length
|
||||||
}
|
}
|
||||||
new_data = vstack((new_data, audio))
|
cutter.take_audio(audio_guess, info, start, end)
|
||||||
current_sec += length
|
|
||||||
break
|
break
|
||||||
else:
|
else:
|
||||||
print(f'{choice} is not a valid take to use')
|
print(f'{choice} is not a valid take to use')
|
||||||
@@ -143,7 +108,4 @@ for (audio_guess, possible_sections) in timestamps.items():
|
|||||||
else:
|
else:
|
||||||
print(f'{choice} is not a valid option')
|
print(f'{choice} is not a valid option')
|
||||||
|
|
||||||
if searching_for != None:
|
cutter.process_audio(process_chunk, new_wav_file())
|
||||||
print(f"{searching_for} not found")
|
|
||||||
|
|
||||||
save()
|
|
||||||
11
projects/hollywoo/scripts/imports.py
Normal file
11
projects/hollywoo/scripts/imports.py
Normal file
@@ -0,0 +1,11 @@
|
|||||||
|
import sys
|
||||||
|
import json
|
||||||
|
import wave
|
||||||
|
from scipy.io import wavfile
|
||||||
|
from simpleaudio import play_buffer
|
||||||
|
from numpy import vstack
|
||||||
|
try:
|
||||||
|
from getch import getch
|
||||||
|
except:
|
||||||
|
from msvcrt import getwch as getch
|
||||||
|
__all__ = ['sys', 'json', 'wave', 'wavfile', 'play_buffer', 'vstack', 'getch']
|
||||||
@@ -6,13 +6,9 @@ usage = 'python transcribe-voice-track.py <wav filenames...> '
|
|||||||
# If you don't get results, try re-exporting as Signed 16-bit PCM
|
# If you don't get results, try re-exporting as Signed 16-bit PCM
|
||||||
|
|
||||||
import util
|
import util
|
||||||
import wave
|
|
||||||
import json
|
|
||||||
import sys
|
|
||||||
import os
|
import os
|
||||||
import requests
|
import requests
|
||||||
from zipfile import ZipFile
|
from zipfile import ZipFile
|
||||||
from scipy.io import wavfile
|
|
||||||
|
|
||||||
from vosk import Model, KaldiRecognizer
|
from vosk import Model, KaldiRecognizer
|
||||||
|
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
import sys
|
from imports import *
|
||||||
|
|
||||||
def arg(num, usage, default=None):
|
def arg(num, usage, default=None):
|
||||||
val = ''
|
val = ''
|
||||||
@@ -19,3 +19,75 @@ def args(starting_num, usage, default=None):
|
|||||||
return default
|
return default
|
||||||
raise ValueError(usage)
|
raise ValueError(usage)
|
||||||
return l
|
return l
|
||||||
|
|
||||||
|
class AudioCutter:
|
||||||
|
def __init__(self, wav_file, json_file):
|
||||||
|
# Store a wav file's sound data and json data representing tagged chunks of audio in the wav
|
||||||
|
with open(json_file, 'r') as f:
|
||||||
|
self.json_info = json.load(f)
|
||||||
|
|
||||||
|
with open(wav_file, 'rb') as f:
|
||||||
|
self.wav = wave.open(f)
|
||||||
|
|
||||||
|
self.nchannels, self.sampwidth, self.framerate, self.nframes, self.comptype, self.compname = self.wav.getparams()
|
||||||
|
_, self.data = wavfile.read(wav_file)
|
||||||
|
|
||||||
|
# Accumulate new sound data cut from the original, along with new related json data
|
||||||
|
self.new_data = self.data[0:1]
|
||||||
|
self.new_json_info = {}
|
||||||
|
|
||||||
|
# State of a search through the json/wav file:
|
||||||
|
self.current_sec = 0
|
||||||
|
self.searching_for = None
|
||||||
|
self.last_search = None
|
||||||
|
|
||||||
|
def save_and_quit(self, new_wav_file):
|
||||||
|
wavfile.write(new_wav_file, self.framerate, self.new_data)
|
||||||
|
with open(new_wav_file.replace(".wav", ".json"), 'w') as f:
|
||||||
|
json.dump(self.new_json_info, f)
|
||||||
|
sys.exit(0)
|
||||||
|
|
||||||
|
def audio_and_length(self, start, end):
|
||||||
|
start_frame = int(start * self.framerate)
|
||||||
|
end_frame = int(end * self.framerate)
|
||||||
|
return self.data[start_frame:end_frame], end - start
|
||||||
|
|
||||||
|
def take_audio(self, tag, info, start, end):
|
||||||
|
audio, length = self.audio_and_length(start, end)
|
||||||
|
self.new_data = vstack((self.new_data, audio))
|
||||||
|
self.current_sec += length
|
||||||
|
self.new_json_info[tag] = info
|
||||||
|
|
||||||
|
def play_audio(self, start, end):
|
||||||
|
audio, _ = self.audio_and_length(start, end)
|
||||||
|
play_buffer(audio, self.nchannels, self.sampwidth, self.framerate)
|
||||||
|
|
||||||
|
def search(self):
|
||||||
|
phrase = input("phrase (lower-case) to search for?")
|
||||||
|
self.last_search = phrase
|
||||||
|
self.searching_for = phrase
|
||||||
|
|
||||||
|
def repeat_search(self):
|
||||||
|
self.searching_for = self.last_search
|
||||||
|
|
||||||
|
def process_audio(self, chunk_processor, new_wav_file):
|
||||||
|
for (audio_tag, chunk_info) in self.json_info.items():
|
||||||
|
# When the AudioCutter is searching for a phrase, skip all audio tags that don't match
|
||||||
|
if self.searching_for != None:
|
||||||
|
if self.searching_for in audio_tag:
|
||||||
|
self.searching_for = None
|
||||||
|
else:
|
||||||
|
continue
|
||||||
|
|
||||||
|
chunk_processor(audio_tag, chunk_info)
|
||||||
|
|
||||||
|
if self.searching_for != None:
|
||||||
|
print(f"{self.searching_for} not found")
|
||||||
|
|
||||||
|
self.save_and_quit(new_wav_file)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user