You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
95 lines
4.8 KiB
Python
95 lines
4.8 KiB
Python
# Read WAV and MP3 files to array
|
|
from pydub import AudioSegment
|
|
import numpy as np
|
|
from scipy.io import wavfile
|
|
from plotly.offline import init_notebook_mode
|
|
import plotly.graph_objs as go
|
|
import plotly
|
|
import IPython
|
|
import librosa
|
|
|
|
# read WAV file using scipy.io.wavfile
|
|
#fs_wav, data_wav = wavfile.read("F:\Festplatte\Alex\Dev\Code\Projekte\[py] MachineLearning\AudioSamples\FF8-Odeka Ke Chocobo.wav")
|
|
#time_wav = np.arange(0, len(data_wav)) / fs_wav
|
|
#plotly.offline.iplot({ "data": [go.Scatter(x=time_wav,
|
|
# y=data_wav[:, 0],
|
|
# name='left channel'),
|
|
# go.Scatter(x=time_wav,
|
|
# y=data_wav[:, 1],
|
|
# name='right channel')]})
|
|
|
|
# Normalization
|
|
#fs_wav, data_wav = wavfile.read("data/lost_highway_small.wav")
|
|
#data_wav_norm = data_wav / (2**15)
|
|
#time_wav = np.arange(0, len(data_wav)) / fs_wav
|
|
#plotly.offline.iplot({ "data": [go.Scatter(x=time_wav,
|
|
# y=data_wav_norm,
|
|
# name='normalized audio signal')]})
|
|
|
|
# Trim (segment) audio signal (2 seconds)
|
|
#data_wav_norm_crop = data_wav_norm[2 * fs_wav: 4 * fs_wav]
|
|
#time_wav_crop = np.arange(0, len(data_wav)) / fs_wav
|
|
#plotly.offline.iplot({ "data": [go.Scatter(x=time_wav_crop,
|
|
# y=data_wav_norm_crop,
|
|
# name='cropped audio signal')]})
|
|
|
|
# Fix-sized segmentation (breaks a signal into non-overlapping segments)
|
|
#fs, signal = wavfile.read("data/obama.wav")
|
|
#signal = signal / (2**15)
|
|
#signal_len = len(signal)
|
|
#segment_size_t = 1 # segment size in seconds
|
|
#segment_size = segment_size_t * fs # segment size in samples
|
|
## Break signal into list of segments in a single-line Python code
|
|
#segments = np.array([signal[x:x + segment_size] for x in
|
|
# np.arange(0, signal_len, segment_size)])
|
|
## Save each segment in a seperate filename
|
|
#for iS, s in enumerate(segments):
|
|
# wavfile.write("data/obama_segment_{0:d}_{1:d}.wav".format(segment_size_t * iS,
|
|
# segment_size_t * (iS + 1)), fs, (s))
|
|
|
|
|
|
## Remove pauses using an energy threshold = 50% of the median energy:
|
|
#energies = [(s**2).sum() / len(s) for s in segments]
|
|
## (attention: integer overflow would occure without normalization here!)
|
|
#thres = 0.5 * np.median(energies)
|
|
#index_of_segments_to_keep = (np.where(energies > thres)[0])
|
|
## get segments that have energies higher than a the threshold:
|
|
#segments2 = segments[index_of_segments_to_keep]
|
|
## concatenate segments to signal:
|
|
#new_signal = np.concatenate(segments2)
|
|
## and write to file:
|
|
#wavfile.write("data/obama_processed.wav", fs, new_signal)
|
|
#plotly.offline.iplot({ "data": [go.Scatter(y=energies, name="energy"),
|
|
# go.Scatter(y=np.ones(len(energies)) * thres,
|
|
# name="thres")]})
|
|
# play the initial and the generated files in notebook:
|
|
#IPython.display.display(IPython.display.Audio("data/obama.wav"))
|
|
#IPython.display.display(IPython.display.Audio("data/obama_processed.wav"))
|
|
|
|
# read MP3 file using pudub
|
|
#audiofile = AudioSegment.from_file("F:\Festplatte\Alex\Dev\Code\Projekte\[py] MachineLearning\AudioSamples\FF8-Odeka Ke Chocobo.mp3")
|
|
#data_mp3 = np.array(audiofile.get_array_of_samples())
|
|
#fs_mp3 = audiofile.frame_rate
|
|
|
|
#print("juhu")
|
|
#print('Sq Error Between mp3 and wav data = {}'.format(((data_mp3 - data_wav)**2).sum()))
|
|
#print('Signal Duration = {} seconds'.format(data_wav.shape[0] / fs_wav))
|
|
|
|
# load file and extract tempo and beats:
|
|
[Fs, s] = wavfile.read('F:\Festplatte\Alex\Dev\Code\Projekte\[py] MachineLearning\AudioSamples\FF8-Odeka Ke Chocobo_mono.wav')
|
|
tempo, beats = librosa.beat.beat_track(y=s.astype('float'), sr=Fs, units="time")
|
|
beats -= 0.05
|
|
# add small 220Hz sounds on the 2nd channel of the song ON EACH BEAT
|
|
s = s.reshape(-1, 1)
|
|
s = np.array(np.concatenate((s, np.zeros(s.shape)), axis=1))
|
|
for ib, b in enumerate(beats):
|
|
t = np.arange(0, 0.2, 1.0 / Fs)
|
|
amp_mod = 0.2 / (np.sqrt(t)+0.2) - 0.2
|
|
amp_mod[amp_mod < 0] = 0
|
|
x = s.max() * np.cos(2 * np.pi * t * 220) * amp_mod
|
|
s[int(Fs * b):
|
|
int(Fs * b) + int(x.shape[0]), 1] = x.astype('int16')
|
|
# write a wav file where the 2nd channel has the estimated tempo:
|
|
wavfile.write("F:\Festplatte\Alex\Dev\Code\Projekte\[py] MachineLearning\AudioSamples\FF8-Odeka Ke Chocobo_mono.wav", Fs, np.int16(s))
|
|
# play the generated file in notebook:
|
|
IPython.display.display(IPython.display.Audio("F:\Festplatte\Alex\Dev\Code\Projekte\[py] MachineLearning\AudioSamples\FF8-Odeka Ke Chocobo_mono.wav")) |