# Read WAV and MP3 files to array from pydub import AudioSegment import numpy as np from scipy.io import wavfile from plotly.offline import init_notebook_mode import plotly.graph_objs as go import plotly import IPython import librosa # read WAV file using scipy.io.wavfile #fs_wav, data_wav = wavfile.read("F:\Festplatte\Alex\Dev\Code\Projekte\[py] MachineLearning\AudioSamples\FF8-Odeka Ke Chocobo.wav") #time_wav = np.arange(0, len(data_wav)) / fs_wav #plotly.offline.iplot({ "data": [go.Scatter(x=time_wav, # y=data_wav[:, 0], # name='left channel'), # go.Scatter(x=time_wav, # y=data_wav[:, 1], # name='right channel')]}) # Normalization #fs_wav, data_wav = wavfile.read("data/lost_highway_small.wav") #data_wav_norm = data_wav / (2**15) #time_wav = np.arange(0, len(data_wav)) / fs_wav #plotly.offline.iplot({ "data": [go.Scatter(x=time_wav, # y=data_wav_norm, # name='normalized audio signal')]}) # Trim (segment) audio signal (2 seconds) #data_wav_norm_crop = data_wav_norm[2 * fs_wav: 4 * fs_wav] #time_wav_crop = np.arange(0, len(data_wav)) / fs_wav #plotly.offline.iplot({ "data": [go.Scatter(x=time_wav_crop, # y=data_wav_norm_crop, # name='cropped audio signal')]}) # Fix-sized segmentation (breaks a signal into non-overlapping segments) #fs, signal = wavfile.read("data/obama.wav") #signal = signal / (2**15) #signal_len = len(signal) #segment_size_t = 1 # segment size in seconds #segment_size = segment_size_t * fs # segment size in samples ## Break signal into list of segments in a single-line Python code #segments = np.array([signal[x:x + segment_size] for x in # np.arange(0, signal_len, segment_size)]) ## Save each segment in a seperate filename #for iS, s in enumerate(segments): # wavfile.write("data/obama_segment_{0:d}_{1:d}.wav".format(segment_size_t * iS, # segment_size_t * (iS + 1)), fs, (s)) ## Remove pauses using an energy threshold = 50% of the median energy: #energies = [(s**2).sum() / len(s) for s in segments] ## (attention: integer overflow would occure without normalization here!) #thres = 0.5 * np.median(energies) #index_of_segments_to_keep = (np.where(energies > thres)[0]) ## get segments that have energies higher than a the threshold: #segments2 = segments[index_of_segments_to_keep] ## concatenate segments to signal: #new_signal = np.concatenate(segments2) ## and write to file: #wavfile.write("data/obama_processed.wav", fs, new_signal) #plotly.offline.iplot({ "data": [go.Scatter(y=energies, name="energy"), # go.Scatter(y=np.ones(len(energies)) * thres, # name="thres")]}) # play the initial and the generated files in notebook: #IPython.display.display(IPython.display.Audio("data/obama.wav")) #IPython.display.display(IPython.display.Audio("data/obama_processed.wav")) # read MP3 file using pudub #audiofile = AudioSegment.from_file("F:\Festplatte\Alex\Dev\Code\Projekte\[py] MachineLearning\AudioSamples\FF8-Odeka Ke Chocobo.mp3") #data_mp3 = np.array(audiofile.get_array_of_samples()) #fs_mp3 = audiofile.frame_rate #print("juhu") #print('Sq Error Between mp3 and wav data = {}'.format(((data_mp3 - data_wav)**2).sum())) #print('Signal Duration = {} seconds'.format(data_wav.shape[0] / fs_wav)) # load file and extract tempo and beats: [Fs, s] = wavfile.read('F:\Festplatte\Alex\Dev\Code\Projekte\[py] MachineLearning\AudioSamples\FF8-Odeka Ke Chocobo_mono.wav') tempo, beats = librosa.beat.beat_track(y=s.astype('float'), sr=Fs, units="time") beats -= 0.05 # add small 220Hz sounds on the 2nd channel of the song ON EACH BEAT s = s.reshape(-1, 1) s = np.array(np.concatenate((s, np.zeros(s.shape)), axis=1)) for ib, b in enumerate(beats): t = np.arange(0, 0.2, 1.0 / Fs) amp_mod = 0.2 / (np.sqrt(t)+0.2) - 0.2 amp_mod[amp_mod < 0] = 0 x = s.max() * np.cos(2 * np.pi * t * 220) * amp_mod s[int(Fs * b): int(Fs * b) + int(x.shape[0]), 1] = x.astype('int16') # write a wav file where the 2nd channel has the estimated tempo: wavfile.write("F:\Festplatte\Alex\Dev\Code\Projekte\[py] MachineLearning\AudioSamples\FF8-Odeka Ke Chocobo_mono.wav", Fs, np.int16(s)) # play the generated file in notebook: IPython.display.display(IPython.display.Audio("F:\Festplatte\Alex\Dev\Code\Projekte\[py] MachineLearning\AudioSamples\FF8-Odeka Ke Chocobo_mono.wav"))