import tensorflow as tf
#from model import Model
import os
import shutil
from data import Data
#from preprocess import to_spectrogram, get_magnitude
#from utils import Diff
from config import TrainConfig
import librosa
import numpy as np
from tensorflow.contrib.rnn import GRUCell, MultiRNNCell
import soundfile as sf
from config import EvalConfig, ModelConfig
from mir_eval.separation import bss_eval_sources
import datetime
def get_phase(stft_maxtrixes):
return np.angle(stft_maxtrixes)
def spec_to_batch(src):
freq, n_frames = src.shape
ModelConfigSEQ_LEN=128
# Padding
pad_len = 0
if n_frames % ModelConfigSEQ_LEN > 0:
pad_len = (ModelConfigSEQ_LEN - (n_frames % ModelConfigSEQ_LEN))
pad_width = ((0, 0), (0, pad_len))
padded_src = np.pad(src, pad_width=pad_width, mode='constant', constant_values=0)
#print("padded_src shape:",padded_src.shape)
assert(padded_src.shape[-1] % ModelConfigSEQ_LEN == 0)
batch=padded_src.transpose(1,0)[:,:,np.newaxis]
batch=np.reshape(batch,(-1,ModelConfigSEQ_LEN,freq,1))
batch=batch.transpose(0,2,1,3)
#batch = np.reshape(padded_src.transpose(1,0), (-1, freq, ModelConfigSEQ_LEN,1))
return batch, padded_src
def batch_to_spec(src):
a=src.transpose(0,2,1,3)
b=np.reshape(a,(-1,512)).transpose(1,0)
return b
def get_stft_matrix(magnitudes, phases):
return magnitudes * np.exp(1.j * phases)
def griffin_lim(mag, len_frame=1024, len_hop=256, num_iters=50, phase_angle=None, length=None):
assert(num_iters > 0)
if phase_angle is None:
phase_angle = np.pi * np.random.rand(*mag.shape)
spec = get_stft_matrix(mag, phase_angle)
for i in range(num_iters):
wav = librosa.istft(spec, win_length=len_frame, hop_length=len_hop, length=length)
if i != num_iters - 1:
spec = librosa.stft(wav, n_fft=len_frame, win_length=len_frame, hop_length=len_hop)
_, phase = librosa.magphase(spec)
phase_angle = np.angle(phase)
spec = get_stft_matrix(mag, phase_angle)
return wav
def to_wav_mag_only(mag, init_phase, len_frame=ModelConfig.L_FRAME, len_hop=ModelConfig.L_HOP, num_iters=50):
return np.array(list(map(lambda m, p: griffin_lim(m, len_frame, len_hop, num_iters=num_iters, phase_angle=p), list(zip(mag, init_phase)))))
def to_wav(mag, phase, len_hop=ModelConfig.L_HOP):
stft_maxrix = get_stft_matrix(mag, phase)
return np.array([librosa.istft(s, hop_length=len_hop) for s in stft_maxrix])
def fuyuan(mag, pha):
wav1=np.ones(pha.shape,dtype=np.complex128)
for h in range(wav1.shape[0]):
for j in range(wav1.shape[1]):
wav1[h][j]=np.complex(np.cos(pha[h][j])*mag[h][j],np.sin(pha[h][j])*mag[h][j])
return wav1
def bss_eval_global(mixed_wav, src1_wav, src2_wav, pred_src1_wav, pred_src2_wav):
len_cropped = pred_src1_wav.shape[-1]
src1_wav = src1_wav[:, :len_cropped]
src2_wav = src2_wav[:, :len_cropped]
mixed_wav = mixed_wav[:, :len_cropped]
gnsdr = gsir = gsar = np.zeros(2)
total_len = 0
for i in range(EvalConfig.NUM_EVAL):
sdr, sir, sar, _ = bss_eval_sources(np.array([src1_wav[i], src2_wav[i]]),
np.array([pred_src1_wav[i], pred_src2_wav[i]]), False)
sdr_mixed, _, _, _ = bss_eval_sources(np.array([src1_wav[i], src2_wav[i]]),
np.array([mixed_wav[i], mixed_wav[i]]), False)
nsdr = sdr - sdr_mixed
gnsdr += len_cropped * nsdr
gsir += len_cropped * sir
gsar += len_cropped * sar
total_len += len_cropped
gnsdr = gnsdr / total_len
gsir = gsir / total_len
gsar = gsar / total_len
print("gnsdr:",gnsdr,"gsir:",gsir,"gsar:",gsar)
#return gnsdr, gsir, gsar
def train():
global_step = tf.Variable(0, dtype=tf.int32, trainable=False)
is_train=tf.placeholder(tf.bool)
batchsize=tf.placeholder(tf.float32)
x_mixed = tf.placeholder(tf.float32, [None, 512, 128,1], name='x_mixed')
y_src1= tf.placeholder(tf.float32, [None, 512, 128,1], name='y_pre1')
y_src2= tf.placeholder(tf.float32, [None, 512, 128,1], name='y_pre2')
yconv1=tf.placeholder(tf.float32, [None, 256, 64,16])
yconv2=tf.placeholder(tf.float32, [None, 128, 32,32])
yconv3=tf.placeholder(tf.float32, [None, 64, 16,64])
yconv4=tf.placeholder(tf.float32, [None, 32, 8,128])
yconv5=tf.placeholder(tf.float32, [None, 16, 4,256])
ayconv1=tf.placeholder(tf.float32, [None, 256, 64,16])
ayconv2=tf.placeholder(tf.float32, [None, 128, 32,32])
ayconv3=tf.placeholder(tf.float32, [None, 64, 16,64])
ayconv4=tf.placeholder(tf.float32, [None, 32, 8,128])
ayconv5=tf.placeholder(tf.float32, [None, 16, 4,256])
#第1个卷积层
filter1=tf.Variable(tf.random_normal([5,5,1,16]))
conv_out1=tf.nn.conv2d(x_mixed, filter1, strides=[1, 2, 2, 1], padding='SAME')
batch_mean1, batch_var1 = tf.nn.moments(conv_out1, [0, 1, 2], keep_dims=True)
shift1 = tf.Variable(tf.zeros([16]))
scale1 = tf.Variable(tf.ones([16]))
epsilon = 1e-3
batnor1 = tf.nn.batch_normalization(conv_out1, batch_mean1, batch_var1, shift1, scale1, epsilon)
out1=tf.nn.leaky_relu(batnor1)
yconv1=out1
afilter1=tf.Variable(tf.random_normal([5,5,1,16]))
aconv_out1=tf.nn.conv2d(x_mixed, afilter1, strides=[1, 2, 2, 1], padding='SAME')
abatch_mean1,abatch_var1 = tf.nn.moments(aconv_out1, [0, 1, 2], keep_dims=True)
ashift1 = tf.Variable(tf.zeros([16]))
ascale1 = tf.Variable(tf.ones([16]))
epsilon = 1e-3
abatnor1 = tf.nn.batch_normalization(aconv_out1, abatch_mean1, abatch_var1, ashift1, ascale1, epsilon)
aout1=tf.nn.leaky_relu(abatnor1)
ayconv1=aout1
#第2个卷积层
filter2=tf.Variable(tf.random_normal([5,5,16,32]))
conv_out2=tf.nn.conv2d(out1, filter2, strides=[1, 2, 2, 1], padding='SAME')
batch_mean2, batch_var2 = tf.nn.moments(conv_out2, [0, 1, 2], keep_dims=True)
shift2 = tf.Variable(tf.zeros([32]))
scale2 = tf.Variable(tf.ones([32]))
batnor2 = tf.nn.batch_normalization(conv_out2, batch_mean2, batch_var2, shift2, scale2, epsilon)
out2=tf.nn.leaky_relu(batnor2)
yconv2=out2
afilter2=tf.Variable(tf.random_normal([5,5,16,32]))
aconv_out2=tf.nn.conv2d(aout1, afilter2, strides=[1, 2, 2, 1], padding='SAME')
abatch_mean2, abatch_var2 = tf.nn.moments(aconv_out2, [0, 1, 2], keep_dims=True)
ashift2 = tf.Variable(tf.zeros([32]))
ascale2 = tf.Variable(tf.ones([32]))
abatnor2 = tf.nn.batch_normalization(aconv_out2, abatch_mean2, abatch_var2, ashift2, ascale2, epsilon)
aout2=tf.nn.leaky_relu(abatnor2)
ayconv2=aout2
#第3个卷积层
filter3=tf.Variable(tf.random_normal([5,5,32,64]))
conv_out3=tf.nn.conv2d(out2, filter3, strides=[1, 2, 2, 1], padding='SAME')
batch_mean3, batch_var3 = tf.nn.moments(conv_out3, [0, 1, 2], keep_dims=True)
shift3 = tf.Variable(tf.zeros([64]))
scale3 = tf.Variable(tf.ones([64]))
batnor3 = tf.nn.batch_normalization(conv_out3, batch_mean3, batch_var3, shift3, scale3, epsilon)
out3=tf.nn.leaky_relu(batnor3)
yconv3=out3
afilter3=tf.Variable(tf.random_normal([5,5,32,64]))
aconv_out3=tf.nn.conv2d(aout2, afilter3, strides=[1, 2, 2, 1], padding='SAME')
abatch_mean3, abatch_var3 = tf.nn.moments(aconv_out3, [0, 1, 2], keep_dims=True)
ashift3 = tf.Variable(tf.zeros([64]))
ascale3 = tf.Variable(tf.ones([64]))
abatnor3 = tf.nn.batch_normalization(aconv_out3, abatch_mean3, abatch_var3, ashift3, ascale3, epsilon)
aout3=tf.nn.leaky_relu(abatnor3)
ayconv3=aout3
#第4个卷积层
filter4=tf.Variable(tf.random_normal([5,5,64,128]))
conv_out4=tf.nn.conv2d(out3, filter4, strides=[1, 2, 2, 1], padding='SAME')