基于python+CNN网络实现声音场景识别系统源码(毕业设计).zip

共10个文件

py：7个

png：1个

md：1个

版权申诉

网络

python

毕业设计

5星 · 超过95%的资源 140 浏览量 2023-08-02 18:24:17 上传评论 8 收藏 99KB ZIP 举报

资源推荐

资源详情

资源评论

收起资源包目录

基于python+CNN网络实现声音场景识别系统源码(毕业设计).zip （10个子文件）

项目说明.md 988B

plot_confusion_matrix.py 3KB

best_result.py 18KB

dcnn_train.py 9KB

evaluate_predict.py 6KB

4_fold_fulltraining.py 9KB

4_fold_dcnn_train.py 7KB

images

file 1B

错分矩阵.png 88KB

feature_extract_demo.py 3KB

#coding=utf-8 import numpy as np from keras.models import load_model,model_from_yaml import os import matplotlib.pyplot as plt from PIL import Image from dcnn_train import get_model,cnn_model,load_dev_traindata import random from scipy import stats from keras.callbacks import EarlyStopping,ModelCheckpoint # from keras.models import load_weights def get_label_dict(): dicts = dict() dicts['residential_area'] = 0 #to fix a little bug with string split dicts['residential_are'] = 0 dicts['city_center']=1 dicts['beach']=2 dicts['park']=3 dicts['home'] = 4 dicts['forest_path'] = 5 dicts['bus'] = 6 dicts['grocery_store'] = 7 dicts['cafe/restaurant'] = 8 dicts['car'] = 9 dicts['train'] = 10 dicts['metro_station'] = 11 dicts['office'] = 12 dicts['tram'] = 13 dicts['library'] = 14 # print dicts return dicts def load_fold_data(data='Train',fold=1,cfg_path='../TUT-acoustic-scenes-2016-development/evaluation_setup/' ,data_path='./result64/',one_hot=True,Normalization=True): if not os.path.exists(data_path): print 'please runing feature_extract_demo.py Firstly' exit() if data=='Test': load_file = cfg_path+'fold'+str(fold)+'_evaluate.txt' else: load_file = cfg_path+'fold'+str(fold)+'_train.txt' dicts = get_label_dict() wav_paths = [] scene_y = [] with open(load_file,'rb') as f: for line in f: ls = line.strip('\r\n').split(' ') # print ls[0],ls[1],dicts.get(ls[1]) scene_y.append(dicts.get(ls[1])) wav_paths.append(data_path + ls[0].split('audio/')[1] + '.jpg') scene_x = np.empty((len(scene_y),1,64,2584),dtype='float32') scene_y = np.array(scene_y,dtype='uint8') for i in range(scene_x.shape[0]): scene_x[i,0,:, :] = np.asarray(Image.open(wav_paths[i], 'r').convert('L'), dtype='float32') # # strip = 64 # win_size = 128 # step = int(2584 / 64) - 1 strip = 64 win_size = 64 step = int(2584 / strip) - 1 frame_x = np.empty((scene_x.shape[0] * step, 1, 64, win_size), dtype='float32') frame_y = [] for i in range(scene_x.shape[0]): for j in range(step): frame_x[i*step+j,:,:,:]=scene_x[i,0,:,j*strip:j*strip+win_size] frame_y.append(scene_y[i]) # plt.imshow(frame_x[i*step+j,0,:,:],cmap='gray') # plt.figure() # plt.imshow(scene_x[i,0,:,:],cmap='gray') # plt.show() frame_y = np.array(frame_y, dtype='uint8') if one_hot is True: from keras.utils import np_utils frame_y = np_utils.to_categorical(frame_y, 15) # label one-hot if Normalization is True: frame_x = frame_x.astype('float32') / 255 scene_x = scene_x.astype('float32') /255 print 'load_'+data+'_data ok!' print(frame_x.shape[0], ' samples') return frame_x,scene_x,frame_y,scene_y def get_acc(_model,fold=1): # frame_x, scene_x, frame_y, scene_y = load_fold_data(data='Train',fold=fold,Normalization=True) frame_x_test, scene_x_test, frame_y_test, scene_y_test = load_fold_data(data='Test', fold=fold,Normalization=True) # # print frame_x.shape,scene_x.shape # index = [i for i in range(len(frame_x))] # random.shuffle(index) # frame_x = frame_x[index] # frame_y = frame_y[index] strip = 64 step = int(2584 / strip) - 1 # filepath = "best_12_06_02.nn"+str(1) filepath = "best_12_06_02.nn" + str(fold) _model.load_weights(filepath) frame_acc = _model.evaluate(frame_x_test,frame_y_test,verbose=2) print 'fold_'+str(fold)+'_frame_acc:_'+str(frame_acc) lists = []#做混淆矩阵（错分） f = open('./error+' + str(fold) + '.txt', 'w') # 写入错分日志 for i in range(scene_x_test.shape[0]): # frame_predicts = np.empty((39,),dtype='uint8') # for j in range(39):#step of frames in a scene sample # frame_predicts = np.argmax(_model.predict(frame_x_test[i*39:i*39+39,:,:,:]),axis=-1) frame_predicts = _model.predict_classes(frame_x_test[i*step:i*step+step,:,:,:],verbose=0) scene_predict = stats.mode(frame_predicts).mode if scene_predict[0] == scene_y_test[i]: pass else: f.write(str(scene_y_test[i])+'错分为->'+str(scene_predict[0])+'\n') lists.append([scene_y_test[i],scene_predict[0]]) f.close() scene_acc = float(scene_y_test.shape[0]-len(lists))/float(1.0*scene_y_test.shape[0]) print 'fold_'+str(fold)+'_scene_acc_:'+str(scene_acc) return frame_acc,scene_acc def get_all_result(): result = [] _model = get_model() result.append(get_acc(_model, fold=1)) _model = get_model() result.append(get_acc(_model, fold=2)) _model = get_model() result.append(get_acc(_model, fold=3)) _model = get_model() result.append(get_acc(_model, fold=4)) print result print (float(result[0][1])+float(result[1][1])+float(result[2][1])+float(result[3][1]))/4.0 def load_evaluate_data(cfg_path='../TUT-acoustic-scenes-2016-evaluation/evaluation_setup/' ,data_path='./evaluate/',one_hot=True,Normalization=True): if not os.path.exists(data_path): print 'please runing feature_extract_demo.py Firstly' exit() load_file = cfg_path+'evaluate.txt' dicts = get_label_dict() wav_paths = [] scene_y = [] with open(load_file,'rb') as f: for line in f: ls = line.strip('\r\n').split(' ') # print ls[0],ls[1],dicts.get(ls[1]) scene_y.append(dicts.get(ls[1])) wav_paths.append(data_path + ls[0].split('audio/')[1] + '.jpg') scene_x = np.empty((len(scene_y),1,64,2584),dtype='float32') scene_y = np.array(scene_y,dtype='uint8') for i in range(scene_x.shape[0]): scene_x[i,0,:, :] = np.asarray(Image.open(wav_paths[i], 'r').convert('L'), dtype='float32') strip = 64 win_size = 64 step = int((2584-win_size )/ strip) - 1 frame_x = np.empty((scene_x.shape[0] * step, 1, 64, win_size), dtype='float32') frame_y = [] for i in range(scene_x.shape[0]): for j in range(step): frame_x[i*step+j,:,:,:]=scene_x[i,0,:,j*strip:j*strip+win_size] # print i*step+j,j*strip+win_size,scene_x[i,0,:,j*strip:j*strip+win_size].shape frame_y.append(scene_y[i]) # plt.imshow(frame_x[i*step+j,0,:,:],cmap='gray') # plt.figure() # plt.imshow(scene_x[i,0,:,:],cmap='gray') # plt.show() frame_y = np.array(frame_y, dtype='uint8') if one_hot is True: from keras.utils import np_utils frame_y = np_utils.to_categorical(frame_y, 15) # label one-hot if Normalization is True: frame_x = frame_x.astype('float32') / 255 scene_x = scene_x.astype('float32') /255 print 'load_evaluate_data ok!' print(frame_x.shape[0], 'frame samples') print(scene_x.shape[0], 'scene samples') return frame_x,scene_x,frame_y,scene_y def get_evaluate_acc(_model,replace_softmax=False): # frame_x, scene_x, frame_y, scene_y = load_evaluate_data() # print frame_x.shape,scene_x.shape # index = [i for i in range(len(frame_x))] # random.shuffle(index) # frame_x = frame_x[index] # frame_y = frame_y[index] #采取EarlyStopping的策略防止过拟合 # early_stopping = EarlyStopping(monitor='val_acc', patience=5) # _model.fit(frame_x,frame_y,batch_size=24,nb_epoch=nb_epoch,callbacks=[early_stopping],validation_split=0.2,shuffle=True) # _model.fit(frame_x, frame_y, batch_size=24, nb_epoch=nb_epoch, # shuffle=True) frame_x_test, scene_x_test, frame_y_test, scene_y_test = load_evaluate_

评论收藏

内容反馈

版权申诉