#coding=utf-8
import numpy as np
from keras.models import load_model,model_from_yaml
import os
import matplotlib.pyplot as plt
from PIL import Image
from dcnn_train import get_model,cnn_model,load_dev_traindata
import random
from scipy import stats
from keras.callbacks import EarlyStopping,ModelCheckpoint
# from keras.models import load_weights
def get_label_dict():
dicts = dict()
dicts['residential_area'] = 0
#to fix a little bug with string split
dicts['residential_are'] = 0
dicts['city_center']=1
dicts['beach']=2
dicts['park']=3
dicts['home'] = 4
dicts['forest_path'] = 5
dicts['bus'] = 6
dicts['grocery_store'] = 7
dicts['cafe/restaurant'] = 8
dicts['car'] = 9
dicts['train'] = 10
dicts['metro_station'] = 11
dicts['office'] = 12
dicts['tram'] = 13
dicts['library'] = 14
# print dicts
return dicts
def load_fold_data(data='Train',fold=1,cfg_path='../TUT-acoustic-scenes-2016-development/evaluation_setup/'
,data_path='./result64/',one_hot=True,Normalization=True):
if not os.path.exists(data_path):
print 'please runing feature_extract_demo.py Firstly'
exit()
if data=='Test':
load_file = cfg_path+'fold'+str(fold)+'_evaluate.txt'
else:
load_file = cfg_path+'fold'+str(fold)+'_train.txt'
dicts = get_label_dict()
wav_paths = []
scene_y = []
with open(load_file,'rb') as f:
for line in f:
ls = line.strip('\r\n').split(' ')
# print ls[0],ls[1],dicts.get(ls[1])
scene_y.append(dicts.get(ls[1]))
wav_paths.append(data_path + ls[0].split('audio/')[1] + '.jpg')
scene_x = np.empty((len(scene_y),1,64,2584),dtype='float32')
scene_y = np.array(scene_y,dtype='uint8')
for i in range(scene_x.shape[0]):
scene_x[i,0,:, :] = np.asarray(Image.open(wav_paths[i], 'r').convert('L'), dtype='float32')
#
# strip = 64
# win_size = 128
# step = int(2584 / 64) - 1
strip = 64
win_size = 64
step = int(2584 / strip) - 1
frame_x = np.empty((scene_x.shape[0] * step, 1, 64, win_size), dtype='float32')
frame_y = []
for i in range(scene_x.shape[0]):
for j in range(step):
frame_x[i*step+j,:,:,:]=scene_x[i,0,:,j*strip:j*strip+win_size]
frame_y.append(scene_y[i])
# plt.imshow(frame_x[i*step+j,0,:,:],cmap='gray')
# plt.figure()
# plt.imshow(scene_x[i,0,:,:],cmap='gray')
# plt.show()
frame_y = np.array(frame_y, dtype='uint8')
if one_hot is True:
from keras.utils import np_utils
frame_y = np_utils.to_categorical(frame_y, 15) # label one-hot
if Normalization is True:
frame_x = frame_x.astype('float32') / 255
scene_x = scene_x.astype('float32') /255
print 'load_'+data+'_data ok!'
print(frame_x.shape[0], ' samples')
return frame_x,scene_x,frame_y,scene_y
def get_acc(_model,fold=1):
# frame_x, scene_x, frame_y, scene_y = load_fold_data(data='Train',fold=fold,Normalization=True)
frame_x_test, scene_x_test, frame_y_test, scene_y_test = load_fold_data(data='Test', fold=fold,Normalization=True)
# # print frame_x.shape,scene_x.shape
# index = [i for i in range(len(frame_x))]
# random.shuffle(index)
# frame_x = frame_x[index]
# frame_y = frame_y[index]
strip = 64
step = int(2584 / strip) - 1
# filepath = "best_12_06_02.nn"+str(1)
filepath = "best_12_06_02.nn" + str(fold)
_model.load_weights(filepath)
frame_acc = _model.evaluate(frame_x_test,frame_y_test,verbose=2)
print 'fold_'+str(fold)+'_frame_acc:_'+str(frame_acc)
lists = []#做混淆矩阵(错分)
f = open('./error+' + str(fold) + '.txt', 'w') # 写入错分日志
for i in range(scene_x_test.shape[0]):
# frame_predicts = np.empty((39,),dtype='uint8')
# for j in range(39):#step of frames in a scene sample
# frame_predicts = np.argmax(_model.predict(frame_x_test[i*39:i*39+39,:,:,:]),axis=-1)
frame_predicts = _model.predict_classes(frame_x_test[i*step:i*step+step,:,:,:],verbose=0)
scene_predict = stats.mode(frame_predicts).mode
if scene_predict[0] == scene_y_test[i]:
pass
else:
f.write(str(scene_y_test[i])+'错分为->'+str(scene_predict[0])+'\n')
lists.append([scene_y_test[i],scene_predict[0]])
f.close()
scene_acc = float(scene_y_test.shape[0]-len(lists))/float(1.0*scene_y_test.shape[0])
print 'fold_'+str(fold)+'_scene_acc_:'+str(scene_acc)
return frame_acc,scene_acc
def get_all_result():
result = []
_model = get_model()
result.append(get_acc(_model, fold=1))
_model = get_model()
result.append(get_acc(_model, fold=2))
_model = get_model()
result.append(get_acc(_model, fold=3))
_model = get_model()
result.append(get_acc(_model, fold=4))
print result
print (float(result[0][1])+float(result[1][1])+float(result[2][1])+float(result[3][1]))/4.0
def load_evaluate_data(cfg_path='../TUT-acoustic-scenes-2016-evaluation/evaluation_setup/'
,data_path='./evaluate/',one_hot=True,Normalization=True):
if not os.path.exists(data_path):
print 'please runing feature_extract_demo.py Firstly'
exit()
load_file = cfg_path+'evaluate.txt'
dicts = get_label_dict()
wav_paths = []
scene_y = []
with open(load_file,'rb') as f:
for line in f:
ls = line.strip('\r\n').split(' ')
# print ls[0],ls[1],dicts.get(ls[1])
scene_y.append(dicts.get(ls[1]))
wav_paths.append(data_path + ls[0].split('audio/')[1] + '.jpg')
scene_x = np.empty((len(scene_y),1,64,2584),dtype='float32')
scene_y = np.array(scene_y,dtype='uint8')
for i in range(scene_x.shape[0]):
scene_x[i,0,:, :] = np.asarray(Image.open(wav_paths[i], 'r').convert('L'), dtype='float32')
strip = 64
win_size = 64
step = int((2584-win_size )/ strip) - 1
frame_x = np.empty((scene_x.shape[0] * step, 1, 64, win_size), dtype='float32')
frame_y = []
for i in range(scene_x.shape[0]):
for j in range(step):
frame_x[i*step+j,:,:,:]=scene_x[i,0,:,j*strip:j*strip+win_size]
# print i*step+j,j*strip+win_size,scene_x[i,0,:,j*strip:j*strip+win_size].shape
frame_y.append(scene_y[i])
# plt.imshow(frame_x[i*step+j,0,:,:],cmap='gray')
# plt.figure()
# plt.imshow(scene_x[i,0,:,:],cmap='gray')
# plt.show()
frame_y = np.array(frame_y, dtype='uint8')
if one_hot is True:
from keras.utils import np_utils
frame_y = np_utils.to_categorical(frame_y, 15) # label one-hot
if Normalization is True:
frame_x = frame_x.astype('float32') / 255
scene_x = scene_x.astype('float32') /255
print 'load_evaluate_data ok!'
print(frame_x.shape[0], 'frame samples')
print(scene_x.shape[0], 'scene samples')
return frame_x,scene_x,frame_y,scene_y
def get_evaluate_acc(_model,replace_softmax=False):
# frame_x, scene_x, frame_y, scene_y = load_evaluate_data()
# print frame_x.shape,scene_x.shape
# index = [i for i in range(len(frame_x))]
# random.shuffle(index)
# frame_x = frame_x[index]
# frame_y = frame_y[index]
#采取EarlyStopping的策略防止过拟合
# early_stopping = EarlyStopping(monitor='val_acc', patience=5)
# _model.fit(frame_x,frame_y,batch_size=24,nb_epoch=nb_epoch,callbacks=[early_stopping],validation_split=0.2,shuffle=True)
# _model.fit(frame_x, frame_y, batch_size=24, nb_epoch=nb_epoch,
# shuffle=True)
frame_x_test, scene_x_test, frame_y_test, scene_y_test = load_evaluate_
没有合适的资源?快使用搜索试试~ 我知道了~
温馨提示
基于python+CNN网络实现声音场景识别系统源码(毕业设计).zip 【项目介绍】 该资源内项目源码是个人的毕设,代码都测试ok,都是运行成功后才上传资源,答辩评审平均分达到94.5分,放心下载使用! 该资源适合计算机相关专业(如人工智能、通信工程、自动化、软件工程等)的在校学生、老师或者企业员工下载,适合小白学习或者实际项目借鉴参考! 任务介绍以及数据库下载:http://www.cs.tut.fi/sgn/arg/dcase2016/task-acoustic-scene-classification 依赖库:numpy、scipy、librosa、keras、tensorflow or theano 实验采用Mel能量谱+CNN+随机森林 cnn本身训练好的feature map特征其实是非常稀疏的、但带噪时即使采用过拟合手段有dropout,交叉验证、早停法、权重衰减、正则化,仍然有一些数值较低的权重,而不是0。 在有噪声的情况下(场景声音混杂了其他声音事件、如交谈声、风声等),很多神经元节点其实本身的权重都不会偏向0,而是以一个较小的值存在、本实验思想主要尝试使用
资源推荐
资源详情
资源评论
收起资源包目录
基于python+CNN网络实现声音场景识别系统源码(毕业设计).zip (10个子文件)
项目说明.md 988B
plot_confusion_matrix.py 3KB
best_result.py 18KB
dcnn_train.py 9KB
evaluate_predict.py 6KB
4_fold_fulltraining.py 9KB
4_fold_dcnn_train.py 7KB
images
file 1B
错分矩阵.png 88KB
feature_extract_demo.py 3KB
共 10 条
- 1
资源评论
- qiaoming242024-06-04资源很好用,有较大的参考价值,资源不错,支持一下。
- xzyliebelst2023-12-29实在是宝藏资源、宝藏分享者!感谢大佬~onnx2024-05-28嗯嗯,不客气!
onnx
- 粉丝: 9616
- 资源: 5597
上传资源 快速赚钱
- 我的内容管理 展开
- 我的资源 快来上传第一个资源
- 我的收益 登录查看自己的收益
- 我的积分 登录查看自己的积分
- 我的C币 登录后查看C币余额
- 我的收藏
- 我的下载
- 下载帮助
最新资源
资源上传下载、课程学习等过程中有任何疑问或建议,欢迎提出宝贵意见哦~我们会及时处理!
点击此处反馈
安全验证
文档复制为VIP权益,开通VIP直接复制
信息提交成功