#!/usr/bin/python3
# -*- coding: utf-8 -*-
import os
import sys
from random import shuffle
import librosa
import tensorflow as tf
import tensorflow.keras as keras
import numpy as np
import pickle
# 原始数据集
dataset_dir = './recordings/'
# 预处理的数据集
dataset_pickle = "./dataset.pickle"
def read_files(files):
labels = []
features = []
for file in files:
ans = int(file[0])
wave, sr = librosa.load(dataset_dir + file, mono=True)
label = keras.utils.to_categorical(ans, 10)
labels.append(label)
mfcc = librosa.feature.mfcc(wave, sr)
mfcc = np.pad(mfcc, ((0, 0), (0, 100 - len(mfcc[0]))), mode='constant', constant_values=0)
features.append(np.array(mfcc))
return np.array(features), np.array(labels)
# 读取数据集文件列表,并将其划分为训练集,验证集以及测试集
def load_files():
files = os.listdir(dataset_dir)
wav_files = []
for wav in files:
if not wav.endswith(".wav"): continue
wav_files.append(wav)
if not wav_files:
print("未找到数据集")
# 重排数据集,保证训练,测试还有验证集中基本上各个类别的数据都有
shuffle(wav_files)
# 划分数据集
nfiles = len(wav_files)
ntrain = int(nfiles * 0.7)
nvalidation = int(nfiles * 0.2)
return wav_files[ : ntrain], wav_files[ntrain : ntrain + nvalidation], \
wav_files[ntrain + nvalidation : ]
def mean_normalize(features):
std_value = features.std()
mean_value = features.mean()
return (features - mean_value) / std_value
class CNNConfig():
# 网络结构
filter_sizes = [2, 3, 4, 5]
num_filters = 64
hidden_dim = 256
# 训练过程
learning_rate = 0.001
num_epochs = 100
batch_size = 256
dropout_keep_prob = 0.5
print_per_batch = 100 # 训练过程中,每100次batch迭代,打印训练信息
save_tb_per_batch = 200
class ASRCNN(object):
def __init__(self, config, width, height, num_classes): # 20,100
self.config = config
# 训练过程
# 输入的语音变成了一张特征图
self.input_x = tf.placeholder(tf.float32, [None, width, height], name='input_x')
self.input_y = tf.placeholder(tf.float32, [None, num_classes], name='input_y')
self.keep_prob = tf.placeholder(tf.float32, name='keep_prob')
# input_x = tf.reshape(self.input_x, [-1, height, width])
# 将图由width * height变成了height * width
input_x = tf.transpose(self.input_x, [0, 2, 1])
pooled_outputs = []
# 也就是说有一系列不同size的filter
for i, filter_size in enumerate(self.config.filter_sizes):
with tf.name_scope("conv-maxpool-%s" % filter_size):
print("conv-maxpool-%s" % filter_size)
conv = tf.layers.conv1d(input_x, self.config.num_filters, filter_size, activation=tf.nn.relu)
print(conv.shape)
pooled = tf.reduce_max(conv, reduction_indices=[1])
print(pooled.shape)
pooled_outputs.append(pooled)
num_filters_total = self.config.num_filters * len(self.config.filter_sizes) # 64*4
pooled_reshape = tf.reshape(tf.concat(pooled_outputs, 1), [-1, num_filters_total])
fc = tf.layers.dense(pooled_reshape, self.config.hidden_dim, activation=tf.nn.relu, name='fc1')
fc = tf.contrib.layers.dropout(fc, self.keep_prob)
# 分类器
self.logits = tf.layers.dense(fc, num_classes, name='fc2')
self.y_pred_cls = tf.argmax(tf.nn.softmax(self.logits), 1, name="pred") # 预测类别
# 损失函数,交叉熵
cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits=self.logits, labels=self.input_y)
self.loss = tf.reduce_mean(cross_entropy)
# 优化器
self.optim = tf.train.AdamOptimizer(learning_rate=self.config.learning_rate).minimize(self.loss)
# 准确率
correct_pred = tf.equal(tf.argmax(self.input_y, 1), self.y_pred_cls)
self.acc = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
def preprocess():
if not os.path.isfile(dataset_pickle):
train_files, valid_files, test_files = load_files()
train_features, train_labels = read_files(train_files)
train_features = mean_normalize(train_features)
print('read train files down')
valid_features, valid_labels = read_files(valid_files)
valid_features = mean_normalize(valid_features)
print('read valid files down')
test_features, test_labels = read_files(test_files)
test_features = mean_normalize(test_features)
print('read test files down')
print('预处理数据集写入%s' % dataset_pickle)
pickle_out = open(dataset_pickle,"wb")
pickle.dump((train_features, train_labels, valid_features, valid_labels,
test_features, test_labels), pickle_out)
pickle_out.close()
else:
print('从%s中加载预处理数据集' % dataset_pickle)
pickle_in = open(dataset_pickle,"rb")
(train_features, train_labels, valid_features, valid_labels,
test_features, test_labels) = pickle.load(pickle_in)
pickle_in.close()
return train_features, train_labels, \
valid_features, valid_labels, test_features, test_labels
def batch_iter(features, labels, batch_size):
'''
一个一个batch地生成训练数据
'''
assert len(features) == len(labels), \
"feature and label size do not match!"
for i in range(int(len(features) / batch_size)):
begin = i * batch_size
end = (i + 1) * batch_size
yield features[begin : end], labels[begin : end]
def train(argv=None):
# 预处理数据集从数据集中提取特征太过费时
# 因此我们将提取好的数据集特征存储在文件中
train_features, train_labels, \
valid_features, valid_labels, \
test_features, test_labels = preprocess()
width = 20 # mfcc features
height = 100 # (max) length of utterance
classes = 10 # digits
config = CNNConfig
cnn = ASRCNN(config, width, height, classes)
session = tf.Session()
session.run(tf.global_variables_initializer())
saver = tf.train.Saver(tf.global_variables())
checkpoint_path = os.path.join('cnn_model', 'model.ckpt')
tensorboard_train_dir = 'tensorboard/train'
tensorboard_valid_dir = 'tensorboard/valid'
if not os.path.exists(tensorboard_train_dir):
os.makedirs(tensorboard_train_dir)
if not os.path.exists(tensorboard_valid_dir):
os.makedirs(tensorboard_valid_dir)
tf.summary.scalar("loss", cnn.loss)
tf.summary.scalar("accuracy", cnn.acc)
merged_summary = tf.summary.merge_all()
train_writer = tf.summary.FileWriter(tensorboard_train_dir)
valid_writer = tf.summary.FileWriter(tensorboard_valid_dir)
total_batch = 0
for epoch in range(config.num_epochs):
print('Epoch:', epoch + 1)
batch_train = batch_iter(train_features, train_labels, config.batch_size)
for x_batch, y_batch in batch_train:
total_batch += 1
# 训练一个batch
train_feed_dict = {
cnn.input_x: x_batch,
cnn.input_y: y_batch,
cnn.keep_prob: config.dropout_keep_prob
}
session.run(cnn.optim, feed_dict=train_feed_dict)
# 检查loss以及acc
if total_batch % config.print_per_batch == 0:
# 检查训练的batch的loss以及acc
train_loss, train_accuracy = session.run(
[cnn.loss, cnn.a
没有合适的资源?快使用搜索试试~ 我知道了~
资源推荐
资源详情
资源评论
收起资源包目录
深度学习课程实验——语音数字识别-内含源码和说明书.zip (2000个子文件)
numberRec.py 11KB
9_theo_16.wav 36KB
7_theo_36.wav 34KB
9_theo_28.wav 16KB
9_theo_27.wav 14KB
6_jackson_0.wav 13KB
6_jackson_23.wav 13KB
6_jackson_27.wav 13KB
6_jackson_30.wav 12KB
6_jackson_21.wav 12KB
6_jackson_25.wav 12KB
2_jackson_21.wav 12KB
6_jackson_17.wav 12KB
6_jackson_16.wav 11KB
5_Tom_100.wav 11KB
6_Tom_100.wav 11KB
6_Daniel_100.wav 11KB
6_jackson_26.wav 11KB
0_jackson_40.wav 11KB
6_jackson_13.wav 11KB
6_jackson_37.wav 11KB
5_Daniel_120.wav 11KB
6_jackson_19.wav 11KB
6_jackson_20.wav 11KB
6_jackson_49.wav 11KB
0_jackson_10.wav 11KB
9_jackson_34.wav 11KB
6_jackson_5.wav 11KB
2_Steffi_100.wav 11KB
3_Tom_100.wav 11KB
4_Tom_100.wav 11KB
7_theo_15.wav 10KB
6_jackson_39.wav 10KB
0_jackson_38.wav 10KB
9_jackson_36.wav 10KB
9_Tom_100.wav 10KB
2_jackson_45.wav 10KB
9_jackson_38.wav 10KB
6_jackson_28.wav 10KB
0_jackson_27.wav 10KB
6_jackson_1.wav 10KB
0_jackson_28.wav 10KB
0_jackson_24.wav 10KB
0_Tom_100.wav 10KB
0_jackson_30.wav 10KB
6_jackson_35.wav 10KB
9_jackson_30.wav 10KB
6_jackson_33.wav 10KB
9_jackson_32.wav 10KB
8_theo_48.wav 10KB
0_jackson_9.wav 10KB
0_jackson_6.wav 10KB
9_Steffi_100.wav 10KB
3_Daniel_120.wav 10KB
0_jackson_35.wav 10KB
7_Tom_100.wav 10KB
9_jackson_37.wav 10KB
0_jackson_14.wav 10KB
0_jackson_37.wav 10KB
9_jackson_14.wav 10KB
0_jackson_20.wav 10KB
2_jackson_44.wav 10KB
9_jackson_43.wav 10KB
9_jackson_11.wav 10KB
9_theo_32.wav 10KB
9_jackson_47.wav 10KB
9_jackson_28.wav 10KB
9_jackson_48.wav 10KB
9_jackson_39.wav 9KB
9_jackson_0.wav 9KB
5_Daniel_140.wav 9KB
0_jackson_22.wav 9KB
6_theo_28.wav 9KB
4_theo_27.wav 9KB
0_jackson_21.wav 9KB
9_theo_31.wav 9KB
6_Daniel_120.wav 9KB
6_theo_17.wav 9KB
9_jackson_13.wav 9KB
9_jackson_20.wav 9KB
1_Tom_100.wav 9KB
0_jackson_23.wav 9KB
0_jackson_13.wav 9KB
9_jackson_15.wav 9KB
1_jackson_48.wav 9KB
6_jackson_40.wav 9KB
1_jackson_44.wav 9KB
6_jackson_36.wav 9KB
9_jackson_42.wav 9KB
0_jackson_39.wav 9KB
9_jackson_2.wav 9KB
9_jackson_12.wav 9KB
1_Steffi_120.wav 9KB
1_theo_22.wav 9KB
6_jackson_34.wav 9KB
1_jackson_43.wav 9KB
8_jackson_41.wav 9KB
1_theo_28.wav 9KB
0_theo_22.wav 9KB
0_nicolas_18.wav 9KB
共 2000 条
- 1
- 2
- 3
- 4
- 5
- 6
- 20
资源评论
小码蚁.
- 粉丝: 2533
- 资源: 4146
上传资源 快速赚钱
- 我的内容管理 展开
- 我的资源 快来上传第一个资源
- 我的收益 登录查看自己的收益
- 我的积分 登录查看自己的积分
- 我的C币 登录后查看C币余额
- 我的收藏
- 我的下载
- 下载帮助
安全验证
文档复制为VIP权益,开通VIP直接复制
信息提交成功