#!/usr/bin/env python
# coding: utf-8
# In[1]:
# 加载函数库
from keras.layers.convolutional import Conv2D
from keras.layers.convolutional import MaxPooling2D
from keras.layers.core import Activation, Dropout
from keras.layers.core import Flatten
from keras.layers.core import Dense
from keras.datasets import mnist, cifar10
#from keras.datasets import cifar10
from keras.utils import np_utils,generic_utils
from keras.preprocessing.image import ImageDataGenerator
import keras
from keras.optimizers import SGD, RMSprop, Adam
import numpy as np
import matplotlib.pyplot as plt
from keras.models import Sequential
from keras.utils.vis_utils import plot_model
# In[2]:
# 初始化模型参数
# 随机种子
np.random.seed(666)
# 训练轮数
NB_EPOCH = 5
# batch size
BATCH_SIZE = 256
# 四层神经元数量总数
N_ADDITION = 1024
#verbose:日志显示
#verbose = 0 为不在标准输出流输出日志信息
#verbose = 1 为输出进度条记录
#verbose = 2 为每个epoch输出一行记录
VERBOSE = 1
# 分类标签种类数量
NB_CLASSES = 10
#OPTIMIZER = SGD()
# 隐藏层神经元数量
# 第一层
N_HIDDEN = 128
#validation_split用于在没有提供验证集的时候
#按一定比例从训练集中取出一部分作为验证集
VALIDATION_SPLIT = 0.2
# In[3]:
data_generator = ImageDataGenerator(rotation_range=90, width_shift_range=0.15
,height_shift_range=0.15, zoom_range=0.3
,horizontal_flip=True
,fill_mode='nearest'
)
# In[4]:
# 准备数据
(X_train,y_train),(X_test,y_test) = cifar10.load_data()
# convolution不需要做数据结构的修改
# 还是32*32, 不用转化为一维数组
# 数据类型
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')
# 数据预处理
X_train /= 255
X_test /= 255
# 分类标签处理:转化为二值化序列
y_train = np_utils.to_categorical(y_train, NB_CLASSES)
y_test = np_utils.to_categorical(y_test,NB_CLASSES)
# 图片shape
input_shape = (32,32,3)
print(X_train.shape)
# In[5]:
# 构建卷积模型
model = Sequential()
model.add(Conv2D(20, kernel_size=5, padding="same", input_shape=input_shape))
model.add(Activation("relu"))
model.add(MaxPooling2D(pool_size=(2,2), strides=(2,2), padding='valid'))
# 增加Dropout层
# 在第一池化层/第二池化层/最后一个池化层的效果都不一样
# 第一个添加的效果最好
# 思考为什么?
model.add(Dropout(0.15))
# 添加规则化器的约束
model.add(Conv2D(50, kernel_size=3, padding="same", kernel_regularizer=keras.regularizers.l2(0.01)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2,2), strides=(2,2), padding='valid'))
# 增加新的卷积层、池化层
model.add(Conv2D(20, kernel_size=5, padding="same", input_shape=input_shape))
model.add(Activation("relu"))
model.add(MaxPooling2D(pool_size=(2,2), strides=(2,2), padding='valid'))
# 扁平化处理
model.add(Flatten())
model.add(Dense(500))
model.add(Activation("relu"))
model.add(Dense(10))
model.add(Activation("softmax"))
# In[6]:
model.compile(loss='categorical_crossentropy', optimizer='sgd', metrics=['accuracy'])
# 初始化回调函数
CSV_log = keras.callbacks.CSVLogger(filename='csv/cnn_CIFAR10_image.log', separator=',', append=False)
# 训练模型
model.fit(X_train, y_train, batch_size=BATCH_SIZE, epochs=NB_EPOCH
,verbose=VERBOSE, validation_split=VALIDATION_SPLIT, callbacks=[CSV_log])
# In[ ]:
# In[ ]:
'''
# 编译模型
#model.compile(loss='categorical_crossentropy', optimizer='sgd', metrics=['accuracy'])
model.compile(loss='categorical_crossentropy', optimizer='rmsprop', metrics=['accuracy'])
print("Training on Data Agumentation...")
# 手动定义训练模型的训练,首先定义
for e in range(NB_EPOCH):
print('EPOCH:{0}/{1}'.format(e+1, NB_EPOCH))
print('Training...')
# 用于呈现每一轮的训练速度
progbar = generic_utils.Progbar(X_train.shape[0])
batch_size = 0
# 通过数据发生器以原始数据X_train,y_train为原型生成批评批量大小BATCH_SIZE数据
for x_batch,ybatch in data_generator.flow(X_train,y_train,batch_size=BATCH_SIZE):
loss, train_acc = model.train_on_batch(x_batch,y_batch)
batch_size += x_batch.shape[0]
if batch_size > X_train.shape[0]:
break
# 每跑完一个批量,probar便累计
progbar.add(x_batch.shape[0], values[('train loss', loss), ('train acc', train_acc)])
'''
# In[ ]:
'''
# 用原始数据中的测试集验证模型
loss, acc = model.evaluate(X_test, y_test, batch_size=32)
print('Val Loss:', loss)
print('Val Accuracy:', acc)
# 初始化回调函数
CSV_log = keras.callbacks.CSVLogger(filename='csv/cnn_CIFAR10_param.log', separator=',', append=False)
# 训练模型
model.fit(X_train, y_train, batch_size=BATCH_SIZE, epochs=NB_EPOCH
,verbose=VERBOSE, validation_split=VALIDATION_SPLIT, callbacks=[CSV_log])
'''
# In[7]:
# 模型评分
score = model.evaluate(X_test, y_test, verbose=VERBOSE)
print("test loss : ", score[0])
print("test accuracy : ", score[1])
# summary()用于显示模型详细信息
model.summary()
# plot_model函数用于绘制神经网络模型结构
# 输入所需要绘制模型的model
# 输出图片为model.png,并展示各网络层的大小
plot_model(model, to_file='model_convolution_CIFAR10_image.png',show_shapes=True)
# In[8]:
# 对日志进行抽取并显示
from pandas import DataFrame
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
# In[10]:
# 读取各个不同的日志文件
path = 'csv\cnn_CIFAR10_image.log'
data = pd.read_csv(path, sep=',')
# 配置排版
plt.subplot()
#绘制曲线
sns.lineplot(data=data[['accuracy','val_accuracy']])
# 设置标题
plt.title('model CNN_CIFAR10_image:'
'\nbest acc is {0:.4}'
'\nbest val_acc is {1:.4}'.format(data[['accuracy']].max().values[0], data[['val_accuracy']].max().values[0])
)
#plt.figure(figsize=(16, 12))344opo;[0--]
#plt.tight_layout(pad=0.1, w_pad=1.0, h_pad=1.0)
plt.tight_layout()
plt.show()
# In[ ]:
# In[ ]:
# In[ ]:
# In[ ]:
# In[ ]:
# In[ ]:
# In[ ]: