# -*- coding: utf-8 -*-
# 导入库pip install openpyxl -i https://pypi.tuna.tsinghua.edu.cn/simple
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn import metrics
from sklearn.metrics import mean_squared_error # 评价指标
import matplotlib.pyplot as plt
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM, GRU
from keras import optimizers
import keras
import tensorflow as tf
print(keras.__version__)
print(tf.__version__)
# mse rmse mae rmape
# adam sgd
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False
import warnings
warnings.filterwarnings("ignore") # 忽略一些警告 不影响运行
df=pd.read_excel("原始数据.xlsx")
print(df.columns)
print(df.head())
# print(df[['出库含沙量', '出库流量', '水位', '入库流量', '入库含沙量']])
# ['日期', '星期', '最高气温', '最低气温', '天气', '风向', '级数']
data_x=df[['出库含沙量', '出库流量', '水位', '入库流量', '入库含沙量']].values
data_x=np.array(data_x,dtype=np.float16)
print(data_x)
# 序列长度
int_sequence_len=5
# 每个序列的长度
int_a = 5
train_x = []
train_y = []
for i in range(0, len(data_x)-int_sequence_len,1):
train_x.append(data_x[i:i+int_sequence_len])
train_y.append(data_x[i+int_sequence_len][0])
print(len(train_x), len(train_y))
# print(train_y)
# print(train_x)
# 划分验证集和测试集
x_train, x_test, y_train, y_test = train_test_split(np.array(train_x), np.array(train_y), test_size=0.2, random_state=1)
print(x_train.shape)
print(len(x_train), len(x_test)) # 1243 311
x_train = x_train.reshape(len(x_train),int_sequence_len, int_a) # 三维度数据 全部数据长度 序列长度 每个序列维度
y_train = y_train.reshape(len(x_train),1)
print(x_train.shape)
print(y_train.shape)
x_test = x_test.reshape(len(x_test),int_sequence_len, int_a)
y_test = y_test.reshape(len(x_test),1)
print(x_test.shape)
print(y_test.shape)
def create_model_1():
model = keras.models.Sequential([
keras.layers.LSTM(100, activation='sigmoid', input_shape=(int_sequence_len, int_a)), # (1,9) 要与三维度的(1243,1,9) 一一对应
# 序列长度 每个序列维度
keras.layers.Dense(32, activation='sigmoid'), # 全连接
keras.layers.Dense(1, activation='relu') # 1个全链接
])
model.compile(loss='mean_absolute_error', optimizer='Adagrad') # 回归损失函数和优化器
return model
op = optimizers.RMSprop(lr=0.01) # 学习率
model1 = create_model_1()
model1.summary()
history=model1.fit(x_train, y_train, validation_data=(x_train, y_train), epochs=200, batch_size=32, verbose=2, shuffle=True)
# 训练世代 batch
model1.save_weights('lstmmoxing_天气') # 模型保存
import matplotlib.pyplot as plt
training_loss = history.history['loss']
test_loss = history.history['val_loss']
# 创建迭代数量
epoch_count = range(1, len(training_loss) + 1)
# 可视化损失历史
plt.plot(epoch_count, training_loss, 'r--')
plt.plot(epoch_count, test_loss, 'b-')
plt.legend(['Training Loss', 'Test Loss'])
plt.title("天气预测Epoch——loss")
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.show()
plt.clf()
from sklearn.metrics import mean_squared_error # 均方误差
from sklearn.metrics import mean_absolute_error # 平方绝对误差
from sklearn.metrics import r2_score # R square
# 调用
# 引用上边的模型实例
model_jiazai_1 = create_model_1()
# 加载保存好的模型
model_jiazai_1.load_weights('lstmmoxing_天气')
result = []
y1_pred_lstm = model_jiazai_1.predict(x_test)
for i in range(len(y1_pred_lstm)):
print("真实:", y_test[i])
print("预测:", y1_pred_lstm[i])
result.append([y_test[i][0], y1_pred_lstm[i][0]])
print("-----------------------")
print(mean_squared_error(y_test, y1_pred_lstm))
print(mean_absolute_error(y_test, y1_pred_lstm))
print(r2_score(y_test, y1_pred_lstm))
# 所有画图
len_ = [i for i in range(len(y_test))]
plt.xlabel('标签', fontsize=8)
plt.ylabel('均值', fontsize=8)
plt.plot(len_, y_test, label='y_test', color="blue")
plt.plot(len_, y1_pred_lstm, label='y1_pred_lstm', color="yellow")
plt.title("天气预测走势图")
plt.show()
plt.clf()
# 输出到excle
name = ['真实值', '预测值']
test = pd.DataFrame(columns=name, data=result)
test.to_excel('result_天气.xlsx')
# 输出 。。。excle表