import matplotlib.pyplot as plt
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error as mse
import matplotlib.pyplot as plt
plt.rcParams['font.sans-serif']=['SimHei'] # 用来正常显示中文标签
plt.rcParams['axes.unicode_minus']=False # 用来正常显示负号
import seaborn as sns
import pyecharts.options as opts
from pyecharts.charts import Line
data = pd.read_csv('黄金价格.csv')
data = data.fillna(0)
print(data.head(5))
# 设置时间为索引
data['Date'] = pd.to_datetime(data['Date'])
# 重置时间顺序
data.set_index('Date', inplace=True)
data.sort_values('Date', ascending=True, inplace=True)
plt.figure(figsize=(12, 6))
data['Close/Last'].plot()
plt.title("黄金价格走势图")
plt.show()
# 计算相关性系数
corr = data.corr()
corr['Close/Last'].sort_values(ascending=False)
ax = plt.subplots(figsize=(20, 16))#调整画布大小
ax = sns.heatmap(corr, vmax=.8, square=True, annot=True)#画热力图 annot=True 表示显示系数
# 设置刻度字体大小
plt.xticks(fontsize=20)
plt.yticks(fontsize=20)
plt.show()
X = data.drop('Close/Last', axis=1)
y = data['Close/Last']
# 数据标准化
scaler = StandardScaler()
X = scaler.fit_transform(X)
# 数据分割
X_train, X_test, y_train, y_test = train_test_split(X, y, shuffle=False,train_size=0.8,random_state=1)
print(X_train)
# 线性回归:---------
lr = LinearRegression()
lr.fit(X_train, y_train)
train_pred = lr.predict(X_train)
test_pred = lr.predict(X_test)
print(f'train MSE: {mse(y_train, train_pred):.5f}')
print(f'test MSE: {mse(y_test, test_pred):.5f}')
index = y_test.index
plt.figure(figsize=(12, 6))
data['Close/Last'].plot(label='actual')
pd.Series(test_pred, index=index).plot(color='red',label='pred')
plt.title("线性回归预测结果对比图")
plt.legend()
plt.savefig("线性回归预测结果对比图.png")
plt.show()
# xgboost:---------
from xgboost import XGBRegressor
lr = XGBRegressor()
lr.fit(X_train, y_train)
train_pred = lr.predict(X_train)
test_pred = lr.predict(X_test)
print(f'train MSE: {mse(y_train, train_pred):.5f}')
print(f'test MSE: {mse(y_test, test_pred):.5f}')
index = y_test.index
plt.figure(figsize=(12, 6))
data['Close/Last'].plot(label='actual')
pd.Series(test_pred, index=index).plot(color='red',label='pred')
plt.title("xgboost预测结果对比图")
plt.legend()
plt.savefig("xgboost预测结果对比图.png")
plt.show()
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
import torch
import torch.nn as nn
import matplotlib.pyplot as plt
df = pd.read_csv('黄金价格.csv')
df = data.fillna(0)
# 数据归一化
scaler = MinMaxScaler(feature_range=(-1, 1))
data = scaler.fit_transform(df['Close/Last'].values.reshape(-1, 1))
# 划分训练集和测试集
train_data, test_data = train_test_split(data, test_size=0.2, shuffle=False)
# 转换为tensor
train_data = torch.FloatTensor(train_data)
test_data = torch.FloatTensor(test_data)
# 定义LSTM模型
class LSTM(nn.Module):
def __init__(self, input_size=1, hidden_layer_size=50, output_size=1):
super().__init__()
self.hidden_layer_size = hidden_layer_size
self.lstm = nn.LSTM(input_size, hidden_layer_size)
self.linear = nn.Linear(hidden_layer_size, output_size)
self.hidden_cell = (torch.zeros(1,1,self.hidden_layer_size),
torch.zeros(1,1,self.hidden_layer_size))
def forward(self, input_seq):
lstm_out, self.hidden_cell = self.lstm(input_seq.view(len(input_seq) ,1, -1), self.hidden_cell)
predictions = self.linear(lstm_out.view(len(input_seq), -1))
return predictions[-1]
# 训练模型
model = LSTM()
loss_function = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
epochs = 100
losses = []
for i in range(epochs):
for seq in train_data:
optimizer.zero_grad()
model.hidden_cell = (torch.zeros(1, 1, model.hidden_layer_size),
torch.zeros(1, 1, model.hidden_layer_size))
y_pred = model(seq)
single_loss = loss_function(y_pred, seq)
single_loss.backward()
optimizer.step()
losses.append(single_loss.item())
if i%25 == 1:
print(f'epoch: {i:3} loss: {single_loss.item():10.8f}')
print(f'epoch: {i:3} loss: {single_loss.item():10.10f}')
import numpy as np
# 使用训练好的模型进行预测
model.eval()
test_inputs = train_data[-100:].tolist()
test_outputs = []
for i in range(len(test_data)):
seq = torch.FloatTensor(test_inputs[-100:])
with torch.no_grad():
model.hidden = (torch.zeros(1, 1, model.hidden_layer_size),
torch.zeros(1, 1, model.hidden_layer_size))
test_inputs.append(test_data[i])
test_outputs.append(model(seq).item())
# 反归一化
predicted_price = scaler.inverse_transform(np.array(test_outputs).reshape(-1, 1))
# 绘制预测结果
plt.figure(figsize=(12, 6))
plt.plot(range(len(train_data), len(train_data) + len(predicted_price)), predicted_price, color='r', label='Predicted Price')
plt.plot(range(len(data)), scaler.inverse_transform(data), color='b', label='Actual Price')
plt.legend()
plt.savefig("LSTM预测结果对比图.png")
plt.show()