【免费】时间序列预测模型实战案例(Xgboost)(Python)(机器学习)包括时间序列预测和时间序列分类，点击即可运行！

共3个文件

py：2个

csv：1个

python

机器学习

需积分: 0 170 浏览量 2023-09-25 13:38:59 上传评论 6 收藏 407KB ZIP 举报

资源推荐

资源详情

资源评论

收起资源包目录

Xgboost-ETTh1.zip （3个子文件）

ETTh1.csv 2.47MB

xgboostforecast.py 5KB

shareXgboost.py 3KB

import matplotlib.pyplot as plt import numpy as np import pandas as pd from sklearn.model_selection import cross_val_score, TimeSeriesSplit from sklearn.preprocessing import StandardScaler from xgboost import XGBRegressor def timeseries_train_test_split(X, y, test_size): """ Perform train-test split with respect to time series structure """ # get the index after which test set starts test_index = int(len(X) * (1 - test_size)) X_train = X.iloc[:test_index] y_train = y.iloc[:test_index] X_test = X.iloc[test_index:] y_test = y.iloc[test_index:] return X_train, X_test, y_train, y_test def code_mean(data, cat_feature, real_feature): """ cat_feature:类别型特征，如星期几； real_feature：target字段 """ return dict(data.groupby(cat_feature)[real_feature].mean()) def mean_absolute_percentage_error(y_true, y_pred): return np.mean(np.abs((y_true - y_pred) / y_true)) * 100 def plotModelResults(model, X_train, X_test, plot_intervals=False, plot_anomalies=False, scale=1.96): """ Plots modelled vs fact values, prediction intervals and anomalies """ prediction = model.predict(X_test) plt.figure(figsize=(15, 7)) plt.plot(prediction, "g", label="prediction", linewidth=2.0) plt.plot(y_test.values, label="actual", linewidth=2.0) if plot_intervals: cv = cross_val_score(model, X_train, y_train, cv=tscv, scoring="neg_mean_squared_error") # mae = cv.mean() * (-1) deviation = np.sqrt(cv.std()) lower = prediction - (scale * deviation) upper = prediction + (scale * deviation) plt.plot(lower, "r--", label="upper bond / lower bond", alpha=0.5) plt.plot(upper, "r--", alpha=0.5) if plot_anomalies: anomalies = np.array([np.NaN] * len(y_test)) anomalies[y_test < lower] = y_test[y_test < lower] anomalies[y_test > upper] = y_test[y_test > upper] plt.plot(anomalies, "o", markersize=10, label="Anomalies") error = mean_absolute_percentage_error(prediction, y_test) plt.title("Mean absolute percentage error {0:.2f}%".format(error)) plt.legend(loc="best") plt.tight_layout() plt.grid(True); plt.show() def prepareData(series, lag_start, lag_end, test_size, target_encoding=False): """ series: pd.DataFrame dataframe with timeseries lag_start: int initial step back in time to slice target variable example - lag_start = 1 means that the model will see yesterday's values to predict today lag_end: int final step back in time to slice target variable example - lag_end = 4 means that the model will see up to 4 days back in time to predict today test_size: float size of the test dataset after train/test split as percentage of dataset target_encoding: boolean if True - add target averages to the dataset """ # copy of the initial dataset data = pd.DataFrame(series.copy()).loc[:, ['OT']] data.columns = ["y"] # lags of series for i in range(lag_start, lag_end): data["lag_{}".format(i)] = data.y.shift(i) # # datetime features data.index = pd.to_datetime(data.index) data["hour"] = data.index.hour data["weekday"] = data.index.weekday data['is_weekend'] = data.weekday.isin([5, 6]) * 1 if target_encoding: # calculate averages on train set only test_index = int(len(data.dropna()) * (1 - test_size)) data['weekday_average'] = list(map( code_mean(data[:test_index], 'weekday', "y").get, data.weekday)) # frop encoded variables data.drop(["weekday"], axis=1, inplace=True) # train-test split y = data.dropna().y X = data.dropna().drop(['y'], axis=1) X = pd.get_dummies(X) X_train, X_test, y_train, y_test = \ timeseries_train_test_split(X, y, test_size=test_size) return X_train, X_test, y_train, y_test if __name__ == '__main__': """ "XGBoost（机器学习）", """ df = pd.read_csv('ETTh1.csv') df['OT'].fillna(0, inplace=True) df.set_index('date', inplace=True) hp_raw = df[['OT']] tscv = TimeSeriesSplit(n_splits=5) # reserve 30% of data for testing X_train, X_test, y_train, y_test = \ prepareData(hp_raw, lag_start=1, lag_end=28, test_size=0.1, target_encoding=True) scaler = StandardScaler() X_train_scaled = scaler.fit_transform(X_train) X_test_scaled = scaler.transform(X_test) xgb = XGBRegressor() xgb.fit(X_train_scaled, y_train) plotModelResults(xgb, X_train=X_train_scaled, X_test=X_test_scaled, plot_intervals=True, plot_anomalies=True)

评论收藏

内容反馈