python学习打卡day55

DAY 55 序列预测任务介绍

知识点回顾

序列预测介绍
1. 单步预测
2. 多步预测的2种方式
序列数据的处理：滑动窗口
多输入多输出任务的思路
经典机器学习在序列任务上的劣势；以随机森林为例

作业：手动构造类似的数据集（如cosx数据），观察不同的机器学习模型的差异

使用lightgbm同样效果非常差

import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
import lightgbm as lgb# =============================================================
# ===== 步骤1：数据准备 (与之前完全相同) =====
# =============================================================# 生成合成时间序列
x = np.linspace(0, 100, 1000)
y = np.cos(x) + 0.1 * x + np.random.normal(0, 0.5, 1000)# 定义参数
train_size = int(len(y) * 0.8)
seq_length = 30# 正确的数据标准化
train_data_raw = y[:train_size]
scaler = MinMaxScaler(feature_range=(0, 1))
scaler.fit(train_data_raw.reshape(-1, 1))
scaled_y = scaler.transform(y.reshape(-1, 1)).flatten()# 创建时序数据集函数
def create_sequences(data, seq_length):X, y = [], []for i in range(len(data) - seq_length):X.append(data[i:i+seq_length])y.append(data[i+seq_length])return np.array(X), np.array(y)# 对完整数据应用滑动窗口
all_X, all_y = create_sequences(scaled_y, seq_length)# 划分序列数据集
split_idx = train_size - seq_length
X_train_np = all_X[:split_idx]
y_train_np = all_y[:split_idx]
X_test_np = all_X[split_idx:]
y_test_np = all_y[split_idx:]# =========================================================================
# ===== 步骤2：为LightGBM模型准备数据 =====
# =========================================================================# 调整X的形状为二维 [样本数, 特征数]
n_samples_train = X_train_np.shape[0]
n_samples_test = X_test_np.shape[0]X_train_lgb = X_train_np.reshape(n_samples_train, -1)
X_test_lgb = X_test_np.reshape(n_samples_test, -1)print("为LightGBM准备的 X_train 形状:", X_train_lgb.shape)  # (770, 30)
print("为LightGBM准备的 X_test 形状:", X_test_lgb.shape)   # (200, 30)# =============================================================
# ===== 步骤3：创建、训练和评估LightGBM模型 =====
# =============================================================# 创建LightGBM数据集
#train_data = lgb.Dataset(X_train_lgb, label=y_train_np)
#test_data = lgb.Dataset(X_test_lgb, label=y_test_np, reference=train_data)
lgb_model = lgb.LGBMRegressor(n_estimators=100,num_leaves=31,learning_rate=0.05,feature_fraction=0.9,random_state=42,n_jobs=-1
)# 训练模型
print("\n开始训练LightGBM模型...")
lgb_model.fit(X_train_lgb, y_train_np)
print("模型训练完成！")# 做出预测
train_predict = lgb_model.predict(X_train_lgb)
test_predict = lgb_model.predict(X_test_lgb)# 反标准化预测结果
train_predict = scaler.inverse_transform(train_predict.reshape(-1, 1))
test_predict = scaler.inverse_transform(test_predict.reshape(-1, 1))# 原始标签也需要反标准化
y_train_orig = scaler.inverse_transform(y_train_np.reshape(-1, 1))
y_test_orig = scaler.inverse_transform(y_test_np.reshape(-1, 1))# 计算均方根误差 (RMSE)
train_rmse = np.sqrt(mean_squared_error(y_train_orig, train_predict))
test_rmse = np.sqrt(mean_squared_error(y_test_orig, test_predict))print(f"\n训练集 RMSE: {train_rmse:.4f}")
print(f"测试集 RMSE: {test_rmse:.4f}")# =============================================================
# ===== 步骤4：可视化结果 =====
# =============================================================plt.figure(figsize=(15, 7))
plt.plot(y, label='原始数据', color='gray', alpha=0.5)# 绘制训练集的预测结果
train_predict_plot = np.empty_like(y)
train_predict_plot[:] = np.nan
train_predict_plot[seq_length:seq_length+len(train_predict)] = train_predict.flatten()
plt.plot(train_predict_plot, label='训练集预测值 (LightGBM)', color='blue')# 绘制测试集的预测结果
test_predict_plot = np.empty_like(y)
test_predict_plot[:] = np.nan
test_predict_plot[len(train_predict) + seq_length : len(y)] = test_predict.flatten()
plt.plot(test_predict_plot, label='测试集预测值 (RF)', color='red')plt.title('时间序列预测结果对比 (LightGBM)')
plt.xlabel('时间步')
plt.ylabel('值')
plt.legend()
plt.grid(True)
plt.show()# 特征重要性可视化
lgb.plot_importance(lgb_model, height=0.8, title='特征重要性', importance_type='gain')
plt.show()

@浙大疏锦行

本文来自互联网用户投稿，该文观点仅代表作者本人，不代表本站立场。本站仅提供信息存储空间服务，不拥有所有权，不承担相关法律责任。
如若转载，请注明出处：http://www.pswp.cn/web/84763.shtml
繁体地址，请注明出处：http://hk.pswp.cn/web/84763.shtml

如若内容造成侵权/违法违规/事实不符，请联系多彩编程网进行投诉反馈email:809451989@qq.com，一经查实，立即删除！