import tkinter as tk
from tkinter import ttk, filedialog, messagebox
import pandas as pd
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
from matplotlib.font_manager import FontProperties
from matplotlib.backends.backend_tkagg import FigureCanvasTkAgg
from sklearn.preprocessing import MinMaxScaler
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense,Layer,Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping,ModelCheckpoint,ReduceLROnPlateau
import os
plt.rcParams['font.sans-serif'] = ['SimHei'] # 使用黑体
plt.rcParams['axes.unicode_minus'] = False
class DamSeepageModel:
def __init__(self, root):
self.root = root
self.root.title("大坝渗流预测模型")
self.root.geometry("1200x800")
# 初始化数据
self.train_df = None
self.test_df = None
self.model = None
self.scaler = MinMaxScaler(feature_range=(0, 1))
# 创建主界面
self.create_widgets()
def create_widgets(self):
# 创建主框架
main_frame = ttk.Frame(self.root, padding=10)
main_frame.pack(fill=tk.BOTH, expand=True)
# 左侧控制面板
control_frame = ttk.LabelFrame(main_frame, text="模型控制", padding=10)
control_frame.pack(side=tk.LEFT, fill=tk.Y, padx=5, pady=5)
# 文件选择部分
file_frame = ttk.LabelFrame(control_frame, text="数据文件", padding=10)
file_frame.pack(fill=tk.X, pady=5)
# 训练集选择
ttk.Label(file_frame, text="训练集:").grid(row=0, column=0, sticky=tk.W, pady=5)
self.train_file_var = tk.StringVar()
ttk.Entry(file_frame, textvariable=self.train_file_var, width=30, state='readonly').grid(row=0, column=1,
padx=5)
ttk.Button(file_frame, text="选择文件", command=lambda: self.select_file("train")).grid(row=0, column=2)
# 测试集选择
ttk.Label(file_frame, text="测试集:").grid(row=1, column=0, sticky=tk.W, pady=5)
self.test_file_var = tk.StringVar()
ttk.Entry(file_frame, textvariable=self.test_file_var, width=30, state='readonly').grid(row=1, column=1, padx=5)
ttk.Button(file_frame, text="选择文件", command=lambda: self.select_file("test")).grid(row=1, column=2)
# 参数设置部分
param_frame = ttk.LabelFrame(control_frame, text="模型参数", padding=10)
param_frame.pack(fill=tk.X, pady=10)
ttk.Label(param_frame, text="Dropout率:").grid(row=5, column=0, sticky=tk.W, pady=5)
self.dropout_rate_var = tk.DoubleVar(value=0.2)
ttk.Spinbox(param_frame, from_=0.0, to=0.5, increment=0.05, format="%.2f",
textvariable=self.dropout_rate_var, width=10).grid(row=5, column=1, padx=5)
# 时间窗口大小
ttk.Label(param_frame, text="时间窗口大小:").grid(row=0, column=0, sticky=tk.W, pady=5)
self.window_size_var = tk.IntVar(value=60)
ttk.Spinbox(param_frame, from_=10, to=200, increment=5,
textvariable=self.window_size_var, width=10).grid(row=0, column=1, padx=5)
# LSTM单元数量
ttk.Label(param_frame, text="LSTM单元数:").grid(row=1, column=0, sticky=tk.W, pady=5)
self.lstm_units_var = tk.IntVar(value=50)
ttk.Spinbox(param_frame, from_=10, to=200, increment=10,
textvariable=self.lstm_units_var, width=10).grid(row=1, column=1, padx=5)
# 训练轮次
ttk.Label(param_frame, text="训练轮次:").grid(row=2, column=0, sticky=tk.W, pady=5)
self.epochs_var = tk.IntVar(value=100)
ttk.Spinbox(param_frame, from_=10, to=500, increment=10,
textvariable=self.epochs_var, width=10).grid(row=2, column=1, padx=5)
# 批处理大小
ttk.Label(param_frame, text="批处理大小:").grid(row=3, column=0, sticky=tk.W, pady=5)
self.batch_size_var = tk.IntVar(value=32)
ttk.Spinbox(param_frame, from_=16, to=128, increment=16,
textvariable=self.batch_size_var, width=10).grid(row=3, column=1, padx=5)
# 控制按钮
btn_frame = ttk.Frame(control_frame)
btn_frame.pack(fill=tk.X, pady=10)
ttk.Button(btn_frame, text="训练模型", command=self.train_model).pack(side=tk.LEFT, padx=5)
ttk.Button(btn_frame, text="预测结果", command=self.predict).pack(side=tk.LEFT, padx=5)
ttk.Button(btn_frame, text="保存结果", command=self.save_results).pack(side=tk.LEFT, padx=5)
ttk.Button(btn_frame, text="重置", command=self.reset).pack(side=tk.RIGHT, padx=5)
# 状态栏
self.status_var = tk.StringVar(value="就绪")
status_bar = ttk.Label(control_frame, textvariable=self.status_var, relief=tk.SUNKEN, anchor=tk.W)
status_bar.pack(fill=tk.X, side=tk.BOTTOM)
# 右侧结果显示区域
result_frame = ttk.Frame(main_frame)
result_frame.pack(side=tk.RIGHT, fill=tk.BOTH, expand=True, padx=5, pady=5)
# 创建标签页
self.notebook = ttk.Notebook(result_frame)
self.notebook.pack(fill=tk.BOTH, expand=True)
# 损失曲线标签页
self.loss_frame = ttk.Frame(self.notebook)
self.notebook.add(self.loss_frame, text="训练损失")
# 预测结果标签页
self.prediction_frame = ttk.Frame(self.notebook)
self.notebook.add(self.prediction_frame, text="预测结果")
# 初始化绘图区域
self.fig, self.ax = plt.subplots(figsize=(10, 6))
self.canvas = FigureCanvasTkAgg(self.fig, master=self.prediction_frame)
self.canvas.get_tk_widget().pack(fill=tk.BOTH, expand=True)
self.loss_fig, self.loss_ax = plt.subplots(figsize=(10, 4))
self.loss_canvas = FigureCanvasTkAgg(self.loss_fig, master=self.loss_frame)
self.loss_canvas.get_tk_widget().pack(fill=tk.BOTH, expand=True)
# 文件选择
def select_file(self, file_type):
"""选择Excel文件"""
file_path = filedialog.askopenfilename(
title=f"选择{file_type}集Excel文件",
filetypes=[("Excel文件", "*.xlsx *.xls"), ("所有文件", "*.*")]
)
if file_path:
try:
# 读取Excel文件
df = pd.read_excel(file_path)
# 时间特征列
time_features = ['year', 'month', 'day']
missing_time_features = [feat for feat in time_features if feat not in df.columns]
if '水位' not in df.columns:
messagebox.showerror("列名错误", "Excel文件必须包含'水位'列")
return
if missing_time_features:
messagebox.showerror("列名错误",
f"Excel文件缺少预处理后的时间特征列: {', '.join(missing_time_features)}\n"
"请确保已使用预处理功能添加这些列")
return
# 创建完整的时间戳列
# 处理可能缺失的小时、分钟、秒数据
if 'hour' in df.columns and 'minute' in df.columns and 'second' in df.columns:
df['datetime'] = pd.to_datetime(
df[['year', 'month', 'day', 'hour', 'minute', 'second']]
)
elif 'hour' in df.columns and 'minute' in df.columns:
df['datetime'] = pd.to_datetime(
df[['year', 'month', 'day', 'hour', 'minute']].assign(second=0)
)
else:
df['datetime'] = pd.to_datetime(df[['year', 'month', 'day']])
# 设置时间索引
df = df.set_index('datetime')
# 保存数据
if file_type == "train":
self.train_df = df
self.train_file_var.set(os.path.basename(file_path))
self.status_var.set(f"已加载训练集: {len(self.train_df)}条数据")
else:
self.test_df = df
self.test_file_var.set(os.path.basename(file_path))
self.status_var.set(f"已加载测试集: {len(self.test_df)}条数据")
except Exception as e:
messagebox.showerror("文件错误", f"读取文件失败: {str(e)}")
def create_dataset(self, data, window_size):
"""创建时间窗口数据集"""
X, y = [], []
for i in range(len(data) - window_size):
X.append(data[i:(i + window_size), 0])
y.append(data[i + window_size, 0])
return np.array(X), np.array(y)
def train_model(self):
"""训练LSTM模型"""
if self.train_df is None:
messagebox.showwarning("警告", "请先选择训练集文件")
return
try:
self.status_var.set("正在预处理数据...")
self.root.update()
# 数据预处理
train_scaled = self.scaler.fit_transform(self.train_df[['水位']])
# 创建时间窗口数据集
window_size = self.window_size_var.get()
X_train, y_train = self.create_dataset(train_scaled, window_size)
# 调整LSTM输入格式
X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))
# 构建LSTM模型
self.model = Sequential()
self.model.add(LSTM(
self.lstm_units_var.get(),
return_sequences=True,
input_shape=(window_size, 1)
))
self.model.add(Dropout(self.dropout_rate_var.get())) # 添加Dropout
# 添加额外的LSTM层
for _ in range(self.lstm_layers_var.get() - 1):
self.model.add(LSTM(self.lstm_units_var.get()))
self.model.add(Dropout(self.dropout_rate_var.get())) # 添加Dropout
self.model.add(Dense(1))
self.model.compile(
optimizer=Adam(learning_rate=0.001), # 优化器
loss='binary_crossentropy', # 损失函数
metrics=['accuracy'] # 评估指标
)
# 添加更完善的回调机制
early_stopping = EarlyStopping(
monitor='val_loss',
patience=30, # 增加耐心值
min_delta=0.0001,
restore_best_weights=True,
verbose=1
)
model_checkpoint = ModelCheckpoint(
'best_model.h5',
monitor='val_loss',
save_best_only=True,
verbose=1
)
reduce_lr = ReduceLROnPlateau(
monitor='val_loss',
factor=0.4, # 更激进的学习率衰减
patience=20,
min_lr=1e-6,
verbose=1
)
# 训练模型(添加更多回调)
history = self.model.fit(
X_train,
y_train,
epochs=self.epochs_var.get(),
batch_size=self.batch_size_var.get(),
validation_split=0.2,
callbacks=[early_stopping, model_checkpoint, reduce_lr],
verbose=0
)
# 绘制损失曲线
self.loss_ax.clear()
self.loss_ax.plot(history.history['loss'], label='训练损失')
self.loss_ax.plot(history.history['val_loss'], label='验证损失')
self.loss_ax.set_title('模型训练损失')
self.loss_ax.set_xlabel('轮次')
self.loss_ax.set_ylabel('损失',rotation=0)
self.loss_ax.legend()
self.loss_ax.grid(True)
self.loss_canvas.draw()
# 根据早停情况更新状态信息
if early_stopping.stopped_epoch > 0:
stopped_epoch = early_stopping.stopped_epoch
best_epoch = early_stopping.best_epoch
final_loss = history.history['loss'][-1]
best_loss = min(history.history['val_loss'])
self.status_var.set(
f"训练在{stopped_epoch + 1}轮提前终止 | "
f"最佳模型在第{best_epoch + 1}轮 | "
f"最终损失: {final_loss:.6f} | "
f"最佳验证损失: {best_loss:.6f}"
)
messagebox.showinfo(
"训练完成",
f"模型训练提前终止!\n"
f"最佳模型在第{best_epoch + 1}轮\n"
f"最佳验证损失: {best_loss:.6f}"
)
else:
final_loss = history.history['loss'][-1]
self.status_var.set(f"模型训练完成 | 最终损失: {final_loss:.6f}")
messagebox.showinfo("训练完成", "模型训练成功完成!")
except Exception as e:
messagebox.showerror("训练错误", f"模型训练失败:\n{str(e)}")
self.status_var.set("训练失败")
def predict(self):
"""使用模型进行预测"""
if self.model is None:
messagebox.showwarning("警告", "请先训练模型")
return
if self.test_df is None:
messagebox.showwarning("警告", "请先选择测试集文件")
return
try:
self.status_var.set("正在生成预测...")
self.root.update()
# 预处理测试数据
test_scaled = self.scaler.transform(self.test_df[['水位']])
# 创建测试集时间窗口
window_size = self.window_size_var.get()
X_test, y_test = self.create_dataset(test_scaled, window_size)
X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))
# 进行预测
test_predict = self.model.predict(X_test)
# 反归一化
test_predict = self.scaler.inverse_transform(test_predict)
y_test_orig = self.scaler.inverse_transform([y_test]).T
# 创建时间索引
test_time = self.test_df.index[window_size:window_size + len(test_predict)]
# 绘制结果
self.ax.clear()
self.ax.plot(self.train_df.index, self.train_df['水位'], 'b-', label='训练集数据')
self.ax.plot(test_time, self.test_df['水位'][window_size:window_size + len(test_predict)],
'g-', label='测试集数据')
self.ax.plot(test_time, test_predict, 'r--', label='模型预测')
# 添加分隔线
split_point = test_time[0]
self.ax.axvline(x=split_point, color='k', linestyle='--', alpha=0.5)
self.ax.text(split_point, self.ax.get_ylim()[0] * 0.9, ' 训练/测试分界', rotation=90)
self.ax.set_title('大坝渗流水位预测结果')
self.ax.set_xlabel('时间')
self.ax.set_ylabel('测压管水位',rotation=0)
self.ax.legend()
self.ax.grid(True)
self.ax.tick_params(axis='x', rotation=45)
self.fig.tight_layout()
self.canvas.draw()
self.status_var.set("预测完成,结果已显示")
except Exception as e:
messagebox.showerror("预测错误", f"预测失败:\n{str(e)}")
self.status_var.set("预测失败")
def save_results(self):
"""保存预测结果"""
if not hasattr(self, 'test_predict') or self.test_predict is None:
messagebox.showwarning("警告", "请先生成预测结果")
return
save_path = filedialog.asksaveasfilename(
defaultextension=".xlsx",
filetypes=[("Excel文件", "*.xlsx"), ("所有文件", "*.*")]
)
if not save_path:
return
try:
# 创建包含预测结果的DataFrame
window_size = self.window_size_var.get()
test_time = self.test_df.index[window_size:window_size + len(self.test_predict)]
result_df = pd.DataFrame({
'时间': test_time,
'实际水位': self.test_df['水位'][window_size:window_size + len(self.test_predict)].values,
'预测水位': self.test_predict.flatten()
})
# 保存到Excel
result_df.to_excel(save_path, index=False)
# 保存图表
chart_path = os.path.splitext(save_path)[0] + "_chart.png"
self.fig.savefig(chart_path, dpi=300)
self.status_var.set(f"结果已保存至: {os.path.basename(save_path)}")
messagebox.showinfo("保存成功", f"预测结果和图表已保存至:\n{save_path}\n{chart_path}")
except Exception as e:
messagebox.showerror("保存错误", f"保存结果失败:\n{str(e)}")
def reset(self):
"""重置程序状态"""
self.train_df = None
self.test_df = None
self.model = None
self.train_file_var.set("")
self.test_file_var.set("")
self.ax.clear()
self.loss_ax.clear()
self.canvas.draw()
self.loss_canvas.draw()
self.data_text.delete(1.0, tk.END)
self.status_var.set("已重置,请选择新数据")
messagebox.showinfo("重置", "程序已重置,可以开始新的分析")
if __name__ == "__main__":
root = tk.Tk()
app = DamSeepageModel(root)
root.mainloop()
这个代码怎么改进来降低训练和验证损失
最新发布