# pip install openpyxl -i https://pypi.tuna.tsinghua.edu.cn/simple/
# pip install optuna -i https://pypi.tuna.tsinghua.edu.cn/simple/
import numpy as np
import pandas as pd
from tqdm import tqdm
import torch
from torch import nn
import torch.nn.functional as F
from torch import tensor
import torch.utils.data as Data
import math
from matplotlib import pyplot
from datetime import datetime, timedelta
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import seaborn as sns
import torch
import torch.nn as nn
import math
import warnings
warnings.filterwarnings("ignore")
plt.rcParams['font.sans-serif'] = ['SimHei'] # 用来正常显示中文标签
plt.rcParams['axes.unicode_minus'] = False # 用来正常显示负号
# 设置随机参数:保证实验结果可以重复
SEED = 1234
import random
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed(SEED) # 适用于显卡训练
torch.cuda.manual_seed_all(SEED) # 适用于多显卡训练
from torch.backends import cudnn
cudnn.benchmark = False
cudnn.deterministic = True
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
import pandas as pd
import pandas as pd
data=pd.read_csv("dwv_order_make_haikou_1.csv",encoding='gbk')
data=data[['district', 'county',
'combo_type', 'traffic_type', 'passenger_count',
'start_dest_distance','pre_total_fee', 'product_1level', 'dest_lng',
'dest_lat', 'starting_lng', 'starting_lat']]
data=data.values[:1000]
print(data.shape)
# print(data[0])
# print(len(data_y))
# 5个数据划分一组,前4个预测后一个
sqe_len=3
len_int=12
data_2_x = []
data_2_y = []
for i in range(0, len(data) - 5, 1): # 步长为5 (0:len(data_y) 不包含 len(data_y)
temp_x = []
temp_x.append(data[i][:])
temp_x.append(data[i+1][:])
temp_x.append(data[i+2][:])
# temp_x.append(data[i + 3][:-1])
data_2_x.append(temp_x)
data_2_y.append(data[i+3][-1])
print(len(data_2_x), len(data_2_y))
print(data_2_x[0])
data_2_x=np.array(data_2_x)#,dtype=float)
print("data_2_x.shape",data_2_x.shape)
class DataSet(Data.Dataset):
def __init__(self, data_inputs, data_targets):
self.inputs = torch.FloatTensor(data_inputs)
self.label = torch.FloatTensor(data_targets)
def __getitem__(self, index):
return self.inputs[index], self.label[index]
def __len__(self):
return len(self.inputs)
Batch_Size = 128 #
DataSet = DataSet(np.array(data_2_x), list(data_2_y))
train_size = int(len(data_2_y) * 0.8) # 自己计算
test_size = len(data_2_y) - train_size
train_dataset, test_dataset = torch.utils.data.random_split(DataSet, [train_size, test_size])
TrainDataLoader = Data.DataLoader(train_dataset, batch_size=Batch_Size, shuffle=False, drop_last=True)
TestDataLoader = Data.DataLoader(test_dataset, batch_size=Batch_Size, shuffle=False, drop_last=True)
print("TestDataLoader 的batch个数", TestDataLoader.__len__())
print("TrainDataLoader 的batch个数", TrainDataLoader.__len__())
print("np.array(data_2_x).shape",np.array(data_2_x).shape)
class PositionalEncoding(nn.Module):
def __init__(self, d_model, max_len=5000):
super(PositionalEncoding, self).__init__()
pe = torch.zeros(max_len, d_model)
position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model))
pe[:, 0::2] = torch.sin(position * div_term)
pe[:, 1::2] = torch.cos(position * div_term)
pe = pe.unsqueeze(0).transpose(0, 1)
# pe.requires_grad = False
self.register_buffer('pe', pe)
def forward(self, x: torch.Tensor):
chunk = x.chunk(x.size(-1), dim=2)
out = torch.Tensor([]).to(x.device)
for i in range(len(chunk)):
out = torch.cat((out, chunk[i] + self.pe[:chunk[i].size(0), ...]), dim=2)
return out
def transformer_generate_tgt_mask(length, device):
mask = torch.tril(torch.ones(length, length, device=device)) == 1
mask = (
mask.float()
.masked_fill(mask == 0, float("-inf"))
.masked_fill(mask == 1, float(0.0))
)
return mask
class Transformer(nn.Module):
"""标准的Transformer编码器-解码器结构"""
def __init__(self, n_encoder_inputs, n_decoder_inputs, Sequence_length, d_model=512, dropout=0.1, num_layer=8):
"""
初始化
:param n_encoder_inputs: 输入数据的特征维度
:param n_decoder_inputs: 编码器输入的特征维度,其实等于编码器输出的特征维度
:param d_model: 词嵌入特征维度
:param dropout: dropout
:param num_layer: Transformer块的个数
Sequence_length: transformer 输入数据 序列的长度
"""
super(Transformer, self).__init__()
self.input_pos_embedding = torch.nn.Embedding(500, embedding_dim=d_model)
self.target_pos_embedding = torch.nn.Embedding(500, embedding_dim=d_model)
encoder_layer = torch.nn.TransformerEncoderLayer(d_model=d_model, nhead=num_layer, dropout=dropout,
dim_feedforward=4 * d_model)
decoder_layer = torch.nn.TransformerDecoderLayer(d_model=d_model, nhead=num_layer, dropout=dropout,
dim_feedforward=4 * d_model)
self.encoder = torch.nn.TransformerEncoder(encoder_layer, num_layers=2)
self.decoder = torch.nn.TransformerDecoder(decoder_layer, num_layers=4)
self.lstm = nn.LSTM(len_int,len_int, num_layers=1, bidirectional=False) # ,batch_first=True 是使用双向
# cnn conv输入数据格式:[batch, channel(=1), sequence_length, embedding_size]
self.conv = nn.Sequential(nn.Conv2d(1,1, (1,2)), # 卷积核大小为2*2
# nn.Conv2d(in_channels,#输入通道数 out_channels,#输出通道数 kernel_size#卷积核大小 )
nn.ReLU(), nn.MaxPool2d((2, 1)), )
self.input_projection = torch.nn.Linear(n_encoder_inputs, d_model)
self.output_projection = torch.nn.Linear(n_decoder_inputs, d_model)
self.linear = torch.nn.Linear(d_model, 1)
self.ziji_add_linear = torch.nn.Linear(Sequence_length, 1)
def encode_in(self, src):
src_start = self.input_projection(src).permute(1, 0, 2)
in_sequence_len, batch_size = src_start.size(0), src_start.size(1)
pos_encoder = (torch.arange(0, in_sequence_len, device=src.device).unsqueeze(0).repeat(batch_size, 1))
pos_encoder = self.input_pos_embedding(pos_encoder).permute(1, 0, 2)
src = src_start + pos_encoder
src = self.encoder(src) + src_start
return src
def decode_out(self, tgt, memory):
tgt_start = self.output_projection(tgt).permute(1, 0, 2)
out_sequence_len, batch_size = tgt_start.size(0), tgt_start.size(1)
pos_decoder = (torch.arange(0, out_sequence_len, device=tgt.device).unsqueeze(0).repeat(batch_size, 1))
pos_decoder = self.target_pos_embedding(pos_decoder).permute(1, 0, 2)
tgt = tgt_start + pos_decoder
tgt_mask = transformer_generate_tgt_mask(out_sequence_len, tgt.device)
out = self.decoder(tgt=tgt, memory=memory, tgt_mask=tgt_mask) + tgt_start
out = out.permute(1, 0, 2) # [batch_size, seq_len, d_model]
out = self.linear(out)
return out
def forward(self, src, target_in):
# print("src.shape",src.shape)#src.shape torch.Size([9, 8, 512])
lstm_out, (h_n, c_n)=self.lstm(src)
src = self.encode_in(lstm_out)
out = self.decode_out(tgt=target_in, memory=src)
# print
- 1
- 2
前往页