PyTorch搭建基于图神经网络(GCN)的天气推荐系统(附源码和数据集)
创始人
2024-01-31 03:41:26
0

需要源码和数据集请点赞关注收藏后评论区留言~~~

一、背景

极端天气情况一直困扰着人们的工作和生活。部分企业或者工种对极端天气的要求不同,但是目前主流的天气推荐系统是直接将天气信息推送给全部用户。这意味着重要的天气信息在用户手上得不到筛选,降低用户的满意度,甚至导致用户的经济损失。我们计划开发一个基于图神经网络的天气靶向模型,根据用户的历史交互行为,判断不同天气对他的利害程度。如果有必要,则将该极端天气情况推送给该用户,让其有时间做好应对准备。该模型能够减少不必要的信息传递,提高用户的体验感。

二、模型介绍

四、模型介绍

(一)数据集共有三个txt文件,分别是user.txt,weather.txt,rating.txt。这些文件一共包含900名用户,1600个天气状况,95964条用户的历史交互记录。

  1. user.txt

用户的信息记录在user.txt中。格式如下:

用户ID\t年龄\t性别\t职业\t地理位置

  1. weather.txt

天气的信息记录在weather.txt中。格式如下:

天气ID\t天气类型\t温度\t湿度\t风速 

  1. rating.txt

用户的历史交互记录在rating.txt中。格式如下:

用户ID\t天气ID\t评分

三、项目结构

如下图 data里面存放了数据集

四、运行结果

开始训练  可以看到第一行显示了一些训练的基本配置内容 包括用的设备cpu 训练批次 学习率等等

 可以看出随着训练次数的增加 损失率在不断降低

最后会自动选出一个最佳的测试和训练集的损失值

 结果可视化如下

 五、代码

部分源码如下

train类

import pandas as pd
import time
from utils import fix_seed_torch, draw_loss_pic
import argparse
from model import GCN
from Logger import Logger
from mydataset import MyDataset
import torch
from torch.nn import MSELoss
from torch.optim import Adam
from torch.utils.data import DataLoader, random_split
import sys
import os
os.environ['KMP_DUPLICATE_LIB_OK']='TRUE'# 固定随机数种子
fix_seed_torch(seed=2021)
# 设置训练的超参数
parser = argparse.ArgumentParser()
parser.add_argument('--gcn_layers', type=int, default=2, help='the number of gcn layers')
parser.add_argument('--n_epochs', type=int, default=20, help='the number of epochs')
parser.add_argument('--embedSize', type=int, default=64, help='dimension of user and entity embeddings')
parser.add_argument('--batch_size', type=int, default=1024, help='batch size')
parser.add_argument('--lr', type=float, default=0.001, help='learning rate')
parser.add_argument('--ratio', type=float, default=0.8, help='size of training dataset')
args = parser.parse_args()
# 设备是否支持cuda
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
args.device = device
# 读取用户特征、天气特征、评分
user_feature = pd.read_csv('./data/user.txt', encoding='utf-8', sep='\t')
item_feature = pd.read_csv('./data/weather.txt', encoding='utf-8', sep='\t')
rating = pd.read_csv('./data/rating.txt', encoding='utf-8', sep='\t')
# 构建数据集
dataset = MyDataset(rating)
trainLen = int(args.ratio * len(dataset))
train, test = random_split(dataset, [trainLen, len(dataset) - trainLen])
train_loader = DataLoader(train, batch_size=args.batch_size, shuffle=True, pin_memory=True)
test_loader = DataLoader(test, batch_size=len(test))
# 记录训练的超参数
start_time = '{}'.format(time.strftime("%m-%d-%H-%M", time.localtime()))
logger = Logger('./log/log-{}.txt'.format(start_time))
logger.info(' '.join('%s: %s' % (k, str(v)) for k, v in sorted(dict(vars(args)).items())))
# 定义模型
model = GCN(args, user_feature, item_feature, rating)
model.to(device)
# 定义优化器
optimizer = Adam(model.parameters(), lr=args.lr, weight_decay=0.001)
# 定义损失函数
loss_function = MSELoss()
train_result = []
test_result = []
# 最好的epoch
best_loss = sys.float_info.max
# 训练
for i in range(args.n_epochs):model.train()for batch in train_loader:optimizer.zero_grad()prediction=model(batch[0].to(device),batch[1].to(device))train_loss=torch.sqrt(loss_function(batch[2].float().to(device),prediction))train_loss.backward()optimizer.step()train_result.append(train_loss.item())model.eval()for data in test_loader:prediction=model(data[0].to(device),data[1].to(device))test_loss=torch.sqrt(loss_function(data[2].float().to(device),prediction))test_loss=test_loss.item()if best_loss>test_loss:best_loss=test_losstorch.save(model.state_dict(),'./model/bestModeParms-{}.pth'.format(start_time))test_result.append(test_loss)logger.info("Epoch{:d}:trainLoss{:.4f},testLoss{:.4f}".format(i,train_loss,test_loss))
else:model.load_state_dict(torch.load("./model/bestModeParms-11-18-19-47.pth"))user_id=input("请输入用户id")item_num=rating['itemId'].max()+1u=torch.tensor([int(user_id)for i in range(item_num)],dtype=float)气ID".format(user_id))print(i[0]for i in result)# 画图
draw_loss_pic(train_result, test_result)

Logger类

import sys
import os
import loggingclass Logger(object):def __init__(self, filename):self.logger = logging.getLogger(filename)self.logger.setLevel(logging.DEBUG)formatter = logging.Formatter('%(asctime)s: %(message)s',datefmt='%Y-%m-%d %H-%M-%S')# write into filefh = logging.FileHandler(filename)fh.setLevel(logging.DEBUG)fh.setFormatter(formatter)# show on consolech = logging.StreamHandler(sys.stdout)ch.setLevel(logging.DEBUG)ch.setFormatter(formatter)# add to Handlerself.logger.addHandler(fh)self.logger.addHandler(ch)def _flush(self):for handler in self.logger.handlers:handler.flush()def info(self, message):self.logger.info(message)self._flush()

model类

import numpy as np
import torch.nn
import torch.nn as nn
from utils import *
from torch.nn import Module
import scipy.sparse as spclass GCN_Layer(Module):def __init__(self,inF,outF):super(GCN_Layer,self).__init__()self.W1=torch.nn.Linear(in_features=inF,out_features=outF)self.W2=torch.nn.Linear(in_features=inF,out_features=outF)def forward(self,graph,selfLoop,features):part1=self.W1(torch.sparse.mm(graph+selfLoop,features))part2 = self.W2(torch.mul(torch.sparse.mm(graph,features),features))return nn.LeakyReLU()(part1+part2)#######################    请你补充代码       #######################class GCN(Module):def __init__(self, args, user_feature, item_feature, rating):super(GCN, self).__init__()self.args = argsself.device = args.deviceself.user_feature = user_featureself.item_feature = item_featureself.rating = ratingself.num_user = rating['user_id'].max() + 1self.num_item = rating['item_id'].max() + 1# user embeddingself.user_id_embedding = nn.Embedding(user_feature['id'].max() + 1, 32)self.user_age_embedding = nn.Embedding(user_feature['age'].max() + 1, 4)self.user_gender_embedding = nn.Embedding(user_feature['gender'].max() + 1, 2)self.user_occupation_embedding = nn.Embedding(user_feature['occupation'].max() + 1, 8)self.user_location_embedding = nn.Embedding(user_feature['location'].max() + 1, 18)# item embeddingself.item_id_embedding = nn.Embedding(item_feature['id'].max() + 1, 32)self.item_type_embedding = nn.Embedding(item_feature['type'].max() + 1, 8)self.item_temperature_embedding = nn.Embedding(item_feature['temperature'].max() + 1, 8)self.item_humidity_embedding = nn.Embedding(item_feature['humidity'].max() + 1, 8)self.item_windSpeed_embedding = nn.Embedding(item_feature['windSpeed'].max() + 1, 8)# 自循环self.selfLoop = self.getSelfLoop(self.num_user + self.num_item)# 堆叠GCN层self.GCN_Layers = torch.nn.ModuleList()for _ in range(self.args.gcn_layers):self.GCN_Layers.append(GCN_Layer(self.args.embedSize, self.args.embedSize))self.graph = self.buildGraph()self.transForm = nn.Linear(in_features=self.args.embedSize * (self.args.gcn_layers + 1),out_features=self.args.embedSize)def getSelfLoop(self, num):i = torch.LongTensor([[k for k in range(0, num)], [j for j in range(0, num)]])val = torch.FloatTensor([1] * num)return torch.sparse.FloatTensor(i, val).to(self.device)def buildGraph(self):rating=self.rating.valuesgraph=sp.coo_matrix((rating[:,2],(rating[:,0],rating[:,1])),shape=(self.num_user,self.num_item)).tocsr()graph=sp.bmat([[sp.csr_matrix((graph.shape[0],graph.shape[0])),graph],[graph.T,sp.csr_matrix((graph.shape[1],graph.shape[1]))]])row_sum_sqrt=sp.diags(1/(np.sqrt(graph.sum(axis=1).A.ravel())+1e-8))col_sum_sqrt = sp.diags(1 / (np.sqrt(graph.sum(axis=0).A.ravel()) + 1e-8))graph=row_sum_sqrt@graph@col_sum_sqrtgraph=graph.tocoo()values=graph.dataindices=np.vstack((graph.row,graph.col))graph=torch.sparse.FloatTensor(torch.LongTensor(indices),torch.FloatTensor(values),torch.Size(graph.shape))return graph.to(self.device)#######################    请你补充代码       #######################def getFeature(self):# 根据用户特征获取对应的embeddinguser_id = self.user_id_embedding(torch.tensor(self.user_feature['id']).to(self.device))age = self.user_age_embedding(torch.tensor(self.user_feature['age']).to(self.device))gender = self.user_gender_embedding(torch.tensor(self.user_feature['gender']).to(self.device))occupation = self.user_occupation_embedding(torch.tensor(self.user_feature['occupation']).to(self.device))location = self.user_location_embedding(torch.tensor(self.user_feature['location']).to(self.device))user_emb = torch.cat((user_id, age, gender, occupation, location), dim=1)# 根据天气特征获取对应的embeddingitem_id = self.item_id_embedding(torch.tensor(self.item_feature['id']).to(self.device))item_type = self.item_type_embedding(torch.tensor(self.item_feature['type']).to(self.device))temperature = self.item_temperature_embedding(torch.tensor(self.item_feature['temperature']).to(self.device))humidity = self.item_humidity_embedding(torch.tensor(self.item_feature['humidity']).to(self.device))windSpeed = self.item_windSpeed_embedding(torch.tensor(self.item_feature['windSpeed']).to(self.device))item_emb = torch.cat((item_id, item_type, temperature, humidity, windSpeed), dim=1)# 拼接到一起concat_emb = torch.cat([user_emb, item_emb], dim=0)return concat_emb.to(self.device)def forward(self, users, items):features=self.getFeature()final_emb=features.clone()for GCN_Layer in self.GCN_Layers:features=GCN_Layer(self.graph,self.selfLoop,features)final_emb=torch.cat((final_emb,features.clone()),dim=1)user_emb,item_emb=torch.split(final_emb,[self.num_user,self.num_item])user_emb=user_emb[users]item_emb=item_emb[items]user_emb=self.transForm(user_emb)item_emb=self.transForm(item_emb)prediction=torch.mul(user_emb,item_emb).sum(1)return prediction#######################    请你补充代码       #######################

mydataset类

from torch.utils.data import Dataset
import pandas as pdclass MyDataset(Dataset):def __init__(self, rating):super(Dataset, self).__init__()self.user = rating['user_id']self.weather = rating['item_id']self.rating = rating['rating']def __len__(self):return len(self.rating)def __getitem__(self, item):return self.user[item], self.weather[item], self.rating[item]

utils类

from torch.utils.data import Dataset
import pandas as pdclass MyDataset(Dataset):def __init__(self, rating):super(Dataset, self).__init__()self.user = rating['user_id']self.weather = rating['item_id']self.rating = rating['rating']def __len__(self):return len(self.rating)def __getitem__(self, item):return self.user[item], self.weather[item], self.rating[item]

创作不易 觉得有帮助请点赞关注收藏~~~

相关内容

热门资讯

金华市修订完善知识产权强市建设... 近期,《金华市人民政府关于深入实施知识产权强市建设的若干意见》(以下简称《意见》)修订完成。12月2...
“地质瑰宝”披上法治“防护衣”... 华声在线12月25日讯(全媒体记者 杨元崇)今天,《湘西世界地质公园保护条例》正式公布。 湘西土家族...
120亿元!安徽真金白银推进学... 记者近日从安徽省财政厅获悉,2025年,省财政共统筹资金120亿元落实各项学生资助政策。财政资金的持...
国家出台首个支持西部陆海新通道... 12月25日,“金融支持加快西部陆海新通道建设有关情况”新闻发布会在北京举行。中国人民银行党委委员、...
广东华企通法律咨询有限公司成立... 天眼查显示,近日,广东华企通法律咨询有限公司成立,法定代表人为彭俊翔,注册资本500万人民币,由盈合...
低空经济持续获政策支持 业绩有... 人民财讯12月26日电,低空经济作为国家战略性新兴产业,持续受到政策支持。 作为万亿级产业,低空经济...
【深圳特区报】从“制度设计图”... 近日,深圳市财政局召开“数据资产全过程管理试点工作培训暨经验交流会”(以下简称“培训交流会”),围绕...
原创 重... 在情感的漩涡中,我们常常看到悲剧的发生。近日,一则令人心痛的消息震动了整个网络——重庆一男子因感情纠...
极氪回应明年车辆质保权益等问题... 【CNMO科技消息】近日,极氪汽车发布《极氪零距离 | 你问我答》公告,就用户关注的车辆质保、免费充...
关于海南育儿补贴制度实施热点问... 户籍刚迁入海南的婴幼儿是否能享受育儿补贴;如何快速了解补贴申领流程……12月25日,海南省新闻办公室...