diff --git a/docs/index.md b/docs/index.md index d18b98744..8fa9cca2d 100644 --- a/docs/index.md +++ b/docs/index.md @@ -151,6 +151,7 @@ | 大气污染物 | [UNet 污染物扩散](https://aistudio.baidu.com/projectdetail/5663515?channel=0&channelType=0&sUid=438690&shared=1&ts=1698221963752) | 数据驱动 | UNet | 监督学习 | [Data](https://aistudio.baidu.com/datasetdetail/198102) | - | | 天气预报 | [DGMR 气象预报](./zh/examples/dgmr.md) | 数据驱动 | DGMR | 监督学习 | [UK dataset](https://huggingface.co/datasets/openclimatefix/nimrod-uk-1km) | [Paper](https://arxiv.org/pdf/2104.00954.pdf) | | 地震波形反演 | [VelocityGAN 地震波形反演](./zh/examples/velocity_gan.md) | 数据驱动 | VelocityGAN | 监督学习 | [OpenFWI](https://openfwi-lanl.github.io/docs/data.html#vel) | [Paper](https://arxiv.org/abs/1809.10262v6) | +| 交通预测 | [TGCN 交通流量预测](./zh/examples/tgcn.md) | 数据驱动 | GCN & CNN | 监督学习 | [PEMSD4 & PEMSD8](https://paddle-org.bj.bcebos.com/paddlescience/datasets/tgcn/tgcn_data.zip) | - | ## 🚀快速安装 diff --git a/docs/zh/examples/tgcn.md b/docs/zh/examples/tgcn.md index 014c4788f..99d0a3cc4 100644 --- a/docs/zh/examples/tgcn.md +++ b/docs/zh/examples/tgcn.md @@ -1,25 +1,33 @@ # TGCN -开始训练、评估前,请下载数据集:[PEMSD4 & PEMSD8](https://paddle-org.bj.bcebos.com/paddlescience/datasets/tgcn/tgcn_data.zip)。将解压后的数据集文件夹与 `PaddleScience` 文件夹放置于同一目录下。 - -开始评估前,请下载或训练生成预训练模型:[PEMSD4](https://paddle-org.bj.bcebos.com/paddlescience/models/tgcn/PEMSD4_pretrained_model.pdparams) & [PEMSD8](https://paddle-org.bj.bcebos.com/paddlescience/models/tgcn/PEMSD8_pretrained_model.pdparams)。将预训练模型文件与 `PaddleScience` 文件夹放置于同一目录下。 - === "模型训练命令" ``` sh # Train - python PaddleScience/examples/tgcn/run.py data_name=PEMSD8 - # python PaddleScience/examples/tgcn/run.py data_name=PEMSD4 + wget https://paddle-org.bj.bcebos.com/paddlescience/datasets/tgcn/tgcn_data.zip + unzip tgcn_data.zip + python run.py data_name=PEMSD8 + # python run.py data_name=PEMSD4 ``` === "模型评估命令" ``` sh # Eval - python PaddleScience/examples/tgcn/run.py data_name=PEMSD8 mode=eval - # python PaddleScience/examples/tgcn/run.py data_name=PEMSD4 mode=eval + wget https://paddle-org.bj.bcebos.com/paddlescience/datasets/tgcn/tgcn_data.zip + unzip tgcn_data.zip + wget https://paddle-org.bj.bcebos.com/paddlescience/models/tgcn/PEMSD8_pretrained_model.pdparams + python run.py data_name=PEMSD8 mode=eval + # wget https://paddle-org.bj.bcebos.com/paddlescience/models/tgcn/PEMSD4_pretrained_model.pdparams + # python run.py data_name=PEMSD4 mode=eval ``` +| 预训练模型 | 指标 | +| ------------------------------------------------------------ | ----------------------- | +| [PEMSD4_pretrained_model.pdparams](https://paddle-org.bj.bcebos.com/paddlescience/models/tgcn/PEMSD4_pretrained_model.pdparams) | MAE: 21.48; RMSE: 34.06 | +| [PEMSD8_pretrained_model.pdparams](https://paddle-org.bj.bcebos.com/paddlescience/models/tgcn/PEMSD8_pretrained_model.pdparams) | MAE: 15.57; RMSE: 24.52 | + + ## 1. 背景简介 交通预测旨在通过分析历史观测数据(例如,交通网络上的传感器记录)来预测未来的交通时间序列状况(例如,交通流量或交通速度)。作为智能交通系统(ITS)的重要组成部分,交通预测任务是实现智慧城市的核心基础,包括主动动态交通控制和智能路线引导,有助于减少道路安全隐患并提高城市交通系统的运营效率。 @@ -292,4 +300,3 @@ examples/tgcn/conf/run.yaml | :----- | :---- | :---- | | PEMSD4 | 21.48 | 34.06 | | PEMSD8 | 15.57 | 24.52 | - diff --git a/examples/tgcn/conf/run.yaml b/examples/tgcn/conf/run.yaml index f7efe1b72..00deacddb 100644 --- a/examples/tgcn/conf/run.yaml +++ b/examples/tgcn/conf/run.yaml @@ -1,22 +1,27 @@ +defaults: + - ppsci_default + - TRAIN: train_default + - TRAIN/ema: ema_default + - TRAIN/swa: swa_default + - EVAL: eval_default + - INFER: infer_default + - _self_ + hydra: run: # dynamic output directory according to running time and override name - dir: __exp__/${data_name}/${now:%Y_%m_%d_%H_%M_%S} + dir: outputs_tgcn/${now:%Y-%m-%d}/${now:%H-%M-%S} job: name: ${mode} # name of logfile chdir: false # keep current working directory unchanged - config: - override_dirname: - exclude_keys: - - mode - - output_dir - - log_freq + callbacks: + init_callback: + _target_: ppsci.utils.callbacks.InitCallback sweep: # output directory for multirun dir: ${hydra.run.dir} subdir: ./ - # general settings device: gpu mode: train @@ -34,9 +39,8 @@ reduce: mean # model settings MODEL: - afno: - input_keys: ['input'] - label_keys: ['label'] + input_keys: ['input'] + label_keys: ['label'] seed: 3407 batch_size: 64 @@ -60,5 +64,5 @@ TRAIN: # evaluation settings EVAL: - pretrained_model_path: ${data_name}_best.pdparams + pretrained_model_path: null batch_size: ${batch_size} diff --git a/examples/tgcn/run.py b/examples/tgcn/run.py index bd6db243e..d9fc06eca 100644 --- a/examples/tgcn/run.py +++ b/examples/tgcn/run.py @@ -1,83 +1,75 @@ -import hydra import os -import ppsci +import hydra from omegaconf import DictConfig -from ppsci.utils import logger + +import ppsci from ppsci.arch.tgcn import TGCN from ppsci.data.dataset.pems_dataset import get_edge_index def train(cfg: DictConfig): - # set random seed for reproducibility - ppsci.utils.misc.set_random_seed(cfg.seed) - - # initialize logger - logger.init_logger('ppsci', os.path.join(cfg.output_dir, 'train.log'), 'info') - logger.message(cfg) - # set train dataloader config train_dataloader_cfg = { - 'dataset': { - 'name': 'PEMSDataset', - 'file_path': cfg.data_path, - 'split': 'train', - 'input_keys': cfg.MODEL.afno.input_keys, - 'label_keys': cfg.MODEL.afno.label_keys, - 'norm_input': cfg.norm_input, - 'norm_label': cfg.norm_label, - 'input_len': cfg.input_len, - 'label_len': cfg.label_len + "dataset": { + "name": "PEMSDataset", + "file_path": cfg.data_path, + "split": "train", + "input_keys": cfg.MODEL.input_keys, + "label_keys": cfg.MODEL.label_keys, + "norm_input": cfg.norm_input, + "norm_label": cfg.norm_label, + "input_len": cfg.input_len, + "label_len": cfg.label_len, }, - 'sampler': { - 'name': 'BatchSampler', - 'drop_last': True, - 'shuffle': True, + "sampler": { + "name": "BatchSampler", + "drop_last": True, + "shuffle": True, }, - 'batch_size': cfg.TRAIN.batch_size + "batch_size": cfg.TRAIN.batch_size, } # set constraint - sup_constraint = ppsci.constraint.SupervisedConstraint(train_dataloader_cfg, ppsci.loss.L1Loss(), name='train') + sup_constraint = ppsci.constraint.SupervisedConstraint( + train_dataloader_cfg, ppsci.loss.L1Loss(), name="train" + ) constraint = {sup_constraint.name: sup_constraint} # set eval dataloader config eval_dataloader_cfg = { - 'dataset': { - 'name': 'PEMSDataset', - 'file_path': cfg.data_path, - 'split': 'val', - 'input_keys': cfg.MODEL.afno.input_keys, - 'label_keys': cfg.MODEL.afno.label_keys, - 'norm_input': cfg.norm_input, - 'norm_label': cfg.norm_label, - 'input_len': cfg.input_len, - 'label_len': cfg.label_len + "dataset": { + "name": "PEMSDataset", + "file_path": cfg.data_path, + "split": "val", + "input_keys": cfg.MODEL.input_keys, + "label_keys": cfg.MODEL.label_keys, + "norm_input": cfg.norm_input, + "norm_label": cfg.norm_label, + "input_len": cfg.input_len, + "label_len": cfg.label_len, }, - 'sampler': { - 'name': 'BatchSampler', - 'drop_last': False, - 'shuffle': False, + "sampler": { + "name": "BatchSampler", + "drop_last": False, + "shuffle": False, }, - 'batch_size': cfg.EVAL.batch_size + "batch_size": cfg.EVAL.batch_size, } # set validator sup_validator = ppsci.validate.SupervisedValidator( eval_dataloader_cfg, ppsci.loss.L1Loss(), - metric={ - 'MAE': ppsci.metric.MAE(), - 'RMSE': ppsci.metric.RMSE() - }, - name='val' + metric={"MAE": ppsci.metric.MAE(), "RMSE": ppsci.metric.RMSE()}, + name="val", ) validator = {sup_validator.name: sup_validator} # get adj - edge_index, edge_attr, adj = get_edge_index(cfg.data_path, reduce=cfg.reduce) + _, _, adj = get_edge_index(cfg.data_path, reduce=cfg.reduce) # set model - model = TGCN(edge_index=edge_index, edge_attr=edge_attr, adj=adj, cfg=cfg) + model = TGCN(adj=adj, cfg=cfg) # init optimizer optimizer = ppsci.optimizer.Adam(learning_rate=cfg.TRAIN.learning_rate)(model) # set iters_per_epoch by dataloader length @@ -103,7 +95,7 @@ def train(cfg: DictConfig): validator=validator, pretrained_model_path=cfg.TRAIN.pretrained_model_path, # compute_metric_by_batch=False, - eval_with_no_grad=True + eval_with_no_grad=True, ) # train model solver.train() @@ -112,55 +104,47 @@ def train(cfg: DictConfig): def eval(cfg: DictConfig): - # set random seed for reproducibility - ppsci.utils.misc.set_random_seed(cfg.seed) - - # initialize logger - logger.init_logger('ppsci', os.path.join(cfg.output_dir, 'test.log'), 'info') - logger.message(cfg) - # set eval dataloader config test_dataloader_cfg = { - 'dataset': { - 'name': 'PEMSDataset', - 'file_path': cfg.data_path, - 'split': 'test', - 'input_keys': cfg.MODEL.afno.input_keys, - 'label_keys': cfg.MODEL.afno.label_keys, - 'norm_input': cfg.norm_input, - 'norm_label': cfg.norm_label, - 'input_len': cfg.input_len, - 'label_len': cfg.label_len + "dataset": { + "name": "PEMSDataset", + "file_path": cfg.data_path, + "split": "test", + "input_keys": cfg.MODEL.input_keys, + "label_keys": cfg.MODEL.label_keys, + "norm_input": cfg.norm_input, + "norm_label": cfg.norm_label, + "input_len": cfg.input_len, + "label_len": cfg.label_len, }, - 'sampler': { - 'name': 'BatchSampler', - 'drop_last': False, - 'shuffle': False, + "sampler": { + "name": "BatchSampler", + "drop_last": False, + "shuffle": False, }, - 'batch_size': cfg.EVAL.batch_size + "batch_size": cfg.EVAL.batch_size, } # set validator sup_validator = ppsci.validate.SupervisedValidator( test_dataloader_cfg, ppsci.loss.L1Loss(), - metric={ - 'MAE': ppsci.metric.MAE(), - 'RMSE': ppsci.metric.RMSE() - }, - name='test' + metric={"MAE": ppsci.metric.MAE(), "RMSE": ppsci.metric.RMSE()}, + name="test", ) validator = {sup_validator.name: sup_validator} # get adj - edge_index, edge_attr, adj = get_edge_index(cfg.data_path, reduce=cfg.reduce) + _, _, adj = get_edge_index(cfg.data_path, reduce=cfg.reduce) # set model - model = TGCN(edge_index=edge_index, edge_attr=edge_attr, adj=adj, cfg=cfg) + model = TGCN(adj=adj, cfg=cfg) # best model - if cfg.mode == 'train': - best_model_path = os.path.join(cfg.output_dir, 'checkpoints', 'best_model.pdparams') # call in train() + if cfg.mode == "train": + best_model_path = os.path.join( + cfg.output_dir, "checkpoints", "best_model.pdparams" + ) # call in train() else: - best_model_path = cfg.EVAL.pretrained_model_path + best_model_path = cfg.data_name + "_pretrained_model.pdparams" # initialize solver solver = ppsci.solver.Solver( @@ -182,23 +166,23 @@ def eval(cfg: DictConfig): validator=validator, pretrained_model_path=best_model_path, # compute_metric_by_batch=False, - eval_with_no_grad=True + eval_with_no_grad=True, ) # evaluate solver.eval() -@hydra.main( - version_base=None, config_path='./conf', config_name='run.yaml' -) +@hydra.main(version_base=None, config_path="./conf", config_name="run.yaml") def main(cfg: DictConfig): - if cfg.mode == 'train': + if cfg.mode == "train": train(cfg) - elif cfg.mode == 'eval': + elif cfg.mode == "eval": eval(cfg) else: - raise ValueError('cfg.mode should in [train, eval], but got {}'.format(cfg.mode)) + raise ValueError( + "cfg.mode should in [train, eval], but got {}".format(cfg.mode) + ) -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/mkdocs.yml b/mkdocs.yml index 7bcf39ce0..ede4b2959 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -96,6 +96,7 @@ nav: - EarthFormer: zh/examples/earthformer.md - GraphCast: zh/examples/graphcast.md - VelocityGAN: zh/examples/velocity_gan.md + - TGCN: zh/examples/tgcn.md - 化学科学(AI for Chemistry): - Moflow: zh/examples/moflow.md diff --git a/ppsci/arch/tgcn.py b/ppsci/arch/tgcn.py index da846d4a9..957cf5522 100644 --- a/ppsci/arch/tgcn.py +++ b/ppsci/arch/tgcn.py @@ -1,15 +1,20 @@ import paddle as pp import paddle.nn.functional as F - from paddle import nn -from ppsci.arch.base import Arch from paddle.nn.initializer import KaimingNormal +from ppsci.arch.base import Arch + class graph_conv(nn.Layer): def __init__(self, in_dim, out_dim, dropout, num_layer=2): super(graph_conv, self).__init__() - self.mlp = nn.Conv2D((num_layer + 1) * in_dim, out_dim, kernel_size=(1, 1), weight_attr=KaimingNormal()) + self.mlp = nn.Conv2D( + (num_layer + 1) * in_dim, + out_dim, + kernel_size=(1, 1), + weight_attr=KaimingNormal(), + ) self.dropout = dropout self.num_layer = num_layer @@ -35,11 +40,23 @@ def __init__(self, in_dim, out_dim, hidden, num_layer=3, k_s=3, alpha=0.1): self.num_layer = num_layer for i in range(num_layer): in_channels = in_dim if i == 0 else hidden - self.tc_convs.append(nn.Conv2D(in_channels=in_channels, out_channels=hidden, - kernel_size=(1, k_s), padding=(0, i + 1), dilation=i + 1, weight_attr=KaimingNormal())) - - self.mlp = nn.Conv2D(in_channels=in_dim + hidden * num_layer, out_channels=out_dim, - kernel_size=(1, 1), weight_attr=KaimingNormal()) + self.tc_convs.append( + nn.Conv2D( + in_channels=in_channels, + out_channels=hidden, + kernel_size=(1, k_s), + padding=(0, i + 1), + dilation=i + 1, + weight_attr=KaimingNormal(), + ) + ) + + self.mlp = nn.Conv2D( + in_channels=in_dim + hidden * num_layer, + out_channels=out_dim, + kernel_size=(1, 1), + weight_attr=KaimingNormal(), + ) def forward(self, x): # B C N T @@ -52,7 +69,7 @@ def forward(self, x): class TGCN(Arch): - def __init__(self, cfg, edge_index, edge_attr, adj): + def __init__(self, cfg, adj): super(TGCN, self).__init__() # para in_dim = cfg.input_dim @@ -64,27 +81,42 @@ def __init__(self, cfg, edge_index, edge_attr, adj): dropout = cfg.dropout alpha = cfg.leakyrelu_alpha - self.input_keys = cfg.MODEL.afno.input_keys - self.output_keys = cfg.MODEL.afno.label_keys + self.input_keys = cfg.MODEL.input_keys + self.output_keys = cfg.MODEL.label_keys - self.edge_index = pp.to_tensor(data=edge_index, place=cfg.device) - self.edge_attr = pp.to_tensor(data=edge_attr, place=cfg.device) - self.adj = pp.to_tensor(data=adj, place=cfg.device) + self.register_buffer("adj", pp.to_tensor(data=adj)) - self.emb_conv = nn.Conv2D(in_channels=in_dim, out_channels=emb_dim, kernel_size=(1, 1), weight_attr=KaimingNormal()) + self.emb_conv = nn.Conv2D( + in_channels=in_dim, + out_channels=emb_dim, + kernel_size=(1, 1), + weight_attr=KaimingNormal(), + ) - self.tc1_conv = tempol_conv(emb_dim, hidden, hidden, num_layer=tc_layer, k_s=k_s, alpha=alpha) + self.tc1_conv = tempol_conv( + emb_dim, hidden, hidden, num_layer=tc_layer, k_s=k_s, alpha=alpha + ) self.sc1_conv = graph_conv(hidden, hidden, dropout, num_layer=gc_layer) self.bn1 = nn.BatchNorm2D(hidden) - self.tc2_conv = tempol_conv(hidden, hidden, hidden, num_layer=tc_layer, k_s=k_s, alpha=alpha) + self.tc2_conv = tempol_conv( + hidden, hidden, hidden, num_layer=tc_layer, k_s=k_s, alpha=alpha + ) self.sc2_conv = graph_conv(hidden, hidden, dropout, num_layer=gc_layer) self.bn2 = nn.BatchNorm2D(hidden) - self.end_conv_1 = nn.Conv2D(in_channels=emb_dim + hidden + hidden, out_channels=2 * - hidden, kernel_size=(1, 1), weight_attr=KaimingNormal()) - self.end_conv_2 = nn.Conv2D(in_channels=2 * hidden, out_channels=cfg.label_len, - kernel_size=(1, cfg.input_len), weight_attr=KaimingNormal()) + self.end_conv_1 = nn.Conv2D( + in_channels=emb_dim + hidden + hidden, + out_channels=2 * hidden, + kernel_size=(1, 1), + weight_attr=KaimingNormal(), + ) + self.end_conv_2 = nn.Conv2D( + in_channels=2 * hidden, + out_channels=cfg.label_len, + kernel_size=(1, cfg.input_len), + weight_attr=KaimingNormal(), + ) def forward(self, raw): # emb block diff --git a/ppsci/data/dataset/pems_dataset.py b/ppsci/data/dataset/pems_dataset.py index ccae786a8..5b973c60d 100644 --- a/ppsci/data/dataset/pems_dataset.py +++ b/ppsci/data/dataset/pems_dataset.py @@ -1,10 +1,12 @@ import os +from typing import Dict +from typing import Optional +from typing import Tuple + import numpy as np import pandas as pd - from paddle.io import Dataset from paddle.vision.transforms import Compose -from typing import Tuple, Optional, Dict class StandardScaler: @@ -25,14 +27,20 @@ def add_window_horizon(data, in_step=12, out_step=12): X = [] Y = [] for i in range(end_index + 1): - X.append(data[i:i + in_step]) - Y.append(data[i + in_step:i + in_step + out_step]) + X.append(data[i : i + in_step]) + Y.append(data[i + in_step : i + in_step + out_step]) return X, Y -def get_edge_index(file_path, bi=True, reduce='mean'): +def get_edge_index(file_path, bi=True, reduce="mean"): TYPE_DICT = {0: np.int64, 1: np.int64, 2: np.float32} - df = pd.read_csv(os.path.join(file_path, 'dist.csv'), skiprows=1, header=None, sep=',', dtype=TYPE_DICT) + df = pd.read_csv( + os.path.join(file_path, "dist.csv"), + skiprows=1, + header=None, + sep=",", + dtype=TYPE_DICT, + ) edge_index = df.loc[:, [0, 1]].values.T edge_attr = df.loc[:, 2].values @@ -45,9 +53,9 @@ def get_edge_index(file_path, bi=True, reduce='mean'): num = np.max(edge_index) + 1 adj = np.zeros((num, num), dtype=np.float32) - if reduce == 'sum': + if reduce == "sum": adj[edge_index[0], edge_index[1]] = 1.0 - elif reduce == 'mean': + elif reduce == "mean": adj[edge_index[0], edge_index[1]] = 1.0 adj = adj / adj.sum(axis=-1) else: @@ -57,17 +65,19 @@ def get_edge_index(file_path, bi=True, reduce='mean'): class PEMSDataset(Dataset): - def __init__(self, - file_path: str, - split: str, - input_keys: Tuple[str, ...], - label_keys: Tuple[str, ...], - weight_dict: Optional[Dict[str, float]] = None, - transforms: Optional[Compose] = None, - norm_input: bool = True, - norm_label: bool = False, - input_len: int = 12, - label_len: int = 12): + def __init__( + self, + file_path: str, + split: str, + input_keys: Tuple[str, ...], + label_keys: Tuple[str, ...], + weight_dict: Optional[Dict[str, float]] = None, + transforms: Optional[Compose] = None, + norm_input: bool = True, + norm_label: bool = False, + input_len: int = 12, + label_len: int = 12, + ): super().__init__() self.input_keys = input_keys @@ -78,10 +88,12 @@ def __init__(self, self.norm_input = norm_input self.norm_label = norm_label - data = np.load(os.path.join(file_path, '{}.npy'.format(split))).astype(np.float32) + data = np.load(os.path.join(file_path, "{}.npy".format(split))).astype( + np.float32 + ) - self.mean = np.load(os.path.join(file_path, 'mean.npy')).astype(np.float32) - self.std = np.load(os.path.join(file_path, 'std.npy')).astype(np.float32) + self.mean = np.load(os.path.join(file_path, "mean.npy")).astype(np.float32) + self.std = np.load(os.path.join(file_path, "std.npy")).astype(np.float32) self.scaler = StandardScaler(self.mean, self.std) X, Y = add_window_horizon(data, input_len, label_len)