rePR

PaddlePaddle · Oct 9, 2024 · 42bf515 · 42bf515
1 parent 53d9771
commit 42bf515
Show file tree

Hide file tree

Showing 7 changed files with 194 additions and 153 deletions.
diff --git a/docs/index.md b/docs/index.md
@@ -151,6 +151,7 @@
 | 大气污染物 | [UNet 污染物扩散](https://aistudio.baidu.com/projectdetail/5663515?channel=0&channelType=0&sUid=438690&shared=1&ts=1698221963752) | 数据驱动 | UNet | 监督学习 | [Data](https://aistudio.baidu.com/datasetdetail/198102) | - |
 | 天气预报 | [DGMR 气象预报](./zh/examples/dgmr.md) | 数据驱动 | DGMR | 监督学习 | [UK dataset](https://huggingface.co/datasets/openclimatefix/nimrod-uk-1km) | [Paper](https://arxiv.org/pdf/2104.00954.pdf) |
 | 地震波形反演 | [VelocityGAN 地震波形反演](./zh/examples/velocity_gan.md) | 数据驱动 | VelocityGAN | 监督学习 | [OpenFWI](https://openfwi-lanl.github.io/docs/data.html#vel) | [Paper](https://arxiv.org/abs/1809.10262v6) |
+| 交通预测 | [TGCN 交通流量预测](./zh/examples/tgcn.md) | 数据驱动 | GCN & CNN | 监督学习 | [PEMSD4 & PEMSD8](https://paddle-org.bj.bcebos.com/paddlescience/datasets/tgcn/tgcn_data.zip) | - |
 
 ## 🚀快速安装
 

diff --git a/docs/zh/examples/tgcn.md b/docs/zh/examples/tgcn.md
@@ -1,25 +1,33 @@
 # TGCN
 
-开始训练、评估前，请下载数据集：[PEMSD4 & PEMSD8](https://paddle-org.bj.bcebos.com/paddlescience/datasets/tgcn/tgcn_data.zip)。将解压后的数据集文件夹与 `PaddleScience` 文件夹放置于同一目录下。
-
-开始评估前，请下载或训练生成预训练模型：[PEMSD4](https://paddle-org.bj.bcebos.com/paddlescience/models/tgcn/PEMSD4_pretrained_model.pdparams) & [PEMSD8](https://paddle-org.bj.bcebos.com/paddlescience/models/tgcn/PEMSD8_pretrained_model.pdparams)。将预训练模型文件与 `PaddleScience` 文件夹放置于同一目录下。
-
 === "模型训练命令"
 
     ``` sh
     # Train
-    python PaddleScience/examples/tgcn/run.py data_name=PEMSD8
-    # python PaddleScience/examples/tgcn/run.py data_name=PEMSD4
+    wget https://paddle-org.bj.bcebos.com/paddlescience/datasets/tgcn/tgcn_data.zip
+    unzip tgcn_data.zip
+    python run.py data_name=PEMSD8
+    # python run.py data_name=PEMSD4
     ```
 
 === "模型评估命令"
 
     ``` sh
     # Eval
-    python PaddleScience/examples/tgcn/run.py data_name=PEMSD8 mode=eval
-    # python PaddleScience/examples/tgcn/run.py data_name=PEMSD4 mode=eval
+    wget https://paddle-org.bj.bcebos.com/paddlescience/datasets/tgcn/tgcn_data.zip
+    unzip tgcn_data.zip
+    wget https://paddle-org.bj.bcebos.com/paddlescience/models/tgcn/PEMSD8_pretrained_model.pdparams
+    python run.py data_name=PEMSD8 mode=eval
+    # wget https://paddle-org.bj.bcebos.com/paddlescience/models/tgcn/PEMSD4_pretrained_model.pdparams
+    # python run.py data_name=PEMSD4 mode=eval
     ```
 
+| 预训练模型                                                   | 指标                    |
+| ------------------------------------------------------------ | ----------------------- |
+| [PEMSD4_pretrained_model.pdparams](https://paddle-org.bj.bcebos.com/paddlescience/models/tgcn/PEMSD4_pretrained_model.pdparams) | MAE: 21.48; RMSE: 34.06 |
+| [PEMSD8_pretrained_model.pdparams](https://paddle-org.bj.bcebos.com/paddlescience/models/tgcn/PEMSD8_pretrained_model.pdparams) | MAE: 15.57; RMSE: 24.52 |
+
+
 ## 1. 背景简介
 
 交通预测旨在通过分析历史观测数据（例如，交通网络上的传感器记录）来预测未来的交通时间序列状况（例如，交通流量或交通速度）。作为智能交通系统（ITS）的重要组成部分，交通预测任务是实现智慧城市的核心基础，包括主动动态交通控制和智能路线引导，有助于减少道路安全隐患并提高城市交通系统的运营效率。
@@ -292,4 +300,3 @@ examples/tgcn/conf/run.yaml
 | :----- | :---- | :---- |
 | PEMSD4 | 21.48 | 34.06 |
 | PEMSD8 | 15.57 | 24.52 |
-
diff --git a/examples/tgcn/conf/run.yaml b/examples/tgcn/conf/run.yaml
@@ -1,22 +1,27 @@
+defaults:
+  - ppsci_default
+  - TRAIN: train_default
+  - TRAIN/ema: ema_default
+  - TRAIN/swa: swa_default
+  - EVAL: eval_default
+  - INFER: infer_default
+  - _self_
+
 hydra:
   run:
     # dynamic output directory according to running time and override name
-    dir: __exp__/${data_name}/${now:%Y_%m_%d_%H_%M_%S}
+    dir: outputs_tgcn/${now:%Y-%m-%d}/${now:%H-%M-%S}
   job:
     name: ${mode} # name of logfile
     chdir: false # keep current working directory unchanged
-    config:
-      override_dirname:
-        exclude_keys:
-          - mode
-          - output_dir
-          - log_freq
+  callbacks:
+    init_callback:
+      _target_: ppsci.utils.callbacks.InitCallback
   sweep:
     # output directory for multirun
     dir: ${hydra.run.dir}
     subdir: ./
 
-
 # general settings
 device: gpu
 mode: train
@@ -34,9 +39,8 @@ reduce: mean
 
 # model settings
 MODEL:
-  afno:
-    input_keys: ['input']
-    label_keys: ['label']
+  input_keys: ['input']
+  label_keys: ['label']
 
 seed: 3407
 batch_size: 64
@@ -60,5 +64,5 @@ TRAIN:
 
 # evaluation settings
 EVAL:
-  pretrained_model_path: ${data_name}_best.pdparams
+  pretrained_model_path: null
   batch_size: ${batch_size}
diff --git a/examples/tgcn/run.py b/examples/tgcn/run.py
@@ -1,83 +1,75 @@
-import hydra
 import os
-import ppsci
 
+import hydra
 from omegaconf import DictConfig
-from ppsci.utils import logger
+
+import ppsci
 from ppsci.arch.tgcn import TGCN
 from ppsci.data.dataset.pems_dataset import get_edge_index
 
 
 def train(cfg: DictConfig):
-    # set random seed for reproducibility
-    ppsci.utils.misc.set_random_seed(cfg.seed)
-
-    # initialize logger
-    logger.init_logger('ppsci', os.path.join(cfg.output_dir, 'train.log'), 'info')
-    logger.message(cfg)
-
     # set train dataloader config
     train_dataloader_cfg = {
-        'dataset': {
-            'name': 'PEMSDataset',
-            'file_path': cfg.data_path,
-            'split': 'train',
-            'input_keys': cfg.MODEL.afno.input_keys,
-            'label_keys': cfg.MODEL.afno.label_keys,
-            'norm_input': cfg.norm_input,
-            'norm_label': cfg.norm_label,
-            'input_len': cfg.input_len,
-            'label_len': cfg.label_len
+        "dataset": {
+            "name": "PEMSDataset",
+            "file_path": cfg.data_path,
+            "split": "train",
+            "input_keys": cfg.MODEL.input_keys,
+            "label_keys": cfg.MODEL.label_keys,
+            "norm_input": cfg.norm_input,
+            "norm_label": cfg.norm_label,
+            "input_len": cfg.input_len,
+            "label_len": cfg.label_len,
         },
-        'sampler': {
-            'name': 'BatchSampler',
-            'drop_last': True,
-            'shuffle': True,
+        "sampler": {
+            "name": "BatchSampler",
+            "drop_last": True,
+            "shuffle": True,
         },
-        'batch_size': cfg.TRAIN.batch_size
+        "batch_size": cfg.TRAIN.batch_size,
     }
 
     # set constraint
-    sup_constraint = ppsci.constraint.SupervisedConstraint(train_dataloader_cfg, ppsci.loss.L1Loss(), name='train')
+    sup_constraint = ppsci.constraint.SupervisedConstraint(
+        train_dataloader_cfg, ppsci.loss.L1Loss(), name="train"
+    )
     constraint = {sup_constraint.name: sup_constraint}
 
     # set eval dataloader config
     eval_dataloader_cfg = {
-        'dataset': {
-            'name': 'PEMSDataset',
-            'file_path': cfg.data_path,
-            'split': 'val',
-            'input_keys': cfg.MODEL.afno.input_keys,
-            'label_keys': cfg.MODEL.afno.label_keys,
-            'norm_input': cfg.norm_input,
-            'norm_label': cfg.norm_label,
-            'input_len': cfg.input_len,
-            'label_len': cfg.label_len
+        "dataset": {
+            "name": "PEMSDataset",
+            "file_path": cfg.data_path,
+            "split": "val",
+            "input_keys": cfg.MODEL.input_keys,
+            "label_keys": cfg.MODEL.label_keys,
+            "norm_input": cfg.norm_input,
+            "norm_label": cfg.norm_label,
+            "input_len": cfg.input_len,
+            "label_len": cfg.label_len,
         },
-        'sampler': {
-            'name': 'BatchSampler',
-            'drop_last': False,
-            'shuffle': False,
+        "sampler": {
+            "name": "BatchSampler",
+            "drop_last": False,
+            "shuffle": False,
         },
-        'batch_size': cfg.EVAL.batch_size
+        "batch_size": cfg.EVAL.batch_size,
     }
 
     # set validator
     sup_validator = ppsci.validate.SupervisedValidator(
         eval_dataloader_cfg,
         ppsci.loss.L1Loss(),
-        metric={
-            'MAE': ppsci.metric.MAE(),
-            'RMSE': ppsci.metric.RMSE()
-        },
-        name='val'
+        metric={"MAE": ppsci.metric.MAE(), "RMSE": ppsci.metric.RMSE()},
+        name="val",
     )
     validator = {sup_validator.name: sup_validator}
 
     # get adj
-    edge_index, edge_attr, adj = get_edge_index(cfg.data_path, reduce=cfg.reduce)
+    _, _, adj = get_edge_index(cfg.data_path, reduce=cfg.reduce)
     # set model
-    model = TGCN(edge_index=edge_index, edge_attr=edge_attr, adj=adj, cfg=cfg)
+    model = TGCN(adj=adj, cfg=cfg)
     # init optimizer
     optimizer = ppsci.optimizer.Adam(learning_rate=cfg.TRAIN.learning_rate)(model)
     # set iters_per_epoch by dataloader length
@@ -103,7 +95,7 @@ def train(cfg: DictConfig):
         validator=validator,
         pretrained_model_path=cfg.TRAIN.pretrained_model_path,
         # compute_metric_by_batch=False,
-        eval_with_no_grad=True
+        eval_with_no_grad=True,
     )
     # train model
     solver.train()
@@ -112,55 +104,47 @@ def train(cfg: DictConfig):
 
 
 def eval(cfg: DictConfig):
-    # set random seed for reproducibility
-    ppsci.utils.misc.set_random_seed(cfg.seed)
-
-    # initialize logger
-    logger.init_logger('ppsci', os.path.join(cfg.output_dir, 'test.log'), 'info')
-    logger.message(cfg)
-
     # set eval dataloader config
     test_dataloader_cfg = {
-        'dataset': {
-            'name': 'PEMSDataset',
-            'file_path': cfg.data_path,
-            'split': 'test',
-            'input_keys': cfg.MODEL.afno.input_keys,
-            'label_keys': cfg.MODEL.afno.label_keys,
-            'norm_input': cfg.norm_input,
-            'norm_label': cfg.norm_label,
-            'input_len': cfg.input_len,
-            'label_len': cfg.label_len
+        "dataset": {
+            "name": "PEMSDataset",
+            "file_path": cfg.data_path,
+            "split": "test",
+            "input_keys": cfg.MODEL.input_keys,
+            "label_keys": cfg.MODEL.label_keys,
+            "norm_input": cfg.norm_input,
+            "norm_label": cfg.norm_label,
+            "input_len": cfg.input_len,
+            "label_len": cfg.label_len,
         },
-        'sampler': {
-            'name': 'BatchSampler',
-            'drop_last': False,
-            'shuffle': False,
+        "sampler": {
+            "name": "BatchSampler",
+            "drop_last": False,
+            "shuffle": False,
         },
-        'batch_size': cfg.EVAL.batch_size
+        "batch_size": cfg.EVAL.batch_size,
     }
 
     # set validator
     sup_validator = ppsci.validate.SupervisedValidator(
         test_dataloader_cfg,
         ppsci.loss.L1Loss(),
-        metric={
-            'MAE': ppsci.metric.MAE(),
-            'RMSE': ppsci.metric.RMSE()
-        },
-        name='test'
+        metric={"MAE": ppsci.metric.MAE(), "RMSE": ppsci.metric.RMSE()},
+        name="test",
     )
     validator = {sup_validator.name: sup_validator}
 
     # get adj
-    edge_index, edge_attr, adj = get_edge_index(cfg.data_path, reduce=cfg.reduce)
+    _, _, adj = get_edge_index(cfg.data_path, reduce=cfg.reduce)
     # set model
-    model = TGCN(edge_index=edge_index, edge_attr=edge_attr, adj=adj, cfg=cfg)
+    model = TGCN(adj=adj, cfg=cfg)
     # best model
-    if cfg.mode == 'train':
-        best_model_path = os.path.join(cfg.output_dir, 'checkpoints', 'best_model.pdparams')  # call in train()
+    if cfg.mode == "train":
+        best_model_path = os.path.join(
+            cfg.output_dir, "checkpoints", "best_model.pdparams"
+        )  # call in train()
     else:
-        best_model_path = cfg.EVAL.pretrained_model_path
+        best_model_path = cfg.data_name + "_pretrained_model.pdparams"
 
     # initialize solver
     solver = ppsci.solver.Solver(
@@ -182,23 +166,23 @@ def eval(cfg: DictConfig):
         validator=validator,
         pretrained_model_path=best_model_path,
         # compute_metric_by_batch=False,
-        eval_with_no_grad=True
+        eval_with_no_grad=True,
     )
     # evaluate
     solver.eval()
 
 
-@hydra.main(
-    version_base=None, config_path='./conf', config_name='run.yaml'
-)
+@hydra.main(version_base=None, config_path="./conf", config_name="run.yaml")
 def main(cfg: DictConfig):
-    if cfg.mode == 'train':
+    if cfg.mode == "train":
         train(cfg)
-    elif cfg.mode == 'eval':
+    elif cfg.mode == "eval":
         eval(cfg)
     else:
-        raise ValueError('cfg.mode should in [train, eval], but got {}'.format(cfg.mode))
+        raise ValueError(
+            "cfg.mode should in [train, eval], but got {}".format(cfg.mode)
+        )
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     main()
diff --git a/mkdocs.yml b/mkdocs.yml
@@ -96,6 +96,7 @@ nav:
         - EarthFormer: zh/examples/earthformer.md
         - GraphCast: zh/examples/graphcast.md
         - VelocityGAN: zh/examples/velocity_gan.md
+        - TGCN: zh/examples/tgcn.md
       - 化学科学(AI for Chemistry):
         - Moflow: zh/examples/moflow.md