Publications
2025
- TKDEExploring Progress in Multivariate Time Series Forecasting: Comprehensive Benchmarking and Heterogeneity AnalysisZezhi Shao, Fei Wang*, Yongjun Xu*, Wei Wei, Chengqing Yu, Zhao Zhang, Di Yao, Tao Sun, Guangyin Jin, Xin Cao, and 3 more authorsIEEE Transactions on Knowledge and Data Engineering, Jan 2025
BasicTS+ has entered ESI Highly Cited Papers and acquired 1.3k+ Stars 2025 for the First, one of the Most Popular, fair and scalable benchmark of timeseries forecasting
@article{10726722, author = {Shao, Zezhi and Wang, Fei and Xu, Yongjun and Wei, Wei and Yu, Chengqing and Zhang, Zhao and Yao, Di and Sun, Tao and Jin, Guangyin and Cao, Xin and Cong, Gao and Jensen, Christian S. and Cheng, Xueqi}, journal = {IEEE Transactions on Knowledge and Data Engineering}, title = {Exploring Progress in Multivariate Time Series Forecasting: Comprehensive Benchmarking and Heterogeneity Analysis}, year = {2025}, month = jan, volume = {37}, number = {1}, pages = {291-305}, keywords = {Forecasting;Time series analysis;Benchmark testing;Transformers;Predictive models;Data models;Computer science;Reliability;Proposals;Electricity;Benchmarking;multivariate time series;spatial-temporal forecasting;long-term time series forecasting}, doi = {10.1109/TKDE.2024.3484454}, }
- Information FusionMGSFformer: A Multi-Granularity Spatiotemporal Fusion Transformer for air quality predictionChengqing Yu, Fei Wang*, Yilun Wang, Zezhi Shao, Tao Sun, Di Yao, and Yongjun Xu*Information Fusion, Jan 2025
Air quality spatiotemporal prediction can provide technical support for environmental governance and sustainable city development. As a classic multi-source spatiotemporal data, effective multi-source information fusion is key to achieving accurate air quality predictions. However, due to not fully fusing two pieces of information, classical deep learning models struggle to achieve satisfactory prediction results: (1) Multi-granularity: each air monitoring station collects air quality data at different sampling intervals, which show distinct time series patterns. (2) Spatiotemporal correlation: due to human activities and atmospheric diffusion, there exist correlations between air quality data from different air monitoring stations, necessitating the consideration of other air monitoring stations’ influences when modeling each air quality time series. In this study, to achieve satisfactory prediction results, we propose the Multi-Granularity Spatiotemporal Fusion Transformer, comprised of the residual de-redundant block, spatiotemporal attention block, and dynamic fusion block. Specifically, the residual de-redundant block eliminates information redundancy between data with different granularities and prevents the model from being misled by redundant information. The spatiotemporal attention block captures the spatiotemporal correlation of air quality data and facilitates prediction modeling. The dynamic fusion block evaluates the importance of data with different granularities and integrates the prediction results. Experimental results demonstrate that the proposed model surpasses 11 baselines by 5% in performance on three real-world datasets.
@article{YU2025102607, title = {MGSFformer: A Multi-Granularity Spatiotemporal Fusion Transformer for air quality prediction}, journal = {Information Fusion}, volume = {113}, pages = {102607}, year = {2025}, month = jan, issn = {1566-2535}, doi = {https://doi.org/10.1016/j.inffus.2024.102607}, url = {https://www.sciencedirect.com/science/article/pii/S1566253524003853}, author = {Yu, Chengqing and Wang, Fei and Wang, Yilun and Shao, Zezhi and Sun, Tao and Yao, Di and Xu, Yongjun}, keywords = {Air quality prediction, Multi-Granularity Spatiotemporal Fusion Transformer, Spatiotemporal correlation, Multi-source information fusion}, }
- KDDEfficient large-scale traffic forecasting with transformers: A spatial data management perspectiveYuchen Fang, Yuxuan Liang, Bo Hui, Zezhi Shao, Liwei Deng, Xu Liu, Xinke Jiang, and Kai ZhengIn Proceedings of the 31th ACM SIGKDD Conference on Knowledge Discovery and Data Mining, Toronton, ON, Canada, Feb 2025
Road traffic forecasting is crucial in real-world intelligent transportation scenarios like traffic dispatching and path planning in city management and personal traveling. Spatio-temporal graph neural networks (STGNNs) stand out as the mainstream solution in this task. Nevertheless, the quadratic complexity of remarkable dynamic spatial modeling-based STGNNs has become the bottleneck over large-scale traffic data. From the spatial data management perspective, we present a novel Transformer framework called PatchSTG to efficiently and dynamically model spatial dependencies for large-scale traffic forecasting with interpretability and fidelity. Specifically, we design a novel irregular spatial patching to reduce the number of points involved in the dynamic calculation of Transformer. The irregular spatial patching first utilizes the leaf K-dimensional tree (KDTree) to recursively partition irregularly distributed traffic points into leaf nodes with a small capacity, and then merges leaf nodes belonging to the same subtree into occupancy-equaled and non-overlapped patches through padding and backtracking. Based on the patched data, depth and breadth attention are used interchangeably in the encoder to dynamically learn local and global spatial knowledge from points in a patch and points with the same index of patches. Experimental results on four real world large-scale traffic datasets show that our PatchSTG achieves train speed and memory utilization improvements up to 10x and 4x with the state-of-the-art performance.
@inproceedings{patchstg, author = {Fang, Yuchen and Liang, Yuxuan and Hui, Bo and Shao, Zezhi and Deng, Liwei and Liu, Xu and Jiang, Xinke and Zheng, Kai}, title = {Efficient large-scale traffic forecasting with transformers: A spatial data management perspective}, year = {2025}, month = feb, publisher = {Association for Computing Machinery}, address = {New York, NY, USA}, booktitle = {Proceedings of the 31th ACM SIGKDD Conference on Knowledge Discovery and Data Mining}, keywords = {traffic forecasting, Transformer, spatial data management}, location = {Toronton, ON, Canada}, series = {KDD '25}, }
- InnovationFoundation models and intelligent decision-making: Progress, challenges, and perspectivesJincai Huang†, Yongjun Xu†, Qi Wang†, Qi (Cheems) Wang†, Xingxing Liang†, Fei Wang†, Zhao Zhang†, Wei Wei†, Boxuan Zhang†, Libo Huang†, and 61 more authorsThe Innovation, Jun 2025
Intelligent decision-making (IDM) is a cornerstone of artificial intelligence (AI) designed to automate or augment decision processes. Modern IDM paradigms integrate advanced frameworks to enable intelligent agents to make effective and adaptive choices and decompose complex tasks into manageable steps, such as AI agents and high-level reinforcement learning. Recent advances in multimodal foundation-based approaches unify diverse input modalities—such as vision, language, and sensory data—into a cohesive decision-making process. Foundation models (FMs) have become pivotal in science and industry, transforming decision-making and research capabilities. Their large-scale, multimodal data-processing abilities foster adaptability and interdisciplinary breakthroughs across fields such as healthcare, life sciences, and education. This survey examines IDM’s evolution, advanced paradigms with FMs and their transformative impact on decision-making across diverse scientific and industrial domains, highlighting the challenges and opportunities in building efficient, adaptive, and ethical decision systems.
@article{HUANG2025100948, title = {Foundation models and intelligent decision-making: Progress, challenges, and perspectives}, journal = {The Innovation}, volume = {6}, number = {6}, pages = {100948}, year = {2025}, month = jun, issn = {2666-6758}, doi = {https://doi.org/10.1016/j.xinn.2025.100948}, url = {https://www.sciencedirect.com/science/article/pii/S2666675825001511}, author = {Huang, Jincai and Xu, Yongjun and Wang, Qi and Wang, Qi (Cheems) and Liang, Xingxing and Wang, Fei and Zhang, Zhao and Wei, Wei and Zhang, Boxuan and Huang, Libo and Chang, Jingru and Ma, Liantao and Ma, Ting and Liang, Yuxuan and Zhang, Jie and Guo, Jian and Jiang, Xuhui and Fan, Xinxin and An, Zhulin and Li, Tingting and Li, Xuefei and Shao, Zezhi and Qian, Tangwen and Sun, Tao and Diao, Boyu and Yang, Chuanguang and Yu, Chenqing and Wu, Yiqing and Li, Mengxian and Zhang, Haifeng and Zeng, Yongcheng and Zhang, Zhicheng and Zhu, Zhengqiu and Lv, Yiqin and Li, Aming and Chen, Xu and An, Bo and Xiao, Wei and Bai, Chenguang and Mao, Yuxing and Yin, Zhigang and Gui, Sheng and Su, Wentao and Zhu, Yinghao and Gao, Junyi and He, Xinyu and Li, Yizhou and Jin, Guangyin and Ao, Xiang and Zhai, Xuehao and Tan, Haoran and Yun, Lijun and Shi, Hongquan and Li, Jun and Fan, Changjun and Huang, Kuihua and Harrison, Ewen and Leung, Victor C.M. and Qiu, Sihang and Dong, Yanjie and Zheng, Xiaolong and Wang, Gang and Zheng, Yu and Wang, Yuanzhuo and Guo, Jiafeng and Wang, Lizhe and Cheng, Xueqi and Wang, Yaonan and Yang, Shanlin and Fu, Mengyin and Fei, Aiguo}, keywords = {artificial intelligence, intelligent decision-making, foundation models, agent, large language model}, }
- TKDEGinAR+: A Robust End-To-End Framework for Multivariate Time Series Forecasting with Missing ValuesChengqing Yu, Fei Wang*, Zezhi Shao, Tangwen Qian, Zhao Zhang, Wei Wei, Zhulin An, Qi Wang, and Yongjun XuIEEE Transactions on Knowledge and Data Engineering, May 2025
@article{11002729, author = {Yu, Chengqing and Wang, Fei and Shao, Zezhi and Qian, Tangwen and Zhang, Zhao and Wei, Wei and An, Zhulin and Wang, Qi and Xu, Yongjun}, journal = {IEEE Transactions on Knowledge and Data Engineering}, title = {GinAR+: A Robust End-To-End Framework for Multivariate Time Series Forecasting with Missing Values}, year = {2025}, month = may, volume = {}, number = {}, pages = {1-14}, keywords = {Correlation;Predictive models;Forecasting;Time series analysis;Data models;Robustness;Adaptation models;Imputation;Contrastive learning;Training;Contrastive learning;Graph interpolation attention recursive network;Multivariate Time Series Forecasting with Missing Values}, doi = {10.1109/TKDE.2025.3569649}, }
- InnovationSpatial-temporal large models: A super hub linking multiple scientific areas with artificial intelligenceZezhi Shao, Tangwen Qian, Tao Sun, Fei Wang*, and Yongjun Xu*The Innovation, May 2025
@article{SHAO2025100763, title = {Spatial-temporal large models: A super hub linking multiple scientific areas with artificial intelligence}, journal = {The Innovation}, volume = {6}, number = {2}, pages = {100763}, year = {2025}, issn = {2666-6758}, doi = {https://doi.org/10.1016/j.xinn.2024.100763}, url = {https://www.sciencedirect.com/science/article/pii/S2666675824002017}, author = {Shao, Zezhi and Qian, Tangwen and Sun, Tao and Wang, Fei and Xu, Yongjun}, }
- PRTrajectory-User Linking via Multi-Scale Graph Attention NetworkYujie Li, Tao Sun, Zezhi Shao, Yiqiang Zhen, Yongjun Xu, and Fei Wang*Pattern Recognition, Feb 2025
Trajectory-User Linking (TUL) aims to link anonymous trajectories to their owners, which is considered an essential task in discovering human mobility patterns. Although existing TUL studies have shown promising results, they still have specific defects in the perception of spatio-temporal properties of trajectories, which manifested in the following three problems: missing context of the original trajectory, ignorance of spatial information, and high computational complexity. To address those issues, we revisit the characteristics of the trajectory and propose a novel model called TULMGAT (TUL via Multi-Scale Graph Attention Network) based on masked self-attention graph neural networks. Specifically, TULMGAT consists of four components: construction of check-in oriented graphs, node embedding, trajectory embedding, and trajectory user linking. Sufficient experiments on two publicly available datasets have shown that TULMGAT is the state-of-the-art model in task TUL compared to the baselines with an improvement of about 8% in accuracy and only a quarter of the fastest baseline in runtime. Furthermore, model validity experiments have verified the role of each module.
@article{LI2025110978, title = {Trajectory-User Linking via Multi-Scale Graph Attention Network}, journal = {Pattern Recognition}, volume = {158}, pages = {110978}, year = {2025}, month = feb, issn = {0031-3203}, doi = {https://doi.org/10.1016/j.patcog.2024.110978}, url = {https://www.sciencedirect.com/science/article/pii/S0031320324007295}, author = {Li, Yujie and Sun, Tao and Shao, Zezhi and Zhen, Yiqiang and Xu, Yongjun and Wang, Fei}, keywords = {Trajectory-user linking, Graph neural network, Trajectory classification, Spatio-temporal data mining, Check-in data}, }
- KDDBLAST: Balanced Sampling Time Series Corpus for Universal Forecasting ModelsZezhi Shao, Yujie Li, Fei Wang*, Chengqing Yu, Yisong Fu, Tangwen Qian, Bin Xu, Boyu Diao, Yongjun Xu, and Xueqi ChengIn Proceedings of the 31th ACM SIGKDD Conference on Knowledge Discovery and Data Mining, Toronton, ON, Canada, Aug 2025
The advent of universal time series forecasting models has revolutionized zero-shot forecasting across diverse domains, yet the critical role of data diversity in training these models remains underexplored. Existing large-scale time series datasets often suffer from inherent biases and imbalanced distributions, leading to suboptimal model performance and generalization. To address this gap, we introduce BLAST, a novel pre-training corpus designed to enhance data diversity through a balanced sampling strategy. First, BLAST incorporates 321 billion observations from publicly available datasets and employs a comprehensive suite of statistical metrics to characterize time series patterns. Then, to facilitate pattern-oriented sampling, the data is implicitly clustered using grid-based partitioning. Furthermore, by integrating grid sampling and grid mixup techniques, BLAST ensures a balanced and representative coverage of diverse patterns. Experimental results demonstrate that models pre-trained on BLAST achieve state-of-the-art performance with a fraction of the computational resources and training tokens required by existing methods. Our findings highlight the pivotal role of data diversity in improving both training efficiency and model performance for the universal forecasting task.
@inproceedings{blast, author = {Shao, Zezhi and Li, Yujie and Wang, Fei and Yu, Chengqing and Fu, Yisong and Qian, Tangwen and Xu, Bin and Diao, Boyu and Xu, Yongjun and Cheng, Xueqi}, title = {BLAST: Balanced Sampling Time Series Corpus for Universal Forecasting Models}, year = {2025}, month = aug, publisher = {Association for Computing Machinery}, address = {New York, NY, USA}, booktitle = {Proceedings of the 31th ACM SIGKDD Conference on Knowledge Discovery and Data Mining}, keywords = {large-scale time series dataset, balanced sampling, universal time series forecasting}, location = {Toronton, ON, Canada}, series = {KDD '25}, }
2024
- TKDESpatio-Temporal Graph Neural Networks for Predictive Learning in Urban Computing: A SurveyGuangyin Jin, Yuxuan Liang, Yuchen Fang, Zezhi Shao, Jincai Huang, Junbo Zhang, and Yu ZhengIEEE Transactions on Knowledge and Data Engineering, Nov 2024
This paper has entered ESI Highly Cited Papers.
@article{DBLP:journals/tkde/JinLFSHZZ24, author = {Jin, Guangyin and Liang, Yuxuan and Fang, Yuchen and Shao, Zezhi and Huang, Jincai and Zhang, Junbo and Zheng, Yu}, journal = {IEEE Transactions on Knowledge and Data Engineering}, title = {Spatio-Temporal Graph Neural Networks for Predictive Learning in Urban Computing: {A} Survey}, year = {2024}, month = nov, volume = {36}, number = {10}, pages = {5388-5408}, keywords = {spatio-temporal data mining, graph neural networks, urban computing, predictive learning, time series}, doi = {10.1109/TKDE.2023.3333824}, }
- InnovationArtificial intelligence for geoscience: Progress, challenges, and perspectivesTianjie Zhao†, Sheng Wang†, Chaojun Ouyang†, Min Chen†, Chenying Liu†, Jin Zhang†, Long Yu†, Fei Wang†, Yong Xie†, Jun Li†, and 41 more authorsThe Innovation, Sep 2024
This paper explores the evolution of geoscientific inquiry, tracing the progression from traditional physics-based models to modern data-driven approaches facilitated by significant advancements in artificial intelligence (AI) and data collection techniques. Traditional models, which are grounded in physical and numerical frameworks, provide robust explanations by explicitly reconstructing underlying physical processes. However, their limitations in comprehensively capturing Earth’s complexities and uncertainties pose challenges in optimization and real-world applicability. In contrast, contemporary data-driven models, particularly those utilizing machine learning (ML) and deep learning (DL), leverage extensive geoscience data to glean insights without requiring exhaustive theoretical knowledge. ML techniques have shown promise in addressing Earth science-related questions. Nevertheless, challenges such as data scarcity, computational demands, data privacy concerns, and the “black-box” nature of AI models hinder their seamless integration into geoscience. The integration of physics-based and data-driven methodologies into hybrid models presents an alternative paradigm. These models, which incorporate domain knowledge to guide AI methodologies, demonstrate enhanced efficiency and performance with reduced training data requirements. This review provides a comprehensive overview of geoscientific research paradigms, emphasizing untapped opportunities at the intersection of advanced AI techniques and geoscience. It examines major methodologies, showcases advances in large-scale models, and discusses the challenges and prospects that will shape the future landscape of AI in geoscience. The paper outlines a dynamic field ripe with possibilities, poised to unlock new understandings of Earth’s complexities and further advance geoscience exploration.
@article{ZHAO2024100691, title = {Artificial intelligence for geoscience: Progress, challenges, and perspectives}, journal = {The Innovation}, volume = {5}, number = {5}, pages = {100691}, year = {2024}, month = sep, issn = {2666-6758}, doi = {https://doi.org/10.1016/j.xinn.2024.100691}, url = {https://www.sciencedirect.com/science/article/pii/S2666675824001292}, author = {Zhao, Tianjie and Wang, Sheng and Ouyang, Chaojun and Chen, Min and Liu, Chenying and Zhang, Jin and Yu, Long and Wang, Fei and Xie, Yong and Li, Jun and Wang, Fang and Grunwald, Sabine and Wong, Bryan M. and Zhang, Fan and Qian, Zhen and Xu, Yongjun and Yu, Chengqing and Han, Wei and Sun, Tao and Shao, Zezhi and Qian, Tangwen and Chen, Zhao and Zeng, Jiangyuan and Zhang, Huai and Letu, Husi and Zhang, Bing and Wang, Li and Luo, Lei and Shi, Chong and Su, Hongjun and Zhang, Hongsheng and Yin, Shuai and Huang, Ni and Zhao, Wei and Li, Nan and Zheng, Chaolei and Zhou, Yang and Huang, Changping and Feng, Defeng and Xu, Qingsong and Wu, Yan and Hong, Danfeng and Wang, Zhenyu and Lin, Yinyi and Zhang, Tangtang and Kumar, Prashant and Plaza, Antonio and Chanussot, Jocelyn and Zhang, Jiabao and Shi, Jiancheng and Wang, Lizhe}, keywords = {artificial intelligence, machine learning, deep learning, geoscience}, }
- KDDGinAR: An End-To-End Multivariate Time Series Forecasting Model Suitable for Variable MissingChengqing Yu, Fei Wang*, Zezhi Shao, Tangwen Qian, Zhao Zhang, Wei Wei, and Yongjun Xu*In Proceedings of the 30th ACM SIGKDD Conference on Knowledge Discovery and Data Mining, Barcelona, Spain, Aug 2024
Multivariate time series forecasting (MTSF) is crucial for decision-making to precisely forecast the future values/trends, based on the complex relationships identified from historical observations of multiple sequences. Recently, Spatial-Temporal Graph Neural Networks (STGNNs) have gradually become the theme of MTSF model as their powerful capability in mining spatial-temporal dependencies, but almost of them heavily rely on the assumption of historical data integrity. In reality, due to factors such as data collector failures and time-consuming repairment, it is extremely challenging to collect the whole historical observations without missing any variable. In this case, STGNNs can only utilize a subset of normal variables and easily suffer from the incorrect spatial-temporal dependency modeling issue, resulting in the degradation of their forecasting performance. To address the problem, in this paper, we propose a novel Graph Interpolation Attention Recursive Network (named GinAR) to precisely model the spatial-temporal dependencies over the limited collected data for forecasting. In GinAR, it consists of two key components, that is, interpolation attention and adaptive graph convolution to take place of the fully connected layer of simple recursive units, and thus are capable of recovering all missing variables and reconstructing the correct spatial-temporal dependencies for recursively modeling of multivariate time series data, respectively. Extensive experiments conducted on five real-world datasets demonstrate that GinAR outperforms 11 SOTA baselines, and even when 90% of variables are missing, it can still accurately predict the future values of all variables.
@inproceedings{10.1145/3637528.3672055, author = {Yu, Chengqing and Wang, Fei and Shao, Zezhi and Qian, Tangwen and Zhang, Zhao and Wei, Wei and Xu, Yongjun}, title = {GinAR: An End-To-End Multivariate Time Series Forecasting Model Suitable for Variable Missing}, year = {2024}, month = aug, isbn = {9798400704901}, publisher = {Association for Computing Machinery}, address = {New York, NY, USA}, url = {https://doi.org/10.1145/3637528.3672055}, doi = {10.1145/3637528.3672055}, booktitle = {Proceedings of the 30th ACM SIGKDD Conference on Knowledge Discovery and Data Mining}, pages = {3989-4000}, numpages = {12}, keywords = {adaptive graph convolution, graph interpolation attention recursive network, interpolation attention, multivariate time series forecasting, variable missing}, location = {Barcelona, Spain}, series = {KDD '24}, }
- ICASSPDynamic Frequency Domain Graph Convolutional Network for Traffic ForecastingYujie Li, Zezhi Shao, Yongjun Xu, Qiang Qiu, Zhaogang Cao, and Fei WangIn ICASSP 2024 - 2024 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), Apr 2024
@inproceedings{10446144, author = {Li, Yujie and Shao, Zezhi and Xu, Yongjun and Qiu, Qiang and Cao, Zhaogang and Wang, Fei}, booktitle = {ICASSP 2024 - 2024 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)}, title = {Dynamic Frequency Domain Graph Convolutional Network for Traffic Forecasting}, year = {2024}, month = apr, volume = {}, number = {}, pages = {5245-5249}, keywords = {Convolution;Frequency-domain analysis;Time series analysis;Transportation;Traffic control;Spatial databases;Sensors;Traffic prediction;frequency domain signal processing;multivariate time series analysis;dynamic graph learning;graph convolution}, doi = {10.1109/ICASSP48485.2024.10446144}, }
2023
- CIKMDSformer: A Double Sampling Transformer for Multivariate Time Series Long-term PredictionChengqing Yu, Fei Wang*, Zezhi Shao, Tao Sun, Lin Wu, and Yongjun XuIn Proceedings of the 32nd ACM International Conference on Information and Knowledge Management, Birmingham, United Kingdom, Oct 2023
The 2nd-Most Cited paper in CIKM 2023 2023 2/676
Multivariate time series long-term prediction, which aims to predict the change of data in a long time, can provide references for decision-making. Although transformer-based models have made progress in this field, they usually do not make full use of three features of multivariate time series: global information, local information, and variables correlation. To effectively mine the above three features and establish a high-precision prediction model, we propose a double sampling transformer (DSformer), which consists of the double sampling (DS) block and the temporal variable attention (TVA) block. Firstly, the DS block employs down sampling and piecewise sampling to transform the original series into feature vectors that focus on global information and local information respectively. Then, TVA block uses temporal attention and variable attention to mine these feature vectors from different dimensions and extract key information. Finally, based on a parallel structure, DSformer uses multiple TVA blocks to mine and integrate different features obtained from DS blocks respectively. The integrated feature information is passed to the generative decoder based on a multi-layer perceptron to realize multivariate time series long-term prediction. Experimental results on nine real-world datasets show that DSformer can outperform eight existing baselines.
@inproceedings{10.1145/3583780.3614851, author = {Yu, Chengqing and Wang, Fei and Shao, Zezhi and Sun, Tao and Wu, Lin and Xu, Yongjun}, title = {DSformer: A Double Sampling Transformer for Multivariate Time Series Long-term Prediction}, year = {2023}, month = oct, isbn = {9798400701245}, publisher = {Association for Computing Machinery}, address = {New York, NY, USA}, url = {https://doi.org/10.1145/3583780.3614851}, doi = {10.1145/3583780.3614851}, booktitle = {Proceedings of the 32nd ACM International Conference on Information and Knowledge Management}, pages = {3062-3072}, numpages = {11}, keywords = {double sampling transformer, multivariate time series long-term prediction, temporal variable attention block}, location = {Birmingham, United Kingdom}, series = {CIKM '23}, }
- TKDEHeterogeneous Graph Neural Network With Multi-View Representation LearningZezhi Shao, Yongjun Xu, Wei Wei, Fei Wang, Zhao Zhang, and Feida ZhuIEEE Transactions on Knowledge and Data Engineering, Nov 2023
@article{9961953, author = {Shao, Zezhi and Xu, Yongjun and Wei, Wei and Wang, Fei and Zhang, Zhao and Zhu, Feida}, journal = {IEEE Transactions on Knowledge and Data Engineering}, title = {Heterogeneous Graph Neural Network With Multi-View Representation Learning}, year = {2023}, month = nov, volume = {35}, number = {11}, pages = {11476-11488}, keywords = {Semantics;Mercury (metals);Graph neural networks;Aggregates;Task analysis;Representation learning;Adaptation models;Heterogeneous graphs;graph neural networks;graph embedding}, doi = {10.1109/TKDE.2022.3224193}, }
- CIKMClustering-property Matters: A Cluster-aware Network for Large Scale Multivariate Time Series ForecastingYuan Wang, Zezhi Shao, Tao Sun, Chengqing Yu, Yongjun Xu, and Fei Wang*In Proceedings of the 32nd ACM International Conference on Information and Knowledge Management, Birmingham, United Kingdom, Oct 2023
Large-scale Multivariate Time Series(MTS) widely exist in various real-world systems, imposing significant demands on model efficiency. A recent work, STID, addressed the high complexity issue of popular Spatial-Temporal Graph Neural Networks(STGNNs). Despite its success, when applied to large-scale MTS data, the number of parameters of STID for modeling spatial dependencies increases substantially, leading to over-parameterization issues and suboptimal performance. These observations motivate us to explore new approaches for modeling spatial dependencies in a parameter-friendly manner. In this paper, we argue that the spatial properties of variables are essentially the superposition of multiple cluster centers. Accordingly, we propose a Cluster-Aware Network(CANet), which effectively captures spatial dependencies by mining the implicit cluster centers of variables. CANet solely optimizes the cluster centers instead of the spatial information of all nodes, thereby significantly reducing the parameter amount. Extensive experiments on two large-scale datasets validate our motivation and demonstrate the superiority of CANet.
@inproceedings{10.1145/3583780.3615253, author = {Wang, Yuan and Shao, Zezhi and Sun, Tao and Yu, Chengqing and Xu, Yongjun and Wang, Fei}, title = {Clustering-property Matters: A Cluster-aware Network for Large Scale Multivariate Time Series Forecasting}, year = {2023}, month = oct, isbn = {9798400701245}, publisher = {Association for Computing Machinery}, address = {New York, NY, USA}, url = {https://doi.org/10.1145/3583780.3615253}, doi = {10.1145/3583780.3615253}, booktitle = {Proceedings of the 32nd ACM International Conference on Information and Knowledge Management}, pages = {4340-4344}, numpages = {5}, keywords = {multivariate time series forecasting, large-scale, cluster centers}, location = {Birmingham, United Kingdom}, series = {CIKM '23}, }
2022
- CIKMSpatial-Temporal Identity: A Simple yet Effective Baseline for Multivariate Time Series ForecastingZezhi Shao, Zhao Zhang, Fei Wang*, Wei Wei, and Yongjun XuIn Proceedings of the 31st ACM International Conference on Information & Knowledge Management, Atlanta, GA, USA, Oct 2022
The Most Cited paper in CIKM 2022 2022 1/561
Multivariate Time Series (MTS) forecasting plays a vital role in a wide range of applications. Recently, Spatial-Temporal Graph Neural Networks (STGNNs) have become increasingly popular MTS forecasting methods due to their state-of-the-art performance. However, recent works are becoming more sophisticated with limited performance improvements. This phenomenon motivates us to explore the critical factors of MTS forecasting and design a model that is as powerful as STGNNs, but more concise and efficient. In this paper, we identify the indistinguishability of samples in both spatial and temporal dimensions as a key bottleneck, and propose a simple yet effective baseline for MTS forecasting by attaching Spatial and Temporal IDentity information (STID), which achieves the best performance and efficiency simultaneously based on simple Multi-Layer Perceptrons (MLPs). These results suggest that we can design efficient and effective models as long as they solve the indistinguishability of samples, without being limited to STGNNs.
@inproceedings{10.1145/3511808.3557702, author = {Shao, Zezhi and Zhang, Zhao and Wang, Fei and Wei, Wei and Xu, Yongjun}, title = {Spatial-Temporal Identity: A Simple yet Effective Baseline for Multivariate Time Series Forecasting}, year = {2022}, month = oct, isbn = {9781450392365}, publisher = {Association for Computing Machinery}, address = {New York, NY, USA}, url = {https://doi.org/10.1145/3511808.3557702}, doi = {10.1145/3511808.3557702}, booktitle = {Proceedings of the 31st ACM International Conference on Information \& Knowledge Management}, pages = {4454-4458}, numpages = {5}, keywords = {spatial-temporal graph neural network, multivariate time series forecasting, baseline}, location = {Atlanta, GA, USA}, series = {CIKM '22}, }
- KDDPre-training Enhanced Spatial-temporal Graph Neural Network for Multivariate Time Series ForecastingZezhi Shao, Zhao Zhang, Fei Wang*, and Yongjun XuIn Proceedings of the 28th ACM SIGKDD Conference on Knowledge Discovery and Data Mining, Washington DC, USA, Aug 2022
The 3rd-Most Cited paper in KDD 2022 2022 3/254
Multivariate Time Series (MTS) forecasting plays a vital role in a wide range of applications. Recently, Spatial-Temporal Graph Neural Networks (STGNNs) have become increasingly popular MTS forecasting methods. STGNNs jointly model the spatial and temporal patterns of MTS through graph neural networks and sequential models, significantly improving the prediction accuracy. But limited by model complexity, most STGNNs only consider short-term historical MTS data, such as data over the past one hour. However, the patterns of time series and the dependencies between them (i.e., the temporal and spatial patterns) need to be analyzed based on long-term historical MTS data. To address this issue, we propose a novel framework, in which STGNN is Enhanced by a scalable time series Pre-training model (STEP). Specifically, we design a pre-training model to efficiently learn temporal patterns from very long-term history time series (e.g., the past two weeks) and generate segment-level representations. These representations provide contextual information for short-term time series input to STGNNs and facilitate modeling dependencies between time series. Experiments on three public real-world datasets demonstrate that our framework is capable of significantly enhancing downstream STGNNs, and our pre-training model aptly captures temporal patterns.
@inproceedings{10.1145/3534678.3539396, author = {Shao, Zezhi and Zhang, Zhao and Wang, Fei and Xu, Yongjun}, title = {Pre-training Enhanced Spatial-temporal Graph Neural Network for Multivariate Time Series Forecasting}, year = {2022}, month = aug, isbn = {9781450393850}, publisher = {Association for Computing Machinery}, address = {New York, NY, USA}, url = {https://doi.org/10.1145/3534678.3539396}, doi = {10.1145/3534678.3539396}, booktitle = {Proceedings of the 28th ACM SIGKDD Conference on Knowledge Discovery and Data Mining}, pages = {1567-1577}, numpages = {11}, keywords = {multivariate time series forecasting, pre-training model, spatial-temporal graph neural network}, location = {Washington DC, USA}, series = {KDD '22}, }
- VLDBDecoupled Dynamic Spatial-Temporal Graph Neural Network for Traffic ForecastingZezhi Shao, Zhao Zhang, Wei Wei*, Fei Wang*, Yongjun Xu, Xin Cao, and Christian S. JensenProc. VLDB Endow., Jul 2022
The 3rd-Most Cited paper in VLDB 2022 2022 3/357
We all depend on mobility, and vehicular transportation affects the daily lives of most of us. Thus, the ability to forecast the state of traffic in a road network is an important functionality and a challenging task. Traffic data is often obtained from sensors deployed in a road network. Recent proposals on spatial-temporal graph neural networks have achieved great progress at modeling complex spatial-temporal correlations in traffic data, by modeling traffic data as a diffusion process. However, intuitively, traffic data encompasses two different kinds of hidden time series signals, namely the diffusion signals and inherent signals. Unfortunately, nearly all previous works coarsely consider traffic signals entirely as the outcome of the diffusion, while neglecting the inherent signals, which impacts model performance negatively. To improve modeling performance, we propose a novel Decoupled Spatial-Temporal Framework (DSTF) that separates the diffusion and inherent traffic information in a data-driven manner, which encompasses a unique estimation gate and a residual decomposition mechanism. The separated signals can be handled subsequently by the diffusion and inherent modules separately. Further, we propose an instantiation of DSTF, Decoupled Dynamic Spatial-Temporal Graph Neural Network (D2STGNN), that captures spatial-temporal correlations and also features a dynamic graph learning module that targets the learning of the dynamic characteristics of traffic networks. Extensive experiments with four real-world traffic datasets demonstrate that the framework is capable of advancing the state-of-the-art.
@article{10.14778/3551793.3551827, author = {Shao, Zezhi and Zhang, Zhao and Wei, Wei and Wang, Fei and Xu, Yongjun and Cao, Xin and Jensen, Christian S.}, title = {Decoupled Dynamic Spatial-Temporal Graph Neural Network for Traffic Forecasting}, year = {2022}, publisher = {VLDB Endowment}, volume = {15}, number = {11}, issn = {2150-8097}, url = {https://doi.org/10.14778/3551793.3551827}, doi = {10.14778/3551793.3551827}, journal = {Proc. VLDB Endow.}, month = jul, pages = {2733-2746}, numpages = {14}, }
2021
- ICPRTrajectory-User Link with Attention Recurrent NetworksTao Sun, Yongjun Xu, Fei Wang, Lin Wu, Tangwen Qian, and Zezhi ShaoIn 2020 25th International Conference on Pattern Recognition (ICPR), May 2021
@inproceedings{9412453, author = {Sun, Tao and Xu, Yongjun and Wang, Fei and Wu, Lin and Qian, Tangwen and Shao, Zezhi}, booktitle = {2020 25th International Conference on Pattern Recognition (ICPR)}, title = {Trajectory-User Link with Attention Recurrent Networks}, year = {2021}, month = may, volume = {}, number = {}, pages = {4589-4596}, keywords = {Weight measurement;Training;Recurrent neural networks;Semantics;Graphics processing units;Trajectory;Pattern recognition}, doi = {10.1109/ICPR48806.2021.9412453}, }