Publications
2024
Zhengyuan Xie, Haiquan Lu, Jia-wen Xiao, Enguang Wang, Le Zhang, Xialei Liu
Early Preparation Pays Off: New Classifier Pre-tuning for Class Incremental Semantic Segmentation Conference
ECCV, 2024.
Abstract | BibTeX | 标签: | Links:
@conference{nokey,
title = {Early Preparation Pays Off: New Classifier Pre-tuning for Class Incremental Semantic Segmentation},
author = {Zhengyuan Xie, Haiquan Lu, Jia-wen Xiao, Enguang Wang, Le Zhang, Xialei Liu},
url = { https://github.com/zhengyuan-xie/ECCV24_NeST },
year = {2024},
date = {2024-11-13},
urldate = {2024-11-13},
booktitle = {ECCV},
abstract = {Class incremental semantic segmentation aims to preserve old knowledge while learning new tasks, however, it is impeded by catastrophic forgetting and background shift issues. Prior works indicate the pivotal importance of initializing new classifiers and mainly focus on transferring knowledge from the background classifier or preparing classifiers for future classes, neglecting the flexibility and variance of new classifiers. In this paper, we propose a new classifier pre-tuning (NeST) method applied before the formal training process, learning a transformation from old classifiers to generate new classifiers for initialization rather than directly tuning the parameters of new classifiers. Our method can make new classifiers align with the backbone and adapt to the new data, preventing drastic changes in the feature extractor when learning new classes. Besides, we design a strategy considering the cross-task class similarity to initialize matrices used in the transformation, helping achieve the stability-plasticity trade-off. Experiments on Pascal VOC 2012 and ADE20K datasets show that the proposed strategy can significantly improve the performance of previous methods. The code is available at https://github.com/zhengyuan-xie/ECCV24_NeST .},
keywords = {},
pubstate = {published},
tppubtype = {conference}
}
Cheng Gong, Yao Chen, Qiuyang Luo, Ye Lu, Tao Li, Yuzhi Zhang, Yufei Sun, Le Zhang
Deep Feature Surgery: Towards Accurate and Efficient Multi-Exit Networks Conference
ECCV, 2024.
@conference{nokey,
title = {Deep Feature Surgery: Towards Accurate and Efficient Multi-Exit Networks},
author = {Cheng Gong, Yao Chen, Qiuyang Luo, Ye Lu, Tao Li, Yuzhi Zhang, Yufei Sun, Le Zhang},
year = {2024},
date = {2024-11-13},
urldate = {2024-11-13},
booktitle = {ECCV},
abstract = {Multi-exit network is a promising architecture for efficient model inference by sharing backbone networks and weights among multiple exits. However, the gradient conflict of the shared weights results in sub-optimal accuracy. This paper introduces Deep Feature Surgery (methodname), which consists of feature partitioning and feature referencing approaches to resolve gradient conflict issues during the training of multi-exit networks. The feature partitioning separates shared features along the depth axis among all exits to alleviate gradient conflict while simultaneously promoting joint optimization for each exit. Subsequently, feature referencing enhances multi-scale features for distinct exits across varying depths to improve the model accuracy. Furthermore, methodname~reduces the training operations with the reduced complexity of backpropagation. Experimental results on Cifar100 and ImageNet datasets exhibit that methodname~provides up to a textbf{50.00%} reduction in training time and attains up to a textbf{6.94%} enhancement in accuracy when contrasted with baseline methods across diverse models and tasks. Budgeted batch classification evaluation on MSDNet demonstrates that DFS uses about fewer average FLOPs per image to achieve the same classification accuracy as baseline methods on Cifar100.},
keywords = {},
pubstate = {published},
tppubtype = {conference}
}
Bing Li, Wei Cui, Le Zhang, Qi Yang, Min Wu, Joey Tianyi Zhou
Democratizing Federated WiFi-based Human Activity Recognition Using Hypothesis Transfer Journal Article
In: IEEE Transactions on Mobile Computing, 2024.
@article{nokey,
title = {Democratizing Federated WiFi-based Human Activity Recognition Using Hypothesis Transfer},
author = {Bing Li, Wei Cui, Le Zhang, Qi Yang, Min Wu, Joey Tianyi Zhou},
year = {2024},
date = {2024-11-12},
urldate = {2024-11-12},
journal = {IEEE Transactions on Mobile Computing},
abstract = {Human activity recognition (HAR) is a crucial task in IoT systems with applications ranging from surveillance and intruder detection to home automation and more. Recently, non-invasive HAR utilizing WiFi signals has gained considerable attention due to advancements in ubiquitous WiFi technologies. However, recent studies have revealed significant privacy risks associated with WiFi signals, raising concerns about bio-information leakage. To address these concerns, the decentralized paradigm, particularly federated learning (FL), has emerged as a promising approach for training HAR models while preserving data privacy. Nevertheless, FL models may struggle in end-user environments due to substantial domain discrepancies between the source training data and the target end-user environment. This discrepancy arises from the sensitivity of WiFi signals to environmental changes, resulting in notable domain shifts. As a consequence, FL-based HAR approaches often face challenges when deployed in real-world WiFi environments. Albeit there are pioneer attempts on federated domain adaptation, they typically require non-trivial communication and computation cost, which is prohibitively expensive especially considering edge-based hardware equipment of end-user environment. In this paper, we propose a model to democratize the WiFi-based HAR system by enhancing recognition accuracy in unannotated end-user environments while prioritizing data privacy. Our model leverages the hypothesis transfer and a lightweight hypothesis ensemble to mitigate negative transfer. We prove a tighter theoretical upper bound compared to existing multi-source federated domain adaptation models. Extensive experiments shows our model improves the average accuracy by approximately 10 absolute percentage points in both cross-person and cross-environment settings comparing several state-of-the-art baselines.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Le Zhang, Qibin Hou, Yun Liu, Jia-Wang Bian, Xun Xu, Joey Tianyi Zhou, Ce Zhu
Deep Negative Correlation Classification Journal Article
In: Machine Learning, 2024.
@article{nokey,
title = {Deep Negative Correlation Classification},
author = {Le Zhang, Qibin Hou, Yun Liu, Jia-Wang Bian, Xun Xu, Joey Tianyi Zhou, Ce Zhu},
year = {2024},
date = {2024-11-11},
urldate = {2024-11-11},
journal = {Machine Learning},
abstract = {Ensemble learning serves as a straightforward way to improve the performance of almost any machine learning algorithm. Existing deep ensemble methods usually na¨ıvely train many different models and then aggregate their predictions. This is not optimal in our view from two aspects: i) Na¨ıvely training multiple models adds much more computational burden, especially in the deep learning era; ii) Purely optimizing each base model without considering their interactions limits the diversity of ensemble and performance gains. We tackle these issues by proposing deep negative correlation classification (DNCC), in which the accuracy and diversity trade-off is systematically controlled by decomposing the loss function seamlessly into individual accuracy and the “correlation” between individual models and the ensemble. DNCC yields a deep classification ensemble where the individual estimator is both accurate and “negatively correlated”. Thanks to the optimized diversities, DNCC works well even when utilizing a shared network backbone, which significantly improves its efficiency when compared with most existing ensemble systems. Extensive experiments on multiple benchmark datasets and network structures demonstrate the superiority of the proposed method.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Boyuan Sun, Yuqi Yang, Le Zhang, Ming-Ming Cheng, Qibin Hou
CorrMatch: Label Propagation via Correlation Matching for Semi-Supervised Semantic Segmentation Inproceedings
In: CVPR, 2024.
@inproceedings{nokey,
title = {CorrMatch: Label Propagation via Correlation Matching for Semi-Supervised Semantic Segmentation},
author = {Boyuan Sun, Yuqi Yang, Le Zhang, Ming-Ming Cheng, Qibin Hou},
year = {2024},
date = {2024-02-01},
urldate = {2024-02-01},
booktitle = {CVPR},
abstract = {In this paper, we present a simple but performant semi-supervised semantic segmentation approach, termed CorrMatch. Our goal is to mine more high-quality regions from the unlabeled images to leverage the unlabeled data more efficiently via consistency regularization. The key contributions of our CorrMatch are two novel and complementary strategies. First, we introduce an adaptive threshold updating strategy with a relaxed initialization to expand the high-quality regions. Furthermore, we propose to propagate high-confidence predictions through measuring the pairwise similarities between pixels. Despite its simplicity, we show that CorrMatch achieves great performance on popular semi-supervised semantic segmentation benchmarks. Taking the DeepLabV3+ framework with ResNet-101 backbone as our segmentation model, we receive a 76%+ mIoU score on the Pascal VOC 2012 segmentation benchmark with only 92 annotated images provided. We also achieve a consistent improvement over previous semi-supervised semantic segmentation models. Code will be made publicly available.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Zhiwei Lin, Zhe Liu, Zhongyu Xia, Xinhao Wang, Yongtao Wang, Shengxiang Qi, Yang Dong, Nan Dong, Le Zhang, Ce Zhu
RCBEVDet: Radar-camera Fusion in Bird’s Eye View for 3D Object Detection Inproceedings
In: CVPR, 2024.
@inproceedings{nokey,
title = {RCBEVDet: Radar-camera Fusion in Bird’s Eye View for 3D Object Detection},
author = {Zhiwei Lin, Zhe Liu, Zhongyu Xia, Xinhao Wang, Yongtao Wang, Shengxiang Qi, Yang Dong, Nan Dong, Le Zhang, Ce Zhu},
year = {2024},
date = {2024-02-01},
urldate = {2024-02-01},
booktitle = {CVPR},
abstract = {Three-dimensional object detection is one of the key tasks in autonomous driving. To reduce costs in practice, low-cost multi-view cameras for 3D object detection are proposed to replace the expansive LiDAR sensors. However, relying solely on cameras is difficult to achieve highly accurate and robust 3D object detection. An effective solution to this issue is combining multi-view cameras with the economical millimeter-wave radar sensor to achieve more reliable multi-modal 3D object detection. In this paper, we
introduce RCBEVDet, a radar-camera fusion 3D object detection method in the bird’s eye view (BEV). Specifically, we first design RadarBEVNet for radar BEV feature extraction. RadarBEVNet consists of a dual-stream radar backbone and a Radar Cross-Section (RCS) aware BEV encoder. In the dual-stream radar backbone, a point-based encoder and a transformer-based encoder are proposed to extract radar features, with an injection and extraction module to facilitate communication between the two encoders. The RCS-aware BEV encoder takes RCS as the object size prior to scattering the point feature in BEV. Besides, we present the Cross-Attention Multi-layer Fusion module to automatically align the multi-modal BEV feature from radar and camera with the deformable attention mechanism, and then fuse the feature with channel and spatial fusion layers. Experimental results show that RCBEVDet achieves new state-of-the-art radar-camera fusion results on nuScenes and view-of-delft (VoD) 3D object detection benchmarks. Furthermore, RCBEVDet achieves better 3D detection results than all real-time camera-only and radar-camera 3D object detectors with a faster inference speed at 21∼28 FPS.
},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
introduce RCBEVDet, a radar-camera fusion 3D object detection method in the bird’s eye view (BEV). Specifically, we first design RadarBEVNet for radar BEV feature extraction. RadarBEVNet consists of a dual-stream radar backbone and a Radar Cross-Section (RCS) aware BEV encoder. In the dual-stream radar backbone, a point-based encoder and a transformer-based encoder are proposed to extract radar features, with an injection and extraction module to facilitate communication between the two encoders. The RCS-aware BEV encoder takes RCS as the object size prior to scattering the point feature in BEV. Besides, we present the Cross-Attention Multi-layer Fusion module to automatically align the multi-modal BEV feature from radar and camera with the deformable attention mechanism, and then fuse the feature with channel and spatial fusion layers. Experimental results show that RCBEVDet achieves new state-of-the-art radar-camera fusion results on nuScenes and view-of-delft (VoD) 3D object detection benchmarks. Furthermore, RCBEVDet achieves better 3D detection results than all real-time camera-only and radar-camera 3D object detectors with a faster inference speed at 21∼28 FPS.
Tian Gao, Cheng-Zhong Xu, Le Zhang, Hui Kong
GSB: Group superposition binarization for vision transformer with limited training samples Journal Article
In: Neural Networks, 2024.
Abstract | BibTeX | 标签: | Links:
@article{nokey,
title = {GSB: Group superposition binarization for vision transformer with limited training samples},
author = {Tian Gao, Cheng-Zhong Xu, Le Zhang, Hui Kong},
url = {https://github.com/IMRL/GSB-Vision-Transformer},
year = {2024},
date = {2024-01-01},
urldate = {2024-01-01},
journal = {Neural Networks},
abstract = {Vision Transformer (ViT) has performed remarkably in various computer vision tasks. Nonetheless, affected by the massive amount of parameters, ViT usually suffers from serious overfitting problems with a relatively limited number of training samples. In addition, ViT generally demands heavy computing resources, which limit its deployment on resource-constrained devices. As a type of model-compression method, model binarization is potentially a good choice to solve the above problems. Compared with the full-precision one, the model with the binarization method replaces complex tensor multiplication with simple bit-wise binary operations and represents full-precision model parameters and activations with only 1-bit ones, which potentially solves the problem of model size and computational complexity, respectively. In this paper, we investigate a binarized ViT model. Empirically, we observe that the existing binarization technology designed for Convolutional Neural Networks (CNN) cannot migrate well to a ViT’s binarization task. We also find that the decline of the accuracy of the binary ViT model is mainly due to the information loss of the Attention module and the Value vector. Therefore, we propose a novel model binarization technique, called Group Superposition Binarization (GSB), to deal with these issues. Furthermore, in order to further improve the performance of the binarization model, we have investigated the gradient calculation procedure in the binarization process and derived more proper gradient calculation equations for GSB to reduce the influence of gradient mismatch. Then, the knowledge distillation technique is introduced to alleviate the performance degradation caused by model binarization. Analytically, model binarization can limit
the parameter’s search space during parameter updates while training a model. Therefore, the binarization process can actually play an implicit regularization role and help solve the problem of overfitting in the case of insufficient training data. Experiments on three datasets with limited numbers of training samples demonstrate that the proposed GSB model achieves state-of-the-art performance among the binary quantization schemes and exceeds its full-precision counterpart on some indicators. Code and
models are available at: https://github.com/IMRL/GSB-Vision-Transformer.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
the parameter’s search space during parameter updates while training a model. Therefore, the binarization process can actually play an implicit regularization role and help solve the problem of overfitting in the case of insufficient training data. Experiments on three datasets with limited numbers of training samples demonstrate that the proposed GSB model achieves state-of-the-art performance among the binary quantization schemes and exceeds its full-precision counterpart on some indicators. Code and
models are available at: https://github.com/IMRL/GSB-Vision-Transformer.
Aiping Huang, Lijian Li, Le Zhang, Yuzhen Niu, Tiesong Zhao, Chia-Wen Lin
Multi-View Graph Embedding Learning for Image Co-Segmentation and Co-Localization Journal Article
In: IEEE Transactions on Circuits and Systems for Video Technology, 2024.
@article{nokey,
title = {Multi-View Graph Embedding Learning for Image Co-Segmentation and Co-Localization},
author = {Aiping Huang, Lijian Li, Le Zhang, Yuzhen Niu, Tiesong Zhao, Chia-Wen Lin},
year = {2024},
date = {2024-01-01},
urldate = {2024-01-01},
journal = {IEEE Transactions on Circuits and Systems for Video Technology},
abstract = {Image co-segmentation and co-localization exploit inter-image information to identify and extract foreground objects with a batch mode. However, they remain challenging when confronted with large object variations or complex backgrounds. This paper proposes a multi-view graph embedding (MV-Gem) learning scheme which integrates diversity, robustness and discernibility of object features to alleviate this phenomenon. To encourage the diversity, the deep co-information containing both low-layer general representations and high-layer semantic information is generated to form a multi-view feature pool for comprehensive co-object description. To enhance the robustness, a multi-view adaptive weighted learning is formulated to fuse the deep co-information for feature complementation. To ensure the discernibility, the graph embedding and sparse constraint are embedded into the fusion formulation for feature selection. The former aims to inherit important structures from multiple views, and the latter further selects important features to restrain irrelevant backgrounds. With these techniques, MV-Gem gradually recovers all co-objects through optimization iterations. Extensive experimental results on real-world datasets demonstrate that MV-Gem is capable of locating and delineating co-objects in an image group.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
2023
Wei Meng, Zhicong Liu, Bing Li, Wei Cui, Joey Tianyi Zhou, Le Zhang
GrapHAR: A Lightweight Human Activity Recognition Model by Exploring the Sub-carrier Correlations Journal Article
In: IEEE Transactions on Wireless Communications, 2023.
@article{nokey,
title = {GrapHAR: A Lightweight Human Activity Recognition Model by Exploring the Sub-carrier Correlations},
author = {Wei Meng, Zhicong Liu, Bing Li, Wei Cui, Joey Tianyi Zhou, Le Zhang},
year = {2023},
date = {2023-08-08},
journal = {IEEE Transactions on Wireless Communications},
abstract = {Human activity recognition (HAR) is an important task due to its far-reaching applications, such as surveillance, healthcare systems, and human-computer interaction. Recently, Channel State Information (CSI)-based HAR has attracted increasing attention in the research community due to its ubiquitous availability, good user privacy, and fewer constraints on working conditions. Most of the existing methods for CSI-based HAR use various deep learning models, such as Convolutional Neural Networks (CNNs), Long Short-Term Memory (LSTM), and Transformers, to distinguish activities based on their temporal patterns. Despite their remarkable effectiveness, these methods solely focus on temporal patterns while ignoring the correlations among sub-carriers. This limitation prevents them from achieving further performance improvement. Moreover, recent works often involve advanced yet massive and inefficient neural architectures, like Transformers, to obtain satisfactory recognition accuracy. The performance gain is traded off with a steep increase in model complexity, which leads to low efficacy and high training/inference costs outsides the small time window. To address these issues, we propose a lightweight CSI-based HAR model. Our model makes the first effort to explore the graphical correlations of CSI sub-carriers, working in conjunction with a temporal causal convolution module. The high efficacy design enables our model to be highly effective without requiring excessive model complexity. Extensive experiments conducted on four real-world datasets demonstrate that our model outperforms state-of-the-art methods, including a strong Transformer-based baseline. It achieves an average improvement of 8 percentage points in recognition accuracy, with only 10% of the parameters compared to the Transformer-based method (4.95M vs. 49.24M). Additionally, our model is significantly faster, with empirical training and execution times at least 2.07 times faster than the baseline.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Bing Li, Wei Cui, Le Zhang, Ce Zhu, Wei Wang, Ivor Tsang, Joey Tianyi Zhou
DifFormer: Multi-Resolutional Differencing Transformer With Dynamic Ranging for Time Series Analysis Journal Article
In: IEEE Transactions on Pattern Analysis and Machine Intelligence, 2023.
@article{nokey,
title = {DifFormer: Multi-Resolutional Differencing Transformer With Dynamic Ranging for Time Series Analysis},
author = {Bing Li, Wei Cui, Le Zhang, Ce Zhu, Wei Wang, Ivor Tsang, Joey Tianyi Zhou},
year = {2023},
date = {2023-07-17},
urldate = {2023-07-17},
journal = {IEEE Transactions on Pattern Analysis and Machine Intelligence},
abstract = {Time series analysis is essential to many far-reaching applications of data science and statistics including economic and financial forecasting, surveillance, and automated business processing. Though being greatly successful of Transformer in computer vision and natural language processing, the potential of employing it as the general backbone in analyzing the ubiquitous times series data has not been fully released yet. Prior Transformer variants on time series highly rely on task-dependent designs and pre-assumed ``pattern biases'', revealing its insufficiency in representing nuanced seasonal, cyclic, and outlier patterns which are highly prevalent in time series. As a consequence, they can not generalize well to different time series analysis tasks. To tackle the challenges, we propose emph{DifFormer}, an effective and efficient Transformer architecture that can serve as a workhorse for a variety of time-series analysis tasks. DifFormer incorporates a novel multi-resolutional differencing mechanism, which is able to progressively and adaptively make nuanced yet meaningful changes prominent, meanwhile, the periodic or cyclic patterns can be dynamically captured with flexible lagging and dynamic ranging operations. Extensive experiments demonstrate DifFormer significantly outperforms state-of-the-art models on three essential time-series analysis tasks, including classification, regression, and forecasting. In addition to its superior performances, DifFormer also excels in efficiency -- a linear time/memory complexity with empirically lower time consumption.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Ao Li, Le Zhang, Yun Liu, Ce Zhu
Feature Modulation Transformer: Cross-Refinement of Global Representation via High-Frequency Prior for Image Super-Resolution Inproceedings
In: ICCV, 2023.
@inproceedings{nokey,
title = {Feature Modulation Transformer: Cross-Refinement of Global Representation via High-Frequency Prior for Image Super-Resolution},
author = {Ao Li, Le Zhang, Yun Liu, Ce Zhu},
year = {2023},
date = {2023-07-17},
urldate = {2023-07-17},
booktitle = {ICCV},
abstract = {Transformer-based methods have exhibited remarkable potential in Single Image Super-Resolution (SISR) by effectively extracting long-range dependencies. However, most of the current research in this area has prioritized the design of transformer blocks to capture global information, while overlooking the importance of incorporating high-frequency priors, which we believe could be beneficial. In our study, we conducted a series of experiments and found that transformer structures are more adept at capturing low-frequency information, but have limited capacity in constructing high-frequency representations when compared to their convolutional counterparts. Our proposed solution, the Cross-Refinement Adaptive Feature Modulation Transformer (CRAFT), integrates the strengths of both convolutional and transformer structures. It comprises three key components: textbf{the High-Frequency Enhancement Residual Block (HFERB)} for extracting high-frequency information, textbf{the Shift Rectangle Window Attention Block (SRWAB)} for capturing global information, and textbf{the Hybrid Fusion Block (HFB)} for refining the global representation. Our experiments on multiple datasets demonstrate that CRAFT outperforms state-of-the-art methods by up to textbf{0.29dB} while using fewer parameters.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
2022
Fanxing Liu, Cheng Zeng, Le Zhang*, Yingjie Zhou*, Qing Mu, Yanru Zhang, Ling Zhang, Ce Zhu
FedTADBench: Federated Time-series Anomaly Detection Benchmark Inproceedings
In: IEEE HPCC (Best Paper Award), 2022.
Abstract | BibTeX | 标签: | Links:
@inproceedings{nokey,
title = {FedTADBench: Federated Time-series Anomaly Detection Benchmark},
author = {Fanxing Liu, Cheng Zeng, Le Zhang*, Yingjie Zhou*, Qing Mu, Yanru Zhang, Ling Zhang, Ce Zhu},
url = {https://github.com/fanxingliu2020/FedTADBench},
year = {2022},
date = {2022-12-15},
urldate = {2022-12-15},
booktitle = {IEEE HPCC (Best Paper Award)},
abstract = {Time series anomaly detection strives to uncover potential abnormal behaviors and patterns from temporal data, and has fundamental significance in diverse application scenarios. Constructing an effective detection model usually requires
adequate training data stored in a centralized manner, however, this requirement sometimes could not be satisfied in realistic
scenarios. As a prevailing approach to address the above problem, federated learning has demonstrated its power to cooperate with the distributed data available while protecting the privacy of data providers. However, it is still unclear that how existing time series anomaly detection algorithms perform with decentralized data storage and privacy protection through federated learning.
To study this, we conduct a federated time series anomaly
detection benchmark, named FedTADBench, which involves five
representative time series anomaly detection algorithms and four
popular federated learning methods. We would like to answer
the following questions: (1)How is the performance of time series
anomaly detection algorithms when meeting federated learning?
(2) Which federated learning method is the most appropriate
one for time series anomaly detection? (3) How do federated
time series anomaly detection approaches perform on different
partitions of data in clients? Numbers of results as well as corresponding analysis are provided from extensive experiments with various settings. The source code of our benchmark is publicly
available at https://github.com/fanxingliu2020/FedTADBench},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
adequate training data stored in a centralized manner, however, this requirement sometimes could not be satisfied in realistic
scenarios. As a prevailing approach to address the above problem, federated learning has demonstrated its power to cooperate with the distributed data available while protecting the privacy of data providers. However, it is still unclear that how existing time series anomaly detection algorithms perform with decentralized data storage and privacy protection through federated learning.
To study this, we conduct a federated time series anomaly
detection benchmark, named FedTADBench, which involves five
representative time series anomaly detection algorithms and four
popular federated learning methods. We would like to answer
the following questions: (1)How is the performance of time series
anomaly detection algorithms when meeting federated learning?
(2) Which federated learning method is the most appropriate
one for time series anomaly detection? (3) How do federated
time series anomaly detection approaches perform on different
partitions of data in clients? Numbers of results as well as corresponding analysis are provided from extensive experiments with various settings. The source code of our benchmark is publicly
available at https://github.com/fanxingliu2020/FedTADBench
Guolei Sun, Yun Liu, Hao Tang, Ajad Chhatkuli, Le Zhang, Luc Van Gool
Mining Relations among Cross-Frame Affinities for Video Semantic Segmentation Inproceedings
In: ECCV2022, 2022.
Abstract | BibTeX | 标签: | Links:
@inproceedings{nokey,
title = {Mining Relations among Cross-Frame Affinities for Video Semantic Segmentation},
author = {Guolei Sun, Yun Liu, Hao Tang, Ajad Chhatkuli, Le Zhang, Luc Van Gool},
url = {https://github.com/GuoleiSun/VSS-MRCFA},
year = {2022},
date = {2022-10-18},
booktitle = {ECCV2022},
abstract = {The essence of video semantic segmentation (VSS) is how to leverage temporal information for prediction. Previous efforts are mainly devoted to developing new techniques to calculate the cross-frame affinities such as optical flow and attention. Instead, this paper contributes from a different angle by mining relations among cross-frame affinities, upon which better temporal information aggregation could be achieved. We explore relations among affinities in two aspects: single-scale intrinsic correlations and multi-scale relations. Inspired by traditional feature processing, we propose Single-scale Affinity Refinement (SAR) and Multi-scale Affinity Aggregation (MAA). To make it feasible to execute MAA, we propose a Selective Token Masking (STM) strategy to select a subset of consistent reference tokens for different scales when calculating affinities, which also improves the efficiency of our method. At last, the cross-frame affinities strengthened by SAR and MAA are adopted for adaptively aggregating temporal information. Our experiments demonstrate that the proposed method performs favorably against state-of-the-art VSS methods. The code is publicly available at https://github.com/GuoleiSun/VSS-MRCFA.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
GangXu, QiBin Hou, Le Zhang, Ming-Ming Cheng
FMNet: Frequency-Aware Modulation Network for SDR-to-HDR Translation Inproceedings
In: ACM MM, 2022.
Abstract | BibTeX | 标签: | Links:
@inproceedings{nokey,
title = {FMNet: Frequency-Aware Modulation Network for SDR-to-HDR Translation},
author = {GangXu, QiBin Hou, Le Zhang, Ming-Ming Cheng},
url = {https://github.com/MCG-NKU/FMNet},
year = {2022},
date = {2022-10-13},
urldate = {2022-10-13},
booktitle = {ACM MM},
abstract = {High-dynamic-range (HDR) media resources that preserve high contrast and more details in shadow and highlight areas in television are becoming increasingly popular for modern display technology compared to the widely available standard-dynamic-range (SDR) media resources. However, due to the exorbitant price of HDR cameras, researchers have attempted to develop the SDR-to-HDR techniques to convert the abundant SDR media resources to the HDR versions for cost-saving. Recent SDR-to-HDR methods mostly apply the image-adaptive modulation scheme to dynamically modulate the local contrast. However, these methods often fail to properly capture the low-frequency cues, resulting in artifacts in the low-frequency regions and low visual quality. Motivated by the Discrete Cosine Transform (DCT), in this paper, we propose a Frequency-aware Modulation Network (FMNet) to enhance the contrast in a frequency-adaptive way for SDR-to-HDR translation. Specifically, we design a frequency-aware modulation block that can dynamically modulate the features according to its frequency-domain responses. This allows us to reduce the structural distortions and artifacts in the translated low-frequency regions and reconstruct high-quality HDR content in the translated results. Experimental results on the HDRTV1K dataset show that our FMNet outperforms previous methods and the perceptual quality of the generated HDR images can be largely improved. Our code is available at https://github.com/MCG-NKU/FMNet.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Yu-Huan Wu, Yun Liu, Le Zhang, Ming-Ming Cheng, Bo Ren
EDN: Salient object detection via extremely-downsampled network Journal Article
In: IEEE TIP, 2022.
Abstract | BibTeX | 标签: | Links:
@article{nokey,
title = {EDN: Salient object detection via extremely-downsampled network},
author = {Yu-Huan Wu, Yun Liu, Le Zhang, Ming-Ming Cheng, Bo Ren},
url = {https://github.com/yuhuan-wu/EDN},
year = {2022},
date = {2022-06-08},
urldate = {2022-06-08},
journal = {IEEE TIP},
abstract = {Recent progress on salient object detection (SOD)
mainly benefits from multi-scale learning, where the high-level
and low-level features collaborate in locating salient objects
and discovering fine details, respectively. However, most efforts
are devoted to low-level feature learning by fusing multi-scale
features or enhancing boundary representations. High-level features, which although have long proven effective for many
other tasks, yet have been barely studied for SOD. In this
paper, we tap into this gap and show that enhancing highlevel features is essential for SOD as well. To this end, we
introduce an Extremely-Downsampled Network (EDN), which
employs an extreme downsampling technique to effectively learn
a global view of the whole image, leading to accurate salient
object localization. To accomplish better multi-level feature
fusion, we construct the Scale-Correlated Pyramid Convolution
(SCPC) to build an elegant decoder for recovering object details
from the above extreme downsampling. Extensive experiments
demonstrate that EDN achieves state-of-the-art performance with
real-time speed. Our efficient EDN-Lite also achieves competitive
performance with a speed of 316fps. Hence, this work is expected
to spark some new thinking in SOD. Code is available at
https://github.com/yuhuan-wu/EDN},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
mainly benefits from multi-scale learning, where the high-level
and low-level features collaborate in locating salient objects
and discovering fine details, respectively. However, most efforts
are devoted to low-level feature learning by fusing multi-scale
features or enhancing boundary representations. High-level features, which although have long proven effective for many
other tasks, yet have been barely studied for SOD. In this
paper, we tap into this gap and show that enhancing highlevel features is essential for SOD as well. To this end, we
introduce an Extremely-Downsampled Network (EDN), which
employs an extreme downsampling technique to effectively learn
a global view of the whole image, leading to accurate salient
object localization. To accomplish better multi-level feature
fusion, we construct the Scale-Correlated Pyramid Convolution
(SCPC) to build an elegant decoder for recovering object details
from the above extreme downsampling. Extensive experiments
demonstrate that EDN achieves state-of-the-art performance with
real-time speed. Our efficient EDN-Lite also achieves competitive
performance with a speed of 316fps. Hence, this work is expected
to spark some new thinking in SOD. Code is available at
https://github.com/yuhuan-wu/EDN
Wei Cui, Le Zhang, Bing Li, Zhenghua Chen, Min Wu, Xiaoli Li, Jiawen Kang
Semi-Supervised Deep Adversarial Forest for Cross-Environment Localization Journal Article
In: IEEE Transactions on Vehicular Technology, 2022.
@article{nokey,
title = {Semi-Supervised Deep Adversarial Forest for Cross-Environment Localization},
author = {Wei Cui, Le Zhang, Bing Li, Zhenghua Chen, Min Wu, Xiaoli Li, Jiawen Kang},
year = {2022},
date = {2022-05-04},
urldate = {2022-05-04},
journal = {IEEE Transactions on Vehicular Technology},
abstract = {Extracting channel state information (CSI) from WiFi signals is of proved high-effectiveness in locating human locations in a device-free manner. However, existing localization/positioning systems are mainly trained and deployed in a fixed environment, and thus they are likely to suffer from substantial performance declines when immigrating to new environments. In this paper, we address the fundamental problem of WiFi-based cross-environment indoor localization using a semi-supervised approach, in which we only have access to the annotations of the source environment while the data in the target environments are un-annotated. This problem is of high practical values in enabling a well-trained system to be scalable to new environments without tedious human annotations. To this end, a deep neural forest is introduced which unifies the ensemble learning with the representation learning functionalities from deep neural networks in an end-to-end trainable fashion. On top of that, an adversarial training strategy is further employed to learn environment-invariant feature representations for facilitating more robust localization. Extensive experiments on real-world datasets demonstrate the superiority of the proposed methods over state-of-the-art baselines. Compared with the best-performing baseline, our model excels with an average 12.7% relative improvement on all six evaluation settings},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
2021
Yun Liu; Ming-Ming Cheng; Deng-Ping Fan; Le Zhang; JiaWang Bian; Dacheng Tao
Semantic edge detection with diverse deep supervision Journal Article
In: IJCV, 2021.
BibTeX | 标签:
@article{liu2018semantic,
title = {Semantic edge detection with diverse deep supervision},
author = {Yun Liu and Ming-Ming Cheng and Deng-Ping Fan and Le Zhang and JiaWang Bian and Dacheng Tao},
year = {2021},
date = {2021-06-03},
urldate = {2018-01-01},
journal = {IJCV},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Xun Xu; Loong-Fah Cheong; Zhuwen Li; Le Zhang; Ce Zhu;
Learning Clustering for Motion Segmentation Journal Article
In: IEEE TCSVT, 2021.
Abstract | BibTeX | 标签: | Links:
@article{nokey,
title = {Learning Clustering for Motion Segmentation},
author = {Xun Xu; Loong-Fah Cheong; Zhuwen Li; Le Zhang; Ce Zhu;},
url = {https://alex-xun-xu.github.io/Doc/Publication/2021/XuEtAl_TCSVT21.pdf},
doi = {10.1109/TCSVT.2021.3069094},
year = {2021},
date = {2021-03-29},
urldate = {2021-03-29},
journal = {IEEE TCSVT},
abstract = {Subspace clustering has been extensively studied from the hypothesis-and-test, algebraic, and spectral clustering-based perspectives. Most assume that only a single type/class of subspace is present. Generalizations to multiple types are non-trivial, plagued by challenges such as choice of types and numbers of models, sampling imbalance and parameter tuning. In many real world problems, data may not lie perfectly on a linear subspace and hand designed linear subspace models may not fit into these situations. In this work, we formulate the multi-type subspace clustering problem as one of learning non-linear subspace filters via deep multi-layer perceptrons (mlps). The response to the learnt subspace filters serve as the feature embedding that is clustering-friendly, i.e., points of the same clusters will be embedded closer together through the network. For inference, we apply K-means to the network output to cluster the data. Experiments are carried out on synthetic data and real world motion segmentation problems, producing state-of-the-art results.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Le Zhang; Wei Cui; Bing Li; Zhenghua Chen; Min Wu; Teo Sin Gee
Privacy-Preserving Cross-Environment Human Activity Recognition Journal Article
In: IEEE TCybernetics, 2021.
BibTeX | 标签:
@article{zhang2021privacy,
title = {Privacy-Preserving Cross-Environment Human Activity Recognition},
author = {Le Zhang and Wei Cui and Bing Li and Zhenghua Chen and Min Wu and Teo Sin Gee},
year = {2021},
date = {2021-01-01},
urldate = {2021-01-01},
journal = {IEEE TCybernetics},
publisher = {IEEE},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Yining Ma; Jingwen Li; Zhiguang Cao; Wen Song; Le Zhang; Zhenghua Chen; Jing Tang
Learning to Iteratively Solve Routing Problems with Dual-Aspect Collaborative Transformer Inproceedings
In: NeurIPS, 2021.
Abstract | BibTeX | 标签: | Links:
@inproceedings{ma2021learning,
title = {Learning to Iteratively Solve Routing Problems with Dual-Aspect Collaborative Transformer},
author = {Yining Ma and Jingwen Li and Zhiguang Cao and Wen Song and Le Zhang and Zhenghua Chen and Jing Tang},
url = {https://github.com/yining043/VRP-DACT},
year = {2021},
date = {2021-01-01},
urldate = {2021-01-01},
booktitle = {NeurIPS},
journal = {NeurIPS},
volume = {34},
abstract = {Recently, Transformer has become a prevailing deep architecture for solving vehicle 2 routing problems (VRPs). However, the original Transformer is less effective in 3 learning improvement models because its positional encoding (PE) method is not 4 suitable in representing VRP solutions. This paper presents a novel Dual-Aspect 5 Collaborative Transformer (DACT) to learn embeddings for the node and positional 6 features separately, instead of fusing them together as done in the original PE, so 7 as to avoid potential noises and incompatible attention scores. Moreover, the 8 positional features are embedded through a novel cyclic positional encoding (CPE) 9 method to capture the circularity and symmetry of VRP solutions. We train DACT 10 using Proximal Policy Optimization, and design a curriculum learning strategy for 11 better sample efficiency. We apply DACT to solve the traveling salesman problem 12 (TSP) and capacitated vehicle routing problem (CVRP). Results show that DACT 13 outperforms existing Transformer based improvement models, and exhibits better 14 capability of generalizing across different problem sizes. Code is available at https://github.com/yining043/VRP-DACT},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Le Zhang; Zenglin Shi; Ming-Ming Cheng; Yun Liu; Jia-Wang Bian; Joey Tianyi Zhou; Guoyan Zheng; Zeng Zeng
Nonlinear Regression via Deep Negative Correlation Learning Journal Article
In: IEEE TPAMI, 43 (3), pp. 982-998, 2021.
Abstract | BibTeX | 标签: | Links:
@article{8850209,
title = {Nonlinear Regression via Deep Negative Correlation Learning},
author = {Le Zhang and Zenglin Shi and Ming-Ming Cheng and Yun Liu and Jia-Wang Bian and Joey Tianyi Zhou and Guoyan Zheng and Zeng Zeng},
url = { https://mmcheng.net/dncl/},
doi = {10.1109/TPAMI.2019.2943860},
year = {2021},
date = {2021-01-01},
urldate = {2021-01-01},
journal = {IEEE TPAMI},
volume = {43},
number = {3},
pages = {982-998},
abstract = {Nonlinear regression has been extensively employed in many computer vision problems (e.g., crowd counting, age estimation, affective computing). Under the umbrella of deep learning, two common solutions exist i) transforming nonlinear regression to a robust loss function which is jointly optimizable with the deep convolutional network, and ii) utilizing ensemble of deep networks. Although some improved performance is achieved, the former may be lacking due to the intrinsic limitation of choosing a single hypothesis and the latter may suffer from much larger computational complexity. To cope with those issues, we propose to regress via an efficient “divide and conquer” manner. The core of our approach is the generalization of negative correlation learning that has been shown, both theoretically and empirically, to work well for non-deep regression problems. Without extra parameters, the proposed method controls the bias-variance-covariance trade-off systematically and usually yields a deep regression ensemble where each base model is both “accurate” and “diversified.” Moreover, we show that each sub-problem in the proposed method has less Rademacher Complexity and thus is easier to optimize. Extensive experiments on several diverse and challenging tasks including crowd counting, personality analysis, age estimation, and image super-resolution demonstrate the superiority over challenging baselines as well as the versatility of the proposed method. The source code and trained models are available on our project page: https://mmcheng.net/dncl/.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Wanyue Zhang; Xun Xu; Fayao Liu; Le Zhang; Chuan-Sheng Foo
On Automatic Data Augmentation for 3D Point Cloud Classification Proceeding
BMVC, 2021.
BibTeX | 标签:
@proceedings{zhang2021automatic,
title = {On Automatic Data Augmentation for 3D Point Cloud Classification},
author = {Wanyue Zhang and Xun Xu and Fayao Liu and Le Zhang and Chuan-Sheng Foo},
year = {2021},
date = {2021-01-01},
urldate = {2021-01-01},
howpublished = {BMVC},
keywords = {},
pubstate = {published},
tppubtype = {proceedings}
}
Yu-Huan Wu; Yun Liu; Le Zhang; Wang Gao; Ming-Ming Cheng
Regularized Densely-Connected Pyramid Network for Salient Instance Segmentation Journal Article
In: IEEE TIP, 30 , pp. 3897-3907, 2021.
Abstract | BibTeX | 标签: | Links:
@article{9382868,
title = {Regularized Densely-Connected Pyramid Network for Salient Instance Segmentation},
author = {Yu-Huan Wu and Yun Liu and Le Zhang and Wang Gao and Ming-Ming Cheng},
url = {https://github.com/yuhuan-wu/RDPNet},
doi = {10.1109/TIP.2021.3065822},
year = {2021},
date = {2021-01-01},
urldate = {2021-01-01},
journal = {IEEE TIP},
volume = {30},
pages = {3897-3907},
abstract = {Much of the recent efforts on salient object detection (SOD) have been devoted to producing accurate saliency maps without being aware of their instance labels. To this end, we propose a new pipeline for end-to-end salient instance segmentation (SIS) that predicts a class-agnostic mask for each detected salient instance. To better use the rich feature hierarchies in deep networks and enhance the side predictions, we propose the regularized dense connections, which attentively promote informative features and suppress non-informative ones from all feature pyramids. A novel multi-level RoIAlign based decoder is introduced to adaptively aggregate multi-level features for better mask predictions. Such strategies can be well-encapsulated into the Mask R-CNN pipeline. Extensive experiments on popular benchmarks demonstrate that our design significantly outperforms existing sArt competitors by 6.3% (58.6% vs. 52.3%) in terms of the AP metric.The code is available at this https URL.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Wei Wang Wei Cui Bing Li; Min Wu
Two-Stream Convolution Augmented Transformer for Human Activity Recognition Inproceedings
In: AAAI, 2021.
Abstract | BibTeX | 标签: | Links:
@inproceedings{bing2021that,
title = {Two-Stream Convolution Augmented Transformer for Human Activity Recognition},
author = {Wei Wang Wei Cui Bing Li and Min Wu},
url = {https://github.com/windofshadow/THAT},
year = {2021},
date = {2021-01-01},
urldate = {2021-01-01},
booktitle = {AAAI},
abstract = {Recognition of human activities is an important task due to its far-reaching applications such as healthcare system, context-aware applications, and security monitoring. Recently, WiFi based human activity recognition (HAR) is becoming ubiquitous due to its non-invasiveness. Existing WiFi-based HAR methods regard WiFi signals as a temporal sequence of channel state information (CSI), and employ deep sequential models (e.g., RNN, LSTM) to automatically capture channel-over-time features. Although being remarkably effective, they suffer from two major drawbacks. Firstly, the granularity of a single temporal point is blindly elementary for representing meaningful CSI patterns. Secondly, the time-over-channel features are also important, and could be a natural data augmentation. To address the drawbacks, we propose a novel Two-stream Convolution Augmented Human Activity Transformer (THAT) model. Our model proposes to utilize a two-stream structure to capture both time-over-channel and channel-over-time features, and use the multi-scale convolution augmented transformer to capture range-based patterns. Extensive experiments on four real experiment datasets demonstrate that our model outperforms state-of-the-art models in terms of both effectiveness and efficiency.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Jia-Wang Bian; Huangying Zhan; Naiyan Wang; Zhichao Li; Le Zhang; Chunhua Shen; Ming-Ming Cheng; Ian Reid
Unsupervised Scale-consistent Depth Learning from Video Journal Article
In: IJCV, 2021.
Abstract | BibTeX | 标签: | Links:
@article{bian2021ijcv,
title = {Unsupervised Scale-consistent Depth Learning from Video},
author = {Jia-Wang Bian and Huangying Zhan and Naiyan Wang and Zhichao Li and Le Zhang and Chunhua Shen and Ming-Ming Cheng and Ian Reid},
url = {https://github.com/JiawangBian/SC-SfMLearner-Release},
year = {2021},
date = {2021-01-01},
urldate = {2021-01-01},
journal = {IJCV},
abstract = {We propose a monocular depth estimation method SC-Depth, which requires only unlabelled videos for training and enables the scale-consistent prediction at inference time. Our contributions include: (i) we propose a geometry consistency loss, which penalizes the inconsistency of predicted depths between adjacent views; (ii) we propose a self-discovered mask to automatically localize moving objects that violate the underlying static scene assumption and cause noisy signals during training; (iii) we demonstrate the efficacy of each component with a detailed ablation study and show high-quality
depth estimation results in both KITTI and NYUv2 datasets. Moreover, thanks to the capability of scaleconsistent prediction, we show that our monocular-trained deep networks are readily integrated into ORB-SLAM2 system for more robust and accurate tracking. The proposed hybrid Pseudo-RGBD SLAM shows compelling results in KITTI, and it generalizes well to the KAIST dataset without additional training. Finally, we provide several demos for qualitative evaluation. The source code is released on GitHub.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
depth estimation results in both KITTI and NYUv2 datasets. Moreover, thanks to the capability of scaleconsistent prediction, we show that our monocular-trained deep networks are readily integrated into ORB-SLAM2 system for more robust and accurate tracking. The proposed hybrid Pseudo-RGBD SLAM shows compelling results in KITTI, and it generalizes well to the KAIST dataset without additional training. Finally, we provide several demos for qualitative evaluation. The source code is released on GitHub.
Yun Liu; Xin-Yu Zhang; Jia-Wang Bian; Le Zhang; Ming-Ming Cheng
Samnet: Stereoscopically Attentive Multi-Scale Network for Lightweight Salient Object Detection Journal Article
In: IEEE TIP, 30 , pp. 3804–3814, 2021.
Abstract | BibTeX | 标签: | Links:
@article{liu2021samnet,
title = {Samnet: Stereoscopically Attentive Multi-Scale Network for Lightweight Salient Object Detection},
author = {Yun Liu and Xin-Yu Zhang and Jia-Wang Bian and Le Zhang and Ming-Ming Cheng},
url = {https://mmcheng.net/SAMNet/},
year = {2021},
date = {2021-01-01},
urldate = {2021-01-01},
journal = {IEEE TIP},
volume = {30},
pages = {3804--3814},
publisher = {IEEE},
abstract = {Recent progress on salient object detection (SOD) mostly benefits from the explosive development of Convolutional Neural Networks (CNNs). However, much of the improvement comes with the larger network size and heavier computation overhead, which, in our view, is not mobile-friendly and thus difficult to deploy in practice. To promote more practical SOD systems, we introduce a novel Stereoscopically Attentive Multi-scale (SAM) module, which adopts a stereoscopic attention mechanism to adaptively fuse the features of various scales. Embarking on this module, we propose an extremely lightweight network, namely SAMNet, for SOD. Extensive experiments on popular benchmarks demonstrate that the proposed SAMNet yields comparable accuracy with state-of-the-art methods while running at a GPU speed of 343fps and a CPU speed of 5fps for 336 ×336 inputs with only 1.33M parameters. Therefore, SAMNet paves a new path towards SOD. The source code is available on the project page https://mmcheng.net/SAMNet/},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Joey Tianyi Zhou; Le Zhang*; Jiawei Du; Xi Peng; Zhiwen Fang; Zhe Xiao; Hongyuan Zhu;
Locality-Aware Crowd Counting Journal Article
In: IEEE TPAMI, 2021.
BibTeX | 标签:
@article{zhou2021locality,
title = {Locality-Aware Crowd Counting},
author = {Joey Tianyi Zhou; Le Zhang*; Jiawei Du; Xi Peng; Zhiwen Fang; Zhe Xiao; Hongyuan Zhu; },
year = {2021},
date = {2021-01-01},
urldate = {2021-01-01},
journal = {IEEE TPAMI},
publisher = {IEEE},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
2020
Le Zhang; Zhenghua Chen; Wei Cui; Bing Li; Cen Chen; Zhiguang Cao; Kaizhou Gao
Wifi-based indoor robot positioning using deep fuzzy forests Journal Article
In: IEEE Internet of Things Journal, 7 (11), pp. 10773–10781, 2020.
BibTeX | 标签:
@article{zhang2020wifi,
title = {Wifi-based indoor robot positioning using deep fuzzy forests},
author = {Le Zhang and Zhenghua Chen and Wei Cui and Bing Li and Cen Chen and Zhiguang Cao and Kaizhou Gao},
year = {2020},
date = {2020-01-01},
journal = {IEEE Internet of Things Journal},
volume = {7},
number = {11},
pages = {10773--10781},
publisher = {IEEE},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Zhiguang Cao; Hongliang Guo; Wen Song; Kaizhou Gao; Zhenghua Chen; Le Zhang; Xuexi Zhang
Using reinforcement learning to minimize the probability of delay occurrence in transportation Journal Article
In: IEEE transactions on vehicular technology, 69 (3), pp. 2424–2436, 2020.
BibTeX | 标签:
@article{cao2020using,
title = {Using reinforcement learning to minimize the probability of delay occurrence in transportation},
author = {Zhiguang Cao and Hongliang Guo and Wen Song and Kaizhou Gao and Zhenghua Chen and Le Zhang and Xuexi Zhang},
year = {2020},
date = {2020-01-01},
journal = {IEEE transactions on vehicular technology},
volume = {69},
number = {3},
pages = {2424--2436},
publisher = {IEEE},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Cen Chen; Xiaofeng Zou; Zeng Zeng; Zhongyao Cheng; Le Zhang; Steven CH Hoi
Exploring structural knowledge for automated visual inspection of moving trains Journal Article
In: IEEE transactions on cybernetics, 2020.
BibTeX | 标签:
@article{chen2020exploring,
title = {Exploring structural knowledge for automated visual inspection of moving trains},
author = {Cen Chen and Xiaofeng Zou and Zeng Zeng and Zhongyao Cheng and Le Zhang and Steven CH Hoi},
year = {2020},
date = {2020-01-01},
journal = {IEEE transactions on cybernetics},
publisher = {IEEE},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
JiaWang Bian; Wen-Yan Lin; Yun Liu; Le Zhang; Sai-Kit Yeung; Ming-Ming Cheng; Ian Reid
GMS: Grid-based Motion Statistics for Fast, Ultra-Robust Feature Correspondence Journal Article
In: IJCV, 2020.
Abstract | BibTeX | 标签: | Links:
@article{Bian2020gms,
title = {GMS: Grid-based Motion Statistics for Fast, Ultra-Robust Feature Correspondence},
author = {JiaWang Bian and Wen-Yan Lin and Yun Liu and Le Zhang and Sai-Kit Yeung and Ming-Ming Cheng and Ian Reid},
url = {https://github.com/JiawangBian/GMS-Feature-Matcher},
year = {2020},
date = {2020-01-01},
urldate = {2020-01-01},
journal = {IJCV},
abstract = {Feature matching aims at generating correspondences across images, which is widely used in many computer vision tasks. Although considerable progress has been made on feature descriptors and fast matching for initial correspondence hypotheses, selecting good ones from them is still challenging and critical to the overall performance. More importantly, existing methods often take a long computational time, limiting their use in real-time applications. This paper attempts to separate true correspondences from false ones at high speed. We term the proposed method (GMS) grid-based motion Statistics, which incorporates the smoothness constraint into a statistic framework for separation and uses a grid-based implementation for fast calculation. GMS is robust to various challenging image changes, involving in viewpoint, scale, and rotation. It is also fast, e.g., take only 1 or 2 ms in a single CPU thread, even when 50K correspondences are processed. This has important implications for real-time applications. What’s more, we show that incorporating GMS into the classic feature matching and epipolar geometry estimation pipeline can significantly boost the overall performance. Finally, we integrate GMS into the well-known ORB-SLAM system for monocular initialization, resulting in a significant improvement.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Le Zhang; Zenglin Shi; Joey Tianyi Zhou; Ming-Ming Cheng; Yun Liu; Jia-Wang Bian; Zeng Zeng; Chunhua Shen
Ordered or Orderless: A Revisit for Video based Person Re-Identification Journal Article
In: IEEE TPAMI, 2020.
Abstract | BibTeX | 标签: | Links:
@article{Zhang2020OrderlessReID,
title = {Ordered or Orderless: A Revisit for Video based Person Re-Identification},
author = {Le Zhang and Zenglin Shi and Joey Tianyi Zhou and Ming-Ming Cheng and Yun Liu and Jia-Wang Bian and Zeng Zeng and Chunhua Shen},
url = {https://github.com/ZhangLeUestc/VideoReid-TPAMI2020},
year = {2020},
date = {2020-01-01},
urldate = {2020-01-01},
journal = {IEEE TPAMI},
abstract = {Is recurrent network really necessary for learning a good visual representation for video based person re-identification (VPRe-id)? In this paper, we first show that the common practice of employing recurrent neural networks (RNNs) to aggregate temporal spatial features may not be optimal. Specifically, with a diagnostic analysis, we show that the recurrent structure may not be effective to learn temporal dependencies than what we expected and implicitly yields an orderless representation. Based on this observation, we then present a simple yet surprisingly powerful approach for VPRe-id, where we treat VPRe-id as an efficient orderless ensemble of image based person re-identification problem. More specifically, we divide videos into individual images and re-identify person with ensemble of image based rankers. Under the i.i.d. assumption, we provide an error bound that sheds light upon how could we improve VPRe-id. Our work also presents a promising way to bridge the gap between video and image based person re-identification. Comprehensive experimental evaluations demonstrate that the proposed solution achieves state-of-the-art performances on multiple widely used datasets (iLIDS-VID, PRID 2011, and MARS).},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
2019
Jia-Xing Zhao; Yang Cao; Deng-Ping Fan; Ming-Ming Cheng; Xuan-Yi Li; Le Zhang
Contrast prior and fluid pyramid integration for RGBD salient object detection Inproceedings
In: Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition, pp. 3927–3936, 2019.
BibTeX | 标签:
@inproceedings{zhao2019contrast,
title = {Contrast prior and fluid pyramid integration for RGBD salient object detection},
author = {Jia-Xing Zhao and Yang Cao and Deng-Ping Fan and Ming-Ming Cheng and Xuan-Yi Li and Le Zhang},
year = {2019},
date = {2019-01-01},
booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
pages = {3927--3936},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Le Zhang; Songyou Peng; Stefan Winkler
PersEmoN: a deep network for joint analysis of apparent personality, emotion and their relationship Journal Article
In: IEEE Transactions on Affective Computing, 2019.
BibTeX | 标签:
@article{zhang2019persemon,
title = {PersEmoN: a deep network for joint analysis of apparent personality, emotion and their relationship},
author = {Le Zhang and Songyou Peng and Stefan Winkler},
year = {2019},
date = {2019-01-01},
journal = {IEEE Transactions on Affective Computing},
publisher = {IEEE},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Joey Tianyi Zhou; Le Zhang; Zhiwen Fang; Jiawei Du; Xi Peng; Yang Xiao
Attention-driven loss for anomaly detection in video surveillance Journal Article
In: IEEE TCSVT, 30 (12), pp. 4639–4647, 2019.
Abstract | BibTeX | 标签: | Links:
@article{zhou2019attention,
title = {Attention-driven loss for anomaly detection in video surveillance},
author = {Joey Tianyi Zhou and Le Zhang and Zhiwen Fang and Jiawei Du and Xi Peng and Yang Xiao},
url = {https://github.com/joeyzhouty/Attention-driven-loss},
year = {2019},
date = {2019-01-01},
urldate = {2019-01-01},
journal = {IEEE TCSVT},
volume = {30},
number = {12},
pages = {4639--4647},
publisher = {IEEE},
abstract = {Recent video anomaly detection methods focus on reconstructing or predicting frames. Under this umbrella, the long-standing inter-class data-imbalance problem resorts to the imbalance between foreground and stationary background objects in video anomaly detection and this has been less investigated by existing solutions. Naively optimizing the reconstructing loss yields a biased optimization towards background reconstruction rather than the objects of interest in the foreground. To solve this, we proposed a simple yet effective solution, termed attention-driven loss to alleviate the foreground-background imbalance problem in anomaly detection. Specifically, we compute a single mask map that summarizes the frame evolution of moving foreground regions and suppresses the background in the training video clips. After that, we construct an attention map through the combination of the mask map and background to give different weights to the foreground and background region respectively. The proposed attention-driven loss is independent of backbone networks and can be easily augmented in most existing anomaly detection models. Augmented with attention-driven loss, the model is able to achieve AUC 86.0% on Avenue, 83.9% on Ped1, 96% on Ped2 datasets. Extensive experimental results and ablation studies further validate the effectiveness of our model. Project page is available at https://github.com/joeyzhouty/Attention-driven-loss},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
2018
Zenglin Shi; Le Zhang; Yun Liu; Xiaofeng Cao; Yangdong Ye; Ming-Ming Cheng; Guoyan Zheng
Crowd counting with deep negative correlation learning Inproceedings
In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp. 5382–5390, 2018.
BibTeX | 标签:
@inproceedings{shi2018crowd,
title = {Crowd counting with deep negative correlation learning},
author = {Zenglin Shi and Le Zhang and Yun Liu and Xiaofeng Cao and Yangdong Ye and Ming-Ming Cheng and Guoyan Zheng},
year = {2018},
date = {2018-01-01},
booktitle = {Proceedings of the IEEE conference on computer vision and pattern recognition},
pages = {5382--5390},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Chen Wang; Le Zhang; Lihua Xie; Junsong Yuan
Kernel cross-correlator Inproceedings
In: Proceedings of the AAAI Conference on Artificial Intelligence, 2018.
BibTeX | 标签:
@inproceedings{wang2018kernel,
title = {Kernel cross-correlator},
author = {Chen Wang and Le Zhang and Lihua Xie and Junsong Yuan},
year = {2018},
date = {2018-01-01},
booktitle = {Proceedings of the AAAI Conference on Artificial Intelligence},
volume = {32},
number = {1},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Yun Liu; Peng-Tao Jiang; Vahan Petrosyan; Shi-Jie Li; Jiawang Bian; Le Zhang 0001; Ming-Ming Cheng
DEL: Deep Embedding Learning for Efficient Image Segmentation. Inproceedings
In: IJCAI, pp. 870, 2018.
BibTeX | 标签:
@inproceedings{liu2018deep,
title = {DEL: Deep Embedding Learning for Efficient Image Segmentation.},
author = {Yun Liu and Peng-Tao Jiang and Vahan Petrosyan and Shi-Jie Li and Jiawang Bian and Le Zhang 0001 and Ming-Ming Cheng},
year = {2018},
date = {2018-01-01},
booktitle = {IJCAI},
volume = {864},
pages = {870},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Zenglin Shi; Guodong Zeng; Le Zhang; Xiahai Zhuang; Lei Li; Guang Yang; Guoyan Zheng
Bayesian voxdrn: A probabilistic deep voxelwise dilated residual network for whole heart segmentation from 3d mr images Inproceedings
In: International Conference on Medical Image Computing and Computer-Assisted Intervention, pp. 569–577, Springer 2018.
BibTeX | 标签:
@inproceedings{shi2018bayesian,
title = {Bayesian voxdrn: A probabilistic deep voxelwise dilated residual network for whole heart segmentation from 3d mr images},
author = {Zenglin Shi and Guodong Zeng and Le Zhang and Xiahai Zhuang and Lei Li and Guang Yang and Guoyan Zheng},
year = {2018},
date = {2018-01-01},
booktitle = {International Conference on Medical Image Computing and Computer-Assisted Intervention},
pages = {569--577},
organization = {Springer},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Jufeng Yang; Liyi Chen; Le Zhang; Xiaoxiao Sun; Dongyu She; Shao-Ping Lu; Ming-Ming Cheng
Historical context-based style classification of painting images via label distribution learning Inproceedings
In: Proceedings of the 26th ACM international conference on Multimedia, pp. 1154–1162, 2018.
BibTeX | 标签:
@inproceedings{yang2018historical,
title = {Historical context-based style classification of painting images via label distribution learning},
author = {Jufeng Yang and Liyi Chen and Le Zhang and Xiaoxiao Sun and Dongyu She and Shao-Ping Lu and Ming-Ming Cheng},
year = {2018},
date = {2018-01-01},
booktitle = {Proceedings of the 26th ACM international conference on Multimedia},
pages = {1154--1162},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Zhenghua Chen; Le Zhang; Chaoyang Jiang; Zhiguang Cao; Wei Cui
WiFi CSI based passive human activity recognition using attention based BLSTM Journal Article
In: IEEE Transactions on Mobile Computing, 18 (11), pp. 2714–2724, 2018.
BibTeX | 标签:
@article{chen2018wifi,
title = {WiFi CSI based passive human activity recognition using attention based BLSTM},
author = {Zhenghua Chen and Le Zhang and Chaoyang Jiang and Zhiguang Cao and Wei Cui},
year = {2018},
date = {2018-01-01},
journal = {IEEE Transactions on Mobile Computing},
volume = {18},
number = {11},
pages = {2714--2724},
publisher = {IEEE},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
2017
Le Zhang; Jagannadan Varadarajan; Ponnuthurai Nagaratnam Suganthan; Narendra Ahuja; Pierre Moulin
Robust visual tracking using oblique random forests Inproceedings
In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp. 5589–5598, 2017.
BibTeX | 标签:
@inproceedings{zhang2017robust,
title = {Robust visual tracking using oblique random forests},
author = {Le Zhang and Jagannadan Varadarajan and Ponnuthurai Nagaratnam Suganthan and Narendra Ahuja and Pierre Moulin},
year = {2017},
date = {2017-01-01},
booktitle = {Proceedings of the IEEE conference on computer vision and pattern recognition},
pages = {5589--5598},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Le Zhang; Ponnuthurai Nagaratnam Suganthan
Benchmarking ensemble classifiers with novel co-trained kernel ridge regression and random vector functional link ensembles [research frontier] Journal Article
In: IEEE Computational Intelligence Magazine, 12 (4), pp. 61–72, 2017.
BibTeX | 标签:
@article{zhang2017benchmarking,
title = {Benchmarking ensemble classifiers with novel co-trained kernel ridge regression and random vector functional link ensembles [research frontier]},
author = {Le Zhang and Ponnuthurai Nagaratnam Suganthan},
year = {2017},
date = {2017-01-01},
journal = {IEEE Computational Intelligence Magazine},
volume = {12},
number = {4},
pages = {61--72},
publisher = {IEEE},
keywords = {},
pubstate = {published},
tppubtype = {article}
}