Publications

Preprints


FineCIR: Explicit Parsing of Fine-Grained Modification Semantics for Composed Image Retrieval
@article{li2025finecir,
  author       = {Zixu Li and
                  Zhiheng Fu and
                  Yupeng Hu and
                  Zhiwei Chen and
                  Haokun Wen and
                  Liqiang Nie},
  title        = {FineCIR: Explicit Parsing of Fine-Grained Modification Semantics for
                  Composed Image Retrieval},
  journal      = {CoRR},
  volume       = {abs/2503.21309},
  year         = {2025}
}
Zixu Li, Zhiheng Fu, Yupeng Hu, Zhiwei Chen, Haokun Wen, and Liqiang Nie.
ArXiv Preprint
Dual Knowledge-Enhanced Two-Stage Reasoner for Multimodal Dialog Systems
@article{chen2025dual,
  author       = {Xiaolin Chen and
                  Xuemeng Song and
                  Haokun Wen and
                  Weili Guan and
                  Xiangyu Zhao and
                  Liqiang Nie},
  title        = {Dual Knowledge-Enhanced Two-Stage Reasoner for Multimodal Dialog Systems},
  journal      = {CoRR},
  volume       = {abs/2509.07817},
  year         = {2025}
}
Xiaolin Chen, Xuemeng Song, Haokun Wen, Weili Guan, Xiangyu Zhao, and Liqiang Nie.
ArXiv Preprint

2026


D2MoRA: Diversity-Regulated Asymmetric MoE-LoRA Decomposition for Efficient Multi-Task Adaptation
@inproceedings{zuo2026d2mora,
  title={D2MoRA: Diversity-Regulated Asymmetric MoE-LoRA Decomposition for Efficient Multi-Task Adaptation},
  author={Zuo, Jianhui and Song, Xuemeng and Wen, Haokun and Liu, Meng and Hu, Yupeng and Wang, Jiuru and Nie, Liqiang},
  booktitle={AAAI Conference on Artificial Intelligence},
  year={2026}
}
Jianhui Zuo, Xuemeng Song, Haokun Wen, Meng Liu, Yupeng Hu, Jiuru Wang, and Liqiang Nie.
AAAI 2026

2025


Multi-modal Recommendation with Joint Content and Interaction Augmentation
@inproceedings{deng2025multimodal,
  title={Multi-modal Recommendation with Joint Content and Interaction Augmentation},
  author={Deng, Jiajie and Wen, Haokun and Han, Xiao and Song, Xuemeng and Zhao, Xiangyu},
  booktitle={ACM Multimedia Asia},
  year={2025}
}
Jiajie Deng, Haokun Wen, Xiao Han, Xuemeng Song, and Xiangyu Zhao.
MMAsia 2025
Spatial Understanding from Videos: Structured Prompts Meet Simulation Data
@inproceedings{zhang2025spatial,
  title={Spatial Understanding from Videos: Structured Prompts Meet Simulation Data},
  author={Zhang, Haoyu and Liu, Meng and Li, Zaijing and Wen, Haokun and Guan, Weili and Wang, Yaowei and Nie, Liqiang},
  booktitle={NeurIPS},
  year={2025}
}
Haoyu Zhang, Meng Liu, Zaijing Li, Haokun Wen, Weili Guan, Yaowei Wang, and Liqiang Nie.
NeurIPS 2025
A Comprehensive Survey on Composed Image Retrieval
@article{song2025survey,
author = {Song, Xuemeng and Lin, Haoqiang and Wen, Haokun and Hou, Bohan and Xu, Mingzhu and Nie, Liqiang},
title = {A Comprehensive Survey on Composed Image Retrieval},
year = {2025},
publisher = {Association for Computing Machinery},
volume = {44},
number = {1},
issn = {1046-8188},
articleno = {19},
numpages = {54}
} 
Xuemeng Song, Haoqiang Lin, Haokun Wen, Bohan Hou, Mingzhu Xu, and Liqiang Nie.
ACM TOIS 2025
ENCODER: Entity Mining and Modification Relation Binding for Composed Image Retrieval
@inproceedings{li2025encoder,
  author       = {Zixu Li and
                  Zhiwei Chen and
                  Haokun Wen and
                  Zhiheng Fu and
                  Yupeng Hu and
                  Weili Guan},
  title        = {ENCODER: Entity Mining and Modification Relation Binding for Composed
                  Image Retrieval},
  booktitle    = {Proceedings of the Association for the Advancement of Artificial
                  Intelligence},
  pages        = {5101--5109},
  publisher    = {AAAI Press},
  year         = {2025}
}
Zixu Li, Zhiwei Chen, Haokun Wen, Zhiheng Fu, Yupeng Hu, and Weili Guan.
AAAI 2025
FiRE: Enhancing MLLMs with Fine-Grained Context Learning for Complex Image Retrieval
@inproceedings{hou2025fire,
  author       = {Bohan Hou and
                  Haoqiang Lin and
                  Xuemeng Song and
                  Haokun Wen and
                  Meng Liu and
                  Yupeng Hu and
                  Xiangyu Zhao},
  title        = {FiRE: Enhancing MLLMs with Fine-Grained Context Learning for Complex
                  Image Retrieval},
  booktitle    = {Proceedings of the International ACM SIGIR Conference on
                  Research and Development in Information Retrieval},
  pages        = {803--812},
  publisher    = {ACM},
  year         = {2025}
}
Bohan Hou, Haoqiang Lin, Xuemeng Song, Haokun Wen, Meng Liu, Yupeng Hu, and Xiangyu Zhao.
ACM SIGIR 2025
Pseudo-triplet Guided Few-shot Composed Image Retrieval
@inproceedings{hou2025pseudo,
  author       = {Bohan Hou and
                  Haoqiang Lin and
                  Haokun Wen and
                  Meng Liu and
                  Mingzhu Xu and
                  Xuemeng Song},
  title        = {Pseudo Triplet Guided Few-shot Composed Image Retrieval},
  booktitle    = {Proceedings of the International Joint Conference on Neural Networks},
  pages        = {1--8},
  publisher    = {IEEE},
  year         = {2025}
}
Bohan Hou, Haoqiang Lin, Haokun Wen, Meng Liu, and Xuemeng Song.
IJCNN 2025
HUD: Hierarchical Uncertainty-Aware Disambiguation Network for Composed Video Retrieval
@inproceedings{chen2025hud,
  title={HUD: Hierarchical Uncertainty-Aware Disambiguation Network for Composed Video Retrieval},
  author={Chen, Zhiwei and Hu, Yupeng and Li, Zixu and Fu, Zhiheng and Wen, Haokun and Guan, Weili},
  booktitle={Proceedings of the ACM International Conference on Multimedia},
  pages = {6143--6152},
  publisher = {ACM},
  year={2025}
}
Zhiwei Chen, Yupeng Hu, Zixu Li, Zhiheng Fu, Haokun Wen, and Weili Guan.
ACM MM 2025

2024


Simple but Effective Raw-Data Level Multimodal Fusion for Composed Image Retrieval
@inproceedings{wen2024simple,
  title={Simple but Effective Raw-Data Level Multimodal Fusion for Composed Image Retrieval},
  author={Wen, Haokun and Song, Xuemeng and Chen, Xiaolin and Wei, Yinwei and Nie, Liqiang and Chua, Tat-Seng},
  booktitle    = {Proceedings of the International ACM SIGIR Conference on
                  Research and Development in Information Retrieval},
  pages        = {229--239},
  publisher    = {ACM},
  year={2024}
}
Haokun Wen, Xuemeng Song, Xiaolin Chen, Yinwei Wei, Liqiang Nie, and Tat-Seng Chua.
ACM SIGIR 2024
Self-Training Boosted Multi-Factor Matching Network for Composed Image Retrieval
@article{wen2024self,
  title={Self-Training Boosted Multi-Factor Matching Network for Composed Image Retrieval},
  author={Wen, Haokun and Song, Xuemeng and Yin, Jianhua and Wu, Jianlong and Guan, Weili and Nie, Liqiang},
  journal={IEEE Transactions on Pattern Analysis and Machine Intelligence},
  volume       = {46},
  number       = {5},
  pages        = {3665--3678},
  year         = {2024}
}
Haokun Wen, Xuemeng Song, Jianhua Yin, Jianlong Wu, Weili Guan, and Liqiang Nie.
IEEE TPAMI 2024
Fine-Grained Textual Inversion Network for Zero-Shot Composed Image Retrieval
@inproceedings{lin2024fine,
  title={Fine-Grained Textual Inversion Network for Zero-Shot Composed Image Retrieval},
  author={Lin, Haoqiang and Wen, Haokun and Song, Xuemeng and Liu, Meng and Hu, Yupeng and Nie, Liqiang},
  booktitle    = {Proceedings of the International ACM SIGIR Conference on Research and Development in Information Retrieval},
  pages        = {240--250},
  publisher    = {ACM},
  year={2024}
}
Haoqiang Lin, Haokun Wen, Xuemeng Song, Meng Liu, Yupeng Hu, and Liqiang Nie.
ACM SIGIR 2024
Differential-Perceptive and Retrieval-Augmented MLLM for Change Captioning
@inproceedings{zhang2024differential,
  title={Differential-Perceptive and Retrieval-Augmented MLLM for Change Captioning},
  author={Zhang, Xian and Wen, Haokun and Wu, Jianlong and Qin, Pengda and Xue, Hui and Nie, Liqiang},
  booktitle    = {Proceedings of the ACM International Conference on Multimedia},
  pages        = {4148--4157},
  publisher    = {ACM},
  year         = {2024}
}
Xian Zhang, Haokun Wen, Jianlong Wu, Pengda Qin, Hui Xue, and Liqiang Nie.
ACM MM 2024
Interactive Garment Recommendation with User in the Loop
@article{becattini2024interactive,
  title={Interactive Garment Recommendation with User in the Loop},
  author={Becattini, Federico and Chen, Xiaolin and Puccia, Andrea and Wen, Haokun and Song, Xuemeng and Nie, Liqiang and Del Bimbo, Alberto},
  journal={ACM Transactions on Multimedia Computing, Communications, and Applications},
  volume       = {21},
  number       = {1},
  pages        = {37:1--37:21},
  year         = {2025}
}
Federico Becattini, Xiaolin Chen, Andrea Puccia, Haokun Wen, Xuemeng Song, Liqiang Nie, and Alberto Del Bimbo.
ACM ToMM 2024

2023


Target-Guided Composed Image Retrieval
@inproceedings{wen2023target,
  title={Target-Guided Composed Image Retrieval},
  author={Wen, Haokun and Zhang, Xian and Song, Xuemeng and Wei, Yinwei and Nie, Liqiang},
  booktitle    = {Proceedings of the ACM International Conference on Multimedia},
  pages        = {915--923},
  publisher    = {ACM},
  year         = {2023}
}
Haokun Wen, Xian Zhang, Xuemeng Song, Yinwei Wei, and Liqiang Nie.
ACM MM 2023
Finetuning Language Models for Multimodal Question Answering
@inproceedings{zhang2023finetuning,
  title={Finetuning Language Models for Multimodal Question Answering},
  author={Zhang, Xin and Xie, Wen and Dai, Ziqi and Rao, Jun and Wen, Haokun and Luo, Xuan and Zhang, Meishan and Zhang, Min},
  booktitle={Proceedings of the ACM International Conference on Multimedia},
  pages={9420--9424},
  publisher    = {ACM},
  year={2023}
}
Xin Zhang, Wen Xie, Ziqi Dai, Jun Rao, Haokun Wen, Xuan Luo, Meishan Zhang, and Min Zhang.
ACM MM 2023 (Grand Challenge) 🏆 Ranked 1st in VTQA
Egocentric Early Action Prediction via Multimodal Transformer-Based Dual Action Prediction
@article{guan2023egocentric,
  title={Egocentric Early Action Prediction via Multimodal Transformer-Based Dual Action Prediction},
  author={Guan, Weili and Song, Xuemeng and Wang, Kejie and Wen, Haokun and Ni, Hongda and Wang, Yaowei and Chang, Xiaojun},
  journal={IEEE Transactions on Circuits and Systems for Video Technology},
  volume={33},
  number={9},
  pages={4472--4483},
  year={2023},
  publisher={IEEE}
}
Weili Guan, Xuemeng Song, Kejie Wang, Haokun Wen, Hongda Ni, Yaowei Wang, and Xiaojun Chang.
IEEE TCSVT 2023

2022


Personalized Fashion Compatibility Modeling via Metapath-guided Heterogeneous Graph Learning
@inproceedings{guan2022personalized,
  title={Personalized Fashion Compatibility Modeling via Metapath-guided Heterogeneous Graph Learning},
  author={Guan, Weili and Jiao, Fangkai and Song, Xuemeng and Wen, Haokun and Yeh, Chung-Hsing and Chang, Xiaojun},
  booktitle={Proceedings of the international ACM SIGIR conference on research and development in information retrieval},
  pages={482--491},
  publisher    = {ACM},
  year={2022}
}
Weili Guan, Fangkai Jiao, Xuemeng Song, Haokun Wen, Chung-Hsing Yeh, and Xiaojun Chang.
ACM SIGIR 2022
Partially Supervised Compatibility Modeling
@article{guan2022partially,
  title={Partially Supervised Compatibility Modeling},
  author={Guan, Weili and Wen, Haokun and Song, Xuemeng and Wang, Chun and Yeh, Chung-Hsing and Chang, Xiaojun and Nie, Liqiang},
  journal={IEEE Transactions on Image Processing},
  volume={31},
  pages={4733--4745},
  year={2022},
  publisher={IEEE}
}
Weili Guan, Haokun Wen, Xuemeng Song, Chun Wang, Chung-Hsing Yeh, Xiaojun Chang, and Liqiang Nie.
IEEE TIP 2022

2021


Comprehensive Linguistic-Visual Composition Network for Image Retrieval
@inproceedings{wen2021comprehensive,
  title={Comprehensive Linguistic-Visual Composition Network for Image Retrieval},
  author={Wen, Haokun and Song, Xuemeng and Yang, Xin and Zhan, Yibing and Nie, Liqiang},
  booktitle={Proceedings of the International ACM SIGIR Conference on Research and Development in Information Retrieval},
  pages={1369--1378},
  publisher    = {ACM},
  year={2021}
}
Haokun Wen, Xuemeng Song, Xin Yang, Yibing Zhan, and Liqiang Nie.
ACM SIGIR 2021
Multimodal Compatibility Modeling via Exploring the Consistent and Complementary Correlations
@inproceedings{guan2021multimodal,
  title={Multimodal Compatibility Modeling via Exploring the Consistent and Complementary Correlations},
  author={Guan, Weili and Wen, Haokun and Song, Xuemeng and Yeh, Chung-Hsing and Chang, Xiaojun and Nie, Liqiang},
  booktitle={Proceedings of the ACM international conference on multimedia},
  pages={2299--2307},
  publisher    = {ACM},
  year={2021}
}
Weili Guan, Haokun Wen, Xuemeng Song, Chung-Hsing Yeh, Xiaojun Chang, and Liqiang Nie.
ACM MM 2021
Attribute-wise Explainable Fashion Compatibility Modeling
@article{yang2021attribute,
  title={Attribute-wise Explainable Fashion Compatibility Modeling},
  author={Yang, Xin and Song, Xuemeng and Feng, Fuli and Wen, Haokun and Duan, Ling-Yu and Nie, Liqiang},
  journal={ACM Transactions on Multimedia Computing, Communications, and Applications},
  volume={17},
  number={1},
  pages={1--21},
  publisher={ACM},
  year={2021}
}
Xin Yang, Xuemeng Song, Fuli Feng, Haokun Wen, Ling-Yu Duan, and Liqiang Nie.
ACM ToMM 2021

2020


Generative Attribute Manipulation Scheme for Flexible Fashion Search
@inproceedings{yang2020generative,
  title={Generative Attribute Manipulation Scheme for Flexible Fashion Search},
  author={Yang, Xin and Song, Xuemeng and Han, Xianjing and Wen, Haokun and Nie, Jie and Nie, Liqiang},
  booktitle={Proceedings of the International ACM SIGIR Conference on Research and Development in Information Retrieval},
  pages={941--950},
  publisher={ACM},
  year={2020}
}
Xin Yang, Xuemeng Song, Xianjing Han, Haokun Wen, Jie Nie, and Liqiang Nie.
ACM SIGIR 2020