Publications
Preprints
FineCIR: Explicit Parsing of Fine-Grained Modification Semantics for Composed Image Retrieval
@article{li2025finecir,
author = {Zixu Li and
Zhiheng Fu and
Yupeng Hu and
Zhiwei Chen and
Haokun Wen and
Liqiang Nie},
title = {FineCIR: Explicit Parsing of Fine-Grained Modification Semantics for
Composed Image Retrieval},
journal = {CoRR},
volume = {abs/2503.21309},
year = {2025}
}Dual Knowledge-Enhanced Two-Stage Reasoner for Multimodal Dialog Systems
@article{chen2025dual,
author = {Xiaolin Chen and
Xuemeng Song and
Haokun Wen and
Weili Guan and
Xiangyu Zhao and
Liqiang Nie},
title = {Dual Knowledge-Enhanced Two-Stage Reasoner for Multimodal Dialog Systems},
journal = {CoRR},
volume = {abs/2509.07817},
year = {2025}
}2026
D2MoRA: Diversity-Regulated Asymmetric MoE-LoRA Decomposition for Efficient Multi-Task Adaptation
@inproceedings{zuo2026d2mora,
title={D2MoRA: Diversity-Regulated Asymmetric MoE-LoRA Decomposition for Efficient Multi-Task Adaptation},
author={Zuo, Jianhui and Song, Xuemeng and Wen, Haokun and Liu, Meng and Hu, Yupeng and Wang, Jiuru and Nie, Liqiang},
booktitle={AAAI Conference on Artificial Intelligence},
year={2026}
} Jianhui Zuo, Xuemeng Song, Haokun Wen, Meng Liu, Yupeng Hu, Jiuru Wang, and Liqiang Nie.
AAAI 2026
AAAI 2026
2025
Multi-modal Recommendation with Joint Content and Interaction Augmentation
@inproceedings{deng2025multimodal,
title={Multi-modal Recommendation with Joint Content and Interaction Augmentation},
author={Deng, Jiajie and Wen, Haokun and Han, Xiao and Song, Xuemeng and Zhao, Xiangyu},
booktitle={ACM Multimedia Asia},
year={2025}
}Spatial Understanding from Videos: Structured Prompts Meet Simulation Data
@inproceedings{zhang2025spatial,
title={Spatial Understanding from Videos: Structured Prompts Meet Simulation Data},
author={Zhang, Haoyu and Liu, Meng and Li, Zaijing and Wen, Haokun and Guan, Weili and Wang, Yaowei and Nie, Liqiang},
booktitle={NeurIPS},
year={2025}
} Haoyu Zhang, Meng Liu, Zaijing Li, Haokun Wen, Weili Guan, Yaowei Wang, and Liqiang Nie.
NeurIPS 2025
NeurIPS 2025
A Comprehensive Survey on Composed Image Retrieval
@article{song2025survey,
author = {Song, Xuemeng and Lin, Haoqiang and Wen, Haokun and Hou, Bohan and Xu, Mingzhu and Nie, Liqiang},
title = {A Comprehensive Survey on Composed Image Retrieval},
year = {2025},
publisher = {Association for Computing Machinery},
volume = {44},
number = {1},
issn = {1046-8188},
articleno = {19},
numpages = {54}
} ENCODER: Entity Mining and Modification Relation Binding for Composed Image Retrieval
@inproceedings{li2025encoder,
author = {Zixu Li and
Zhiwei Chen and
Haokun Wen and
Zhiheng Fu and
Yupeng Hu and
Weili Guan},
title = {ENCODER: Entity Mining and Modification Relation Binding for Composed
Image Retrieval},
booktitle = {Proceedings of the Association for the Advancement of Artificial
Intelligence},
pages = {5101--5109},
publisher = {AAAI Press},
year = {2025}
}FiRE: Enhancing MLLMs with Fine-Grained Context Learning for Complex Image Retrieval
@inproceedings{hou2025fire,
author = {Bohan Hou and
Haoqiang Lin and
Xuemeng Song and
Haokun Wen and
Meng Liu and
Yupeng Hu and
Xiangyu Zhao},
title = {FiRE: Enhancing MLLMs with Fine-Grained Context Learning for Complex
Image Retrieval},
booktitle = {Proceedings of the International ACM SIGIR Conference on
Research and Development in Information Retrieval},
pages = {803--812},
publisher = {ACM},
year = {2025}
} Bohan Hou, Haoqiang Lin, Xuemeng Song, Haokun Wen, Meng Liu, Yupeng Hu, and Xiangyu Zhao.
ACM SIGIR 2025
ACM SIGIR 2025
Pseudo-triplet Guided Few-shot Composed Image Retrieval
@inproceedings{hou2025pseudo,
author = {Bohan Hou and
Haoqiang Lin and
Haokun Wen and
Meng Liu and
Mingzhu Xu and
Xuemeng Song},
title = {Pseudo Triplet Guided Few-shot Composed Image Retrieval},
booktitle = {Proceedings of the International Joint Conference on Neural Networks},
pages = {1--8},
publisher = {IEEE},
year = {2025}
}HUD: Hierarchical Uncertainty-Aware Disambiguation Network for Composed Video Retrieval
@inproceedings{chen2025hud,
title={HUD: Hierarchical Uncertainty-Aware Disambiguation Network for Composed Video Retrieval},
author={Chen, Zhiwei and Hu, Yupeng and Li, Zixu and Fu, Zhiheng and Wen, Haokun and Guan, Weili},
booktitle={Proceedings of the ACM International Conference on Multimedia},
pages = {6143--6152},
publisher = {ACM},
year={2025}
}2024
Simple but Effective Raw-Data Level Multimodal Fusion for Composed Image Retrieval
@inproceedings{wen2024simple,
title={Simple but Effective Raw-Data Level Multimodal Fusion for Composed Image Retrieval},
author={Wen, Haokun and Song, Xuemeng and Chen, Xiaolin and Wei, Yinwei and Nie, Liqiang and Chua, Tat-Seng},
booktitle = {Proceedings of the International ACM SIGIR Conference on
Research and Development in Information Retrieval},
pages = {229--239},
publisher = {ACM},
year={2024}
}Self-Training Boosted Multi-Factor Matching Network for Composed Image Retrieval
@article{wen2024self,
title={Self-Training Boosted Multi-Factor Matching Network for Composed Image Retrieval},
author={Wen, Haokun and Song, Xuemeng and Yin, Jianhua and Wu, Jianlong and Guan, Weili and Nie, Liqiang},
journal={IEEE Transactions on Pattern Analysis and Machine Intelligence},
volume = {46},
number = {5},
pages = {3665--3678},
year = {2024}
}Fine-Grained Textual Inversion Network for Zero-Shot Composed Image Retrieval
@inproceedings{lin2024fine,
title={Fine-Grained Textual Inversion Network for Zero-Shot Composed Image Retrieval},
author={Lin, Haoqiang and Wen, Haokun and Song, Xuemeng and Liu, Meng and Hu, Yupeng and Nie, Liqiang},
booktitle = {Proceedings of the International ACM SIGIR Conference on Research and Development in Information Retrieval},
pages = {240--250},
publisher = {ACM},
year={2024}
}Differential-Perceptive and Retrieval-Augmented MLLM for Change Captioning
@inproceedings{zhang2024differential,
title={Differential-Perceptive and Retrieval-Augmented MLLM for Change Captioning},
author={Zhang, Xian and Wen, Haokun and Wu, Jianlong and Qin, Pengda and Xue, Hui and Nie, Liqiang},
booktitle = {Proceedings of the ACM International Conference on Multimedia},
pages = {4148--4157},
publisher = {ACM},
year = {2024}
}Interactive Garment Recommendation with User in the Loop
@article{becattini2024interactive,
title={Interactive Garment Recommendation with User in the Loop},
author={Becattini, Federico and Chen, Xiaolin and Puccia, Andrea and Wen, Haokun and Song, Xuemeng and Nie, Liqiang and Del Bimbo, Alberto},
journal={ACM Transactions on Multimedia Computing, Communications, and Applications},
volume = {21},
number = {1},
pages = {37:1--37:21},
year = {2025}
} Federico Becattini, Xiaolin Chen, Andrea Puccia, Haokun Wen, Xuemeng Song, Liqiang Nie, and Alberto Del Bimbo.
ACM ToMM 2024
ACM ToMM 2024
2023
Target-Guided Composed Image Retrieval
@inproceedings{wen2023target,
title={Target-Guided Composed Image Retrieval},
author={Wen, Haokun and Zhang, Xian and Song, Xuemeng and Wei, Yinwei and Nie, Liqiang},
booktitle = {Proceedings of the ACM International Conference on Multimedia},
pages = {915--923},
publisher = {ACM},
year = {2023}
}Finetuning Language Models for Multimodal Question Answering
@inproceedings{zhang2023finetuning,
title={Finetuning Language Models for Multimodal Question Answering},
author={Zhang, Xin and Xie, Wen and Dai, Ziqi and Rao, Jun and Wen, Haokun and Luo, Xuan and Zhang, Meishan and Zhang, Min},
booktitle={Proceedings of the ACM International Conference on Multimedia},
pages={9420--9424},
publisher = {ACM},
year={2023}
} Xin Zhang, Wen Xie, Ziqi Dai, Jun Rao, Haokun Wen, Xuan Luo, Meishan Zhang, and Min Zhang.
ACM MM 2023 (Grand Challenge) 🏆 Ranked 1st in VTQA
ACM MM 2023 (Grand Challenge) 🏆 Ranked 1st in VTQA
Egocentric Early Action Prediction via Multimodal Transformer-Based Dual Action Prediction
@article{guan2023egocentric,
title={Egocentric Early Action Prediction via Multimodal Transformer-Based Dual Action Prediction},
author={Guan, Weili and Song, Xuemeng and Wang, Kejie and Wen, Haokun and Ni, Hongda and Wang, Yaowei and Chang, Xiaojun},
journal={IEEE Transactions on Circuits and Systems for Video Technology},
volume={33},
number={9},
pages={4472--4483},
year={2023},
publisher={IEEE}
} Weili Guan, Xuemeng Song, Kejie Wang, Haokun Wen, Hongda Ni, Yaowei Wang, and Xiaojun Chang.
IEEE TCSVT 2023

IEEE TCSVT 2023
2022
Personalized Fashion Compatibility Modeling via Metapath-guided Heterogeneous Graph Learning
@inproceedings{guan2022personalized,
title={Personalized Fashion Compatibility Modeling via Metapath-guided Heterogeneous Graph Learning},
author={Guan, Weili and Jiao, Fangkai and Song, Xuemeng and Wen, Haokun and Yeh, Chung-Hsing and Chang, Xiaojun},
booktitle={Proceedings of the international ACM SIGIR conference on research and development in information retrieval},
pages={482--491},
publisher = {ACM},
year={2022}
} Weili Guan, Fangkai Jiao, Xuemeng Song, Haokun Wen, Chung-Hsing Yeh, and Xiaojun Chang.
ACM SIGIR 2022

ACM SIGIR 2022
Partially Supervised Compatibility Modeling
@article{guan2022partially,
title={Partially Supervised Compatibility Modeling},
author={Guan, Weili and Wen, Haokun and Song, Xuemeng and Wang, Chun and Yeh, Chung-Hsing and Chang, Xiaojun and Nie, Liqiang},
journal={IEEE Transactions on Image Processing},
volume={31},
pages={4733--4745},
year={2022},
publisher={IEEE}
} Weili Guan, Haokun Wen, Xuemeng Song, Chun Wang, Chung-Hsing Yeh, Xiaojun Chang, and Liqiang Nie.
IEEE TIP 2022

IEEE TIP 2022
2021
Comprehensive Linguistic-Visual Composition Network for Image Retrieval
@inproceedings{wen2021comprehensive,
title={Comprehensive Linguistic-Visual Composition Network for Image Retrieval},
author={Wen, Haokun and Song, Xuemeng and Yang, Xin and Zhan, Yibing and Nie, Liqiang},
booktitle={Proceedings of the International ACM SIGIR Conference on Research and Development in Information Retrieval},
pages={1369--1378},
publisher = {ACM},
year={2021}
}Multimodal Compatibility Modeling via Exploring the Consistent and Complementary Correlations
@inproceedings{guan2021multimodal,
title={Multimodal Compatibility Modeling via Exploring the Consistent and Complementary Correlations},
author={Guan, Weili and Wen, Haokun and Song, Xuemeng and Yeh, Chung-Hsing and Chang, Xiaojun and Nie, Liqiang},
booktitle={Proceedings of the ACM international conference on multimedia},
pages={2299--2307},
publisher = {ACM},
year={2021}
}Attribute-wise Explainable Fashion Compatibility Modeling
@article{yang2021attribute,
title={Attribute-wise Explainable Fashion Compatibility Modeling},
author={Yang, Xin and Song, Xuemeng and Feng, Fuli and Wen, Haokun and Duan, Ling-Yu and Nie, Liqiang},
journal={ACM Transactions on Multimedia Computing, Communications, and Applications},
volume={17},
number={1},
pages={1--21},
publisher={ACM},
year={2021}
}2020
Generative Attribute Manipulation Scheme for Flexible Fashion Search
@inproceedings{yang2020generative,
title={Generative Attribute Manipulation Scheme for Flexible Fashion Search},
author={Yang, Xin and Song, Xuemeng and Han, Xianjing and Wen, Haokun and Nie, Jie and Nie, Liqiang},
booktitle={Proceedings of the International ACM SIGIR Conference on Research and Development in Information Retrieval},
pages={941--950},
publisher={ACM},
year={2020}
}