Publications

Preprints

FineCIR: Explicit Parsing of Fine-Grained Modification Semantics for Composed Image Retrieval

@article{li2025finecir,
  author       = {Zixu Li and
                  Zhiheng Fu and
                  Yupeng Hu and
                  Zhiwei Chen and
                  Haokun Wen and
                  Liqiang Nie},
  title        = {FineCIR: Explicit Parsing of Fine-Grained Modification Semantics for
                  Composed Image Retrieval},
  journal      = {CoRR},
  volume       = {abs/2503.21309},
  year         = {2025}
}

Zixu Li, Zhiheng Fu, Yupeng Hu, Zhiwei Chen, Haokun Wen, and Liqiang Nie.
ArXiv Preprint

Dual Knowledge-Enhanced Two-Stage Reasoner for Multimodal Dialog Systems

@article{chen2025dual,
  author       = {Xiaolin Chen and
                  Xuemeng Song and
                  Haokun Wen and
                  Weili Guan and
                  Xiangyu Zhao and
                  Liqiang Nie},
  title        = {Dual Knowledge-Enhanced Two-Stage Reasoner for Multimodal Dialog Systems},
  journal      = {CoRR},
  volume       = {abs/2509.07817},
  year         = {2025}
}

Xiaolin Chen, Xuemeng Song, Haokun Wen, Weili Guan, Xiangyu Zhao, and Liqiang Nie.
ArXiv Preprint

2026

D2MoRA: Diversity-Regulated Asymmetric MoE-LoRA Decomposition for Efficient Multi-Task Adaptation

@inproceedings{zuo2026d2mora,
  title={D2MoRA: Diversity-Regulated Asymmetric MoE-LoRA Decomposition for Efficient Multi-Task Adaptation},
  author={Zuo, Jianhui and Song, Xuemeng and Wen, Haokun and Liu, Meng and Hu, Yupeng and Wang, Jiuru and Nie, Liqiang},
  booktitle={AAAI Conference on Artificial Intelligence},
  year={2026}
}

Jianhui Zuo, Xuemeng Song, Haokun Wen, Meng Liu, Yupeng Hu, Jiuru Wang, and Liqiang Nie.
AAAI 2026

2025

Multi-modal Recommendation with Joint Content and Interaction Augmentation

@inproceedings{deng2025multimodal,
  title={Multi-modal Recommendation with Joint Content and Interaction Augmentation},
  author={Deng, Jiajie and Wen, Haokun and Han, Xiao and Song, Xuemeng and Zhao, Xiangyu},
  booktitle={ACM Multimedia Asia},
  year={2025}
}

Jiajie Deng, Haokun Wen, Xiao Han, Xuemeng Song, and Xiangyu Zhao.
MMAsia 2025

Spatial Understanding from Videos: Structured Prompts Meet Simulation Data

@inproceedings{zhang2025spatial,
  title={Spatial Understanding from Videos: Structured Prompts Meet Simulation Data},
  author={Zhang, Haoyu and Liu, Meng and Li, Zaijing and Wen, Haokun and Guan, Weili and Wang, Yaowei and Nie, Liqiang},
  booktitle={NeurIPS},
  year={2025}
}

Haoyu Zhang, Meng Liu, Zaijing Li, Haokun Wen, Weili Guan, Yaowei Wang, and Liqiang Nie.
NeurIPS 2025

A Comprehensive Survey on Composed Image Retrieval

@article{song2025survey,
author = {Song, Xuemeng and Lin, Haoqiang and Wen, Haokun and Hou, Bohan and Xu, Mingzhu and Nie, Liqiang},
title = {A Comprehensive Survey on Composed Image Retrieval},
year = {2025},
publisher = {Association for Computing Machinery},
volume = {44},
number = {1},
issn = {1046-8188},
articleno = {19},
numpages = {54}
}

Xuemeng Song, Haoqiang Lin, Haokun Wen, Bohan Hou, Mingzhu Xu, and Liqiang Nie.
ACM TOIS 2025

ENCODER: Entity Mining and Modification Relation Binding for Composed Image Retrieval

@inproceedings{li2025encoder,
  author       = {Zixu Li and
                  Zhiwei Chen and
                  Haokun Wen and
                  Zhiheng Fu and
                  Yupeng Hu and
                  Weili Guan},
  title        = {ENCODER: Entity Mining and Modification Relation Binding for Composed
                  Image Retrieval},
  booktitle    = {Proceedings of the Association for the Advancement of Artificial
                  Intelligence},
  pages        = {5101--5109},
  publisher    = {AAAI Press},
  year         = {2025}
}

Zixu Li, Zhiwei Chen, Haokun Wen, Zhiheng Fu, Yupeng Hu, and Weili Guan.
AAAI 2025

FiRE: Enhancing MLLMs with Fine-Grained Context Learning for Complex Image Retrieval

@inproceedings{hou2025fire,
  author       = {Bohan Hou and
                  Haoqiang Lin and
                  Xuemeng Song and
                  Haokun Wen and
                  Meng Liu and
                  Yupeng Hu and
                  Xiangyu Zhao},
  title        = {FiRE: Enhancing MLLMs with Fine-Grained Context Learning for Complex
                  Image Retrieval},
  booktitle    = {Proceedings of the International ACM SIGIR Conference on
                  Research and Development in Information Retrieval},
  pages        = {803--812},
  publisher    = {ACM},
  year         = {2025}
}

Bohan Hou, Haoqiang Lin, Xuemeng Song, Haokun Wen, Meng Liu, Yupeng Hu, and Xiangyu Zhao.
ACM SIGIR 2025

Pseudo-triplet Guided Few-shot Composed Image Retrieval

@inproceedings{hou2025pseudo,
  author       = {Bohan Hou and
                  Haoqiang Lin and
                  Haokun Wen and
                  Meng Liu and
                  Mingzhu Xu and
                  Xuemeng Song},
  title        = {Pseudo Triplet Guided Few-shot Composed Image Retrieval},
  booktitle    = {Proceedings of the International Joint Conference on Neural Networks},
  pages        = {1--8},
  publisher    = {IEEE},
  year         = {2025}
}

Bohan Hou, Haoqiang Lin, Haokun Wen, Meng Liu, and Xuemeng Song.
IJCNN 2025

HUD: Hierarchical Uncertainty-Aware Disambiguation Network for Composed Video Retrieval

@inproceedings{chen2025hud,
  title={HUD: Hierarchical Uncertainty-Aware Disambiguation Network for Composed Video Retrieval},
  author={Chen, Zhiwei and Hu, Yupeng and Li, Zixu and Fu, Zhiheng and Wen, Haokun and Guan, Weili},
  booktitle={Proceedings of the ACM International Conference on Multimedia},
  pages = {6143--6152},
  publisher = {ACM},
  year={2025}
}

Zhiwei Chen, Yupeng Hu, Zixu Li, Zhiheng Fu, Haokun Wen, and Weili Guan.
ACM MM 2025

2024

Simple but Effective Raw-Data Level Multimodal Fusion for Composed Image Retrieval

@inproceedings{wen2024simple,
  title={Simple but Effective Raw-Data Level Multimodal Fusion for Composed Image Retrieval},
  author={Wen, Haokun and Song, Xuemeng and Chen, Xiaolin and Wei, Yinwei and Nie, Liqiang and Chua, Tat-Seng},
  booktitle    = {Proceedings of the International ACM SIGIR Conference on
                  Research and Development in Information Retrieval},
  pages        = {229--239},
  publisher    = {ACM},
  year={2024}
}

Haokun Wen, Xuemeng Song, Xiaolin Chen, Yinwei Wei, Liqiang Nie, and Tat-Seng Chua.
ACM SIGIR 2024

Self-Training Boosted Multi-Factor Matching Network for Composed Image Retrieval

@article{wen2024self,
  title={Self-Training Boosted Multi-Factor Matching Network for Composed Image Retrieval},
  author={Wen, Haokun and Song, Xuemeng and Yin, Jianhua and Wu, Jianlong and Guan, Weili and Nie, Liqiang},
  journal={IEEE Transactions on Pattern Analysis and Machine Intelligence},
  volume       = {46},
  number       = {5},
  pages        = {3665--3678},
  year         = {2024}
}

Haokun Wen, Xuemeng Song, Jianhua Yin, Jianlong Wu, Weili Guan, and Liqiang Nie.
IEEE TPAMI 2024

Fine-Grained Textual Inversion Network for Zero-Shot Composed Image Retrieval

@inproceedings{lin2024fine,
  title={Fine-Grained Textual Inversion Network for Zero-Shot Composed Image Retrieval},
  author={Lin, Haoqiang and Wen, Haokun and Song, Xuemeng and Liu, Meng and Hu, Yupeng and Nie, Liqiang},
  booktitle    = {Proceedings of the International ACM SIGIR Conference on Research and Development in Information Retrieval},
  pages        = {240--250},
  publisher    = {ACM},
  year={2024}
}

Haoqiang Lin, Haokun Wen, Xuemeng Song, Meng Liu, Yupeng Hu, and Liqiang Nie.
ACM SIGIR 2024

Differential-Perceptive and Retrieval-Augmented MLLM for Change Captioning

@inproceedings{zhang2024differential,
  title={Differential-Perceptive and Retrieval-Augmented MLLM for Change Captioning},
  author={Zhang, Xian and Wen, Haokun and Wu, Jianlong and Qin, Pengda and Xue, Hui and Nie, Liqiang},
  booktitle    = {Proceedings of the ACM International Conference on Multimedia},
  pages        = {4148--4157},
  publisher    = {ACM},
  year         = {2024}
}

Xian Zhang, Haokun Wen, Jianlong Wu, Pengda Qin, Hui Xue, and Liqiang Nie.
ACM MM 2024

Interactive Garment Recommendation with User in the Loop

@article{becattini2024interactive,
  title={Interactive Garment Recommendation with User in the Loop},
  author={Becattini, Federico and Chen, Xiaolin and Puccia, Andrea and Wen, Haokun and Song, Xuemeng and Nie, Liqiang and Del Bimbo, Alberto},
  journal={ACM Transactions on Multimedia Computing, Communications, and Applications},
  volume       = {21},
  number       = {1},
  pages        = {37:1--37:21},
  year         = {2025}
}

Federico Becattini, Xiaolin Chen, Andrea Puccia, Haokun Wen, Xuemeng Song, Liqiang Nie, and Alberto Del Bimbo.
ACM ToMM 2024

2023

Target-Guided Composed Image Retrieval

@inproceedings{wen2023target,
  title={Target-Guided Composed Image Retrieval},
  author={Wen, Haokun and Zhang, Xian and Song, Xuemeng and Wei, Yinwei and Nie, Liqiang},
  booktitle    = {Proceedings of the ACM International Conference on Multimedia},
  pages        = {915--923},
  publisher    = {ACM},
  year         = {2023}
}

Haokun Wen, Xian Zhang, Xuemeng Song, Yinwei Wei, and Liqiang Nie.
ACM MM 2023

Finetuning Language Models for Multimodal Question Answering

@inproceedings{zhang2023finetuning,
  title={Finetuning Language Models for Multimodal Question Answering},
  author={Zhang, Xin and Xie, Wen and Dai, Ziqi and Rao, Jun and Wen, Haokun and Luo, Xuan and Zhang, Meishan and Zhang, Min},
  booktitle={Proceedings of the ACM International Conference on Multimedia},
  pages={9420--9424},
  publisher    = {ACM},
  year={2023}
}

Xin Zhang, Wen Xie, Ziqi Dai, Jun Rao, Haokun Wen, Xuan Luo, Meishan Zhang, and Min Zhang.
ACM MM 2023 (Grand Challenge) 🏆 Ranked 1st in VTQA

Egocentric Early Action Prediction via Multimodal Transformer-Based Dual Action Prediction

@article{guan2023egocentric,
  title={Egocentric Early Action Prediction via Multimodal Transformer-Based Dual Action Prediction},
  author={Guan, Weili and Song, Xuemeng and Wang, Kejie and Wen, Haokun and Ni, Hongda and Wang, Yaowei and Chang, Xiaojun},
  journal={IEEE Transactions on Circuits and Systems for Video Technology},
  volume={33},
  number={9},
  pages={4472--4483},
  year={2023},
  publisher={IEEE}
}

Weili Guan, Xuemeng Song, Kejie Wang, Haokun Wen, Hongda Ni, Yaowei Wang, and Xiaojun Chang.
IEEE TCSVT 2023

2022

Personalized Fashion Compatibility Modeling via Metapath-guided Heterogeneous Graph Learning

@inproceedings{guan2022personalized,
  title={Personalized Fashion Compatibility Modeling via Metapath-guided Heterogeneous Graph Learning},
  author={Guan, Weili and Jiao, Fangkai and Song, Xuemeng and Wen, Haokun and Yeh, Chung-Hsing and Chang, Xiaojun},
  booktitle={Proceedings of the international ACM SIGIR conference on research and development in information retrieval},
  pages={482--491},
  publisher    = {ACM},
  year={2022}
}

Weili Guan, Fangkai Jiao, Xuemeng Song, Haokun Wen, Chung-Hsing Yeh, and Xiaojun Chang.
ACM SIGIR 2022

Partially Supervised Compatibility Modeling

@article{guan2022partially,
  title={Partially Supervised Compatibility Modeling},
  author={Guan, Weili and Wen, Haokun and Song, Xuemeng and Wang, Chun and Yeh, Chung-Hsing and Chang, Xiaojun and Nie, Liqiang},
  journal={IEEE Transactions on Image Processing},
  volume={31},
  pages={4733--4745},
  year={2022},
  publisher={IEEE}
}

Weili Guan, Haokun Wen, Xuemeng Song, Chun Wang, Chung-Hsing Yeh, Xiaojun Chang, and Liqiang Nie.
IEEE TIP 2022

2021

Comprehensive Linguistic-Visual Composition Network for Image Retrieval

@inproceedings{wen2021comprehensive,
  title={Comprehensive Linguistic-Visual Composition Network for Image Retrieval},
  author={Wen, Haokun and Song, Xuemeng and Yang, Xin and Zhan, Yibing and Nie, Liqiang},
  booktitle={Proceedings of the International ACM SIGIR Conference on Research and Development in Information Retrieval},
  pages={1369--1378},
  publisher    = {ACM},
  year={2021}
}

Haokun Wen, Xuemeng Song, Xin Yang, Yibing Zhan, and Liqiang Nie.
ACM SIGIR 2021

Multimodal Compatibility Modeling via Exploring the Consistent and Complementary Correlations

@inproceedings{guan2021multimodal,
  title={Multimodal Compatibility Modeling via Exploring the Consistent and Complementary Correlations},
  author={Guan, Weili and Wen, Haokun and Song, Xuemeng and Yeh, Chung-Hsing and Chang, Xiaojun and Nie, Liqiang},
  booktitle={Proceedings of the ACM international conference on multimedia},
  pages={2299--2307},
  publisher    = {ACM},
  year={2021}
}

Weili Guan, Haokun Wen, Xuemeng Song, Chung-Hsing Yeh, Xiaojun Chang, and Liqiang Nie.
ACM MM 2021

Attribute-wise Explainable Fashion Compatibility Modeling

@article{yang2021attribute,
  title={Attribute-wise Explainable Fashion Compatibility Modeling},
  author={Yang, Xin and Song, Xuemeng and Feng, Fuli and Wen, Haokun and Duan, Ling-Yu and Nie, Liqiang},
  journal={ACM Transactions on Multimedia Computing, Communications, and Applications},
  volume={17},
  number={1},
  pages={1--21},
  publisher={ACM},
  year={2021}
}

Xin Yang, Xuemeng Song, Fuli Feng, Haokun Wen, Ling-Yu Duan, and Liqiang Nie.
ACM ToMM 2021

2020

Generative Attribute Manipulation Scheme for Flexible Fashion Search

@inproceedings{yang2020generative,
  title={Generative Attribute Manipulation Scheme for Flexible Fashion Search},
  author={Yang, Xin and Song, Xuemeng and Han, Xianjing and Wen, Haokun and Nie, Jie and Nie, Liqiang},
  booktitle={Proceedings of the International ACM SIGIR Conference on Research and Development in Information Retrieval},
  pages={941--950},
  publisher={ACM},
  year={2020}
}

Xin Yang, Xuemeng Song, Xianjing Han, Haokun Wen, Jie Nie, and Liqiang Nie.
ACM SIGIR 2020

Haokun Wen / 温皓琨