@inproceedings{yu-etal-2021-vision,
    title = "Vision Guided Generative Pre-trained Language Models for Multimodal Abstractive Summarization",
    author = "Yu, Tiezheng  and
      Dai, Wenliang  and
      Liu, Zihan  and
      Fung, Pascale",
    editor = "Moens, Marie-Francine  and
      Huang, Xuanjing  and
      Specia, Lucia  and
      Yih, Scott Wen-tau",
    booktitle = "Proceedings of the 2021 Conference on Empirical Methods in Natural Language Processing",
    month = nov,
    year = "2021",
    address = "Online and Punta Cana, Dominican Republic",
    publisher = "Association for Computational Linguistics",
    url = "https://aclanthology.org/2021.emnlp-main.326/",
    doi = "10.18653/v1/2021.emnlp-main.326",
    pages = "3995--4007"
}