@inproceedings{lin-etal-2024-personalized,
title = "Personalized Video Comment Generation",
author = "Lin, Xudong and
Zare, Ali and
Huang, Shiyuan and
Yang, Ming-Hsuan and
Chang, Shih-Fu and
Zhang, Li",
editor = "Al-Onaizan, Yaser and
Bansal, Mohit and
Chen, Yun-Nung",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2024",
month = nov,
year = "2024",
address = "Miami, Florida, USA",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.findings-emnlp.979",
pages = "16806--16820",
abstract = "Generating personalized responses, particularly in the context of video, poses a unique challenge for language models. This paper introduces the novel task of \textbf{Personalized Video Comment Generation} (PVCG), aiming to predict user comments tailored to both the input video and the user{'}s comment history, where the user is unseen during the model training process. Unlike existing video captioning tasks that ignores the personalization in the text generation process, we introduce PerVidCom, a new dataset specifically collected for this novel task with diverse personalized comments from YouTube. Recognizing the limitations of existing captioning metrics for evaluating this task, we propose a new automatic metric based on Large Language Models (LLMs) with few-shot in-context learning, named FICL-Score, specifically measuring quality from the aspects of emotion, language style and content relevance. We verify the proposed metric with human evaluations. We establish baselines using prominent Multimodal LLMs (MLLMs), analyze their performance discrepancies through extensive evaluation, and identifies directions for future improvement on this important task. Our research opens up a new direction of personalizing MLLMs and paves the way for future research.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="lin-etal-2024-personalized">
<titleInfo>
<title>Personalized Video Comment Generation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Xudong</namePart>
<namePart type="family">Lin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ali</namePart>
<namePart type="family">Zare</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shiyuan</namePart>
<namePart type="family">Huang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ming-Hsuan</namePart>
<namePart type="family">Yang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shih-Fu</namePart>
<namePart type="family">Chang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Li</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: EMNLP 2024</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yaser</namePart>
<namePart type="family">Al-Onaizan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohit</namePart>
<namePart type="family">Bansal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yun-Nung</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Miami, Florida, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Generating personalized responses, particularly in the context of video, poses a unique challenge for language models. This paper introduces the novel task of Personalized Video Comment Generation (PVCG), aiming to predict user comments tailored to both the input video and the user’s comment history, where the user is unseen during the model training process. Unlike existing video captioning tasks that ignores the personalization in the text generation process, we introduce PerVidCom, a new dataset specifically collected for this novel task with diverse personalized comments from YouTube. Recognizing the limitations of existing captioning metrics for evaluating this task, we propose a new automatic metric based on Large Language Models (LLMs) with few-shot in-context learning, named FICL-Score, specifically measuring quality from the aspects of emotion, language style and content relevance. We verify the proposed metric with human evaluations. We establish baselines using prominent Multimodal LLMs (MLLMs), analyze their performance discrepancies through extensive evaluation, and identifies directions for future improvement on this important task. Our research opens up a new direction of personalizing MLLMs and paves the way for future research.</abstract>
<identifier type="citekey">lin-etal-2024-personalized</identifier>
<location>
<url>https://aclanthology.org/2024.findings-emnlp.979</url>
</location>
<part>
<date>2024-11</date>
<extent unit="page">
<start>16806</start>
<end>16820</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Personalized Video Comment Generation
%A Lin, Xudong
%A Zare, Ali
%A Huang, Shiyuan
%A Yang, Ming-Hsuan
%A Chang, Shih-Fu
%A Zhang, Li
%Y Al-Onaizan, Yaser
%Y Bansal, Mohit
%Y Chen, Yun-Nung
%S Findings of the Association for Computational Linguistics: EMNLP 2024
%D 2024
%8 November
%I Association for Computational Linguistics
%C Miami, Florida, USA
%F lin-etal-2024-personalized
%X Generating personalized responses, particularly in the context of video, poses a unique challenge for language models. This paper introduces the novel task of Personalized Video Comment Generation (PVCG), aiming to predict user comments tailored to both the input video and the user’s comment history, where the user is unseen during the model training process. Unlike existing video captioning tasks that ignores the personalization in the text generation process, we introduce PerVidCom, a new dataset specifically collected for this novel task with diverse personalized comments from YouTube. Recognizing the limitations of existing captioning metrics for evaluating this task, we propose a new automatic metric based on Large Language Models (LLMs) with few-shot in-context learning, named FICL-Score, specifically measuring quality from the aspects of emotion, language style and content relevance. We verify the proposed metric with human evaluations. We establish baselines using prominent Multimodal LLMs (MLLMs), analyze their performance discrepancies through extensive evaluation, and identifies directions for future improvement on this important task. Our research opens up a new direction of personalizing MLLMs and paves the way for future research.
%U https://aclanthology.org/2024.findings-emnlp.979
%P 16806-16820
Markdown (Informal)
[Personalized Video Comment Generation](https://aclanthology.org/2024.findings-emnlp.979) (Lin et al., Findings 2024)
ACL
- Xudong Lin, Ali Zare, Shiyuan Huang, Ming-Hsuan Yang, Shih-Fu Chang, and Li Zhang. 2024. Personalized Video Comment Generation. In Findings of the Association for Computational Linguistics: EMNLP 2024, pages 16806–16820, Miami, Florida, USA. Association for Computational Linguistics.