@inproceedings{le-2024-cross,
title = "Cross-Lingual Summarization with Pseudo-Label Regularization",
author = "Le, Thang",
editor = "Duh, Kevin and
Gomez, Helena and
Bethard, Steven",
booktitle = "Findings of the Association for Computational Linguistics: NAACL 2024",
month = jun,
year = "2024",
address = "Mexico City, Mexico",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.findings-naacl.289",
doi = "10.18653/v1/2024.findings-naacl.289",
pages = "4644--4677",
abstract = "Cross-Lingual Summarization (XLS) aims to summarize a document in the source language into a condensed version in the target language, effectively removing language barriers for non-native readers. Previous approaches, however, have the same limitation that only a single reference (gold summary) is exploited during model training, making the base model exposed to an underrepresented hypothesis space since the actual number of possible hypotheses is exponentially large. To alleviate this problem, we present a study adopting pseudo-labels in regularizing standard cross-lingual summarization training. We investigate several components leading to the gains in regularization training with verified experiments involving 8 diverse languages from different families. Conclusively, we show that pseudo-labeling is a simple and effective approach that significantly improves over standard gold reference training in XLS.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="le-2024-cross">
<titleInfo>
<title>Cross-Lingual Summarization with Pseudo-Label Regularization</title>
</titleInfo>
<name type="personal">
<namePart type="given">Thang</namePart>
<namePart type="family">Le</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: NAACL 2024</title>
</titleInfo>
<name type="personal">
<namePart type="given">Kevin</namePart>
<namePart type="family">Duh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Helena</namePart>
<namePart type="family">Gomez</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Steven</namePart>
<namePart type="family">Bethard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Mexico City, Mexico</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Cross-Lingual Summarization (XLS) aims to summarize a document in the source language into a condensed version in the target language, effectively removing language barriers for non-native readers. Previous approaches, however, have the same limitation that only a single reference (gold summary) is exploited during model training, making the base model exposed to an underrepresented hypothesis space since the actual number of possible hypotheses is exponentially large. To alleviate this problem, we present a study adopting pseudo-labels in regularizing standard cross-lingual summarization training. We investigate several components leading to the gains in regularization training with verified experiments involving 8 diverse languages from different families. Conclusively, we show that pseudo-labeling is a simple and effective approach that significantly improves over standard gold reference training in XLS.</abstract>
<identifier type="citekey">le-2024-cross</identifier>
<identifier type="doi">10.18653/v1/2024.findings-naacl.289</identifier>
<location>
<url>https://aclanthology.org/2024.findings-naacl.289</url>
</location>
<part>
<date>2024-06</date>
<extent unit="page">
<start>4644</start>
<end>4677</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Cross-Lingual Summarization with Pseudo-Label Regularization
%A Le, Thang
%Y Duh, Kevin
%Y Gomez, Helena
%Y Bethard, Steven
%S Findings of the Association for Computational Linguistics: NAACL 2024
%D 2024
%8 June
%I Association for Computational Linguistics
%C Mexico City, Mexico
%F le-2024-cross
%X Cross-Lingual Summarization (XLS) aims to summarize a document in the source language into a condensed version in the target language, effectively removing language barriers for non-native readers. Previous approaches, however, have the same limitation that only a single reference (gold summary) is exploited during model training, making the base model exposed to an underrepresented hypothesis space since the actual number of possible hypotheses is exponentially large. To alleviate this problem, we present a study adopting pseudo-labels in regularizing standard cross-lingual summarization training. We investigate several components leading to the gains in regularization training with verified experiments involving 8 diverse languages from different families. Conclusively, we show that pseudo-labeling is a simple and effective approach that significantly improves over standard gold reference training in XLS.
%R 10.18653/v1/2024.findings-naacl.289
%U https://aclanthology.org/2024.findings-naacl.289
%U https://doi.org/10.18653/v1/2024.findings-naacl.289
%P 4644-4677
Markdown (Informal)
[Cross-Lingual Summarization with Pseudo-Label Regularization](https://aclanthology.org/2024.findings-naacl.289) (Le, Findings 2024)
ACL