@inproceedings{mille-etal-2024-filling-gaps,
title = "Filling Gaps in {W}ikipedia: Leveraging Data-to-Text Generation to Improve Encyclopedic Coverage of Underrepresented Groups",
author = "Mille, Simon and
Pronesti, Massimiliano and
Thomson, Craig and
Lorandi, Michela and
Fitzpatrick, Sophie and
Huidrom, Rudali and
Sabry, Mohammed and
O{'}Riordan, Amy and
Belz, Anya",
editor = "Mahamood, Saad and
Minh, Nguyen Le and
Ippolito, Daphne",
booktitle = "Proceedings of the 17th International Natural Language Generation Conference: System Demonstrations",
month = sep,
year = "2024",
address = "Tokyo, Japan",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.inlg-demos.6",
pages = "16--19",
abstract = "Wikipedia is known to have systematic gaps in its coverage that correspond to under-resourced languages as well as underrepresented groups. This paper presents a new tool to support efforts to fill in these gaps by automatically generating draft articles and facilitating post-editing and uploading to Wikipedia. A rule-based generator and an input-constrained LLM are used to generate two alternative articles, enabling the often more fluent, but error-prone, LLM-generated article to be content-checked against the more reliable, but less fluent, rule-generated article.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="mille-etal-2024-filling-gaps">
<titleInfo>
<title>Filling Gaps in Wikipedia: Leveraging Data-to-Text Generation to Improve Encyclopedic Coverage of Underrepresented Groups</title>
</titleInfo>
<name type="personal">
<namePart type="given">Simon</namePart>
<namePart type="family">Mille</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Massimiliano</namePart>
<namePart type="family">Pronesti</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Craig</namePart>
<namePart type="family">Thomson</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Michela</namePart>
<namePart type="family">Lorandi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sophie</namePart>
<namePart type="family">Fitzpatrick</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rudali</namePart>
<namePart type="family">Huidrom</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohammed</namePart>
<namePart type="family">Sabry</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Amy</namePart>
<namePart type="family">O’Riordan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anya</namePart>
<namePart type="family">Belz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-09</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 17th International Natural Language Generation Conference: System Demonstrations</title>
</titleInfo>
<name type="personal">
<namePart type="given">Saad</namePart>
<namePart type="family">Mahamood</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nguyen</namePart>
<namePart type="given">Le</namePart>
<namePart type="family">Minh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Daphne</namePart>
<namePart type="family">Ippolito</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Tokyo, Japan</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Wikipedia is known to have systematic gaps in its coverage that correspond to under-resourced languages as well as underrepresented groups. This paper presents a new tool to support efforts to fill in these gaps by automatically generating draft articles and facilitating post-editing and uploading to Wikipedia. A rule-based generator and an input-constrained LLM are used to generate two alternative articles, enabling the often more fluent, but error-prone, LLM-generated article to be content-checked against the more reliable, but less fluent, rule-generated article.</abstract>
<identifier type="citekey">mille-etal-2024-filling-gaps</identifier>
<location>
<url>https://aclanthology.org/2024.inlg-demos.6</url>
</location>
<part>
<date>2024-09</date>
<extent unit="page">
<start>16</start>
<end>19</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Filling Gaps in Wikipedia: Leveraging Data-to-Text Generation to Improve Encyclopedic Coverage of Underrepresented Groups
%A Mille, Simon
%A Pronesti, Massimiliano
%A Thomson, Craig
%A Lorandi, Michela
%A Fitzpatrick, Sophie
%A Huidrom, Rudali
%A Sabry, Mohammed
%A O’Riordan, Amy
%A Belz, Anya
%Y Mahamood, Saad
%Y Minh, Nguyen Le
%Y Ippolito, Daphne
%S Proceedings of the 17th International Natural Language Generation Conference: System Demonstrations
%D 2024
%8 September
%I Association for Computational Linguistics
%C Tokyo, Japan
%F mille-etal-2024-filling-gaps
%X Wikipedia is known to have systematic gaps in its coverage that correspond to under-resourced languages as well as underrepresented groups. This paper presents a new tool to support efforts to fill in these gaps by automatically generating draft articles and facilitating post-editing and uploading to Wikipedia. A rule-based generator and an input-constrained LLM are used to generate two alternative articles, enabling the often more fluent, but error-prone, LLM-generated article to be content-checked against the more reliable, but less fluent, rule-generated article.
%U https://aclanthology.org/2024.inlg-demos.6
%P 16-19
Markdown (Informal)
[Filling Gaps in Wikipedia: Leveraging Data-to-Text Generation to Improve Encyclopedic Coverage of Underrepresented Groups](https://aclanthology.org/2024.inlg-demos.6) (Mille et al., INLG 2024)
ACL
- Simon Mille, Massimiliano Pronesti, Craig Thomson, Michela Lorandi, Sophie Fitzpatrick, Rudali Huidrom, Mohammed Sabry, Amy O’Riordan, and Anya Belz. 2024. Filling Gaps in Wikipedia: Leveraging Data-to-Text Generation to Improve Encyclopedic Coverage of Underrepresented Groups. In Proceedings of the 17th International Natural Language Generation Conference: System Demonstrations, pages 16–19, Tokyo, Japan. Association for Computational Linguistics.