@inproceedings{pengpun-etal-2023-cross,
title = "Cross-Lingual Data Augmentation For {T}hai Question-Answering",
author = "Pengpun, Parinthapat and
Udomcharoenchaikit, Can and
Buaphet, Weerayut and
Limkonchotiwat, Peerat",
editor = "Hupkes, Dieuwke and
Dankers, Verna and
Batsuren, Khuyagbaatar and
Sinha, Koustuv and
Kazemnejad, Amirhossein and
Christodoulopoulos, Christos and
Cotterell, Ryan and
Bruni, Elia",
booktitle = "Proceedings of the 1st GenBench Workshop on (Benchmarking) Generalisation in NLP",
month = dec,
year = "2023",
address = "Singapore",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.genbench-1.16/",
doi = "10.18653/v1/2023.genbench-1.16",
pages = "193--203",
abstract = "This paper presents an innovative data augmentation framework with data quality control designed to enhance the robustness of Question Answering (QA) models in low-resource languages, particularly Thai. Recognizing the challenges posed by the scarcity and quality of training data, we leverage data augmentation techniques in both monolingual and cross-lingual settings. Our approach augments and enriches the original dataset, thereby increasing its linguistic diversity and robustness. We evaluate the robustness of our framework on Machine Reading Comprehension, and the experimental results illustrate the potential of data augmentation to effectively increase training data and improve model generalization in low-resource language settings, offering a promising direction for the data augmentation manner."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="pengpun-etal-2023-cross">
<titleInfo>
<title>Cross-Lingual Data Augmentation For Thai Question-Answering</title>
</titleInfo>
<name type="personal">
<namePart type="given">Parinthapat</namePart>
<namePart type="family">Pengpun</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Can</namePart>
<namePart type="family">Udomcharoenchaikit</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Weerayut</namePart>
<namePart type="family">Buaphet</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Peerat</namePart>
<namePart type="family">Limkonchotiwat</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 1st GenBench Workshop on (Benchmarking) Generalisation in NLP</title>
</titleInfo>
<name type="personal">
<namePart type="given">Dieuwke</namePart>
<namePart type="family">Hupkes</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Verna</namePart>
<namePart type="family">Dankers</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Khuyagbaatar</namePart>
<namePart type="family">Batsuren</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Koustuv</namePart>
<namePart type="family">Sinha</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Amirhossein</namePart>
<namePart type="family">Kazemnejad</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Christos</namePart>
<namePart type="family">Christodoulopoulos</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ryan</namePart>
<namePart type="family">Cotterell</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Elia</namePart>
<namePart type="family">Bruni</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Singapore</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This paper presents an innovative data augmentation framework with data quality control designed to enhance the robustness of Question Answering (QA) models in low-resource languages, particularly Thai. Recognizing the challenges posed by the scarcity and quality of training data, we leverage data augmentation techniques in both monolingual and cross-lingual settings. Our approach augments and enriches the original dataset, thereby increasing its linguistic diversity and robustness. We evaluate the robustness of our framework on Machine Reading Comprehension, and the experimental results illustrate the potential of data augmentation to effectively increase training data and improve model generalization in low-resource language settings, offering a promising direction for the data augmentation manner.</abstract>
<identifier type="citekey">pengpun-etal-2023-cross</identifier>
<identifier type="doi">10.18653/v1/2023.genbench-1.16</identifier>
<location>
<url>https://aclanthology.org/2023.genbench-1.16/</url>
</location>
<part>
<date>2023-12</date>
<extent unit="page">
<start>193</start>
<end>203</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Cross-Lingual Data Augmentation For Thai Question-Answering
%A Pengpun, Parinthapat
%A Udomcharoenchaikit, Can
%A Buaphet, Weerayut
%A Limkonchotiwat, Peerat
%Y Hupkes, Dieuwke
%Y Dankers, Verna
%Y Batsuren, Khuyagbaatar
%Y Sinha, Koustuv
%Y Kazemnejad, Amirhossein
%Y Christodoulopoulos, Christos
%Y Cotterell, Ryan
%Y Bruni, Elia
%S Proceedings of the 1st GenBench Workshop on (Benchmarking) Generalisation in NLP
%D 2023
%8 December
%I Association for Computational Linguistics
%C Singapore
%F pengpun-etal-2023-cross
%X This paper presents an innovative data augmentation framework with data quality control designed to enhance the robustness of Question Answering (QA) models in low-resource languages, particularly Thai. Recognizing the challenges posed by the scarcity and quality of training data, we leverage data augmentation techniques in both monolingual and cross-lingual settings. Our approach augments and enriches the original dataset, thereby increasing its linguistic diversity and robustness. We evaluate the robustness of our framework on Machine Reading Comprehension, and the experimental results illustrate the potential of data augmentation to effectively increase training data and improve model generalization in low-resource language settings, offering a promising direction for the data augmentation manner.
%R 10.18653/v1/2023.genbench-1.16
%U https://aclanthology.org/2023.genbench-1.16/
%U https://doi.org/10.18653/v1/2023.genbench-1.16
%P 193-203
Markdown (Informal)
[Cross-Lingual Data Augmentation For Thai Question-Answering](https://aclanthology.org/2023.genbench-1.16/) (Pengpun et al., GenBench 2023)
ACL
- Parinthapat Pengpun, Can Udomcharoenchaikit, Weerayut Buaphet, and Peerat Limkonchotiwat. 2023. Cross-Lingual Data Augmentation For Thai Question-Answering. In Proceedings of the 1st GenBench Workshop on (Benchmarking) Generalisation in NLP, pages 193–203, Singapore. Association for Computational Linguistics.