@inproceedings{tran-etal-2023-revealing,
title = "Revealing Weaknesses of {V}ietnamese Language Models Through Unanswerable Questions in Machine Reading Comprehension",
author = "Tran, Son Quoc and
Do, Phong Nguyen-Thuan and
Nguyen, Kiet Van and
Nguyen, Ngan Luu-Thuy",
editor = "Bassignana, Elisa and
Lindemann, Matthias and
Petit, Alban",
booktitle = "Proceedings of the 17th Conference of the European Chapter of the Association for Computational Linguistics: Student Research Workshop",
month = may,
year = "2023",
address = "Dubrovnik, Croatia",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.eacl-srw.1",
doi = "10.18653/v1/2023.eacl-srw.1",
pages = "1--13",
abstract = "Although the curse of multilinguality significantly restricts the language abilities of multilingual models in monolingual settings, researchers now still have to rely on multilingual models to develop state-of-the-art systems in Vietnamese Machine Reading Comprehension. This difficulty in researching is because of the limited number of high-quality works in developing Vietnamese language models. In order to encourage more work in this research field, we present a comprehensive analysis of language weaknesses and strengths of current Vietnamese monolingual models using the downstream task of Machine Reading Comprehension. From the analysis results, we suggest new directions for developing Vietnamese language models. Besides this main contribution, we also successfully reveal the existence of artifacts in Vietnamese Machine Reading Comprehension benchmarks and suggest an urgent need for new high-quality benchmarks to track the progress of Vietnamese Machine Reading Comprehension. Moreover, we also introduced a minor but valuable modification to the process of annotating unanswerable questions for Machine Reading Comprehension from previous work. Our proposed modification helps improve the quality of unanswerable questions to a higher level of difficulty for Machine Reading Comprehension systems to solve.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="tran-etal-2023-revealing">
<titleInfo>
<title>Revealing Weaknesses of Vietnamese Language Models Through Unanswerable Questions in Machine Reading Comprehension</title>
</titleInfo>
<name type="personal">
<namePart type="given">Son</namePart>
<namePart type="given">Quoc</namePart>
<namePart type="family">Tran</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Phong</namePart>
<namePart type="given">Nguyen-Thuan</namePart>
<namePart type="family">Do</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kiet</namePart>
<namePart type="given">Van</namePart>
<namePart type="family">Nguyen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ngan</namePart>
<namePart type="given">Luu-Thuy</namePart>
<namePart type="family">Nguyen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 17th Conference of the European Chapter of the Association for Computational Linguistics: Student Research Workshop</title>
</titleInfo>
<name type="personal">
<namePart type="given">Elisa</namePart>
<namePart type="family">Bassignana</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Matthias</namePart>
<namePart type="family">Lindemann</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alban</namePart>
<namePart type="family">Petit</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Dubrovnik, Croatia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Although the curse of multilinguality significantly restricts the language abilities of multilingual models in monolingual settings, researchers now still have to rely on multilingual models to develop state-of-the-art systems in Vietnamese Machine Reading Comprehension. This difficulty in researching is because of the limited number of high-quality works in developing Vietnamese language models. In order to encourage more work in this research field, we present a comprehensive analysis of language weaknesses and strengths of current Vietnamese monolingual models using the downstream task of Machine Reading Comprehension. From the analysis results, we suggest new directions for developing Vietnamese language models. Besides this main contribution, we also successfully reveal the existence of artifacts in Vietnamese Machine Reading Comprehension benchmarks and suggest an urgent need for new high-quality benchmarks to track the progress of Vietnamese Machine Reading Comprehension. Moreover, we also introduced a minor but valuable modification to the process of annotating unanswerable questions for Machine Reading Comprehension from previous work. Our proposed modification helps improve the quality of unanswerable questions to a higher level of difficulty for Machine Reading Comprehension systems to solve.</abstract>
<identifier type="citekey">tran-etal-2023-revealing</identifier>
<identifier type="doi">10.18653/v1/2023.eacl-srw.1</identifier>
<location>
<url>https://aclanthology.org/2023.eacl-srw.1</url>
</location>
<part>
<date>2023-05</date>
<extent unit="page">
<start>1</start>
<end>13</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Revealing Weaknesses of Vietnamese Language Models Through Unanswerable Questions in Machine Reading Comprehension
%A Tran, Son Quoc
%A Do, Phong Nguyen-Thuan
%A Nguyen, Kiet Van
%A Nguyen, Ngan Luu-Thuy
%Y Bassignana, Elisa
%Y Lindemann, Matthias
%Y Petit, Alban
%S Proceedings of the 17th Conference of the European Chapter of the Association for Computational Linguistics: Student Research Workshop
%D 2023
%8 May
%I Association for Computational Linguistics
%C Dubrovnik, Croatia
%F tran-etal-2023-revealing
%X Although the curse of multilinguality significantly restricts the language abilities of multilingual models in monolingual settings, researchers now still have to rely on multilingual models to develop state-of-the-art systems in Vietnamese Machine Reading Comprehension. This difficulty in researching is because of the limited number of high-quality works in developing Vietnamese language models. In order to encourage more work in this research field, we present a comprehensive analysis of language weaknesses and strengths of current Vietnamese monolingual models using the downstream task of Machine Reading Comprehension. From the analysis results, we suggest new directions for developing Vietnamese language models. Besides this main contribution, we also successfully reveal the existence of artifacts in Vietnamese Machine Reading Comprehension benchmarks and suggest an urgent need for new high-quality benchmarks to track the progress of Vietnamese Machine Reading Comprehension. Moreover, we also introduced a minor but valuable modification to the process of annotating unanswerable questions for Machine Reading Comprehension from previous work. Our proposed modification helps improve the quality of unanswerable questions to a higher level of difficulty for Machine Reading Comprehension systems to solve.
%R 10.18653/v1/2023.eacl-srw.1
%U https://aclanthology.org/2023.eacl-srw.1
%U https://doi.org/10.18653/v1/2023.eacl-srw.1
%P 1-13
Markdown (Informal)
[Revealing Weaknesses of Vietnamese Language Models Through Unanswerable Questions in Machine Reading Comprehension](https://aclanthology.org/2023.eacl-srw.1) (Tran et al., EACL 2023)
ACL