@inproceedings{chen-etal-2023-gladis,
title = "{GLADIS}: A General and Large Acronym Disambiguation Benchmark",
author = "Chen, Lihu and
Varoquaux, Gael and
Suchanek, Fabian M.",
editor = "Vlachos, Andreas and
Augenstein, Isabelle",
booktitle = "Proceedings of the 17th Conference of the European Chapter of the Association for Computational Linguistics",
month = may,
year = "2023",
address = "Dubrovnik, Croatia",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.eacl-main.152",
doi = "10.18653/v1/2023.eacl-main.152",
pages = "2073--2088",
abstract = "Acronym Disambiguation (AD) is crucial for natural language understanding on various sources, including biomedical reports, scientific papers, and search engine queries. However, existing acronym disambiguationbenchmarks and tools are limited to specific domains, and the size of prior benchmarks is rather small. To accelerate the research on acronym disambiguation, we construct a new benchmark with three components: (1) a much larger acronym dictionary with 1.5M acronyms and 6.4M long forms; (2) a pre-training corpus with 160 million sentences;(3) three datasets that cover thegeneral, scientific, and biomedical domains. We then pre-train a language model, \textit{AcroBERT}, on our constructed corpus for general acronym disambiguation, and show the challenges and values of our new benchmark.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="chen-etal-2023-gladis">
<titleInfo>
<title>GLADIS: A General and Large Acronym Disambiguation Benchmark</title>
</titleInfo>
<name type="personal">
<namePart type="given">Lihu</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Gael</namePart>
<namePart type="family">Varoquaux</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Fabian</namePart>
<namePart type="given">M</namePart>
<namePart type="family">Suchanek</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 17th Conference of the European Chapter of the Association for Computational Linguistics</title>
</titleInfo>
<name type="personal">
<namePart type="given">Andreas</namePart>
<namePart type="family">Vlachos</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Isabelle</namePart>
<namePart type="family">Augenstein</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Dubrovnik, Croatia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Acronym Disambiguation (AD) is crucial for natural language understanding on various sources, including biomedical reports, scientific papers, and search engine queries. However, existing acronym disambiguationbenchmarks and tools are limited to specific domains, and the size of prior benchmarks is rather small. To accelerate the research on acronym disambiguation, we construct a new benchmark with three components: (1) a much larger acronym dictionary with 1.5M acronyms and 6.4M long forms; (2) a pre-training corpus with 160 million sentences;(3) three datasets that cover thegeneral, scientific, and biomedical domains. We then pre-train a language model, AcroBERT, on our constructed corpus for general acronym disambiguation, and show the challenges and values of our new benchmark.</abstract>
<identifier type="citekey">chen-etal-2023-gladis</identifier>
<identifier type="doi">10.18653/v1/2023.eacl-main.152</identifier>
<location>
<url>https://aclanthology.org/2023.eacl-main.152</url>
</location>
<part>
<date>2023-05</date>
<extent unit="page">
<start>2073</start>
<end>2088</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T GLADIS: A General and Large Acronym Disambiguation Benchmark
%A Chen, Lihu
%A Varoquaux, Gael
%A Suchanek, Fabian M.
%Y Vlachos, Andreas
%Y Augenstein, Isabelle
%S Proceedings of the 17th Conference of the European Chapter of the Association for Computational Linguistics
%D 2023
%8 May
%I Association for Computational Linguistics
%C Dubrovnik, Croatia
%F chen-etal-2023-gladis
%X Acronym Disambiguation (AD) is crucial for natural language understanding on various sources, including biomedical reports, scientific papers, and search engine queries. However, existing acronym disambiguationbenchmarks and tools are limited to specific domains, and the size of prior benchmarks is rather small. To accelerate the research on acronym disambiguation, we construct a new benchmark with three components: (1) a much larger acronym dictionary with 1.5M acronyms and 6.4M long forms; (2) a pre-training corpus with 160 million sentences;(3) three datasets that cover thegeneral, scientific, and biomedical domains. We then pre-train a language model, AcroBERT, on our constructed corpus for general acronym disambiguation, and show the challenges and values of our new benchmark.
%R 10.18653/v1/2023.eacl-main.152
%U https://aclanthology.org/2023.eacl-main.152
%U https://doi.org/10.18653/v1/2023.eacl-main.152
%P 2073-2088
Markdown (Informal)
[GLADIS: A General and Large Acronym Disambiguation Benchmark](https://aclanthology.org/2023.eacl-main.152) (Chen et al., EACL 2023)
ACL
- Lihu Chen, Gael Varoquaux, and Fabian M. Suchanek. 2023. GLADIS: A General and Large Acronym Disambiguation Benchmark. In Proceedings of the 17th Conference of the European Chapter of the Association for Computational Linguistics, pages 2073–2088, Dubrovnik, Croatia. Association for Computational Linguistics.