@inproceedings{alves-etal-2024-domains,
title = "Which Domains, Tasks and Languages are in the Focus of {NLP} Research on the Languages of {E}urope?",
author = "Alves, Diego and
Tadi{\'c}, Marko and
Rehm, Georg",
editor = "Gaspari, Federico and
Moorkens, Joss and
Aldabe, Itziar and
Farwell, Aritz and
Altuna, Begona and
Piperidis, Stelios and
Rehm, Georg and
Rigau, German",
booktitle = "Proceedings of the Second International Workshop Towards Digital Language Equality (TDLE): Focusing on Sustainability @ LREC-COLING 2024",
month = may,
year = "2024",
address = "Torino, Italia",
publisher = "ELRA and ICCL",
url = "https://aclanthology.org/2024.tdle-1.2",
pages = "18--32",
abstract = "This article provides a thorough mapping of NLP and Language Technology research on 39 European languages onto 46 domains. Our analysis is based on almost 50,000 papers published between 2010 and October 2022 in the ACL Anthology. We use a dictionary-based approach to identify 1) languages, 2) domains, and 3) NLP tasks in these papers; the dictionary-based method using exact terms has a precision value of 0.81. Moreover, we identify common mistakes which can be useful to fine-tune the methodology for future work. While we are only able to highlight selected results in this submitted version, the final paper will contain detailed analyses and charts on a per-language basis. We hope that this study can contribute to digital language equality in Europe by providing information to the academic and industrial research community about the opportunities for novel LT/NLP research.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="alves-etal-2024-domains">
<titleInfo>
<title>Which Domains, Tasks and Languages are in the Focus of NLP Research on the Languages of Europe?</title>
</titleInfo>
<name type="personal">
<namePart type="given">Diego</namePart>
<namePart type="family">Alves</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marko</namePart>
<namePart type="family">Tadić</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Georg</namePart>
<namePart type="family">Rehm</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Second International Workshop Towards Digital Language Equality (TDLE): Focusing on Sustainability @ LREC-COLING 2024</title>
</titleInfo>
<name type="personal">
<namePart type="given">Federico</namePart>
<namePart type="family">Gaspari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joss</namePart>
<namePart type="family">Moorkens</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Itziar</namePart>
<namePart type="family">Aldabe</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Aritz</namePart>
<namePart type="family">Farwell</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Begona</namePart>
<namePart type="family">Altuna</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stelios</namePart>
<namePart type="family">Piperidis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Georg</namePart>
<namePart type="family">Rehm</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">German</namePart>
<namePart type="family">Rigau</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>ELRA and ICCL</publisher>
<place>
<placeTerm type="text">Torino, Italia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This article provides a thorough mapping of NLP and Language Technology research on 39 European languages onto 46 domains. Our analysis is based on almost 50,000 papers published between 2010 and October 2022 in the ACL Anthology. We use a dictionary-based approach to identify 1) languages, 2) domains, and 3) NLP tasks in these papers; the dictionary-based method using exact terms has a precision value of 0.81. Moreover, we identify common mistakes which can be useful to fine-tune the methodology for future work. While we are only able to highlight selected results in this submitted version, the final paper will contain detailed analyses and charts on a per-language basis. We hope that this study can contribute to digital language equality in Europe by providing information to the academic and industrial research community about the opportunities for novel LT/NLP research.</abstract>
<identifier type="citekey">alves-etal-2024-domains</identifier>
<location>
<url>https://aclanthology.org/2024.tdle-1.2</url>
</location>
<part>
<date>2024-05</date>
<extent unit="page">
<start>18</start>
<end>32</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Which Domains, Tasks and Languages are in the Focus of NLP Research on the Languages of Europe?
%A Alves, Diego
%A Tadić, Marko
%A Rehm, Georg
%Y Gaspari, Federico
%Y Moorkens, Joss
%Y Aldabe, Itziar
%Y Farwell, Aritz
%Y Altuna, Begona
%Y Piperidis, Stelios
%Y Rehm, Georg
%Y Rigau, German
%S Proceedings of the Second International Workshop Towards Digital Language Equality (TDLE): Focusing on Sustainability @ LREC-COLING 2024
%D 2024
%8 May
%I ELRA and ICCL
%C Torino, Italia
%F alves-etal-2024-domains
%X This article provides a thorough mapping of NLP and Language Technology research on 39 European languages onto 46 domains. Our analysis is based on almost 50,000 papers published between 2010 and October 2022 in the ACL Anthology. We use a dictionary-based approach to identify 1) languages, 2) domains, and 3) NLP tasks in these papers; the dictionary-based method using exact terms has a precision value of 0.81. Moreover, we identify common mistakes which can be useful to fine-tune the methodology for future work. While we are only able to highlight selected results in this submitted version, the final paper will contain detailed analyses and charts on a per-language basis. We hope that this study can contribute to digital language equality in Europe by providing information to the academic and industrial research community about the opportunities for novel LT/NLP research.
%U https://aclanthology.org/2024.tdle-1.2
%P 18-32
Markdown (Informal)
[Which Domains, Tasks and Languages are in the Focus of NLP Research on the Languages of Europe?](https://aclanthology.org/2024.tdle-1.2) (Alves et al., TDLE-WS 2024)
ACL