@inproceedings{duran-silva-etal-2024-affilgood,
title = "{A}ffil{G}ood: Building reliable institution name disambiguation tools to improve scientific literature analysis",
author = "Duran-Silva, Nicolau and
Accuosto, Pablo and
Przyby{\l}a, Piotr and
Saggion, Horacio",
editor = "Ghosal, Tirthankar and
Singh, Amanpreet and
Waard, Anita and
Mayr, Philipp and
Naik, Aakanksha and
Weller, Orion and
Lee, Yoonjoo and
Shen, Shannon and
Qin, Yanxia",
booktitle = "Proceedings of the Fourth Workshop on Scholarly Document Processing (SDP 2024)",
month = aug,
year = "2024",
address = "Bangkok, Thailand",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.sdp-1.13",
pages = "135--144",
abstract = "The accurate attribution of scientific works to research organizations is hindered by the lack of openly available manually annotated data{--}in particular when multilingual and complex affiliation strings are considered. The AffilGood framework introduced in this paper addresses this gap. We identify three sub-tasks relevant for institution name disambiguation and make available annotated datasets and tools aimed at each of them, including i) a dataset annotated with affiliation spans in noisy automatically-extracted strings; ii) a dataset annotated with named entities for the identification of organizations and their locations; iii) seven datasets annotated with the Research Organization Registry (ROR) identifiers for the evaluation of entity-linking systems. In addition, we describe, evaluate and make available newly developed tools that use these datasets to provide solutions for each of the identified sub-tasks. Our results confirm the value of the developed resources and methods in addressing key challenges in institution name disambiguation.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="duran-silva-etal-2024-affilgood">
<titleInfo>
<title>AffilGood: Building reliable institution name disambiguation tools to improve scientific literature analysis</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicolau</namePart>
<namePart type="family">Duran-Silva</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Pablo</namePart>
<namePart type="family">Accuosto</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Piotr</namePart>
<namePart type="family">Przybyła</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Horacio</namePart>
<namePart type="family">Saggion</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Fourth Workshop on Scholarly Document Processing (SDP 2024)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Tirthankar</namePart>
<namePart type="family">Ghosal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Amanpreet</namePart>
<namePart type="family">Singh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anita</namePart>
<namePart type="family">Waard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Philipp</namePart>
<namePart type="family">Mayr</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Aakanksha</namePart>
<namePart type="family">Naik</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Orion</namePart>
<namePart type="family">Weller</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yoonjoo</namePart>
<namePart type="family">Lee</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shannon</namePart>
<namePart type="family">Shen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yanxia</namePart>
<namePart type="family">Qin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Bangkok, Thailand</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>The accurate attribution of scientific works to research organizations is hindered by the lack of openly available manually annotated data–in particular when multilingual and complex affiliation strings are considered. The AffilGood framework introduced in this paper addresses this gap. We identify three sub-tasks relevant for institution name disambiguation and make available annotated datasets and tools aimed at each of them, including i) a dataset annotated with affiliation spans in noisy automatically-extracted strings; ii) a dataset annotated with named entities for the identification of organizations and their locations; iii) seven datasets annotated with the Research Organization Registry (ROR) identifiers for the evaluation of entity-linking systems. In addition, we describe, evaluate and make available newly developed tools that use these datasets to provide solutions for each of the identified sub-tasks. Our results confirm the value of the developed resources and methods in addressing key challenges in institution name disambiguation.</abstract>
<identifier type="citekey">duran-silva-etal-2024-affilgood</identifier>
<location>
<url>https://aclanthology.org/2024.sdp-1.13</url>
</location>
<part>
<date>2024-08</date>
<extent unit="page">
<start>135</start>
<end>144</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T AffilGood: Building reliable institution name disambiguation tools to improve scientific literature analysis
%A Duran-Silva, Nicolau
%A Accuosto, Pablo
%A Przybyła, Piotr
%A Saggion, Horacio
%Y Ghosal, Tirthankar
%Y Singh, Amanpreet
%Y Waard, Anita
%Y Mayr, Philipp
%Y Naik, Aakanksha
%Y Weller, Orion
%Y Lee, Yoonjoo
%Y Shen, Shannon
%Y Qin, Yanxia
%S Proceedings of the Fourth Workshop on Scholarly Document Processing (SDP 2024)
%D 2024
%8 August
%I Association for Computational Linguistics
%C Bangkok, Thailand
%F duran-silva-etal-2024-affilgood
%X The accurate attribution of scientific works to research organizations is hindered by the lack of openly available manually annotated data–in particular when multilingual and complex affiliation strings are considered. The AffilGood framework introduced in this paper addresses this gap. We identify three sub-tasks relevant for institution name disambiguation and make available annotated datasets and tools aimed at each of them, including i) a dataset annotated with affiliation spans in noisy automatically-extracted strings; ii) a dataset annotated with named entities for the identification of organizations and their locations; iii) seven datasets annotated with the Research Organization Registry (ROR) identifiers for the evaluation of entity-linking systems. In addition, we describe, evaluate and make available newly developed tools that use these datasets to provide solutions for each of the identified sub-tasks. Our results confirm the value of the developed resources and methods in addressing key challenges in institution name disambiguation.
%U https://aclanthology.org/2024.sdp-1.13
%P 135-144
Markdown (Informal)
[AffilGood: Building reliable institution name disambiguation tools to improve scientific literature analysis](https://aclanthology.org/2024.sdp-1.13) (Duran-Silva et al., sdp-WS 2024)
ACL