@inproceedings{shao-etal-2022-low,
title = "Low-resource Entity Set Expansion: A Comprehensive Study on User-generated Text",
author = "Shao, Yutong and
Bhutani, Nikita and
Rahman, Sajjadur and
Hruschka, Estevam",
editor = "Carpuat, Marine and
de Marneffe, Marie-Catherine and
Meza Ruiz, Ivan Vladimir",
booktitle = "Findings of the Association for Computational Linguistics: NAACL 2022",
month = jul,
year = "2022",
address = "Seattle, United States",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.findings-naacl.100",
doi = "10.18653/v1/2022.findings-naacl.100",
pages = "1343--1353",
abstract = "Entity set expansion (ESE) aims at obtaining a more complete set of entities given a textual corpus and a seed set of entities of a concept. Although it is a critical task in many NLP applications, existing benchmarks are limited to well-formed text (e.g., Wikipedia) and well-defined concepts (e.g., countries and diseases). Furthermore, only a small number of predictions are evaluated compared to the actual size of an entity set. A rigorous assessment of ESE methods warrants more comprehensive benchmarks and evaluation. In this paper, we consider user-generated text to understand the generalizability of ESE methods. We develop new benchmarks and propose more rigorous evaluation metrics for assessing the performance of ESE methods. Additionally, we identify phenomena such as non-named entities, multifaceted entities, vague concepts that are more prevalent in user-generated text than well-formed text, and use them to profile ESE methods. We observe that the strong performance of state-of-the-art ESE methods does not generalize well to user-generated text. We conduct comprehensive empirical analysis and draw insights from the findings.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="shao-etal-2022-low">
<titleInfo>
<title>Low-resource Entity Set Expansion: A Comprehensive Study on User-generated Text</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yutong</namePart>
<namePart type="family">Shao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nikita</namePart>
<namePart type="family">Bhutani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sajjadur</namePart>
<namePart type="family">Rahman</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Estevam</namePart>
<namePart type="family">Hruschka</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: NAACL 2022</title>
</titleInfo>
<name type="personal">
<namePart type="given">Marine</namePart>
<namePart type="family">Carpuat</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marie-Catherine</namePart>
<namePart type="family">de Marneffe</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ivan</namePart>
<namePart type="given">Vladimir</namePart>
<namePart type="family">Meza Ruiz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Seattle, United States</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Entity set expansion (ESE) aims at obtaining a more complete set of entities given a textual corpus and a seed set of entities of a concept. Although it is a critical task in many NLP applications, existing benchmarks are limited to well-formed text (e.g., Wikipedia) and well-defined concepts (e.g., countries and diseases). Furthermore, only a small number of predictions are evaluated compared to the actual size of an entity set. A rigorous assessment of ESE methods warrants more comprehensive benchmarks and evaluation. In this paper, we consider user-generated text to understand the generalizability of ESE methods. We develop new benchmarks and propose more rigorous evaluation metrics for assessing the performance of ESE methods. Additionally, we identify phenomena such as non-named entities, multifaceted entities, vague concepts that are more prevalent in user-generated text than well-formed text, and use them to profile ESE methods. We observe that the strong performance of state-of-the-art ESE methods does not generalize well to user-generated text. We conduct comprehensive empirical analysis and draw insights from the findings.</abstract>
<identifier type="citekey">shao-etal-2022-low</identifier>
<identifier type="doi">10.18653/v1/2022.findings-naacl.100</identifier>
<location>
<url>https://aclanthology.org/2022.findings-naacl.100</url>
</location>
<part>
<date>2022-07</date>
<extent unit="page">
<start>1343</start>
<end>1353</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Low-resource Entity Set Expansion: A Comprehensive Study on User-generated Text
%A Shao, Yutong
%A Bhutani, Nikita
%A Rahman, Sajjadur
%A Hruschka, Estevam
%Y Carpuat, Marine
%Y de Marneffe, Marie-Catherine
%Y Meza Ruiz, Ivan Vladimir
%S Findings of the Association for Computational Linguistics: NAACL 2022
%D 2022
%8 July
%I Association for Computational Linguistics
%C Seattle, United States
%F shao-etal-2022-low
%X Entity set expansion (ESE) aims at obtaining a more complete set of entities given a textual corpus and a seed set of entities of a concept. Although it is a critical task in many NLP applications, existing benchmarks are limited to well-formed text (e.g., Wikipedia) and well-defined concepts (e.g., countries and diseases). Furthermore, only a small number of predictions are evaluated compared to the actual size of an entity set. A rigorous assessment of ESE methods warrants more comprehensive benchmarks and evaluation. In this paper, we consider user-generated text to understand the generalizability of ESE methods. We develop new benchmarks and propose more rigorous evaluation metrics for assessing the performance of ESE methods. Additionally, we identify phenomena such as non-named entities, multifaceted entities, vague concepts that are more prevalent in user-generated text than well-formed text, and use them to profile ESE methods. We observe that the strong performance of state-of-the-art ESE methods does not generalize well to user-generated text. We conduct comprehensive empirical analysis and draw insights from the findings.
%R 10.18653/v1/2022.findings-naacl.100
%U https://aclanthology.org/2022.findings-naacl.100
%U https://doi.org/10.18653/v1/2022.findings-naacl.100
%P 1343-1353
Markdown (Informal)
[Low-resource Entity Set Expansion: A Comprehensive Study on User-generated Text](https://aclanthology.org/2022.findings-naacl.100) (Shao et al., Findings 2022)
ACL