@inproceedings{primadhanty-quattoni-2023-entity,
title = "Entity Disambiguation on a Tight Labeling Budget",
author = "Primadhanty, Audi and
Quattoni, Ariadna",
editor = "Bouamor, Houda and
Pino, Juan and
Bali, Kalika",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2023",
month = dec,
year = "2023",
address = "Singapore",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.findings-emnlp.479",
doi = "10.18653/v1/2023.findings-emnlp.479",
pages = "7208--7215",
abstract = "Many real-world NLP applications face the challenge of training an entity disambiguation model for a specific domain with a small labeling budget. In this setting there is often access to a large unlabeled pool of documents. It is then natural to ask the question: which samples should be selected for annotation? In this paper we propose a solution that combines feature diversity with low rank correction. Our sampling strategy is formulated in the context of bilinear tensor models. Our experiments show that the proposed approach can significantly reduce the amount of labeled data necessary to achieve a given performance.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="primadhanty-quattoni-2023-entity">
<titleInfo>
<title>Entity Disambiguation on a Tight Labeling Budget</title>
</titleInfo>
<name type="personal">
<namePart type="given">Audi</namePart>
<namePart type="family">Primadhanty</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ariadna</namePart>
<namePart type="family">Quattoni</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: EMNLP 2023</title>
</titleInfo>
<name type="personal">
<namePart type="given">Houda</namePart>
<namePart type="family">Bouamor</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Juan</namePart>
<namePart type="family">Pino</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kalika</namePart>
<namePart type="family">Bali</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Singapore</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Many real-world NLP applications face the challenge of training an entity disambiguation model for a specific domain with a small labeling budget. In this setting there is often access to a large unlabeled pool of documents. It is then natural to ask the question: which samples should be selected for annotation? In this paper we propose a solution that combines feature diversity with low rank correction. Our sampling strategy is formulated in the context of bilinear tensor models. Our experiments show that the proposed approach can significantly reduce the amount of labeled data necessary to achieve a given performance.</abstract>
<identifier type="citekey">primadhanty-quattoni-2023-entity</identifier>
<identifier type="doi">10.18653/v1/2023.findings-emnlp.479</identifier>
<location>
<url>https://aclanthology.org/2023.findings-emnlp.479</url>
</location>
<part>
<date>2023-12</date>
<extent unit="page">
<start>7208</start>
<end>7215</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Entity Disambiguation on a Tight Labeling Budget
%A Primadhanty, Audi
%A Quattoni, Ariadna
%Y Bouamor, Houda
%Y Pino, Juan
%Y Bali, Kalika
%S Findings of the Association for Computational Linguistics: EMNLP 2023
%D 2023
%8 December
%I Association for Computational Linguistics
%C Singapore
%F primadhanty-quattoni-2023-entity
%X Many real-world NLP applications face the challenge of training an entity disambiguation model for a specific domain with a small labeling budget. In this setting there is often access to a large unlabeled pool of documents. It is then natural to ask the question: which samples should be selected for annotation? In this paper we propose a solution that combines feature diversity with low rank correction. Our sampling strategy is formulated in the context of bilinear tensor models. Our experiments show that the proposed approach can significantly reduce the amount of labeled data necessary to achieve a given performance.
%R 10.18653/v1/2023.findings-emnlp.479
%U https://aclanthology.org/2023.findings-emnlp.479
%U https://doi.org/10.18653/v1/2023.findings-emnlp.479
%P 7208-7215
Markdown (Informal)
[Entity Disambiguation on a Tight Labeling Budget](https://aclanthology.org/2023.findings-emnlp.479) (Primadhanty & Quattoni, Findings 2023)
ACL