@inproceedings{quattoni-carreras-2021-minimizing-annotation,
title = "Minimizing Annotation Effort via Max-Volume Spectral Sampling",
author = "Quattoni, Ariadna and
Carreras, Xavier",
editor = "Moens, Marie-Francine and
Huang, Xuanjing and
Specia, Lucia and
Yih, Scott Wen-tau",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2021",
month = nov,
year = "2021",
address = "Punta Cana, Dominican Republic",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2021.findings-emnlp.246",
doi = "10.18653/v1/2021.findings-emnlp.246",
pages = "2890--2899",
abstract = "We address the annotation data bottleneck for sequence classification. Specifically we ask the question: if one has a budget of N annotations, which samples should we select for annotation? The solution we propose looks for diversity in the selected sample, by maximizing the amount of information that is useful for the learning algorithm, or equivalently by minimizing the redundancy of samples in the selection. This is formulated in the context of spectral learning of recurrent functions for sequence classification. Our method represents unlabeled data in the form of a Hankel matrix, and uses the notion of spectral max-volume to find a compact sub-block from which annotation samples are drawn. Experiments on sequence classification confirm that our spectral sampling strategy is in fact efficient and yields good models.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="quattoni-carreras-2021-minimizing-annotation">
<titleInfo>
<title>Minimizing Annotation Effort via Max-Volume Spectral Sampling</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ariadna</namePart>
<namePart type="family">Quattoni</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xavier</namePart>
<namePart type="family">Carreras</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: EMNLP 2021</title>
</titleInfo>
<name type="personal">
<namePart type="given">Marie-Francine</namePart>
<namePart type="family">Moens</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xuanjing</namePart>
<namePart type="family">Huang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lucia</namePart>
<namePart type="family">Specia</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Scott</namePart>
<namePart type="given">Wen-tau</namePart>
<namePart type="family">Yih</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Punta Cana, Dominican Republic</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We address the annotation data bottleneck for sequence classification. Specifically we ask the question: if one has a budget of N annotations, which samples should we select for annotation? The solution we propose looks for diversity in the selected sample, by maximizing the amount of information that is useful for the learning algorithm, or equivalently by minimizing the redundancy of samples in the selection. This is formulated in the context of spectral learning of recurrent functions for sequence classification. Our method represents unlabeled data in the form of a Hankel matrix, and uses the notion of spectral max-volume to find a compact sub-block from which annotation samples are drawn. Experiments on sequence classification confirm that our spectral sampling strategy is in fact efficient and yields good models.</abstract>
<identifier type="citekey">quattoni-carreras-2021-minimizing-annotation</identifier>
<identifier type="doi">10.18653/v1/2021.findings-emnlp.246</identifier>
<location>
<url>https://aclanthology.org/2021.findings-emnlp.246</url>
</location>
<part>
<date>2021-11</date>
<extent unit="page">
<start>2890</start>
<end>2899</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Minimizing Annotation Effort via Max-Volume Spectral Sampling
%A Quattoni, Ariadna
%A Carreras, Xavier
%Y Moens, Marie-Francine
%Y Huang, Xuanjing
%Y Specia, Lucia
%Y Yih, Scott Wen-tau
%S Findings of the Association for Computational Linguistics: EMNLP 2021
%D 2021
%8 November
%I Association for Computational Linguistics
%C Punta Cana, Dominican Republic
%F quattoni-carreras-2021-minimizing-annotation
%X We address the annotation data bottleneck for sequence classification. Specifically we ask the question: if one has a budget of N annotations, which samples should we select for annotation? The solution we propose looks for diversity in the selected sample, by maximizing the amount of information that is useful for the learning algorithm, or equivalently by minimizing the redundancy of samples in the selection. This is formulated in the context of spectral learning of recurrent functions for sequence classification. Our method represents unlabeled data in the form of a Hankel matrix, and uses the notion of spectral max-volume to find a compact sub-block from which annotation samples are drawn. Experiments on sequence classification confirm that our spectral sampling strategy is in fact efficient and yields good models.
%R 10.18653/v1/2021.findings-emnlp.246
%U https://aclanthology.org/2021.findings-emnlp.246
%U https://doi.org/10.18653/v1/2021.findings-emnlp.246
%P 2890-2899
Markdown (Informal)
[Minimizing Annotation Effort via Max-Volume Spectral Sampling](https://aclanthology.org/2021.findings-emnlp.246) (Quattoni & Carreras, Findings 2021)
ACL