@inproceedings{voutilainen-2012-improving,
title = "Improving corpus annotation productivity: a method and experiment with interactive tagging",
author = "Voutilainen, Atro",
editor = "Calzolari, Nicoletta and
Choukri, Khalid and
Declerck, Thierry and
Do{\u{g}}an, Mehmet U{\u{g}}ur and
Maegaard, Bente and
Mariani, Joseph and
Moreno, Asuncion and
Odijk, Jan and
Piperidis, Stelios",
booktitle = "Proceedings of the Eighth International Conference on Language Resources and Evaluation ({LREC}`12)",
month = may,
year = "2012",
address = "Istanbul, Turkey",
publisher = "European Language Resources Association (ELRA)",
url = "https://aclanthology.org/L12-1200/",
pages = "2097--2102",
abstract = "Corpus linguistic and language technological research needs empirical corpus data with nearly correct annotation and high volume to enable advances in language modelling and theorising. Recent work on improving corpus annotation accuracy presents semiautomatic methods to correct some of the analysis errors in available annotated corpora, while leaving the remaining errors undetected in the annotated corpus. We review recent advances in linguistics-based partial tagging and parsing, and regard the achieved analysis performance as sufficient for reconsidering a previously proposed method: combining nearly correct but partial automatic analysis with a minimal amount of human postediting (disambiguation) to achieve nearly correct corpus annotation accuracy at a competitive annotation speed. We report a pilot experiment with morphological (part-of-speech) annotation using a partial linguistic tagger of a kind previously reported with a very attractive precision-recall ratio, and observe that a desired level of annotation accuracy can be reached by using human disambiguation for less than 10{\textbackslash}{\%} of the words in the corpus."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="voutilainen-2012-improving">
<titleInfo>
<title>Improving corpus annotation productivity: a method and experiment with interactive tagging</title>
</titleInfo>
<name type="personal">
<namePart type="given">Atro</namePart>
<namePart type="family">Voutilainen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2012-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Eighth International Conference on Language Resources and Evaluation (LREC‘12)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Khalid</namePart>
<namePart type="family">Choukri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Thierry</namePart>
<namePart type="family">Declerck</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mehmet</namePart>
<namePart type="given">Uğur</namePart>
<namePart type="family">Doğan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bente</namePart>
<namePart type="family">Maegaard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joseph</namePart>
<namePart type="family">Mariani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Asuncion</namePart>
<namePart type="family">Moreno</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jan</namePart>
<namePart type="family">Odijk</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stelios</namePart>
<namePart type="family">Piperidis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Language Resources Association (ELRA)</publisher>
<place>
<placeTerm type="text">Istanbul, Turkey</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Corpus linguistic and language technological research needs empirical corpus data with nearly correct annotation and high volume to enable advances in language modelling and theorising. Recent work on improving corpus annotation accuracy presents semiautomatic methods to correct some of the analysis errors in available annotated corpora, while leaving the remaining errors undetected in the annotated corpus. We review recent advances in linguistics-based partial tagging and parsing, and regard the achieved analysis performance as sufficient for reconsidering a previously proposed method: combining nearly correct but partial automatic analysis with a minimal amount of human postediting (disambiguation) to achieve nearly correct corpus annotation accuracy at a competitive annotation speed. We report a pilot experiment with morphological (part-of-speech) annotation using a partial linguistic tagger of a kind previously reported with a very attractive precision-recall ratio, and observe that a desired level of annotation accuracy can be reached by using human disambiguation for less than 10\textbackslash% of the words in the corpus.</abstract>
<identifier type="citekey">voutilainen-2012-improving</identifier>
<location>
<url>https://aclanthology.org/L12-1200/</url>
</location>
<part>
<date>2012-05</date>
<extent unit="page">
<start>2097</start>
<end>2102</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Improving corpus annotation productivity: a method and experiment with interactive tagging
%A Voutilainen, Atro
%Y Calzolari, Nicoletta
%Y Choukri, Khalid
%Y Declerck, Thierry
%Y Doğan, Mehmet Uğur
%Y Maegaard, Bente
%Y Mariani, Joseph
%Y Moreno, Asuncion
%Y Odijk, Jan
%Y Piperidis, Stelios
%S Proceedings of the Eighth International Conference on Language Resources and Evaluation (LREC‘12)
%D 2012
%8 May
%I European Language Resources Association (ELRA)
%C Istanbul, Turkey
%F voutilainen-2012-improving
%X Corpus linguistic and language technological research needs empirical corpus data with nearly correct annotation and high volume to enable advances in language modelling and theorising. Recent work on improving corpus annotation accuracy presents semiautomatic methods to correct some of the analysis errors in available annotated corpora, while leaving the remaining errors undetected in the annotated corpus. We review recent advances in linguistics-based partial tagging and parsing, and regard the achieved analysis performance as sufficient for reconsidering a previously proposed method: combining nearly correct but partial automatic analysis with a minimal amount of human postediting (disambiguation) to achieve nearly correct corpus annotation accuracy at a competitive annotation speed. We report a pilot experiment with morphological (part-of-speech) annotation using a partial linguistic tagger of a kind previously reported with a very attractive precision-recall ratio, and observe that a desired level of annotation accuracy can be reached by using human disambiguation for less than 10\textbackslash% of the words in the corpus.
%U https://aclanthology.org/L12-1200/
%P 2097-2102
Markdown (Informal)
[Improving corpus annotation productivity: a method and experiment with interactive tagging](https://aclanthology.org/L12-1200/) (Voutilainen, LREC 2012)
ACL