@inproceedings{pedersen-etal-2016-semdax,
title = "The {S}em{D}a{X} Corpus ― Sense Annotations with Scalable Sense Inventories",
author = "Pedersen, Bolette and
Braasch, Anna and
Johannsen, Anders and
Alonso, H{\'e}ctor Mart{\'\i}nez and
Nimb, Sanni and
Olsen, Sussi and
S{\o}gaard, Anders and
S{\o}rensen, Nicolai Hartvig",
editor = "Calzolari, Nicoletta and
Choukri, Khalid and
Declerck, Thierry and
Goggi, Sara and
Grobelnik, Marko and
Maegaard, Bente and
Mariani, Joseph and
Mazo, Helene and
Moreno, Asuncion and
Odijk, Jan and
Piperidis, Stelios",
booktitle = "Proceedings of the Tenth International Conference on Language Resources and Evaluation ({LREC}'16)",
month = may,
year = "2016",
address = "Portoro{\v{z}}, Slovenia",
publisher = "European Language Resources Association (ELRA)",
url = "https://aclanthology.org/L16-1136",
pages = "842--847",
abstract = "We launch the SemDaX corpus which is a recently completed Danish human-annotated corpus available through a CLARIN academic license. The corpus includes approx. 90,000 words, comprises six textual domains, and is annotated with sense inventories of different granularity. The aim of the developed corpus is twofold: i) to assess the reliability of the different sense annotation schemes for Danish measured by qualitative analyses and annotation agreement scores, and ii) to serve as training and test data for machine learning algorithms with the practical purpose of developing sense taggers for Danish. To these aims, we take a new approach to human-annotated corpus resources by double annotating a much larger part of the corpus than what is normally seen: for the all-words task we double annotated 60{\%} of the material and for the lexical sample task 100{\%}. We include in the corpus not only the adjucated files, but also the diverging annotations. In other words, we consider not all disagreement to be noise, but rather to contain valuable linguistic information that can help us improve our annotation schemes and our learning algorithms.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="pedersen-etal-2016-semdax">
<titleInfo>
<title>The SemDaX Corpus ― Sense Annotations with Scalable Sense Inventories</title>
</titleInfo>
<name type="personal">
<namePart type="given">Bolette</namePart>
<namePart type="family">Pedersen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anna</namePart>
<namePart type="family">Braasch</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anders</namePart>
<namePart type="family">Johannsen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Héctor</namePart>
<namePart type="given">Martínez</namePart>
<namePart type="family">Alonso</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sanni</namePart>
<namePart type="family">Nimb</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sussi</namePart>
<namePart type="family">Olsen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anders</namePart>
<namePart type="family">Søgaard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nicolai</namePart>
<namePart type="given">Hartvig</namePart>
<namePart type="family">Sørensen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2016-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Tenth International Conference on Language Resources and Evaluation (LREC’16)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Khalid</namePart>
<namePart type="family">Choukri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Thierry</namePart>
<namePart type="family">Declerck</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sara</namePart>
<namePart type="family">Goggi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marko</namePart>
<namePart type="family">Grobelnik</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bente</namePart>
<namePart type="family">Maegaard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joseph</namePart>
<namePart type="family">Mariani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Helene</namePart>
<namePart type="family">Mazo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Asuncion</namePart>
<namePart type="family">Moreno</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jan</namePart>
<namePart type="family">Odijk</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stelios</namePart>
<namePart type="family">Piperidis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Language Resources Association (ELRA)</publisher>
<place>
<placeTerm type="text">Portorož, Slovenia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We launch the SemDaX corpus which is a recently completed Danish human-annotated corpus available through a CLARIN academic license. The corpus includes approx. 90,000 words, comprises six textual domains, and is annotated with sense inventories of different granularity. The aim of the developed corpus is twofold: i) to assess the reliability of the different sense annotation schemes for Danish measured by qualitative analyses and annotation agreement scores, and ii) to serve as training and test data for machine learning algorithms with the practical purpose of developing sense taggers for Danish. To these aims, we take a new approach to human-annotated corpus resources by double annotating a much larger part of the corpus than what is normally seen: for the all-words task we double annotated 60% of the material and for the lexical sample task 100%. We include in the corpus not only the adjucated files, but also the diverging annotations. In other words, we consider not all disagreement to be noise, but rather to contain valuable linguistic information that can help us improve our annotation schemes and our learning algorithms.</abstract>
<identifier type="citekey">pedersen-etal-2016-semdax</identifier>
<location>
<url>https://aclanthology.org/L16-1136</url>
</location>
<part>
<date>2016-05</date>
<extent unit="page">
<start>842</start>
<end>847</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T The SemDaX Corpus ― Sense Annotations with Scalable Sense Inventories
%A Pedersen, Bolette
%A Braasch, Anna
%A Johannsen, Anders
%A Alonso, Héctor Martínez
%A Nimb, Sanni
%A Olsen, Sussi
%A Søgaard, Anders
%A Sørensen, Nicolai Hartvig
%Y Calzolari, Nicoletta
%Y Choukri, Khalid
%Y Declerck, Thierry
%Y Goggi, Sara
%Y Grobelnik, Marko
%Y Maegaard, Bente
%Y Mariani, Joseph
%Y Mazo, Helene
%Y Moreno, Asuncion
%Y Odijk, Jan
%Y Piperidis, Stelios
%S Proceedings of the Tenth International Conference on Language Resources and Evaluation (LREC’16)
%D 2016
%8 May
%I European Language Resources Association (ELRA)
%C Portorož, Slovenia
%F pedersen-etal-2016-semdax
%X We launch the SemDaX corpus which is a recently completed Danish human-annotated corpus available through a CLARIN academic license. The corpus includes approx. 90,000 words, comprises six textual domains, and is annotated with sense inventories of different granularity. The aim of the developed corpus is twofold: i) to assess the reliability of the different sense annotation schemes for Danish measured by qualitative analyses and annotation agreement scores, and ii) to serve as training and test data for machine learning algorithms with the practical purpose of developing sense taggers for Danish. To these aims, we take a new approach to human-annotated corpus resources by double annotating a much larger part of the corpus than what is normally seen: for the all-words task we double annotated 60% of the material and for the lexical sample task 100%. We include in the corpus not only the adjucated files, but also the diverging annotations. In other words, we consider not all disagreement to be noise, but rather to contain valuable linguistic information that can help us improve our annotation schemes and our learning algorithms.
%U https://aclanthology.org/L16-1136
%P 842-847
Markdown (Informal)
[The SemDaX Corpus ― Sense Annotations with Scalable Sense Inventories](https://aclanthology.org/L16-1136) (Pedersen et al., LREC 2016)
ACL
- Bolette Pedersen, Anna Braasch, Anders Johannsen, Héctor Martínez Alonso, Sanni Nimb, Sussi Olsen, Anders Søgaard, and Nicolai Hartvig Sørensen. 2016. The SemDaX Corpus ― Sense Annotations with Scalable Sense Inventories. In Proceedings of the Tenth International Conference on Language Resources and Evaluation (LREC'16), pages 842–847, Portorož, Slovenia. European Language Resources Association (ELRA).