@inproceedings{navigli-etal-2010-annotated,
title = "An Annotated Dataset for Extracting Definitions and Hypernyms from the Web",
author = "Navigli, Roberto and
Velardi, Paola and
Ruiz-Mart{\'\i}nez, Juana Maria",
editor = "Calzolari, Nicoletta and
Choukri, Khalid and
Maegaard, Bente and
Mariani, Joseph and
Odijk, Jan and
Piperidis, Stelios and
Rosner, Mike and
Tapias, Daniel",
booktitle = "Proceedings of the Seventh International Conference on Language Resources and Evaluation ({LREC}'10)",
month = may,
year = "2010",
address = "Valletta, Malta",
publisher = "European Language Resources Association (ELRA)",
url = "http://www.lrec-conf.org/proceedings/lrec2010/pdf/20_Paper.pdf",
abstract = "This paper presents and analyzes an annotated corpus of definitions, created to train an algorithm for the automatic extraction of definitions and hypernyms from web documents. As an additional resource, we also include a corpus of non-definitions with syntactic patterns similar to those of definition sentences, e.g.: ''``An android is a robot'''' vs. ''``Snowcap is unmistakable''''. Domain and style independence is obtained thanks to the annotation of a large and domain-balanced corpus and to a novel pattern generalization algorithm based on word-class lattices (WCL). A lattice is a directed acyclic graph (DAG), a subclass of nondeterministic finite state automata (NFA). The lattice structure has the purpose of preserving the salient differences among distinct sequences, while eliminating redundant information. The WCL algorithm will be integrated into an improved version of the GlossExtractor Web application (Velardi et al., 2008). This paper is mostly concerned with a description of the corpus, the annotation strategy, and a linguistic analysis of the data. A summary of the WCL algorithm is also provided for the sake of completeness.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="navigli-etal-2010-annotated">
<titleInfo>
<title>An Annotated Dataset for Extracting Definitions and Hypernyms from the Web</title>
</titleInfo>
<name type="personal">
<namePart type="given">Roberto</namePart>
<namePart type="family">Navigli</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Paola</namePart>
<namePart type="family">Velardi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Juana</namePart>
<namePart type="given">Maria</namePart>
<namePart type="family">Ruiz-Martínez</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2010-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Seventh International Conference on Language Resources and Evaluation (LREC’10)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Khalid</namePart>
<namePart type="family">Choukri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bente</namePart>
<namePart type="family">Maegaard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joseph</namePart>
<namePart type="family">Mariani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jan</namePart>
<namePart type="family">Odijk</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stelios</namePart>
<namePart type="family">Piperidis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mike</namePart>
<namePart type="family">Rosner</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Daniel</namePart>
<namePart type="family">Tapias</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Language Resources Association (ELRA)</publisher>
<place>
<placeTerm type="text">Valletta, Malta</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This paper presents and analyzes an annotated corpus of definitions, created to train an algorithm for the automatic extraction of definitions and hypernyms from web documents. As an additional resource, we also include a corpus of non-definitions with syntactic patterns similar to those of definition sentences, e.g.: ”“An android is a robot”” vs. ”“Snowcap is unmistakable””. Domain and style independence is obtained thanks to the annotation of a large and domain-balanced corpus and to a novel pattern generalization algorithm based on word-class lattices (WCL). A lattice is a directed acyclic graph (DAG), a subclass of nondeterministic finite state automata (NFA). The lattice structure has the purpose of preserving the salient differences among distinct sequences, while eliminating redundant information. The WCL algorithm will be integrated into an improved version of the GlossExtractor Web application (Velardi et al., 2008). This paper is mostly concerned with a description of the corpus, the annotation strategy, and a linguistic analysis of the data. A summary of the WCL algorithm is also provided for the sake of completeness.</abstract>
<identifier type="citekey">navigli-etal-2010-annotated</identifier>
<location>
<url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/20_Paper.pdf</url>
</location>
<part>
<date>2010-05</date>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T An Annotated Dataset for Extracting Definitions and Hypernyms from the Web
%A Navigli, Roberto
%A Velardi, Paola
%A Ruiz-Martínez, Juana Maria
%Y Calzolari, Nicoletta
%Y Choukri, Khalid
%Y Maegaard, Bente
%Y Mariani, Joseph
%Y Odijk, Jan
%Y Piperidis, Stelios
%Y Rosner, Mike
%Y Tapias, Daniel
%S Proceedings of the Seventh International Conference on Language Resources and Evaluation (LREC’10)
%D 2010
%8 May
%I European Language Resources Association (ELRA)
%C Valletta, Malta
%F navigli-etal-2010-annotated
%X This paper presents and analyzes an annotated corpus of definitions, created to train an algorithm for the automatic extraction of definitions and hypernyms from web documents. As an additional resource, we also include a corpus of non-definitions with syntactic patterns similar to those of definition sentences, e.g.: ”“An android is a robot”” vs. ”“Snowcap is unmistakable””. Domain and style independence is obtained thanks to the annotation of a large and domain-balanced corpus and to a novel pattern generalization algorithm based on word-class lattices (WCL). A lattice is a directed acyclic graph (DAG), a subclass of nondeterministic finite state automata (NFA). The lattice structure has the purpose of preserving the salient differences among distinct sequences, while eliminating redundant information. The WCL algorithm will be integrated into an improved version of the GlossExtractor Web application (Velardi et al., 2008). This paper is mostly concerned with a description of the corpus, the annotation strategy, and a linguistic analysis of the data. A summary of the WCL algorithm is also provided for the sake of completeness.
%U http://www.lrec-conf.org/proceedings/lrec2010/pdf/20_Paper.pdf
Markdown (Informal)
[An Annotated Dataset for Extracting Definitions and Hypernyms from the Web](http://www.lrec-conf.org/proceedings/lrec2010/pdf/20_Paper.pdf) (Navigli et al., LREC 2010)
ACL