@article{vulic-etal-2017-hyperlex,
title = "{H}yper{L}ex: A Large-Scale Evaluation of Graded Lexical Entailment",
author = "Vuli{\'c}, Ivan and
Gerz, Daniela and
Kiela, Douwe and
Hill, Felix and
Korhonen, Anna",
journal = "Computational Linguistics",
volume = "43",
number = "4",
month = dec,
year = "2017",
address = "Cambridge, MA",
publisher = "MIT Press",
url = "https://aclanthology.org/J17-4004/",
doi = "10.1162/COLI_a_00301",
pages = "781--835",
abstract = "We introduce HyperLex{---}a data set and evaluation resource that quantifies the extent of the semantic category membership, that is, type-of relation, also known as hyponymy{--}hypernymy or lexical entailment (LE) relation between 2,616 concept pairs. Cognitive psychology research has established that typicality and category/class membership are computed in human semantic memory as a gradual rather than binary relation. Nevertheless, most NLP research and existing large-scale inventories of concept category membership (WordNet, DBPedia, etc.) treat category membership and LE as binary. To address this, we asked hundreds of native English speakers to indicate typicality and strength of category membership between a diverse range of concept pairs on a crowdsourcing platform. Our results confirm that category membership and LE are indeed more gradual than binary. We then compare these human judgments with the predictions of automatic systems, which reveals a huge gap between human performance and state-of-the-art LE, distributional and representation learning models, and substantial differences between the models themselves. We discuss a pathway for improving semantic models to overcome this discrepancy, and indicate future application areas for improved graded LE systems."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="vulic-etal-2017-hyperlex">
<titleInfo>
<title>HyperLex: A Large-Scale Evaluation of Graded Lexical Entailment</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ivan</namePart>
<namePart type="family">Vulić</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Daniela</namePart>
<namePart type="family">Gerz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Douwe</namePart>
<namePart type="family">Kiela</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Felix</namePart>
<namePart type="family">Hill</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anna</namePart>
<namePart type="family">Korhonen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2017-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<genre authority="bibutilsgt">journal article</genre>
<relatedItem type="host">
<titleInfo>
<title>Computational Linguistics</title>
</titleInfo>
<originInfo>
<issuance>continuing</issuance>
<publisher>MIT Press</publisher>
<place>
<placeTerm type="text">Cambridge, MA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">periodical</genre>
<genre authority="bibutilsgt">academic journal</genre>
</relatedItem>
<abstract>We introduce HyperLex—a data set and evaluation resource that quantifies the extent of the semantic category membership, that is, type-of relation, also known as hyponymy–hypernymy or lexical entailment (LE) relation between 2,616 concept pairs. Cognitive psychology research has established that typicality and category/class membership are computed in human semantic memory as a gradual rather than binary relation. Nevertheless, most NLP research and existing large-scale inventories of concept category membership (WordNet, DBPedia, etc.) treat category membership and LE as binary. To address this, we asked hundreds of native English speakers to indicate typicality and strength of category membership between a diverse range of concept pairs on a crowdsourcing platform. Our results confirm that category membership and LE are indeed more gradual than binary. We then compare these human judgments with the predictions of automatic systems, which reveals a huge gap between human performance and state-of-the-art LE, distributional and representation learning models, and substantial differences between the models themselves. We discuss a pathway for improving semantic models to overcome this discrepancy, and indicate future application areas for improved graded LE systems.</abstract>
<identifier type="citekey">vulic-etal-2017-hyperlex</identifier>
<identifier type="doi">10.1162/COLI_a_00301</identifier>
<location>
<url>https://aclanthology.org/J17-4004/</url>
</location>
<part>
<date>2017-12</date>
<detail type="volume"><number>43</number></detail>
<detail type="issue"><number>4</number></detail>
<extent unit="page">
<start>781</start>
<end>835</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Journal Article
%T HyperLex: A Large-Scale Evaluation of Graded Lexical Entailment
%A Vulić, Ivan
%A Gerz, Daniela
%A Kiela, Douwe
%A Hill, Felix
%A Korhonen, Anna
%J Computational Linguistics
%D 2017
%8 December
%V 43
%N 4
%I MIT Press
%C Cambridge, MA
%F vulic-etal-2017-hyperlex
%X We introduce HyperLex—a data set and evaluation resource that quantifies the extent of the semantic category membership, that is, type-of relation, also known as hyponymy–hypernymy or lexical entailment (LE) relation between 2,616 concept pairs. Cognitive psychology research has established that typicality and category/class membership are computed in human semantic memory as a gradual rather than binary relation. Nevertheless, most NLP research and existing large-scale inventories of concept category membership (WordNet, DBPedia, etc.) treat category membership and LE as binary. To address this, we asked hundreds of native English speakers to indicate typicality and strength of category membership between a diverse range of concept pairs on a crowdsourcing platform. Our results confirm that category membership and LE are indeed more gradual than binary. We then compare these human judgments with the predictions of automatic systems, which reveals a huge gap between human performance and state-of-the-art LE, distributional and representation learning models, and substantial differences between the models themselves. We discuss a pathway for improving semantic models to overcome this discrepancy, and indicate future application areas for improved graded LE systems.
%R 10.1162/COLI_a_00301
%U https://aclanthology.org/J17-4004/
%U https://doi.org/10.1162/COLI_a_00301
%P 781-835
Markdown (Informal)
[HyperLex: A Large-Scale Evaluation of Graded Lexical Entailment](https://aclanthology.org/J17-4004/) (Vulić et al., CL 2017)
ACL