@inproceedings{paul-etal-2018-mostly,
title = "A mostly unlexicalized model for recognizing textual entailment",
author = "Paul, Mithun and
Sharp, Rebecca and
Surdeanu, Mihai",
editor = "Thorne, James and
Vlachos, Andreas and
Cocarascu, Oana and
Christodoulopoulos, Christos and
Mittal, Arpit",
booktitle = "Proceedings of the First Workshop on Fact Extraction and {VER}ification ({FEVER})",
month = nov,
year = "2018",
address = "Brussels, Belgium",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/W18-5528/",
doi = "10.18653/v1/W18-5528",
pages = "166--171",
abstract = "Many approaches to automatically recognizing entailment relations have employed classifiers over hand engineered lexicalized features, or deep learning models that implicitly capture lexicalization through word embeddings. This reliance on lexicalization may complicate the adaptation of these tools between domains. For example, such a system trained in the news domain may learn that a sentence like {\textquotedblleft}Palestinians recognize Texas as part of Mexico{\textquotedblright} tends to be unsupported, but this fact (and its corresponding lexicalized cues) have no value in, say, a scientific domain. To mitigate this dependence on lexicalized information, in this paper we propose a model that reads two sentences, from any given domain, to determine entailment without using lexicalized features. Instead our model relies on features that are either unlexicalized or are domain independent such as proportion of negated verbs, antonyms, or noun overlap. In its current implementation, this model does not perform well on the FEVER dataset, due to two reasons. First, for the information retrieval portion of the task we used the baseline system provided, since this was not the aim of our project. Second, this is work in progress and we still are in the process of identifying more features and gradually increasing the accuracy of our model. In the end, we hope to build a generic end-to-end classifier, which can be used in a domain outside the one in which it was trained, with no or minimal re-training."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="paul-etal-2018-mostly">
<titleInfo>
<title>A mostly unlexicalized model for recognizing textual entailment</title>
</titleInfo>
<name type="personal">
<namePart type="given">Mithun</namePart>
<namePart type="family">Paul</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rebecca</namePart>
<namePart type="family">Sharp</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mihai</namePart>
<namePart type="family">Surdeanu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2018-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the First Workshop on Fact Extraction and VERification (FEVER)</title>
</titleInfo>
<name type="personal">
<namePart type="given">James</namePart>
<namePart type="family">Thorne</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Andreas</namePart>
<namePart type="family">Vlachos</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Oana</namePart>
<namePart type="family">Cocarascu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Christos</namePart>
<namePart type="family">Christodoulopoulos</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Arpit</namePart>
<namePart type="family">Mittal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Brussels, Belgium</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Many approaches to automatically recognizing entailment relations have employed classifiers over hand engineered lexicalized features, or deep learning models that implicitly capture lexicalization through word embeddings. This reliance on lexicalization may complicate the adaptation of these tools between domains. For example, such a system trained in the news domain may learn that a sentence like “Palestinians recognize Texas as part of Mexico” tends to be unsupported, but this fact (and its corresponding lexicalized cues) have no value in, say, a scientific domain. To mitigate this dependence on lexicalized information, in this paper we propose a model that reads two sentences, from any given domain, to determine entailment without using lexicalized features. Instead our model relies on features that are either unlexicalized or are domain independent such as proportion of negated verbs, antonyms, or noun overlap. In its current implementation, this model does not perform well on the FEVER dataset, due to two reasons. First, for the information retrieval portion of the task we used the baseline system provided, since this was not the aim of our project. Second, this is work in progress and we still are in the process of identifying more features and gradually increasing the accuracy of our model. In the end, we hope to build a generic end-to-end classifier, which can be used in a domain outside the one in which it was trained, with no or minimal re-training.</abstract>
<identifier type="citekey">paul-etal-2018-mostly</identifier>
<identifier type="doi">10.18653/v1/W18-5528</identifier>
<location>
<url>https://aclanthology.org/W18-5528/</url>
</location>
<part>
<date>2018-11</date>
<extent unit="page">
<start>166</start>
<end>171</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T A mostly unlexicalized model for recognizing textual entailment
%A Paul, Mithun
%A Sharp, Rebecca
%A Surdeanu, Mihai
%Y Thorne, James
%Y Vlachos, Andreas
%Y Cocarascu, Oana
%Y Christodoulopoulos, Christos
%Y Mittal, Arpit
%S Proceedings of the First Workshop on Fact Extraction and VERification (FEVER)
%D 2018
%8 November
%I Association for Computational Linguistics
%C Brussels, Belgium
%F paul-etal-2018-mostly
%X Many approaches to automatically recognizing entailment relations have employed classifiers over hand engineered lexicalized features, or deep learning models that implicitly capture lexicalization through word embeddings. This reliance on lexicalization may complicate the adaptation of these tools between domains. For example, such a system trained in the news domain may learn that a sentence like “Palestinians recognize Texas as part of Mexico” tends to be unsupported, but this fact (and its corresponding lexicalized cues) have no value in, say, a scientific domain. To mitigate this dependence on lexicalized information, in this paper we propose a model that reads two sentences, from any given domain, to determine entailment without using lexicalized features. Instead our model relies on features that are either unlexicalized or are domain independent such as proportion of negated verbs, antonyms, or noun overlap. In its current implementation, this model does not perform well on the FEVER dataset, due to two reasons. First, for the information retrieval portion of the task we used the baseline system provided, since this was not the aim of our project. Second, this is work in progress and we still are in the process of identifying more features and gradually increasing the accuracy of our model. In the end, we hope to build a generic end-to-end classifier, which can be used in a domain outside the one in which it was trained, with no or minimal re-training.
%R 10.18653/v1/W18-5528
%U https://aclanthology.org/W18-5528/
%U https://doi.org/10.18653/v1/W18-5528
%P 166-171
Markdown (Informal)
[A mostly unlexicalized model for recognizing textual entailment](https://aclanthology.org/W18-5528/) (Paul et al., EMNLP 2018)
ACL