@inproceedings{al-natsheh-etal-2017-udl,
title = "{U}d{L} at {S}em{E}val-2017 Task 1: Semantic Textual Similarity Estimation of {E}nglish Sentence Pairs Using Regression Model over Pairwise Features",
author = "Al-Natsheh, Hussein T. and
Martinet, Lucie and
Muhlenbach, Fabrice and
Zighed, Djamel Abdelkader",
editor = "Bethard, Steven and
Carpuat, Marine and
Apidianaki, Marianna and
Mohammad, Saif M. and
Cer, Daniel and
Jurgens, David",
booktitle = "Proceedings of the 11th International Workshop on Semantic Evaluation ({S}em{E}val-2017)",
month = aug,
year = "2017",
address = "Vancouver, Canada",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/S17-2013",
doi = "10.18653/v1/S17-2013",
pages = "115--119",
abstract = "This paper describes the model UdL we proposed to solve the semantic textual similarity task of SemEval 2017 workshop. The track we participated in was estimating the semantics relatedness of a given set of sentence pairs in English. The best run out of three submitted runs of our model achieved a Pearson correlation score of 0.8004 compared to a hidden human annotation of 250 pairs. We used random forest ensemble learning to map an expandable set of extracted pairwise features into a semantic similarity estimated value bounded between 0 and 5. Most of these features were calculated using word embedding vectors similarity to align Part of Speech (PoS) and Name Entities (NE) tagged tokens of each sentence pair. Among other pairwise features, we experimented a classical tf-idf weighted Bag of Words (BoW) vector model but with character-based range of n-grams instead of words. This sentence vector BoW-based feature gave a relatively high importance value percentage in the feature importances analysis of the ensemble learning.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="al-natsheh-etal-2017-udl">
<titleInfo>
<title>UdL at SemEval-2017 Task 1: Semantic Textual Similarity Estimation of English Sentence Pairs Using Regression Model over Pairwise Features</title>
</titleInfo>
<name type="personal">
<namePart type="given">Hussein</namePart>
<namePart type="given">T</namePart>
<namePart type="family">Al-Natsheh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lucie</namePart>
<namePart type="family">Martinet</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Fabrice</namePart>
<namePart type="family">Muhlenbach</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Djamel</namePart>
<namePart type="given">Abdelkader</namePart>
<namePart type="family">Zighed</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2017-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 11th International Workshop on Semantic Evaluation (SemEval-2017)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Steven</namePart>
<namePart type="family">Bethard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marine</namePart>
<namePart type="family">Carpuat</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marianna</namePart>
<namePart type="family">Apidianaki</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Saif</namePart>
<namePart type="given">M</namePart>
<namePart type="family">Mohammad</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Daniel</namePart>
<namePart type="family">Cer</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">David</namePart>
<namePart type="family">Jurgens</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Vancouver, Canada</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This paper describes the model UdL we proposed to solve the semantic textual similarity task of SemEval 2017 workshop. The track we participated in was estimating the semantics relatedness of a given set of sentence pairs in English. The best run out of three submitted runs of our model achieved a Pearson correlation score of 0.8004 compared to a hidden human annotation of 250 pairs. We used random forest ensemble learning to map an expandable set of extracted pairwise features into a semantic similarity estimated value bounded between 0 and 5. Most of these features were calculated using word embedding vectors similarity to align Part of Speech (PoS) and Name Entities (NE) tagged tokens of each sentence pair. Among other pairwise features, we experimented a classical tf-idf weighted Bag of Words (BoW) vector model but with character-based range of n-grams instead of words. This sentence vector BoW-based feature gave a relatively high importance value percentage in the feature importances analysis of the ensemble learning.</abstract>
<identifier type="citekey">al-natsheh-etal-2017-udl</identifier>
<identifier type="doi">10.18653/v1/S17-2013</identifier>
<location>
<url>https://aclanthology.org/S17-2013</url>
</location>
<part>
<date>2017-08</date>
<extent unit="page">
<start>115</start>
<end>119</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T UdL at SemEval-2017 Task 1: Semantic Textual Similarity Estimation of English Sentence Pairs Using Regression Model over Pairwise Features
%A Al-Natsheh, Hussein T.
%A Martinet, Lucie
%A Muhlenbach, Fabrice
%A Zighed, Djamel Abdelkader
%Y Bethard, Steven
%Y Carpuat, Marine
%Y Apidianaki, Marianna
%Y Mohammad, Saif M.
%Y Cer, Daniel
%Y Jurgens, David
%S Proceedings of the 11th International Workshop on Semantic Evaluation (SemEval-2017)
%D 2017
%8 August
%I Association for Computational Linguistics
%C Vancouver, Canada
%F al-natsheh-etal-2017-udl
%X This paper describes the model UdL we proposed to solve the semantic textual similarity task of SemEval 2017 workshop. The track we participated in was estimating the semantics relatedness of a given set of sentence pairs in English. The best run out of three submitted runs of our model achieved a Pearson correlation score of 0.8004 compared to a hidden human annotation of 250 pairs. We used random forest ensemble learning to map an expandable set of extracted pairwise features into a semantic similarity estimated value bounded between 0 and 5. Most of these features were calculated using word embedding vectors similarity to align Part of Speech (PoS) and Name Entities (NE) tagged tokens of each sentence pair. Among other pairwise features, we experimented a classical tf-idf weighted Bag of Words (BoW) vector model but with character-based range of n-grams instead of words. This sentence vector BoW-based feature gave a relatively high importance value percentage in the feature importances analysis of the ensemble learning.
%R 10.18653/v1/S17-2013
%U https://aclanthology.org/S17-2013
%U https://doi.org/10.18653/v1/S17-2013
%P 115-119
Markdown (Informal)
[UdL at SemEval-2017 Task 1: Semantic Textual Similarity Estimation of English Sentence Pairs Using Regression Model over Pairwise Features](https://aclanthology.org/S17-2013) (Al-Natsheh et al., SemEval 2017)
ACL