@inproceedings{stanojevic-simaan-2017-alternative,
title = "Alternative Objective Functions for Training {MT} Evaluation Metrics",
author = "Stanojevi{\'c}, Milo{\v{s}} and
Sima{'}an, Khalil",
editor = "Barzilay, Regina and
Kan, Min-Yen",
booktitle = "Proceedings of the 55th Annual Meeting of the Association for Computational Linguistics (Volume 2: Short Papers)",
month = jul,
year = "2017",
address = "Vancouver, Canada",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/P17-2004",
doi = "10.18653/v1/P17-2004",
pages = "20--25",
abstract = "MT evaluation metrics are tested for correlation with human judgments either at the sentence- or the corpus-level. Trained metrics ignore corpus-level judgments and are trained for high sentence-level correlation only. We show that training only for one objective (sentence or corpus level), can not only harm the performance on the other objective, but it can also be suboptimal for the objective being optimized. To this end we present a metric trained for corpus-level and show empirical comparison against a metric trained for sentence-level exemplifying how their performance may vary per language pair, type and level of judgment. Subsequently we propose a model trained to optimize both objectives simultaneously and show that it is far more stable than{--}and on average outperforms{--}both models on both objectives.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="stanojevic-simaan-2017-alternative">
<titleInfo>
<title>Alternative Objective Functions for Training MT Evaluation Metrics</title>
</titleInfo>
<name type="personal">
<namePart type="given">Miloš</namePart>
<namePart type="family">Stanojević</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Khalil</namePart>
<namePart type="family">Sima’an</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2017-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 55th Annual Meeting of the Association for Computational Linguistics (Volume 2: Short Papers)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Regina</namePart>
<namePart type="family">Barzilay</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Min-Yen</namePart>
<namePart type="family">Kan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Vancouver, Canada</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>MT evaluation metrics are tested for correlation with human judgments either at the sentence- or the corpus-level. Trained metrics ignore corpus-level judgments and are trained for high sentence-level correlation only. We show that training only for one objective (sentence or corpus level), can not only harm the performance on the other objective, but it can also be suboptimal for the objective being optimized. To this end we present a metric trained for corpus-level and show empirical comparison against a metric trained for sentence-level exemplifying how their performance may vary per language pair, type and level of judgment. Subsequently we propose a model trained to optimize both objectives simultaneously and show that it is far more stable than–and on average outperforms–both models on both objectives.</abstract>
<identifier type="citekey">stanojevic-simaan-2017-alternative</identifier>
<identifier type="doi">10.18653/v1/P17-2004</identifier>
<location>
<url>https://aclanthology.org/P17-2004</url>
</location>
<part>
<date>2017-07</date>
<extent unit="page">
<start>20</start>
<end>25</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Alternative Objective Functions for Training MT Evaluation Metrics
%A Stanojević, Miloš
%A Sima’an, Khalil
%Y Barzilay, Regina
%Y Kan, Min-Yen
%S Proceedings of the 55th Annual Meeting of the Association for Computational Linguistics (Volume 2: Short Papers)
%D 2017
%8 July
%I Association for Computational Linguistics
%C Vancouver, Canada
%F stanojevic-simaan-2017-alternative
%X MT evaluation metrics are tested for correlation with human judgments either at the sentence- or the corpus-level. Trained metrics ignore corpus-level judgments and are trained for high sentence-level correlation only. We show that training only for one objective (sentence or corpus level), can not only harm the performance on the other objective, but it can also be suboptimal for the objective being optimized. To this end we present a metric trained for corpus-level and show empirical comparison against a metric trained for sentence-level exemplifying how their performance may vary per language pair, type and level of judgment. Subsequently we propose a model trained to optimize both objectives simultaneously and show that it is far more stable than–and on average outperforms–both models on both objectives.
%R 10.18653/v1/P17-2004
%U https://aclanthology.org/P17-2004
%U https://doi.org/10.18653/v1/P17-2004
%P 20-25
Markdown (Informal)
[Alternative Objective Functions for Training MT Evaluation Metrics](https://aclanthology.org/P17-2004) (Stanojević & Sima’an, ACL 2017)
ACL