@inproceedings{nasir-etal-2022-feeding,
title = "Feeding {NMT} a Healthy Diet {--} The Impact of Quality, Quantity, or the Right Type of Nutrients",
author = "Nasir, Abdallah and
Alisis, Sara and
Jaikat, Ruba W and
Jonsson, Rebecca and
Qardan, Sara and
Shawahneh, Eyas and
Al-Khdour, Nour",
editor = "Campbell, Janice and
Larocca, Stephen and
Marciano, Jay and
Savenkov, Konstantin and
Yanishevsky, Alex",
booktitle = "Proceedings of the 15th Biennial Conference of the Association for Machine Translation in the Americas (Volume 2: Users and Providers Track and Government Track)",
month = sep,
year = "2022",
address = "Orlando, USA",
publisher = "Association for Machine Translation in the Americas",
url = "https://aclanthology.org/2022.amta-upg.21",
pages = "293--312",
abstract = "In the era of gigantic language models, and in our case, Neural Machine Translation (NMT) models, where merely size seems to matter, we{'}ve been asking ourselves, is it healthy to just feed our NMT model with more and more data? In this presentation, we want to show our findings on the impact of NMT performance of different data {``}nutrients{''} we were feeding our models. We have explored the impact of quantity, quality and the type of data we feed to our English-Arabic NMT models. The presentation will show the impact of adding millions of parallel sentences into our training data as opposed to a much smaller data set with much higher quality, and the results from additional experiments with different data nutrients. We will highlight our learnings, challenges and share insights from our Linguistics Quality Assurance team, on what are the advantages and disadvantages of each type of data source and define the criteria of high-quality data with respect to a healthy NMT diet.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="nasir-etal-2022-feeding">
<titleInfo>
<title>Feeding NMT a Healthy Diet – The Impact of Quality, Quantity, or the Right Type of Nutrients</title>
</titleInfo>
<name type="personal">
<namePart type="given">Abdallah</namePart>
<namePart type="family">Nasir</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sara</namePart>
<namePart type="family">Alisis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ruba</namePart>
<namePart type="given">W</namePart>
<namePart type="family">Jaikat</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rebecca</namePart>
<namePart type="family">Jonsson</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sara</namePart>
<namePart type="family">Qardan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Eyas</namePart>
<namePart type="family">Shawahneh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nour</namePart>
<namePart type="family">Al-Khdour</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-09</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 15th Biennial Conference of the Association for Machine Translation in the Americas (Volume 2: Users and Providers Track and Government Track)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Janice</namePart>
<namePart type="family">Campbell</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stephen</namePart>
<namePart type="family">Larocca</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jay</namePart>
<namePart type="family">Marciano</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Konstantin</namePart>
<namePart type="family">Savenkov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alex</namePart>
<namePart type="family">Yanishevsky</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Machine Translation in the Americas</publisher>
<place>
<placeTerm type="text">Orlando, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In the era of gigantic language models, and in our case, Neural Machine Translation (NMT) models, where merely size seems to matter, we’ve been asking ourselves, is it healthy to just feed our NMT model with more and more data? In this presentation, we want to show our findings on the impact of NMT performance of different data “nutrients” we were feeding our models. We have explored the impact of quantity, quality and the type of data we feed to our English-Arabic NMT models. The presentation will show the impact of adding millions of parallel sentences into our training data as opposed to a much smaller data set with much higher quality, and the results from additional experiments with different data nutrients. We will highlight our learnings, challenges and share insights from our Linguistics Quality Assurance team, on what are the advantages and disadvantages of each type of data source and define the criteria of high-quality data with respect to a healthy NMT diet.</abstract>
<identifier type="citekey">nasir-etal-2022-feeding</identifier>
<location>
<url>https://aclanthology.org/2022.amta-upg.21</url>
</location>
<part>
<date>2022-09</date>
<extent unit="page">
<start>293</start>
<end>312</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Feeding NMT a Healthy Diet – The Impact of Quality, Quantity, or the Right Type of Nutrients
%A Nasir, Abdallah
%A Alisis, Sara
%A Jaikat, Ruba W.
%A Jonsson, Rebecca
%A Qardan, Sara
%A Shawahneh, Eyas
%A Al-Khdour, Nour
%Y Campbell, Janice
%Y Larocca, Stephen
%Y Marciano, Jay
%Y Savenkov, Konstantin
%Y Yanishevsky, Alex
%S Proceedings of the 15th Biennial Conference of the Association for Machine Translation in the Americas (Volume 2: Users and Providers Track and Government Track)
%D 2022
%8 September
%I Association for Machine Translation in the Americas
%C Orlando, USA
%F nasir-etal-2022-feeding
%X In the era of gigantic language models, and in our case, Neural Machine Translation (NMT) models, where merely size seems to matter, we’ve been asking ourselves, is it healthy to just feed our NMT model with more and more data? In this presentation, we want to show our findings on the impact of NMT performance of different data “nutrients” we were feeding our models. We have explored the impact of quantity, quality and the type of data we feed to our English-Arabic NMT models. The presentation will show the impact of adding millions of parallel sentences into our training data as opposed to a much smaller data set with much higher quality, and the results from additional experiments with different data nutrients. We will highlight our learnings, challenges and share insights from our Linguistics Quality Assurance team, on what are the advantages and disadvantages of each type of data source and define the criteria of high-quality data with respect to a healthy NMT diet.
%U https://aclanthology.org/2022.amta-upg.21
%P 293-312
Markdown (Informal)
[Feeding NMT a Healthy Diet – The Impact of Quality, Quantity, or the Right Type of Nutrients](https://aclanthology.org/2022.amta-upg.21) (Nasir et al., AMTA 2022)
ACL
- Abdallah Nasir, Sara Alisis, Ruba W Jaikat, Rebecca Jonsson, Sara Qardan, Eyas Shawahneh, and Nour Al-Khdour. 2022. Feeding NMT a Healthy Diet – The Impact of Quality, Quantity, or the Right Type of Nutrients. In Proceedings of the 15th Biennial Conference of the Association for Machine Translation in the Americas (Volume 2: Users and Providers Track and Government Track), pages 293–312, Orlando, USA. Association for Machine Translation in the Americas.