@inproceedings{kodali-etal-2022-precogiiith,
title = "{P}re{C}og{IIITH} at {H}inglish{E}val : Leveraging Code-Mixing Metrics {\&} Language Model Embeddings To Estimate Code-Mix Quality",
author = "Kodali, Prashant and
Sachan, Tanmay and
Goindani, Akshay and
Goel, Anmol and
Ahuja, Naman and
Shrivastava, Manish and
Kumaraguru, Ponnurangam",
editor = "Shaikh, Samira and
Ferreira, Thiago and
Stent, Amanda",
booktitle = "Proceedings of the 15th International Conference on Natural Language Generation: Generation Challenges",
month = jul,
year = "2022",
address = "Waterville, Maine, USA and virtual meeting",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.inlg-genchal.4",
pages = "26--30",
abstract = "Code-Mixing is a phenomenon of mixing two or more languages in a speech event and is prevalent in multilingual societies. Given the low-resource nature of Code-Mixing, machine generation of code-mixed text is a prevalent approach for data augmentation. However, evaluating the quality of such machine gen- erated code-mixed text is an open problem. In our submission to HinglishEval, a shared- task collocated with INLG2022, we attempt to build models factors that impact the quality of synthetically generated code-mix text by pre- dicting ratings for code-mix quality. Hingli- shEval Shared Task consists of two sub-tasks - a) Quality rating prediction); b) Disagree- ment prediction. We leverage popular code- mixed metrics and embeddings of multilin- gual large language models (MLLMs) as fea- tures, and train task specific MLP regression models. Our approach could not beat the baseline results. However, for Subtask-A our team ranked a close second on F-1 and Co- hen{'}s Kappa Score measures and first for Mean Squared Error measure. For Subtask-B our ap- proach ranked third for F1 score, and first for Mean Squared Error measure. Code of our submission can be accessed here.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="kodali-etal-2022-precogiiith">
<titleInfo>
<title>PreCogIIITH at HinglishEval : Leveraging Code-Mixing Metrics & Language Model Embeddings To Estimate Code-Mix Quality</title>
</titleInfo>
<name type="personal">
<namePart type="given">Prashant</namePart>
<namePart type="family">Kodali</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tanmay</namePart>
<namePart type="family">Sachan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Akshay</namePart>
<namePart type="family">Goindani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anmol</namePart>
<namePart type="family">Goel</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Naman</namePart>
<namePart type="family">Ahuja</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Manish</namePart>
<namePart type="family">Shrivastava</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ponnurangam</namePart>
<namePart type="family">Kumaraguru</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 15th International Conference on Natural Language Generation: Generation Challenges</title>
</titleInfo>
<name type="personal">
<namePart type="given">Samira</namePart>
<namePart type="family">Shaikh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Thiago</namePart>
<namePart type="family">Ferreira</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Amanda</namePart>
<namePart type="family">Stent</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Waterville, Maine, USA and virtual meeting</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Code-Mixing is a phenomenon of mixing two or more languages in a speech event and is prevalent in multilingual societies. Given the low-resource nature of Code-Mixing, machine generation of code-mixed text is a prevalent approach for data augmentation. However, evaluating the quality of such machine gen- erated code-mixed text is an open problem. In our submission to HinglishEval, a shared- task collocated with INLG2022, we attempt to build models factors that impact the quality of synthetically generated code-mix text by pre- dicting ratings for code-mix quality. Hingli- shEval Shared Task consists of two sub-tasks - a) Quality rating prediction); b) Disagree- ment prediction. We leverage popular code- mixed metrics and embeddings of multilin- gual large language models (MLLMs) as fea- tures, and train task specific MLP regression models. Our approach could not beat the baseline results. However, for Subtask-A our team ranked a close second on F-1 and Co- hen’s Kappa Score measures and first for Mean Squared Error measure. For Subtask-B our ap- proach ranked third for F1 score, and first for Mean Squared Error measure. Code of our submission can be accessed here.</abstract>
<identifier type="citekey">kodali-etal-2022-precogiiith</identifier>
<location>
<url>https://aclanthology.org/2022.inlg-genchal.4</url>
</location>
<part>
<date>2022-07</date>
<extent unit="page">
<start>26</start>
<end>30</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T PreCogIIITH at HinglishEval : Leveraging Code-Mixing Metrics & Language Model Embeddings To Estimate Code-Mix Quality
%A Kodali, Prashant
%A Sachan, Tanmay
%A Goindani, Akshay
%A Goel, Anmol
%A Ahuja, Naman
%A Shrivastava, Manish
%A Kumaraguru, Ponnurangam
%Y Shaikh, Samira
%Y Ferreira, Thiago
%Y Stent, Amanda
%S Proceedings of the 15th International Conference on Natural Language Generation: Generation Challenges
%D 2022
%8 July
%I Association for Computational Linguistics
%C Waterville, Maine, USA and virtual meeting
%F kodali-etal-2022-precogiiith
%X Code-Mixing is a phenomenon of mixing two or more languages in a speech event and is prevalent in multilingual societies. Given the low-resource nature of Code-Mixing, machine generation of code-mixed text is a prevalent approach for data augmentation. However, evaluating the quality of such machine gen- erated code-mixed text is an open problem. In our submission to HinglishEval, a shared- task collocated with INLG2022, we attempt to build models factors that impact the quality of synthetically generated code-mix text by pre- dicting ratings for code-mix quality. Hingli- shEval Shared Task consists of two sub-tasks - a) Quality rating prediction); b) Disagree- ment prediction. We leverage popular code- mixed metrics and embeddings of multilin- gual large language models (MLLMs) as fea- tures, and train task specific MLP regression models. Our approach could not beat the baseline results. However, for Subtask-A our team ranked a close second on F-1 and Co- hen’s Kappa Score measures and first for Mean Squared Error measure. For Subtask-B our ap- proach ranked third for F1 score, and first for Mean Squared Error measure. Code of our submission can be accessed here.
%U https://aclanthology.org/2022.inlg-genchal.4
%P 26-30
Markdown (Informal)
[PreCogIIITH at HinglishEval : Leveraging Code-Mixing Metrics & Language Model Embeddings To Estimate Code-Mix Quality](https://aclanthology.org/2022.inlg-genchal.4) (Kodali et al., INLG 2022)
ACL
- Prashant Kodali, Tanmay Sachan, Akshay Goindani, Anmol Goel, Naman Ahuja, Manish Shrivastava, and Ponnurangam Kumaraguru. 2022. PreCogIIITH at HinglishEval : Leveraging Code-Mixing Metrics & Language Model Embeddings To Estimate Code-Mix Quality. In Proceedings of the 15th International Conference on Natural Language Generation: Generation Challenges, pages 26–30, Waterville, Maine, USA and virtual meeting. Association for Computational Linguistics.