@inproceedings{deka-etal-2022-bert,
title = "{BERT}-based Language Identification in Code-Mix {K}annada-{E}nglish Text at the {C}o{LI}-Kanglish Shared Task@{ICON} 2022",
author = "Deka, Pritam and
Jyoti Kalita, Nayan and
Kumar Sarma, Shikhar",
editor = "Chakravarthi, Bharathi Raja and
Murugappan, Abirami and
Chinnappa, Dhivya and
Hane, Adeep and
Kumeresan, Prasanna Kumar and
Ponnusamy, Rahul",
booktitle = "Proceedings of the 19th International Conference on Natural Language Processing (ICON): Shared Task on Word Level Language Identification in Code-mixed Kannada-English Texts",
month = dec,
year = "2022",
address = "IIIT Delhi, New Delhi, India",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.icon-wlli.3/",
pages = "12--17",
abstract = "Language identification has recently gained research interest in code-mixed languages due to the extensive use of social media among people. People who speak multiple languages tend to use code-mixed languages when communicating with each other. It has become necessary to identify the languages in such code-mixed environment to detect hate speeches, fake news, misinformation or disinformation and for tasks such as sentiment analysis. In this work, we have proposed a BERT-based approach for language identification in the CoLI-Kanglish shared task at ICON 2022. Our approach achieved 86{\%} weighted average F-1 score and a macro average F-1 score of 57{\%} in the test set."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="deka-etal-2022-bert">
<titleInfo>
<title>BERT-based Language Identification in Code-Mix Kannada-English Text at the CoLI-Kanglish Shared Task@ICON 2022</title>
</titleInfo>
<name type="personal">
<namePart type="given">Pritam</namePart>
<namePart type="family">Deka</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nayan</namePart>
<namePart type="family">Jyoti Kalita</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shikhar</namePart>
<namePart type="family">Kumar Sarma</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 19th International Conference on Natural Language Processing (ICON): Shared Task on Word Level Language Identification in Code-mixed Kannada-English Texts</title>
</titleInfo>
<name type="personal">
<namePart type="given">Bharathi</namePart>
<namePart type="given">Raja</namePart>
<namePart type="family">Chakravarthi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Abirami</namePart>
<namePart type="family">Murugappan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dhivya</namePart>
<namePart type="family">Chinnappa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Adeep</namePart>
<namePart type="family">Hane</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Prasanna</namePart>
<namePart type="given">Kumar</namePart>
<namePart type="family">Kumeresan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rahul</namePart>
<namePart type="family">Ponnusamy</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">IIIT Delhi, New Delhi, India</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Language identification has recently gained research interest in code-mixed languages due to the extensive use of social media among people. People who speak multiple languages tend to use code-mixed languages when communicating with each other. It has become necessary to identify the languages in such code-mixed environment to detect hate speeches, fake news, misinformation or disinformation and for tasks such as sentiment analysis. In this work, we have proposed a BERT-based approach for language identification in the CoLI-Kanglish shared task at ICON 2022. Our approach achieved 86% weighted average F-1 score and a macro average F-1 score of 57% in the test set.</abstract>
<identifier type="citekey">deka-etal-2022-bert</identifier>
<location>
<url>https://aclanthology.org/2022.icon-wlli.3/</url>
</location>
<part>
<date>2022-12</date>
<extent unit="page">
<start>12</start>
<end>17</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T BERT-based Language Identification in Code-Mix Kannada-English Text at the CoLI-Kanglish Shared Task@ICON 2022
%A Deka, Pritam
%A Jyoti Kalita, Nayan
%A Kumar Sarma, Shikhar
%Y Chakravarthi, Bharathi Raja
%Y Murugappan, Abirami
%Y Chinnappa, Dhivya
%Y Hane, Adeep
%Y Kumeresan, Prasanna Kumar
%Y Ponnusamy, Rahul
%S Proceedings of the 19th International Conference on Natural Language Processing (ICON): Shared Task on Word Level Language Identification in Code-mixed Kannada-English Texts
%D 2022
%8 December
%I Association for Computational Linguistics
%C IIIT Delhi, New Delhi, India
%F deka-etal-2022-bert
%X Language identification has recently gained research interest in code-mixed languages due to the extensive use of social media among people. People who speak multiple languages tend to use code-mixed languages when communicating with each other. It has become necessary to identify the languages in such code-mixed environment to detect hate speeches, fake news, misinformation or disinformation and for tasks such as sentiment analysis. In this work, we have proposed a BERT-based approach for language identification in the CoLI-Kanglish shared task at ICON 2022. Our approach achieved 86% weighted average F-1 score and a macro average F-1 score of 57% in the test set.
%U https://aclanthology.org/2022.icon-wlli.3/
%P 12-17
Markdown (Informal)
[BERT-based Language Identification in Code-Mix Kannada-English Text at the CoLI-Kanglish Shared Task@ICON 2022](https://aclanthology.org/2022.icon-wlli.3/) (Deka et al., ICON 2022)
ACL