@inproceedings{cheong-etal-2021-intrinsic,
title = "Intrinsic evaluation of language models for code-switching",
author = "Cheong, Sik Feng and
Chieu, Hai Leong and
Lim, Jing",
editor = "Xu, Wei and
Ritter, Alan and
Baldwin, Tim and
Rahimi, Afshin",
booktitle = "Proceedings of the Seventh Workshop on Noisy User-generated Text (W-NUT 2021)",
month = nov,
year = "2021",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2021.wnut-1.10/",
doi = "10.18653/v1/2021.wnut-1.10",
pages = "81--86",
abstract = "Language models used in speech recognition are often either evaluated intrinsically using perplexity on test data, or extrinsically with an automatic speech recognition (ASR) system. The former evaluation does not always correlate well with ASR performance, while the latter could be specific to particular ASR systems. Recent work proposed to evaluate language models by using them to classify ground truth sentences among alternative phonetically similar sentences generated by a fine state transducer. Underlying such an evaluation is the assumption that the generated sentences are linguistically incorrect. In this paper, we first put this assumption into question, and observe that alternatively generated sentences could often be linguistically correct when they differ from the ground truth by only one edit. Secondly, we showed that by using multi-lingual BERT, we can achieve better performance than previous work on two code-switching data sets. Our implementation is publicly available on Github at \url{https://github.com/sikfeng/language-modelling-for-code-switching}."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="cheong-etal-2021-intrinsic">
<titleInfo>
<title>Intrinsic evaluation of language models for code-switching</title>
</titleInfo>
<name type="personal">
<namePart type="given">Sik</namePart>
<namePart type="given">Feng</namePart>
<namePart type="family">Cheong</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hai</namePart>
<namePart type="given">Leong</namePart>
<namePart type="family">Chieu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jing</namePart>
<namePart type="family">Lim</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Seventh Workshop on Noisy User-generated Text (W-NUT 2021)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Wei</namePart>
<namePart type="family">Xu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alan</namePart>
<namePart type="family">Ritter</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tim</namePart>
<namePart type="family">Baldwin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Afshin</namePart>
<namePart type="family">Rahimi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Language models used in speech recognition are often either evaluated intrinsically using perplexity on test data, or extrinsically with an automatic speech recognition (ASR) system. The former evaluation does not always correlate well with ASR performance, while the latter could be specific to particular ASR systems. Recent work proposed to evaluate language models by using them to classify ground truth sentences among alternative phonetically similar sentences generated by a fine state transducer. Underlying such an evaluation is the assumption that the generated sentences are linguistically incorrect. In this paper, we first put this assumption into question, and observe that alternatively generated sentences could often be linguistically correct when they differ from the ground truth by only one edit. Secondly, we showed that by using multi-lingual BERT, we can achieve better performance than previous work on two code-switching data sets. Our implementation is publicly available on Github at https://github.com/sikfeng/language-modelling-for-code-switching.</abstract>
<identifier type="citekey">cheong-etal-2021-intrinsic</identifier>
<identifier type="doi">10.18653/v1/2021.wnut-1.10</identifier>
<location>
<url>https://aclanthology.org/2021.wnut-1.10/</url>
</location>
<part>
<date>2021-11</date>
<extent unit="page">
<start>81</start>
<end>86</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Intrinsic evaluation of language models for code-switching
%A Cheong, Sik Feng
%A Chieu, Hai Leong
%A Lim, Jing
%Y Xu, Wei
%Y Ritter, Alan
%Y Baldwin, Tim
%Y Rahimi, Afshin
%S Proceedings of the Seventh Workshop on Noisy User-generated Text (W-NUT 2021)
%D 2021
%8 November
%I Association for Computational Linguistics
%C Online
%F cheong-etal-2021-intrinsic
%X Language models used in speech recognition are often either evaluated intrinsically using perplexity on test data, or extrinsically with an automatic speech recognition (ASR) system. The former evaluation does not always correlate well with ASR performance, while the latter could be specific to particular ASR systems. Recent work proposed to evaluate language models by using them to classify ground truth sentences among alternative phonetically similar sentences generated by a fine state transducer. Underlying such an evaluation is the assumption that the generated sentences are linguistically incorrect. In this paper, we first put this assumption into question, and observe that alternatively generated sentences could often be linguistically correct when they differ from the ground truth by only one edit. Secondly, we showed that by using multi-lingual BERT, we can achieve better performance than previous work on two code-switching data sets. Our implementation is publicly available on Github at https://github.com/sikfeng/language-modelling-for-code-switching.
%R 10.18653/v1/2021.wnut-1.10
%U https://aclanthology.org/2021.wnut-1.10/
%U https://doi.org/10.18653/v1/2021.wnut-1.10
%P 81-86
Markdown (Informal)
[Intrinsic evaluation of language models for code-switching](https://aclanthology.org/2021.wnut-1.10/) (Cheong et al., WNUT 2021)
ACL