@inproceedings{masis-etal-2022-corpus,
title = "Corpus-Guided Contrast Sets for Morphosyntactic Feature Detection in Low-Resource {E}nglish Varieties",
author = "Masis, Tessa and
Neal, Anissa and
Green, Lisa and
O{'}Connor, Brendan",
editor = "Serikov, Oleg and
Voloshina, Ekaterina and
Postnikova, Anna and
Klyachko, Elena and
Neminova, Ekaterina and
Vylomova, Ekaterina and
Shavrina, Tatiana and
Ferrand, Eric Le and
Malykh, Valentin and
Tyers, Francis and
Arkhangelskiy, Timofey and
Mikhailov, Vladislav and
Fenogenova, Alena",
booktitle = "Proceedings of the first workshop on NLP applications to field linguistics",
month = oct,
year = "2022",
address = "Gyeongju, Republic of Korea",
publisher = "International Conference on Computational Linguistics",
url = "https://aclanthology.org/2022.fieldmatters-1.2",
pages = "11--25",
abstract = "The study of language variation examines how language varies between and within different groups of speakers, shedding light on how we use language to construct identities and how social contexts affect language use. A common method is to identify instances of a certain linguistic feature - say, the zero copula construction - in a corpus, and analyze the feature{'}s distribution across speakers, topics, and other variables, to either gain a qualitative understanding of the feature{'}s function or systematically measure variation. In this paper, we explore the challenging task of automatic morphosyntactic feature detection in low-resource English varieties. We present a human-in-the-loop approach to generate and filter effective contrast sets via corpus-guided edits. We show that our approach improves feature detection for both Indian English and African American English, demonstrate how it can assist linguistic research, and release our fine-tuned models for use by other researchers.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="masis-etal-2022-corpus">
<titleInfo>
<title>Corpus-Guided Contrast Sets for Morphosyntactic Feature Detection in Low-Resource English Varieties</title>
</titleInfo>
<name type="personal">
<namePart type="given">Tessa</namePart>
<namePart type="family">Masis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anissa</namePart>
<namePart type="family">Neal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lisa</namePart>
<namePart type="family">Green</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Brendan</namePart>
<namePart type="family">O’Connor</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-10</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the first workshop on NLP applications to field linguistics</title>
</titleInfo>
<name type="personal">
<namePart type="given">Oleg</namePart>
<namePart type="family">Serikov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ekaterina</namePart>
<namePart type="family">Voloshina</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anna</namePart>
<namePart type="family">Postnikova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Elena</namePart>
<namePart type="family">Klyachko</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ekaterina</namePart>
<namePart type="family">Neminova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ekaterina</namePart>
<namePart type="family">Vylomova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tatiana</namePart>
<namePart type="family">Shavrina</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Eric</namePart>
<namePart type="given">Le</namePart>
<namePart type="family">Ferrand</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Valentin</namePart>
<namePart type="family">Malykh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Francis</namePart>
<namePart type="family">Tyers</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Timofey</namePart>
<namePart type="family">Arkhangelskiy</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vladislav</namePart>
<namePart type="family">Mikhailov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alena</namePart>
<namePart type="family">Fenogenova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>International Conference on Computational Linguistics</publisher>
<place>
<placeTerm type="text">Gyeongju, Republic of Korea</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>The study of language variation examines how language varies between and within different groups of speakers, shedding light on how we use language to construct identities and how social contexts affect language use. A common method is to identify instances of a certain linguistic feature - say, the zero copula construction - in a corpus, and analyze the feature’s distribution across speakers, topics, and other variables, to either gain a qualitative understanding of the feature’s function or systematically measure variation. In this paper, we explore the challenging task of automatic morphosyntactic feature detection in low-resource English varieties. We present a human-in-the-loop approach to generate and filter effective contrast sets via corpus-guided edits. We show that our approach improves feature detection for both Indian English and African American English, demonstrate how it can assist linguistic research, and release our fine-tuned models for use by other researchers.</abstract>
<identifier type="citekey">masis-etal-2022-corpus</identifier>
<location>
<url>https://aclanthology.org/2022.fieldmatters-1.2</url>
</location>
<part>
<date>2022-10</date>
<extent unit="page">
<start>11</start>
<end>25</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Corpus-Guided Contrast Sets for Morphosyntactic Feature Detection in Low-Resource English Varieties
%A Masis, Tessa
%A Neal, Anissa
%A Green, Lisa
%A O’Connor, Brendan
%Y Serikov, Oleg
%Y Voloshina, Ekaterina
%Y Postnikova, Anna
%Y Klyachko, Elena
%Y Neminova, Ekaterina
%Y Vylomova, Ekaterina
%Y Shavrina, Tatiana
%Y Ferrand, Eric Le
%Y Malykh, Valentin
%Y Tyers, Francis
%Y Arkhangelskiy, Timofey
%Y Mikhailov, Vladislav
%Y Fenogenova, Alena
%S Proceedings of the first workshop on NLP applications to field linguistics
%D 2022
%8 October
%I International Conference on Computational Linguistics
%C Gyeongju, Republic of Korea
%F masis-etal-2022-corpus
%X The study of language variation examines how language varies between and within different groups of speakers, shedding light on how we use language to construct identities and how social contexts affect language use. A common method is to identify instances of a certain linguistic feature - say, the zero copula construction - in a corpus, and analyze the feature’s distribution across speakers, topics, and other variables, to either gain a qualitative understanding of the feature’s function or systematically measure variation. In this paper, we explore the challenging task of automatic morphosyntactic feature detection in low-resource English varieties. We present a human-in-the-loop approach to generate and filter effective contrast sets via corpus-guided edits. We show that our approach improves feature detection for both Indian English and African American English, demonstrate how it can assist linguistic research, and release our fine-tuned models for use by other researchers.
%U https://aclanthology.org/2022.fieldmatters-1.2
%P 11-25
Markdown (Informal)
[Corpus-Guided Contrast Sets for Morphosyntactic Feature Detection in Low-Resource English Varieties](https://aclanthology.org/2022.fieldmatters-1.2) (Masis et al., FieldMatters 2022)
ACL