@inproceedings{ferawati-etal-2024-synchronizing,
title = "Synchronizing Approach in Designing Annotation Guidelines for Multilingual Datasets: A {COVID}-19 Case Study Using {E}nglish and {J}apanese Tweets",
author = "Ferawati, Kiki and
She, Wan Jou and
Wakamiya, Shoko and
Aramaki, Eiji",
editor = "Prabhakaran, Vinodkumar and
Dev, Sunipa and
Benotti, Luciana and
Hershcovich, Daniel and
Cabello, Laura and
Cao, Yong and
Adebara, Ife and
Zhou, Li",
booktitle = "Proceedings of the 2nd Workshop on Cross-Cultural Considerations in NLP",
month = aug,
year = "2024",
address = "Bangkok, Thailand",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.c3nlp-1.3",
doi = "10.18653/v1/2024.c3nlp-1.3",
pages = "32--41",
abstract = "The difference in culture between the U.S. and Japan is a popular subject for Western vs. Eastern cultural comparison for researchers. One particular challenge is to obtain and annotate multilingual datasets. In this study, we utilized COVID-19 tweets from the two countries as a case study, focusing particularly on discussions concerning masks. The annotation task was designed to gain insights into societal attitudes toward the mask policies implemented in both countries. The aim of this study is to provide a practical approach for the annotation task by thoroughly documenting how we aligned the multilingual annotation guidelines to obtain a comparable dataset. We proceeded to document the effective practices during our annotation process to synchronize our multilingual guidelines. Furthermore, we discussed difficulties caused by differences in expression style and culture, and potential strategies that helped improve our agreement scores and reduce discrepancies between the annotation results in both languages. These findings offer an alternative method for synchronizing multilingual annotation guidelines and achieving feasible agreement scores for cross-cultural annotation tasks. This study resulted in a multilingual guideline in English and Japanese to annotate topics related to public discourses about COVID-19 masks in the U.S. and Japan.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="ferawati-etal-2024-synchronizing">
<titleInfo>
<title>Synchronizing Approach in Designing Annotation Guidelines for Multilingual Datasets: A COVID-19 Case Study Using English and Japanese Tweets</title>
</titleInfo>
<name type="personal">
<namePart type="given">Kiki</namePart>
<namePart type="family">Ferawati</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Wan</namePart>
<namePart type="given">Jou</namePart>
<namePart type="family">She</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shoko</namePart>
<namePart type="family">Wakamiya</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Eiji</namePart>
<namePart type="family">Aramaki</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2nd Workshop on Cross-Cultural Considerations in NLP</title>
</titleInfo>
<name type="personal">
<namePart type="given">Vinodkumar</namePart>
<namePart type="family">Prabhakaran</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sunipa</namePart>
<namePart type="family">Dev</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Luciana</namePart>
<namePart type="family">Benotti</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Daniel</namePart>
<namePart type="family">Hershcovich</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Laura</namePart>
<namePart type="family">Cabello</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yong</namePart>
<namePart type="family">Cao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ife</namePart>
<namePart type="family">Adebara</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Li</namePart>
<namePart type="family">Zhou</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Bangkok, Thailand</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>The difference in culture between the U.S. and Japan is a popular subject for Western vs. Eastern cultural comparison for researchers. One particular challenge is to obtain and annotate multilingual datasets. In this study, we utilized COVID-19 tweets from the two countries as a case study, focusing particularly on discussions concerning masks. The annotation task was designed to gain insights into societal attitudes toward the mask policies implemented in both countries. The aim of this study is to provide a practical approach for the annotation task by thoroughly documenting how we aligned the multilingual annotation guidelines to obtain a comparable dataset. We proceeded to document the effective practices during our annotation process to synchronize our multilingual guidelines. Furthermore, we discussed difficulties caused by differences in expression style and culture, and potential strategies that helped improve our agreement scores and reduce discrepancies between the annotation results in both languages. These findings offer an alternative method for synchronizing multilingual annotation guidelines and achieving feasible agreement scores for cross-cultural annotation tasks. This study resulted in a multilingual guideline in English and Japanese to annotate topics related to public discourses about COVID-19 masks in the U.S. and Japan.</abstract>
<identifier type="citekey">ferawati-etal-2024-synchronizing</identifier>
<identifier type="doi">10.18653/v1/2024.c3nlp-1.3</identifier>
<location>
<url>https://aclanthology.org/2024.c3nlp-1.3</url>
</location>
<part>
<date>2024-08</date>
<extent unit="page">
<start>32</start>
<end>41</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Synchronizing Approach in Designing Annotation Guidelines for Multilingual Datasets: A COVID-19 Case Study Using English and Japanese Tweets
%A Ferawati, Kiki
%A She, Wan Jou
%A Wakamiya, Shoko
%A Aramaki, Eiji
%Y Prabhakaran, Vinodkumar
%Y Dev, Sunipa
%Y Benotti, Luciana
%Y Hershcovich, Daniel
%Y Cabello, Laura
%Y Cao, Yong
%Y Adebara, Ife
%Y Zhou, Li
%S Proceedings of the 2nd Workshop on Cross-Cultural Considerations in NLP
%D 2024
%8 August
%I Association for Computational Linguistics
%C Bangkok, Thailand
%F ferawati-etal-2024-synchronizing
%X The difference in culture between the U.S. and Japan is a popular subject for Western vs. Eastern cultural comparison for researchers. One particular challenge is to obtain and annotate multilingual datasets. In this study, we utilized COVID-19 tweets from the two countries as a case study, focusing particularly on discussions concerning masks. The annotation task was designed to gain insights into societal attitudes toward the mask policies implemented in both countries. The aim of this study is to provide a practical approach for the annotation task by thoroughly documenting how we aligned the multilingual annotation guidelines to obtain a comparable dataset. We proceeded to document the effective practices during our annotation process to synchronize our multilingual guidelines. Furthermore, we discussed difficulties caused by differences in expression style and culture, and potential strategies that helped improve our agreement scores and reduce discrepancies between the annotation results in both languages. These findings offer an alternative method for synchronizing multilingual annotation guidelines and achieving feasible agreement scores for cross-cultural annotation tasks. This study resulted in a multilingual guideline in English and Japanese to annotate topics related to public discourses about COVID-19 masks in the U.S. and Japan.
%R 10.18653/v1/2024.c3nlp-1.3
%U https://aclanthology.org/2024.c3nlp-1.3
%U https://doi.org/10.18653/v1/2024.c3nlp-1.3
%P 32-41
Markdown (Informal)
[Synchronizing Approach in Designing Annotation Guidelines for Multilingual Datasets: A COVID-19 Case Study Using English and Japanese Tweets](https://aclanthology.org/2024.c3nlp-1.3) (Ferawati et al., C3NLP-WS 2024)
ACL