@inproceedings{laureano-de-leon-etal-2021-uob,
title = "{U}o{B} at {P}rof{NER} 2021: Data Augmentation for Classification Using Machine Translation",
author = "Laureano De Leon, Frances Adriana and
Tayyar Madabushi, Harish and
Lee, Mark",
editor = "Magge, Arjun and
Klein, Ari and
Miranda-Escalada, Antonio and
Al-garadi, Mohammed Ali and
Alimova, Ilseyar and
Miftahutdinov, Zulfat and
Farre-Maduell, Eulalia and
Lopez, Salvador Lima and
Flores, Ivan and
O'Connor, Karen and
Weissenbacher, Davy and
Tutubalina, Elena and
Sarker, Abeed and
Banda, Juan M and
Krallinger, Martin and
Gonzalez-Hernandez, Graciela",
booktitle = "Proceedings of the Sixth Social Media Mining for Health ({\#}SMM4H) Workshop and Shared Task",
month = jun,
year = "2021",
address = "Mexico City, Mexico",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2021.smm4h-1.23",
doi = "10.18653/v1/2021.smm4h-1.23",
pages = "115--117",
abstract = "This paper describes the participation of the UoB-NLP team in the ProfNER-ST shared subtask 7a. The task was aimed at detecting the mention of professions in social media text. Our team experimented with two methods of improving the performance of pre-trained models: Specifically, we experimented with data augmentation through translation and the merging of multiple language inputs to meet the objective of the task. While the best performing model on the test data consisted of mBERT fine-tuned on augmented data using back-translation, the improvement is minor possibly because multi-lingual pre-trained models such as mBERT already have access to the kind of information provided through back-translation and bilingual data.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="laureano-de-leon-etal-2021-uob">
<titleInfo>
<title>UoB at ProfNER 2021: Data Augmentation for Classification Using Machine Translation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Frances</namePart>
<namePart type="given">Adriana</namePart>
<namePart type="family">Laureano De Leon</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Harish</namePart>
<namePart type="family">Tayyar Madabushi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mark</namePart>
<namePart type="family">Lee</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Sixth Social Media Mining for Health (#SMM4H) Workshop and Shared Task</title>
</titleInfo>
<name type="personal">
<namePart type="given">Arjun</namePart>
<namePart type="family">Magge</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ari</namePart>
<namePart type="family">Klein</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Antonio</namePart>
<namePart type="family">Miranda-Escalada</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohammed</namePart>
<namePart type="given">Ali</namePart>
<namePart type="family">Al-garadi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ilseyar</namePart>
<namePart type="family">Alimova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zulfat</namePart>
<namePart type="family">Miftahutdinov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Eulalia</namePart>
<namePart type="family">Farre-Maduell</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Salvador</namePart>
<namePart type="given">Lima</namePart>
<namePart type="family">Lopez</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ivan</namePart>
<namePart type="family">Flores</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Karen</namePart>
<namePart type="family">O’Connor</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Davy</namePart>
<namePart type="family">Weissenbacher</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Elena</namePart>
<namePart type="family">Tutubalina</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Abeed</namePart>
<namePart type="family">Sarker</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Juan</namePart>
<namePart type="given">M</namePart>
<namePart type="family">Banda</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Martin</namePart>
<namePart type="family">Krallinger</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Graciela</namePart>
<namePart type="family">Gonzalez-Hernandez</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Mexico City, Mexico</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This paper describes the participation of the UoB-NLP team in the ProfNER-ST shared subtask 7a. The task was aimed at detecting the mention of professions in social media text. Our team experimented with two methods of improving the performance of pre-trained models: Specifically, we experimented with data augmentation through translation and the merging of multiple language inputs to meet the objective of the task. While the best performing model on the test data consisted of mBERT fine-tuned on augmented data using back-translation, the improvement is minor possibly because multi-lingual pre-trained models such as mBERT already have access to the kind of information provided through back-translation and bilingual data.</abstract>
<identifier type="citekey">laureano-de-leon-etal-2021-uob</identifier>
<identifier type="doi">10.18653/v1/2021.smm4h-1.23</identifier>
<location>
<url>https://aclanthology.org/2021.smm4h-1.23</url>
</location>
<part>
<date>2021-06</date>
<extent unit="page">
<start>115</start>
<end>117</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T UoB at ProfNER 2021: Data Augmentation for Classification Using Machine Translation
%A Laureano De Leon, Frances Adriana
%A Tayyar Madabushi, Harish
%A Lee, Mark
%Y Magge, Arjun
%Y Klein, Ari
%Y Miranda-Escalada, Antonio
%Y Al-garadi, Mohammed Ali
%Y Alimova, Ilseyar
%Y Miftahutdinov, Zulfat
%Y Farre-Maduell, Eulalia
%Y Lopez, Salvador Lima
%Y Flores, Ivan
%Y O’Connor, Karen
%Y Weissenbacher, Davy
%Y Tutubalina, Elena
%Y Sarker, Abeed
%Y Banda, Juan M.
%Y Krallinger, Martin
%Y Gonzalez-Hernandez, Graciela
%S Proceedings of the Sixth Social Media Mining for Health (#SMM4H) Workshop and Shared Task
%D 2021
%8 June
%I Association for Computational Linguistics
%C Mexico City, Mexico
%F laureano-de-leon-etal-2021-uob
%X This paper describes the participation of the UoB-NLP team in the ProfNER-ST shared subtask 7a. The task was aimed at detecting the mention of professions in social media text. Our team experimented with two methods of improving the performance of pre-trained models: Specifically, we experimented with data augmentation through translation and the merging of multiple language inputs to meet the objective of the task. While the best performing model on the test data consisted of mBERT fine-tuned on augmented data using back-translation, the improvement is minor possibly because multi-lingual pre-trained models such as mBERT already have access to the kind of information provided through back-translation and bilingual data.
%R 10.18653/v1/2021.smm4h-1.23
%U https://aclanthology.org/2021.smm4h-1.23
%U https://doi.org/10.18653/v1/2021.smm4h-1.23
%P 115-117
Markdown (Informal)
[UoB at ProfNER 2021: Data Augmentation for Classification Using Machine Translation](https://aclanthology.org/2021.smm4h-1.23) (Laureano De Leon et al., SMM4H 2021)
ACL