@inproceedings{dominguez-orfila-etal-2022-cat,
title = "{CAT} {M}any{N}ames: A New Dataset for Object Naming in {C}atalan",
author = "Dom{\'\i}nguez Orfila, Mar and
Melero Nogu{\'e}s, Maite and
Boleda Torrent, Gemma",
editor = "Zock, Michael and
Chersoni, Emmanuele and
Hsu, Yu-Yin and
Santus, Enrico",
booktitle = "Proceedings of the Workshop on Cognitive Aspects of the Lexicon",
month = nov,
year = "2022",
address = "Taipei, Taiwan",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.cogalex-1.4",
doi = "10.18653/v1/2022.cogalex-1.4",
pages = "31--36",
abstract = "Object Naming is an important task within the field of Language and Vision that consists of generating a correct and appropriate name for an object given an image. The ManyNames dataset uses real-world human annotated images with multiple labels, instead of just one. In this work, we describe the adaptation of this dataset (originally in English) to Catalan, by (i) machine-translating the English labels and (ii) collecting human annotations for a subset of the original corpus and comparing both resources. Analyses reveal divergences in the lexical variation of the two sets showing potential problems of directly translated resources, particularly when there is no resource to a proper context, which in this case is conveyed by the image. The analysis also points to the impact of cultural factors in the naming task, which should be accounted for in future cross-lingual naming tasks.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="dominguez-orfila-etal-2022-cat">
<titleInfo>
<title>CAT ManyNames: A New Dataset for Object Naming in Catalan</title>
</titleInfo>
<name type="personal">
<namePart type="given">Mar</namePart>
<namePart type="family">Domínguez Orfila</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Maite</namePart>
<namePart type="family">Melero Nogués</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Gemma</namePart>
<namePart type="family">Boleda Torrent</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Workshop on Cognitive Aspects of the Lexicon</title>
</titleInfo>
<name type="personal">
<namePart type="given">Michael</namePart>
<namePart type="family">Zock</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Emmanuele</namePart>
<namePart type="family">Chersoni</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yu-Yin</namePart>
<namePart type="family">Hsu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Enrico</namePart>
<namePart type="family">Santus</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Taipei, Taiwan</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Object Naming is an important task within the field of Language and Vision that consists of generating a correct and appropriate name for an object given an image. The ManyNames dataset uses real-world human annotated images with multiple labels, instead of just one. In this work, we describe the adaptation of this dataset (originally in English) to Catalan, by (i) machine-translating the English labels and (ii) collecting human annotations for a subset of the original corpus and comparing both resources. Analyses reveal divergences in the lexical variation of the two sets showing potential problems of directly translated resources, particularly when there is no resource to a proper context, which in this case is conveyed by the image. The analysis also points to the impact of cultural factors in the naming task, which should be accounted for in future cross-lingual naming tasks.</abstract>
<identifier type="citekey">dominguez-orfila-etal-2022-cat</identifier>
<identifier type="doi">10.18653/v1/2022.cogalex-1.4</identifier>
<location>
<url>https://aclanthology.org/2022.cogalex-1.4</url>
</location>
<part>
<date>2022-11</date>
<extent unit="page">
<start>31</start>
<end>36</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T CAT ManyNames: A New Dataset for Object Naming in Catalan
%A Domínguez Orfila, Mar
%A Melero Nogués, Maite
%A Boleda Torrent, Gemma
%Y Zock, Michael
%Y Chersoni, Emmanuele
%Y Hsu, Yu-Yin
%Y Santus, Enrico
%S Proceedings of the Workshop on Cognitive Aspects of the Lexicon
%D 2022
%8 November
%I Association for Computational Linguistics
%C Taipei, Taiwan
%F dominguez-orfila-etal-2022-cat
%X Object Naming is an important task within the field of Language and Vision that consists of generating a correct and appropriate name for an object given an image. The ManyNames dataset uses real-world human annotated images with multiple labels, instead of just one. In this work, we describe the adaptation of this dataset (originally in English) to Catalan, by (i) machine-translating the English labels and (ii) collecting human annotations for a subset of the original corpus and comparing both resources. Analyses reveal divergences in the lexical variation of the two sets showing potential problems of directly translated resources, particularly when there is no resource to a proper context, which in this case is conveyed by the image. The analysis also points to the impact of cultural factors in the naming task, which should be accounted for in future cross-lingual naming tasks.
%R 10.18653/v1/2022.cogalex-1.4
%U https://aclanthology.org/2022.cogalex-1.4
%U https://doi.org/10.18653/v1/2022.cogalex-1.4
%P 31-36
Markdown (Informal)
[CAT ManyNames: A New Dataset for Object Naming in Catalan](https://aclanthology.org/2022.cogalex-1.4) (Domínguez Orfila et al., CogALex 2022)
ACL