@inproceedings{silberer-etal-2020-humans,
title = "Humans Meet Models on Object Naming: A New Dataset and Analysis",
author = "Silberer, Carina and
Zarrie{\ss}, Sina and
Westera, Matthijs and
Boleda, Gemma",
editor = "Scott, Donia and
Bel, Nuria and
Zong, Chengqing",
booktitle = "Proceedings of the 28th International Conference on Computational Linguistics",
month = dec,
year = "2020",
address = "Barcelona, Spain (Online)",
publisher = "International Committee on Computational Linguistics",
url = "https://aclanthology.org/2020.coling-main.172",
doi = "10.18653/v1/2020.coling-main.172",
pages = "1893--1905",
abstract = "We release ManyNames v2 (MN v2), a verified version of an object naming dataset that contains dozens of valid names per object for 25K images. We analyze issues in the data collection method originally employed, standard in Language {\&} Vision (L{\&}V), and find that the main source of noise in the data comes from simulating a naming context solely from an image with a target object marked with a bounding box, which causes subjects to sometimes disagree regarding which object is the target. We also find that both the degree of this uncertainty in the original data and the amount of true naming variation in MN v2 differs substantially across object domains. We use MN v2 to analyze a popular L{\&}V model and demonstrate its effectiveness on the task of object naming. However, our fine-grained analysis reveals that what appears to be human-like model behavior is not stable across domains, e.g., the model confuses people and clothing objects much more frequently than humans do. We also find that standard evaluations underestimate the actual effectiveness of the naming model: on the single-label names of the original dataset (Visual Genome), it obtains −27{\%} accuracy points than on MN v2, that includes all valid object names.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="silberer-etal-2020-humans">
<titleInfo>
<title>Humans Meet Models on Object Naming: A New Dataset and Analysis</title>
</titleInfo>
<name type="personal">
<namePart type="given">Carina</namePart>
<namePart type="family">Silberer</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sina</namePart>
<namePart type="family">Zarrieß</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Matthijs</namePart>
<namePart type="family">Westera</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Gemma</namePart>
<namePart type="family">Boleda</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 28th International Conference on Computational Linguistics</title>
</titleInfo>
<name type="personal">
<namePart type="given">Donia</namePart>
<namePart type="family">Scott</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nuria</namePart>
<namePart type="family">Bel</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chengqing</namePart>
<namePart type="family">Zong</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>International Committee on Computational Linguistics</publisher>
<place>
<placeTerm type="text">Barcelona, Spain (Online)</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We release ManyNames v2 (MN v2), a verified version of an object naming dataset that contains dozens of valid names per object for 25K images. We analyze issues in the data collection method originally employed, standard in Language & Vision (L&V), and find that the main source of noise in the data comes from simulating a naming context solely from an image with a target object marked with a bounding box, which causes subjects to sometimes disagree regarding which object is the target. We also find that both the degree of this uncertainty in the original data and the amount of true naming variation in MN v2 differs substantially across object domains. We use MN v2 to analyze a popular L&V model and demonstrate its effectiveness on the task of object naming. However, our fine-grained analysis reveals that what appears to be human-like model behavior is not stable across domains, e.g., the model confuses people and clothing objects much more frequently than humans do. We also find that standard evaluations underestimate the actual effectiveness of the naming model: on the single-label names of the original dataset (Visual Genome), it obtains −27% accuracy points than on MN v2, that includes all valid object names.</abstract>
<identifier type="citekey">silberer-etal-2020-humans</identifier>
<identifier type="doi">10.18653/v1/2020.coling-main.172</identifier>
<location>
<url>https://aclanthology.org/2020.coling-main.172</url>
</location>
<part>
<date>2020-12</date>
<extent unit="page">
<start>1893</start>
<end>1905</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Humans Meet Models on Object Naming: A New Dataset and Analysis
%A Silberer, Carina
%A Zarrieß, Sina
%A Westera, Matthijs
%A Boleda, Gemma
%Y Scott, Donia
%Y Bel, Nuria
%Y Zong, Chengqing
%S Proceedings of the 28th International Conference on Computational Linguistics
%D 2020
%8 December
%I International Committee on Computational Linguistics
%C Barcelona, Spain (Online)
%F silberer-etal-2020-humans
%X We release ManyNames v2 (MN v2), a verified version of an object naming dataset that contains dozens of valid names per object for 25K images. We analyze issues in the data collection method originally employed, standard in Language & Vision (L&V), and find that the main source of noise in the data comes from simulating a naming context solely from an image with a target object marked with a bounding box, which causes subjects to sometimes disagree regarding which object is the target. We also find that both the degree of this uncertainty in the original data and the amount of true naming variation in MN v2 differs substantially across object domains. We use MN v2 to analyze a popular L&V model and demonstrate its effectiveness on the task of object naming. However, our fine-grained analysis reveals that what appears to be human-like model behavior is not stable across domains, e.g., the model confuses people and clothing objects much more frequently than humans do. We also find that standard evaluations underestimate the actual effectiveness of the naming model: on the single-label names of the original dataset (Visual Genome), it obtains −27% accuracy points than on MN v2, that includes all valid object names.
%R 10.18653/v1/2020.coling-main.172
%U https://aclanthology.org/2020.coling-main.172
%U https://doi.org/10.18653/v1/2020.coling-main.172
%P 1893-1905
Markdown (Informal)
[Humans Meet Models on Object Naming: A New Dataset and Analysis](https://aclanthology.org/2020.coling-main.172) (Silberer et al., COLING 2020)
ACL
- Carina Silberer, Sina Zarrieß, Matthijs Westera, and Gemma Boleda. 2020. Humans Meet Models on Object Naming: A New Dataset and Analysis. In Proceedings of the 28th International Conference on Computational Linguistics, pages 1893–1905, Barcelona, Spain (Online). International Committee on Computational Linguistics.