@inproceedings{tirilly-etal-2010-news,
title = "News Image Annotation on a Large Parallel Text-image Corpus",
author = "Tirilly, Pierre and
Claveau, Vincent and
Gros, Patrick",
editor = "Calzolari, Nicoletta and
Choukri, Khalid and
Maegaard, Bente and
Mariani, Joseph and
Odijk, Jan and
Piperidis, Stelios and
Rosner, Mike and
Tapias, Daniel",
booktitle = "Proceedings of the Seventh International Conference on Language Resources and Evaluation ({LREC}'10)",
month = may,
year = "2010",
address = "Valletta, Malta",
publisher = "European Language Resources Association (ELRA)",
url = "http://www.lrec-conf.org/proceedings/lrec2010/pdf/772_Paper.pdf",
abstract = "In this paper, we present a multimodal parallel text-image corpus, and propose an image annotation method that exploits the textual information associated with images. Our corpus contains news articles composed of a text, images and image captions, and is significantly larger than the other news corpora proposed in image annotation papers (27,041 articles and 42,568 captionned images). In our experiments, we use the text of the articles as a textual information source to annotate images, and image captions as a groundtruth to evaluate our annotation algorithm. Our annotation method identifies relevant named entities in the texts, and associates them with high-level visual concepts detected in the images (in this paper, faces and logos). The named entities most suited to image annotation are selected using an unsupervised score based on their statistics, inspired from the weights used in information retrieval. Our experiments show that, although it is very simple, our annotation method achieves an acceptable accuracy on our real-world news corpus.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="tirilly-etal-2010-news">
<titleInfo>
<title>News Image Annotation on a Large Parallel Text-image Corpus</title>
</titleInfo>
<name type="personal">
<namePart type="given">Pierre</namePart>
<namePart type="family">Tirilly</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vincent</namePart>
<namePart type="family">Claveau</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Patrick</namePart>
<namePart type="family">Gros</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2010-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Seventh International Conference on Language Resources and Evaluation (LREC’10)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Khalid</namePart>
<namePart type="family">Choukri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bente</namePart>
<namePart type="family">Maegaard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joseph</namePart>
<namePart type="family">Mariani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jan</namePart>
<namePart type="family">Odijk</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stelios</namePart>
<namePart type="family">Piperidis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mike</namePart>
<namePart type="family">Rosner</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Daniel</namePart>
<namePart type="family">Tapias</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Language Resources Association (ELRA)</publisher>
<place>
<placeTerm type="text">Valletta, Malta</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In this paper, we present a multimodal parallel text-image corpus, and propose an image annotation method that exploits the textual information associated with images. Our corpus contains news articles composed of a text, images and image captions, and is significantly larger than the other news corpora proposed in image annotation papers (27,041 articles and 42,568 captionned images). In our experiments, we use the text of the articles as a textual information source to annotate images, and image captions as a groundtruth to evaluate our annotation algorithm. Our annotation method identifies relevant named entities in the texts, and associates them with high-level visual concepts detected in the images (in this paper, faces and logos). The named entities most suited to image annotation are selected using an unsupervised score based on their statistics, inspired from the weights used in information retrieval. Our experiments show that, although it is very simple, our annotation method achieves an acceptable accuracy on our real-world news corpus.</abstract>
<identifier type="citekey">tirilly-etal-2010-news</identifier>
<location>
<url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/772_Paper.pdf</url>
</location>
<part>
<date>2010-05</date>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T News Image Annotation on a Large Parallel Text-image Corpus
%A Tirilly, Pierre
%A Claveau, Vincent
%A Gros, Patrick
%Y Calzolari, Nicoletta
%Y Choukri, Khalid
%Y Maegaard, Bente
%Y Mariani, Joseph
%Y Odijk, Jan
%Y Piperidis, Stelios
%Y Rosner, Mike
%Y Tapias, Daniel
%S Proceedings of the Seventh International Conference on Language Resources and Evaluation (LREC’10)
%D 2010
%8 May
%I European Language Resources Association (ELRA)
%C Valletta, Malta
%F tirilly-etal-2010-news
%X In this paper, we present a multimodal parallel text-image corpus, and propose an image annotation method that exploits the textual information associated with images. Our corpus contains news articles composed of a text, images and image captions, and is significantly larger than the other news corpora proposed in image annotation papers (27,041 articles and 42,568 captionned images). In our experiments, we use the text of the articles as a textual information source to annotate images, and image captions as a groundtruth to evaluate our annotation algorithm. Our annotation method identifies relevant named entities in the texts, and associates them with high-level visual concepts detected in the images (in this paper, faces and logos). The named entities most suited to image annotation are selected using an unsupervised score based on their statistics, inspired from the weights used in information retrieval. Our experiments show that, although it is very simple, our annotation method achieves an acceptable accuracy on our real-world news corpus.
%U http://www.lrec-conf.org/proceedings/lrec2010/pdf/772_Paper.pdf
Markdown (Informal)
[News Image Annotation on a Large Parallel Text-image Corpus](http://www.lrec-conf.org/proceedings/lrec2010/pdf/772_Paper.pdf) (Tirilly et al., LREC 2010)
ACL