@inproceedings{ganguly-etal-2016-developing,
title = "Developing a Dataset for Evaluating Approaches for Document Expansion with Images",
author = "Ganguly, Debasis and
Calixto, Iacer and
Jones, Gareth",
editor = "Calzolari, Nicoletta and
Choukri, Khalid and
Declerck, Thierry and
Goggi, Sara and
Grobelnik, Marko and
Maegaard, Bente and
Mariani, Joseph and
Mazo, Helene and
Moreno, Asuncion and
Odijk, Jan and
Piperidis, Stelios",
booktitle = "Proceedings of the Tenth International Conference on Language Resources and Evaluation ({LREC}'16)",
month = may,
year = "2016",
address = "Portoro{\v{z}}, Slovenia",
publisher = "European Language Resources Association (ELRA)",
url = "https://aclanthology.org/L16-1299",
pages = "1898--1901",
abstract = "Motivated by the adage that a {``}picture is worth a thousand words{''} it can be reasoned that automatically enriching the textual content of a document with relevant images can increase the readability of a document. Moreover, features extracted from the additional image data inserted into the textual content of a document may, in principle, be also be used by a retrieval engine to better match the topic of a document with that of a given query. In this paper, we describe our approach of building a ground truth dataset to enable further research into automatic addition of relevant images to text documents. The dataset is comprised of the official ImageCLEF 2010 collection (a collection of images with textual metadata) to serve as the images available for automatic enrichment of text, a set of 25 benchmark documents that are to be enriched, which in this case are children{'}s short stories, and a set of manually judged relevant images for each query story obtained by the standard procedure of depth pooling. We use this benchmark dataset to evaluate the effectiveness of standard information retrieval methods as simple baselines for this task. The results indicate that using the whole story as a weighted query, where the weight of each query term is its tf-idf value, achieves an precision of 0:1714 within the top 5 retrieved images on an average.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="ganguly-etal-2016-developing">
<titleInfo>
<title>Developing a Dataset for Evaluating Approaches for Document Expansion with Images</title>
</titleInfo>
<name type="personal">
<namePart type="given">Debasis</namePart>
<namePart type="family">Ganguly</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Iacer</namePart>
<namePart type="family">Calixto</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Gareth</namePart>
<namePart type="family">Jones</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2016-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Tenth International Conference on Language Resources and Evaluation (LREC’16)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Khalid</namePart>
<namePart type="family">Choukri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Thierry</namePart>
<namePart type="family">Declerck</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sara</namePart>
<namePart type="family">Goggi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marko</namePart>
<namePart type="family">Grobelnik</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bente</namePart>
<namePart type="family">Maegaard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joseph</namePart>
<namePart type="family">Mariani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Helene</namePart>
<namePart type="family">Mazo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Asuncion</namePart>
<namePart type="family">Moreno</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jan</namePart>
<namePart type="family">Odijk</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stelios</namePart>
<namePart type="family">Piperidis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Language Resources Association (ELRA)</publisher>
<place>
<placeTerm type="text">Portorož, Slovenia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Motivated by the adage that a “picture is worth a thousand words” it can be reasoned that automatically enriching the textual content of a document with relevant images can increase the readability of a document. Moreover, features extracted from the additional image data inserted into the textual content of a document may, in principle, be also be used by a retrieval engine to better match the topic of a document with that of a given query. In this paper, we describe our approach of building a ground truth dataset to enable further research into automatic addition of relevant images to text documents. The dataset is comprised of the official ImageCLEF 2010 collection (a collection of images with textual metadata) to serve as the images available for automatic enrichment of text, a set of 25 benchmark documents that are to be enriched, which in this case are children’s short stories, and a set of manually judged relevant images for each query story obtained by the standard procedure of depth pooling. We use this benchmark dataset to evaluate the effectiveness of standard information retrieval methods as simple baselines for this task. The results indicate that using the whole story as a weighted query, where the weight of each query term is its tf-idf value, achieves an precision of 0:1714 within the top 5 retrieved images on an average.</abstract>
<identifier type="citekey">ganguly-etal-2016-developing</identifier>
<location>
<url>https://aclanthology.org/L16-1299</url>
</location>
<part>
<date>2016-05</date>
<extent unit="page">
<start>1898</start>
<end>1901</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Developing a Dataset for Evaluating Approaches for Document Expansion with Images
%A Ganguly, Debasis
%A Calixto, Iacer
%A Jones, Gareth
%Y Calzolari, Nicoletta
%Y Choukri, Khalid
%Y Declerck, Thierry
%Y Goggi, Sara
%Y Grobelnik, Marko
%Y Maegaard, Bente
%Y Mariani, Joseph
%Y Mazo, Helene
%Y Moreno, Asuncion
%Y Odijk, Jan
%Y Piperidis, Stelios
%S Proceedings of the Tenth International Conference on Language Resources and Evaluation (LREC’16)
%D 2016
%8 May
%I European Language Resources Association (ELRA)
%C Portorož, Slovenia
%F ganguly-etal-2016-developing
%X Motivated by the adage that a “picture is worth a thousand words” it can be reasoned that automatically enriching the textual content of a document with relevant images can increase the readability of a document. Moreover, features extracted from the additional image data inserted into the textual content of a document may, in principle, be also be used by a retrieval engine to better match the topic of a document with that of a given query. In this paper, we describe our approach of building a ground truth dataset to enable further research into automatic addition of relevant images to text documents. The dataset is comprised of the official ImageCLEF 2010 collection (a collection of images with textual metadata) to serve as the images available for automatic enrichment of text, a set of 25 benchmark documents that are to be enriched, which in this case are children’s short stories, and a set of manually judged relevant images for each query story obtained by the standard procedure of depth pooling. We use this benchmark dataset to evaluate the effectiveness of standard information retrieval methods as simple baselines for this task. The results indicate that using the whole story as a weighted query, where the weight of each query term is its tf-idf value, achieves an precision of 0:1714 within the top 5 retrieved images on an average.
%U https://aclanthology.org/L16-1299
%P 1898-1901
Markdown (Informal)
[Developing a Dataset for Evaluating Approaches for Document Expansion with Images](https://aclanthology.org/L16-1299) (Ganguly et al., LREC 2016)
ACL