@inproceedings{karakos-etal-2020-reformulating,
title = "Reformulating Information Retrieval from Speech and Text as a Detection Problem",
author = "Karakos, Damianos and
Zbib, Rabih and
Hartmann, William and
Schwartz, Richard and
Makhoul, John",
editor = "McKeown, Kathy and
Oard, Douglas W. and
{Elizabeth} and
Schwartz, Richard",
booktitle = "Proceedings of the workshop on Cross-Language Search and Summarization of Text and Speech (CLSSTS2020)",
month = may,
year = "2020",
address = "Marseille, France",
publisher = "European Language Resources Association",
url = "https://aclanthology.org/2020.clssts-1.7",
pages = "38--43",
abstract = "In the IARPA MATERIAL program, information retrieval (IR) is treated as a hard detection problem; the system has to output a single global ranking over all queries, and apply a hard threshold on this global list to come up with all the hypothesized relevant documents. This means that how queries are ranked relative to each other can have a dramatic impact on performance. In this paper, we study such a performance measure, the Average Query Weighted Value (AQWV), which is a combination of miss and false alarm rates. AQWV requires that the same detection threshold is applied to all queries. Hence, detection scores of different queries should be comparable, and, to do that, a score normalization technique (commonly used in keyword spotting from speech) should be used. We describe unsupervised methods for score normalization, which are borrowed from the speech field and adapted accordingly for IR, and demonstrate that they greatly improve AQWV on the task of cross-language information retrieval (CLIR), on three low-resource languages used in MATERIAL. We also present a novel supervised score normalization approach which gives additional gains.",
language = "English",
ISBN = "979-10-95546-55-9",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="karakos-etal-2020-reformulating">
<titleInfo>
<title>Reformulating Information Retrieval from Speech and Text as a Detection Problem</title>
</titleInfo>
<name type="personal">
<namePart type="given">Damianos</namePart>
<namePart type="family">Karakos</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rabih</namePart>
<namePart type="family">Zbib</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">William</namePart>
<namePart type="family">Hartmann</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Richard</namePart>
<namePart type="family">Schwartz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">John</namePart>
<namePart type="family">Makhoul</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<language>
<languageTerm type="text">English</languageTerm>
<languageTerm type="code" authority="iso639-2b">eng</languageTerm>
</language>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the workshop on Cross-Language Search and Summarization of Text and Speech (CLSSTS2020)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Kathy</namePart>
<namePart type="family">McKeown</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Douglas</namePart>
<namePart type="given">W</namePart>
<namePart type="family">Oard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name>
<namePart>Elizabeth</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Richard</namePart>
<namePart type="family">Schwartz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Language Resources Association</publisher>
<place>
<placeTerm type="text">Marseille, France</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-10-95546-55-9</identifier>
</relatedItem>
<abstract>In the IARPA MATERIAL program, information retrieval (IR) is treated as a hard detection problem; the system has to output a single global ranking over all queries, and apply a hard threshold on this global list to come up with all the hypothesized relevant documents. This means that how queries are ranked relative to each other can have a dramatic impact on performance. In this paper, we study such a performance measure, the Average Query Weighted Value (AQWV), which is a combination of miss and false alarm rates. AQWV requires that the same detection threshold is applied to all queries. Hence, detection scores of different queries should be comparable, and, to do that, a score normalization technique (commonly used in keyword spotting from speech) should be used. We describe unsupervised methods for score normalization, which are borrowed from the speech field and adapted accordingly for IR, and demonstrate that they greatly improve AQWV on the task of cross-language information retrieval (CLIR), on three low-resource languages used in MATERIAL. We also present a novel supervised score normalization approach which gives additional gains.</abstract>
<identifier type="citekey">karakos-etal-2020-reformulating</identifier>
<location>
<url>https://aclanthology.org/2020.clssts-1.7</url>
</location>
<part>
<date>2020-05</date>
<extent unit="page">
<start>38</start>
<end>43</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Reformulating Information Retrieval from Speech and Text as a Detection Problem
%A Karakos, Damianos
%A Zbib, Rabih
%A Hartmann, William
%A Schwartz, Richard
%A Makhoul, John
%Y McKeown, Kathy
%Y Oard, Douglas W.
%Y Schwartz, Richard
%E Elizabeth
%S Proceedings of the workshop on Cross-Language Search and Summarization of Text and Speech (CLSSTS2020)
%D 2020
%8 May
%I European Language Resources Association
%C Marseille, France
%@ 979-10-95546-55-9
%G English
%F karakos-etal-2020-reformulating
%X In the IARPA MATERIAL program, information retrieval (IR) is treated as a hard detection problem; the system has to output a single global ranking over all queries, and apply a hard threshold on this global list to come up with all the hypothesized relevant documents. This means that how queries are ranked relative to each other can have a dramatic impact on performance. In this paper, we study such a performance measure, the Average Query Weighted Value (AQWV), which is a combination of miss and false alarm rates. AQWV requires that the same detection threshold is applied to all queries. Hence, detection scores of different queries should be comparable, and, to do that, a score normalization technique (commonly used in keyword spotting from speech) should be used. We describe unsupervised methods for score normalization, which are borrowed from the speech field and adapted accordingly for IR, and demonstrate that they greatly improve AQWV on the task of cross-language information retrieval (CLIR), on three low-resource languages used in MATERIAL. We also present a novel supervised score normalization approach which gives additional gains.
%U https://aclanthology.org/2020.clssts-1.7
%P 38-43
Markdown (Informal)
[Reformulating Information Retrieval from Speech and Text as a Detection Problem](https://aclanthology.org/2020.clssts-1.7) (Karakos et al., CLSSTS 2020)
ACL