<?xml version="1.0" encoding="UTF-8"?>
<algorithms version="110505">
<algorithm name="ParsCit" version="110505">
<citationList>
<citation valid="true">
<authors>
<author>M Alzghool</author>
<author>D Inkpen</author>
</authors>
<title>Experiments for the cross language speech retrieval task at CLEF 2006. In</title>
<date>2007</date>
<booktitle>Evaluation of multilingual and multi-modal information retrieval</booktitle>
<volume>4730</volume>
<pages>778--785</pages>
<editor>C. Peters, (Ed.),</editor>
<publisher>Springer.</publisher>
<marker>Alzghool, Inkpen, 2007</marker>
<rawString>Alzghool, M. &amp; Inkpen, D. (2007). Experiments for the cross language speech retrieval task at CLEF 2006. In C. Peters, (Ed.), Evaluation of multilingual and multi-modal information retrieval (Vol. 4730/2007, pp. 778-785). Springer.</rawString>
</citation>
<citation valid="true">
<authors>
<author>G Amati</author>
<author>C J Van Rijsbergen</author>
</authors>
<date>2002</date>
<booktitle>Probabilistic Proceedings of the 18th annual international ACM SIGIR conference on Research and development in information retrieval. ACM,</booktitle>
<location>Seattle, Washington, United States.</location>
<marker>Amati, Van Rijsbergen, 2002</marker>
<rawString>Amati, G. &amp; Van Rijsbergen, C. J. (2002). Probabilistic Proceedings of the 18th annual international ACM SIGIR conference on Research and development in information retrieval. ACM, Seattle, Washington, United States.</rawString>
</citation>
<citation valid="true">
<authors>
<author>D W Oard</author>
<author>D Soergel</author>
<author>D Doermann</author>
<author>X Huang</author>
<author>G C Murray</author>
<author>J Wang</author>
<author>B Ramabhadran</author>
<author>M Franz</author>
<author>S Gustman</author>
</authors>
<title>Building an information retrieval test collection for spontaneous conversational speech,</title>
<date>2004</date>
<booktitle>Proceedings of the 27th annual international ACM SIGIR conference on Research and development in information retrieval. ACM,</booktitle>
<location>Sheffield, United Kingdom.</location>
<contexts>
<context position="3027" citStr="Oard et al., 2004" startWordPosition="442" endWordPosition="445"> by an expert in the field. A set of 63 training topics and 33 test topics were generated for this task. The topics provided with the collection were created in English from actual user requests. Topics were structured using the standard TREC format of Title, Description and Narrative fields. To enable CL-SR experiments the topics were translated into Czech, German, French, and Spanish by native speakers; Figure 2 and 3 show two examples for English and its translation in French respectively. Relevance judgments were generated using a search-guided procedure and standard pooling methods. See (Oard et al., 2004) for full details of the collection design. We present results on the automatic transcripts for English queries and translated queries (cross-language) for two combination methods; we also present results when manual summaries and manual keywords are indexed. &lt;DOC&gt; &lt;DOCNO&gt;VHF[IntCode]-[SegId].[SequenceNum]&lt;/DOCNO\&gt; &lt;INTERVIEWDATA&gt;Interviewee name(s) and birthdate&lt;/INTERVIEWDATA&gt; &lt;NAME&gt;Full name of every person mentioned&lt;/NAME&gt; &lt;MANUALKEYWORD&gt;Thesaurus keywords assigned to the segment&lt;/MANUALKEYWORD&gt; &lt;SUMMARY&gt;3-sentence segment summary&lt;/SUMMARY&gt; &lt;ASRTEXT2004A&gt;ASR transcript produced in 2004&lt;/AS</context>
</contexts>
<marker>Oard, Soergel, Doermann, Huang, Murray, Wang, Ramabhadran, Franz, Gustman, 2004</marker>
<rawString>Oard, D. W., Soergel, D., Doermann, D., Huang, X., Murray, G. C., Wang, J., Ramabhadran, B., Franz, M., &amp; Gustman, S. (2004). Building an information retrieval test collection for spontaneous conversational speech, Proceedings of the 27th annual international ACM SIGIR conference on Research and development in information retrieval. ACM, Sheffield, United Kingdom.</rawString>
</citation>
<citation valid="true">
<authors>
<author>D W Oard</author>
<author>J Wang</author>
<author>G J F Jones</author>
<author>R W White</author>
<author>P Pecina</author>
<author>D Soergel</author>
<author>X Huang</author>
<author>I Shafran</author>
</authors>
<title>Overview of the CLEF-2006 cross-language speech retrieval track. In</title>
<date>2007</date>
<booktitle>Evaluation of multilingual and multi-modal information retrieval</booktitle>
<volume>4730</volume>
<pages>744--758</pages>
<editor>C. Peters, (Ed.),</editor>
<publisher>Springer,</publisher>
<location>Heidelberg.</location>
<contexts>
<context position="1147" citStr="Oard et al., 2007" startWordPosition="158" endWordPosition="161">se the text collection is automatically transcribed spontaneous speech, with many recognition errors. Also, the topics are real information needs, difficult to satisfy. Information Retrieval systems are not able to obtain good results on this data set, except for the case when manual summaries are included. 1. Introduction Conversational speech such as recordings of interviews or teleconferences is difficult to search through. The transcripts produced with Automatic Speech Recognition (ASR) systems tend to contain many recognition errors, leading to low Information Retrieval (IR) performance (Oard et al., 2007). Previous research has explored the idea of combining the results of different retrieval strategies; the motivation is that each technique will retrieve different sets of relevant documents; therefore combining the results could produce a better result than any of the individual techniques. We propose new data fusion techniques for combining the results of different IR models. We applied our data fusion techniques to the Mallach collection (Oard et al., 2007) used in the Cross-Language Speech Retrieval (CLSR) task at Cross-Language Evaluation Forum (CLEF) 2007. The Mallach collection comprise</context>
</contexts>
<marker>Oard, Wang, Jones, White, Pecina, Soergel, Huang, Shafran, 2007</marker>
<rawString>Oard, D. W., Wang, J., Jones, G. J. F., White, R. W., Pecina, P., Soergel, D., Huang, X., &amp; Shafran, I. (2007). Overview of the CLEF-2006 cross-language speech retrieval track. In C. Peters, (Ed.), Evaluation of multilingual and multi-modal information retrieval (Vol. 4730/2007, pp. 744-758). Springer, Heidelberg.</rawString>
</citation>
<citation valid="true">
<authors>
<author>I Ounis</author>
<author>G Amati</author>
<author>V Plachouras</author>
<author>B He</author>
<author>C Macdonald</author>
<author>D Johnson</author>
</authors>
<title>Terrier information retrieval platform</title>
<date>2005</date>
<booktitle>In Advances in information retrieval</booktitle>
<volume>3408</volume>
<pages>517--519</pages>
<publisher>Springer,</publisher>
<location>Heidelberg.</location>
<contexts>
<context position="4995" citStr="Ounis et al., 2005" startWordPosition="688" endWordPosition="691"> topic in CL-SR test collection. &lt;top&gt; &lt;num&gt;1159 &lt;title&gt;Les enfants survivants en Suède &lt;desc&gt;Descriptions des mécanismes de survie des enfants nés entre 1930 et 1933 qui ont passé la guerre en camps de concentration ou cachés et qui vivent actuellement en Suède. &lt;narr&gt;... &lt;/top&gt; Figure 3. Example for French topic in CL-SR test collection. 2. System Description Our Cross-Language Information Retrieval systems were built with off-the-shelf components. For the retrieval part, the SMART (Buckley, Salton, &amp;Allan, 1992; Salton &amp;Buckley, 1988) IR system and the Terrier (Amati &amp;Van Rijsbergen, 2002; Ounis et al., 2005) IR system were tested with many different weighting schemes for indexing the collection and the queries. SMART was originally developed at Cornell University in the 1960s. SMART is based on the vector space model of information retrieval. We use the standard notation: weighting scheme for the documents, followed by dot, followed by the weighting scheme for the queries, each term-weighting scheme is described as a combination of term frequency, collection frequency, and length normalization components where the schemes are abbreviated according to its components variations (n no normalization,</context>
<context position="6510" citStr="Ounis et al., 2005" startWordPosition="924" endWordPosition="927">zghool &amp;Inkpen, 2007; Inkpen, Alzghool, &amp;Islam, 2006); lnn.ntn means that lnn was used for documents and ntn for queries according to the following formulas: weightlnn= ln(tf)+1.0 (1) weight ntn= tf × log (2) N nt where tf denotes the term frequency of a term t in the document or query, N denotes the number of documents in the collection, and nt denotes the number of documents in which the term t occurs. Terrier was originally developed at the University of Glasgow. It is based on Divergence from Randomness models (DFR) where IR is seen as a probabilistic process (Amati &amp;Van Rijsbergen, 2002; Ounis et al., 2005). We experimented with the In_expC2 (Inverse Expected Document Frequency model with Bernoulli after-effect and normalization) weighting model, one of Terrier’s DFR-based document weighting models. Using the In_expC2 model, the relevance score of a document d for a query q is given by the formula: sim(d, q) qtf .w ( t , d) = ∑ t q ∈ where qtf is the frequency of term t in the query q, and w(t,d) is the relevance score of a document d for the query term t, given by: 1 w t d ( , ) ( = ) ( log × tfn × ) (4) where -F is the term frequency of t in the whole collection. -N is the number of document i</context>
</contexts>
<marker>Ounis, Amati, Plachouras, He, Macdonald, Johnson, 2005</marker>
<rawString>Ounis, I., Amati, G., Plachouras, V., He, B., Macdonald, C., &amp; Johnson, D. (2005). Terrier information retrieval platform In Advances in information retrieval (Vol. 3408/2005, pp. 517-519). Springer, Heidelberg.</rawString>
</citation>
<citation valid="true">
<authors>
<author>P Pecina</author>
<author>P Hoffmannov´a</author>
<author>G J F Jones</author>
<author>Y Zhang</author>
<author>D W Oard</author>
</authors>
<title>Overview of the CLEF-2007 cross language speech retrieval track, Working Notes of the CLEF-</title>
<date>2007</date>
<booktitle>Evaluation, . CLEF2007,</booktitle>
<location>Budapest-Hungary.</location>
<marker>Pecina, Hoffmannov´a, Jones, Zhang, Oard, 2007</marker>
<rawString>Pecina, P., Hoffmannov´a, P., Jones, G. J. F., Zhang, Y., &amp; Oard, D. W. (2007). Overview of the CLEF-2007 cross language speech retrieval track, Working Notes of the CLEF- 2007 Evaluation, . CLEF2007, Budapest-Hungary.</rawString>
</citation>
<citation valid="true">
<authors>
<author>G Salton</author>
<author>C Buckley</author>
</authors>
<title>Term weighting approaches in automatic text retrieval.</title>
<date>1988</date>
<booktitle>Information Processing and Management,</booktitle>
<volume>24</volume>
<issue>5</issue>
<pages>513--523</pages>
<marker>Salton, Buckley, 1988</marker>
<rawString>Salton, G. &amp; Buckley, C. (1988). Term weighting approaches in automatic text retrieval. Information Processing and Management, 24(5): 513-523.</rawString>
</citation>
<citation valid="true">
<authors>
<author>J A Shaw</author>
<author>E A Fox</author>
</authors>
<title>Combination of multiple searches.</title>
<date>1994</date>
<booktitle>In Third text retrieval conference (trec-3)</booktitle>
<pages>105--108</pages>
<marker>Shaw, Fox, 1994</marker>
<rawString>Shaw, J. A. &amp; Fox, E. A. (1994). Combination of multiple searches. In Third text retrieval conference (trec-3) (pp. 105-108). National Institute of Standards and Technology Special Publication.</rawString>
</citation>
</citationList>
</algorithm>
</algorithms>