@inproceedings{xu-etal-2019-ranking,
title = "Ranking and Sampling in Open-Domain Question Answering",
author = "Xu, Yanfu and
Lin, Zheng and
Liu, Yuanxin and
Liu, Rui and
Wang, Weiping and
Meng, Dan",
editor = "Inui, Kentaro and
Jiang, Jing and
Ng, Vincent and
Wan, Xiaojun",
booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing and the 9th International Joint Conference on Natural Language Processing (EMNLP-IJCNLP)",
month = nov,
year = "2019",
address = "Hong Kong, China",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/D19-1245",
doi = "10.18653/v1/D19-1245",
pages = "2412--2421",
abstract = "Open-domain question answering (OpenQA) aims to answer questions based on a number of unlabeled paragraphs. Existing approaches always follow the distantly supervised setup where some of the paragraphs are wrong-labeled (noisy), and mainly utilize the paragraph-question relevance to denoise. However, the paragraph-paragraph relevance, which may aggregate the evidence among relevant paragraphs, can also be utilized to discover more useful paragraphs. Moreover, current approaches mainly focus on the positive paragraphs which are known to contain the answer during training. This will affect the generalization ability of the model and make it be disturbed by the similar but irrelevant (distracting) paragraphs during testing. In this paper, we first introduce a ranking model leveraging the paragraph-question and the paragraph-paragraph relevance to compute a confidence score for each paragraph. Furthermore, based on the scores, we design a modified weighted sampling strategy for training to mitigate the influence of the noisy and distracting paragraphs. Experiments on three public datasets (Quasar-T, SearchQA and TriviaQA) show that our model advances the state of the art.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="xu-etal-2019-ranking">
<titleInfo>
<title>Ranking and Sampling in Open-Domain Question Answering</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yanfu</namePart>
<namePart type="family">Xu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zheng</namePart>
<namePart type="family">Lin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yuanxin</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rui</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Weiping</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dan</namePart>
<namePart type="family">Meng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2019-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing and the 9th International Joint Conference on Natural Language Processing (EMNLP-IJCNLP)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Kentaro</namePart>
<namePart type="family">Inui</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jing</namePart>
<namePart type="family">Jiang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vincent</namePart>
<namePart type="family">Ng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xiaojun</namePart>
<namePart type="family">Wan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Hong Kong, China</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Open-domain question answering (OpenQA) aims to answer questions based on a number of unlabeled paragraphs. Existing approaches always follow the distantly supervised setup where some of the paragraphs are wrong-labeled (noisy), and mainly utilize the paragraph-question relevance to denoise. However, the paragraph-paragraph relevance, which may aggregate the evidence among relevant paragraphs, can also be utilized to discover more useful paragraphs. Moreover, current approaches mainly focus on the positive paragraphs which are known to contain the answer during training. This will affect the generalization ability of the model and make it be disturbed by the similar but irrelevant (distracting) paragraphs during testing. In this paper, we first introduce a ranking model leveraging the paragraph-question and the paragraph-paragraph relevance to compute a confidence score for each paragraph. Furthermore, based on the scores, we design a modified weighted sampling strategy for training to mitigate the influence of the noisy and distracting paragraphs. Experiments on three public datasets (Quasar-T, SearchQA and TriviaQA) show that our model advances the state of the art.</abstract>
<identifier type="citekey">xu-etal-2019-ranking</identifier>
<identifier type="doi">10.18653/v1/D19-1245</identifier>
<location>
<url>https://aclanthology.org/D19-1245</url>
</location>
<part>
<date>2019-11</date>
<extent unit="page">
<start>2412</start>
<end>2421</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Ranking and Sampling in Open-Domain Question Answering
%A Xu, Yanfu
%A Lin, Zheng
%A Liu, Yuanxin
%A Liu, Rui
%A Wang, Weiping
%A Meng, Dan
%Y Inui, Kentaro
%Y Jiang, Jing
%Y Ng, Vincent
%Y Wan, Xiaojun
%S Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing and the 9th International Joint Conference on Natural Language Processing (EMNLP-IJCNLP)
%D 2019
%8 November
%I Association for Computational Linguistics
%C Hong Kong, China
%F xu-etal-2019-ranking
%X Open-domain question answering (OpenQA) aims to answer questions based on a number of unlabeled paragraphs. Existing approaches always follow the distantly supervised setup where some of the paragraphs are wrong-labeled (noisy), and mainly utilize the paragraph-question relevance to denoise. However, the paragraph-paragraph relevance, which may aggregate the evidence among relevant paragraphs, can also be utilized to discover more useful paragraphs. Moreover, current approaches mainly focus on the positive paragraphs which are known to contain the answer during training. This will affect the generalization ability of the model and make it be disturbed by the similar but irrelevant (distracting) paragraphs during testing. In this paper, we first introduce a ranking model leveraging the paragraph-question and the paragraph-paragraph relevance to compute a confidence score for each paragraph. Furthermore, based on the scores, we design a modified weighted sampling strategy for training to mitigate the influence of the noisy and distracting paragraphs. Experiments on three public datasets (Quasar-T, SearchQA and TriviaQA) show that our model advances the state of the art.
%R 10.18653/v1/D19-1245
%U https://aclanthology.org/D19-1245
%U https://doi.org/10.18653/v1/D19-1245
%P 2412-2421
Markdown (Informal)
[Ranking and Sampling in Open-Domain Question Answering](https://aclanthology.org/D19-1245) (Xu et al., EMNLP-IJCNLP 2019)
ACL
- Yanfu Xu, Zheng Lin, Yuanxin Liu, Rui Liu, Weiping Wang, and Dan Meng. 2019. Ranking and Sampling in Open-Domain Question Answering. In Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing and the 9th International Joint Conference on Natural Language Processing (EMNLP-IJCNLP), pages 2412–2421, Hong Kong, China. Association for Computational Linguistics.