@inproceedings{ruckle-etal-2019-neural,
title = "Neural Duplicate Question Detection without Labeled Training Data",
author = {R{\"u}ckl{\'e}, Andreas and
Moosavi, Nafise Sadat and
Gurevych, Iryna},
editor = "Inui, Kentaro and
Jiang, Jing and
Ng, Vincent and
Wan, Xiaojun",
booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing and the 9th International Joint Conference on Natural Language Processing (EMNLP-IJCNLP)",
month = nov,
year = "2019",
address = "Hong Kong, China",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/D19-1171",
doi = "10.18653/v1/D19-1171",
pages = "1607--1617",
abstract = "Supervised training of neural models to duplicate question detection in community Question Answering (CQA) requires large amounts of labeled question pairs, which can be costly to obtain. To minimize this cost, recent works thus often used alternative methods, e.g., adversarial domain adaptation. In this work, we propose two novel methods{---}weak supervision using the title and body of a question, and the automatic generation of duplicate questions{---}and show that both can achieve improved performances even though they do not require any labeled data. We provide a comparison of popular training strategies and show that our proposed approaches are more effective in many cases because they can utilize larger amounts of data from the CQA forums. Finally, we show that weak supervision with question title and body information is also an effective method to train CQA answer selection models without direct answer supervision.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="ruckle-etal-2019-neural">
<titleInfo>
<title>Neural Duplicate Question Detection without Labeled Training Data</title>
</titleInfo>
<name type="personal">
<namePart type="given">Andreas</namePart>
<namePart type="family">Rücklé</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nafise</namePart>
<namePart type="given">Sadat</namePart>
<namePart type="family">Moosavi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Iryna</namePart>
<namePart type="family">Gurevych</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2019-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing and the 9th International Joint Conference on Natural Language Processing (EMNLP-IJCNLP)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Kentaro</namePart>
<namePart type="family">Inui</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jing</namePart>
<namePart type="family">Jiang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vincent</namePart>
<namePart type="family">Ng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xiaojun</namePart>
<namePart type="family">Wan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Hong Kong, China</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Supervised training of neural models to duplicate question detection in community Question Answering (CQA) requires large amounts of labeled question pairs, which can be costly to obtain. To minimize this cost, recent works thus often used alternative methods, e.g., adversarial domain adaptation. In this work, we propose two novel methods—weak supervision using the title and body of a question, and the automatic generation of duplicate questions—and show that both can achieve improved performances even though they do not require any labeled data. We provide a comparison of popular training strategies and show that our proposed approaches are more effective in many cases because they can utilize larger amounts of data from the CQA forums. Finally, we show that weak supervision with question title and body information is also an effective method to train CQA answer selection models without direct answer supervision.</abstract>
<identifier type="citekey">ruckle-etal-2019-neural</identifier>
<identifier type="doi">10.18653/v1/D19-1171</identifier>
<location>
<url>https://aclanthology.org/D19-1171</url>
</location>
<part>
<date>2019-11</date>
<extent unit="page">
<start>1607</start>
<end>1617</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Neural Duplicate Question Detection without Labeled Training Data
%A Rücklé, Andreas
%A Moosavi, Nafise Sadat
%A Gurevych, Iryna
%Y Inui, Kentaro
%Y Jiang, Jing
%Y Ng, Vincent
%Y Wan, Xiaojun
%S Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing and the 9th International Joint Conference on Natural Language Processing (EMNLP-IJCNLP)
%D 2019
%8 November
%I Association for Computational Linguistics
%C Hong Kong, China
%F ruckle-etal-2019-neural
%X Supervised training of neural models to duplicate question detection in community Question Answering (CQA) requires large amounts of labeled question pairs, which can be costly to obtain. To minimize this cost, recent works thus often used alternative methods, e.g., adversarial domain adaptation. In this work, we propose two novel methods—weak supervision using the title and body of a question, and the automatic generation of duplicate questions—and show that both can achieve improved performances even though they do not require any labeled data. We provide a comparison of popular training strategies and show that our proposed approaches are more effective in many cases because they can utilize larger amounts of data from the CQA forums. Finally, we show that weak supervision with question title and body information is also an effective method to train CQA answer selection models without direct answer supervision.
%R 10.18653/v1/D19-1171
%U https://aclanthology.org/D19-1171
%U https://doi.org/10.18653/v1/D19-1171
%P 1607-1617
Markdown (Informal)
[Neural Duplicate Question Detection without Labeled Training Data](https://aclanthology.org/D19-1171) (Rücklé et al., EMNLP-IJCNLP 2019)
ACL
- Andreas Rücklé, Nafise Sadat Moosavi, and Iryna Gurevych. 2019. Neural Duplicate Question Detection without Labeled Training Data. In Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing and the 9th International Joint Conference on Natural Language Processing (EMNLP-IJCNLP), pages 1607–1617, Hong Kong, China. Association for Computational Linguistics.