@inproceedings{ye-etal-2019-looking,
    title = "Looking Beyond Label Noise: Shifted Label Distribution Matters in Distantly Supervised Relation Extraction",
    author = "Ye, Qinyuan  and
      Liu, Liyuan  and
      Zhang, Maosen  and
      Ren, Xiang",
    editor = "Inui, Kentaro  and
      Jiang, Jing  and
      Ng, Vincent  and
      Wan, Xiaojun",
    booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing and the 9th International Joint Conference on Natural Language Processing (EMNLP-IJCNLP)",
    month = nov,
    year = "2019",
    address = "Hong Kong, China",
    publisher = "Association for Computational Linguistics",
    url = "https://aclanthology.org/D19-1397/",
    doi = "10.18653/v1/D19-1397",
    pages = "3841--3850",
    abstract = "In recent years there is a surge of interest in applying distant supervision (DS) to automatically generate training data for relation extraction (RE). In this paper, we study the problem what limits the performance of DS-trained neural models, conduct thorough analyses, and identify a factor that can influence the performance greatly, shifted label distribution. Specifically, we found this problem commonly exists in real-world DS datasets, and without special handing, typical DS-RE models cannot automatically adapt to this shift, thus achieving deteriorated performance. To further validate our intuition, we develop a simple yet effective adaptation method for DS-trained models, bias adjustment, which updates models learned over the source domain (i.e., DS training set) with a label distribution estimated on the target domain (i.e., test set). Experiments demonstrate that bias adjustment achieves consistent performance gains on DS-trained models, especially on neural models, with an up to 23{\%} relative F1 improvement, which verifies our assumptions. Our code and data can be found at \url{https://github.com/INK-USC/shifted-label-distribution}."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="ye-etal-2019-looking">
    <titleInfo>
        <title>Looking Beyond Label Noise: Shifted Label Distribution Matters in Distantly Supervised Relation Extraction</title>
    </titleInfo>
    <name type="personal">
        <namePart type="given">Qinyuan</namePart>
        <namePart type="family">Ye</namePart>
        <role>
            <roleTerm authority="marcrelator" type="text">author</roleTerm>
        </role>
    </name>
    <name type="personal">
        <namePart type="given">Liyuan</namePart>
        <namePart type="family">Liu</namePart>
        <role>
            <roleTerm authority="marcrelator" type="text">author</roleTerm>
        </role>
    </name>
    <name type="personal">
        <namePart type="given">Maosen</namePart>
        <namePart type="family">Zhang</namePart>
        <role>
            <roleTerm authority="marcrelator" type="text">author</roleTerm>
        </role>
    </name>
    <name type="personal">
        <namePart type="given">Xiang</namePart>
        <namePart type="family">Ren</namePart>
        <role>
            <roleTerm authority="marcrelator" type="text">author</roleTerm>
        </role>
    </name>
    <originInfo>
        <dateIssued>2019-11</dateIssued>
    </originInfo>
    <typeOfResource>text</typeOfResource>
    <relatedItem type="host">
        <titleInfo>
            <title>Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing and the 9th International Joint Conference on Natural Language Processing (EMNLP-IJCNLP)</title>
        </titleInfo>
        <name type="personal">
            <namePart type="given">Kentaro</namePart>
            <namePart type="family">Inui</namePart>
            <role>
                <roleTerm authority="marcrelator" type="text">editor</roleTerm>
            </role>
        </name>
        <name type="personal">
            <namePart type="given">Jing</namePart>
            <namePart type="family">Jiang</namePart>
            <role>
                <roleTerm authority="marcrelator" type="text">editor</roleTerm>
            </role>
        </name>
        <name type="personal">
            <namePart type="given">Vincent</namePart>
            <namePart type="family">Ng</namePart>
            <role>
                <roleTerm authority="marcrelator" type="text">editor</roleTerm>
            </role>
        </name>
        <name type="personal">
            <namePart type="given">Xiaojun</namePart>
            <namePart type="family">Wan</namePart>
            <role>
                <roleTerm authority="marcrelator" type="text">editor</roleTerm>
            </role>
        </name>
        <originInfo>
            <publisher>Association for Computational Linguistics</publisher>
            <place>
                <placeTerm type="text">Hong Kong, China</placeTerm>
            </place>
        </originInfo>
        <genre authority="marcgt">conference publication</genre>
    </relatedItem>
    <abstract>In recent years there is a surge of interest in applying distant supervision (DS) to automatically generate training data for relation extraction (RE). In this paper, we study the problem what limits the performance of DS-trained neural models, conduct thorough analyses, and identify a factor that can influence the performance greatly, shifted label distribution. Specifically, we found this problem commonly exists in real-world DS datasets, and without special handing, typical DS-RE models cannot automatically adapt to this shift, thus achieving deteriorated performance. To further validate our intuition, we develop a simple yet effective adaptation method for DS-trained models, bias adjustment, which updates models learned over the source domain (i.e., DS training set) with a label distribution estimated on the target domain (i.e., test set). Experiments demonstrate that bias adjustment achieves consistent performance gains on DS-trained models, especially on neural models, with an up to 23% relative F1 improvement, which verifies our assumptions. Our code and data can be found at https://github.com/INK-USC/shifted-label-distribution.</abstract>
    <identifier type="citekey">ye-etal-2019-looking</identifier>
    <identifier type="doi">10.18653/v1/D19-1397</identifier>
    <location>
        <url>https://aclanthology.org/D19-1397/</url>
    </location>
    <part>
        <date>2019-11</date>
        <extent unit="page">
            <start>3841</start>
            <end>3850</end>
        </extent>
    </part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Looking Beyond Label Noise: Shifted Label Distribution Matters in Distantly Supervised Relation Extraction
%A Ye, Qinyuan
%A Liu, Liyuan
%A Zhang, Maosen
%A Ren, Xiang
%Y Inui, Kentaro
%Y Jiang, Jing
%Y Ng, Vincent
%Y Wan, Xiaojun
%S Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing and the 9th International Joint Conference on Natural Language Processing (EMNLP-IJCNLP)
%D 2019
%8 November
%I Association for Computational Linguistics
%C Hong Kong, China
%F ye-etal-2019-looking
%X In recent years there is a surge of interest in applying distant supervision (DS) to automatically generate training data for relation extraction (RE). In this paper, we study the problem what limits the performance of DS-trained neural models, conduct thorough analyses, and identify a factor that can influence the performance greatly, shifted label distribution. Specifically, we found this problem commonly exists in real-world DS datasets, and without special handing, typical DS-RE models cannot automatically adapt to this shift, thus achieving deteriorated performance. To further validate our intuition, we develop a simple yet effective adaptation method for DS-trained models, bias adjustment, which updates models learned over the source domain (i.e., DS training set) with a label distribution estimated on the target domain (i.e., test set). Experiments demonstrate that bias adjustment achieves consistent performance gains on DS-trained models, especially on neural models, with an up to 23% relative F1 improvement, which verifies our assumptions. Our code and data can be found at https://github.com/INK-USC/shifted-label-distribution.
%R 10.18653/v1/D19-1397
%U https://aclanthology.org/D19-1397/
%U https://doi.org/10.18653/v1/D19-1397
%P 3841-3850
Markdown (Informal)
[Looking Beyond Label Noise: Shifted Label Distribution Matters in Distantly Supervised Relation Extraction](https://aclanthology.org/D19-1397/) (Ye et al., EMNLP-IJCNLP 2019)
ACL