@inproceedings{do-etal-2017-need,
title = "What do we need to know about an unknown word when parsing {G}erman",
author = "Do, Bich-Ngoc and
Rehbein, Ines and
Frank, Anette",
editor = "Faruqui, Manaal and
Schuetze, Hinrich and
Trancoso, Isabel and
Yaghoobzadeh, Yadollah",
booktitle = "Proceedings of the First Workshop on Subword and Character Level Models in {NLP}",
month = sep,
year = "2017",
address = "Copenhagen, Denmark",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/W17-4117",
doi = "10.18653/v1/W17-4117",
pages = "117--123",
abstract = "We propose a new type of subword embedding designed to provide more information about unknown compounds, a major source for OOV words in German. We present an extrinsic evaluation where we use the compound embeddings as input to a neural dependency parser and compare the results to the ones obtained with other types of embeddings. Our evaluation shows that adding compound embeddings yields a significant improvement of 2{\%} LAS over using word embeddings when no POS information is available. When adding POS embeddings to the input, however, the effect levels out. This suggests that it is not the missing information about the semantics of the unknown words that causes problems for parsing German, but the lack of morphological information for unknown words. To augment our evaluation, we also test the new embeddings in a language modelling task that requires both syntactic and semantic information.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="do-etal-2017-need">
<titleInfo>
<title>What do we need to know about an unknown word when parsing German</title>
</titleInfo>
<name type="personal">
<namePart type="given">Bich-Ngoc</namePart>
<namePart type="family">Do</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ines</namePart>
<namePart type="family">Rehbein</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anette</namePart>
<namePart type="family">Frank</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2017-09</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the First Workshop on Subword and Character Level Models in NLP</title>
</titleInfo>
<name type="personal">
<namePart type="given">Manaal</namePart>
<namePart type="family">Faruqui</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hinrich</namePart>
<namePart type="family">Schuetze</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Isabel</namePart>
<namePart type="family">Trancoso</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yadollah</namePart>
<namePart type="family">Yaghoobzadeh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Copenhagen, Denmark</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We propose a new type of subword embedding designed to provide more information about unknown compounds, a major source for OOV words in German. We present an extrinsic evaluation where we use the compound embeddings as input to a neural dependency parser and compare the results to the ones obtained with other types of embeddings. Our evaluation shows that adding compound embeddings yields a significant improvement of 2% LAS over using word embeddings when no POS information is available. When adding POS embeddings to the input, however, the effect levels out. This suggests that it is not the missing information about the semantics of the unknown words that causes problems for parsing German, but the lack of morphological information for unknown words. To augment our evaluation, we also test the new embeddings in a language modelling task that requires both syntactic and semantic information.</abstract>
<identifier type="citekey">do-etal-2017-need</identifier>
<identifier type="doi">10.18653/v1/W17-4117</identifier>
<location>
<url>https://aclanthology.org/W17-4117</url>
</location>
<part>
<date>2017-09</date>
<extent unit="page">
<start>117</start>
<end>123</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T What do we need to know about an unknown word when parsing German
%A Do, Bich-Ngoc
%A Rehbein, Ines
%A Frank, Anette
%Y Faruqui, Manaal
%Y Schuetze, Hinrich
%Y Trancoso, Isabel
%Y Yaghoobzadeh, Yadollah
%S Proceedings of the First Workshop on Subword and Character Level Models in NLP
%D 2017
%8 September
%I Association for Computational Linguistics
%C Copenhagen, Denmark
%F do-etal-2017-need
%X We propose a new type of subword embedding designed to provide more information about unknown compounds, a major source for OOV words in German. We present an extrinsic evaluation where we use the compound embeddings as input to a neural dependency parser and compare the results to the ones obtained with other types of embeddings. Our evaluation shows that adding compound embeddings yields a significant improvement of 2% LAS over using word embeddings when no POS information is available. When adding POS embeddings to the input, however, the effect levels out. This suggests that it is not the missing information about the semantics of the unknown words that causes problems for parsing German, but the lack of morphological information for unknown words. To augment our evaluation, we also test the new embeddings in a language modelling task that requires both syntactic and semantic information.
%R 10.18653/v1/W17-4117
%U https://aclanthology.org/W17-4117
%U https://doi.org/10.18653/v1/W17-4117
%P 117-123
Markdown (Informal)
[What do we need to know about an unknown word when parsing German](https://aclanthology.org/W17-4117) (Do et al., SCLeM 2017)
ACL