@inproceedings{hatty-etal-2021-compound,
title = "Compound or Term Features? Analyzing Salience in Predicting the Difficulty of {G}erman Noun Compounds across Domains",
author = {H{\"a}tty, Anna and
Bettinger, Julia and
Dorna, Michael and
Kuhn, Jonas and
Schulte im Walde, Sabine},
editor = "Ku, Lun-Wei and
Nastase, Vivi and
Vuli{\'c}, Ivan",
booktitle = "Proceedings of *SEM 2021: The Tenth Joint Conference on Lexical and Computational Semantics",
month = aug,
year = "2021",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2021.starsem-1.24",
doi = "10.18653/v1/2021.starsem-1.24",
pages = "252--262",
abstract = "Predicting the difficulty of domain-specific vocabulary is an important task towards a better understanding of a domain, and to enhance the communication between lay people and experts. We investigate German closed noun compounds and focus on the interaction of compound-based lexical features (such as frequency and productivity) and terminology-based features (contrasting domain-specific and general language) across word representations and classifiers. Our prediction experiments complement insights from classification using (a) manually designed features to characterise termhood and compound formation and (b) compound and constituent word embeddings. We find that for a broad binary distinction into {`}easy{'} vs. {`}difficult{'} general-language compound frequency is sufficient, but for a more fine-grained four-class distinction it is crucial to include contrastive termhood features and compound and constituent features.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="hatty-etal-2021-compound">
<titleInfo>
<title>Compound or Term Features? Analyzing Salience in Predicting the Difficulty of German Noun Compounds across Domains</title>
</titleInfo>
<name type="personal">
<namePart type="given">Anna</namePart>
<namePart type="family">Hätty</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Julia</namePart>
<namePart type="family">Bettinger</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Michael</namePart>
<namePart type="family">Dorna</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jonas</namePart>
<namePart type="family">Kuhn</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sabine</namePart>
<namePart type="family">Schulte im Walde</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of *SEM 2021: The Tenth Joint Conference on Lexical and Computational Semantics</title>
</titleInfo>
<name type="personal">
<namePart type="given">Lun-Wei</namePart>
<namePart type="family">Ku</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vivi</namePart>
<namePart type="family">Nastase</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ivan</namePart>
<namePart type="family">Vulić</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Predicting the difficulty of domain-specific vocabulary is an important task towards a better understanding of a domain, and to enhance the communication between lay people and experts. We investigate German closed noun compounds and focus on the interaction of compound-based lexical features (such as frequency and productivity) and terminology-based features (contrasting domain-specific and general language) across word representations and classifiers. Our prediction experiments complement insights from classification using (a) manually designed features to characterise termhood and compound formation and (b) compound and constituent word embeddings. We find that for a broad binary distinction into ‘easy’ vs. ‘difficult’ general-language compound frequency is sufficient, but for a more fine-grained four-class distinction it is crucial to include contrastive termhood features and compound and constituent features.</abstract>
<identifier type="citekey">hatty-etal-2021-compound</identifier>
<identifier type="doi">10.18653/v1/2021.starsem-1.24</identifier>
<location>
<url>https://aclanthology.org/2021.starsem-1.24</url>
</location>
<part>
<date>2021-08</date>
<extent unit="page">
<start>252</start>
<end>262</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Compound or Term Features? Analyzing Salience in Predicting the Difficulty of German Noun Compounds across Domains
%A Hätty, Anna
%A Bettinger, Julia
%A Dorna, Michael
%A Kuhn, Jonas
%A Schulte im Walde, Sabine
%Y Ku, Lun-Wei
%Y Nastase, Vivi
%Y Vulić, Ivan
%S Proceedings of *SEM 2021: The Tenth Joint Conference on Lexical and Computational Semantics
%D 2021
%8 August
%I Association for Computational Linguistics
%C Online
%F hatty-etal-2021-compound
%X Predicting the difficulty of domain-specific vocabulary is an important task towards a better understanding of a domain, and to enhance the communication between lay people and experts. We investigate German closed noun compounds and focus on the interaction of compound-based lexical features (such as frequency and productivity) and terminology-based features (contrasting domain-specific and general language) across word representations and classifiers. Our prediction experiments complement insights from classification using (a) manually designed features to characterise termhood and compound formation and (b) compound and constituent word embeddings. We find that for a broad binary distinction into ‘easy’ vs. ‘difficult’ general-language compound frequency is sufficient, but for a more fine-grained four-class distinction it is crucial to include contrastive termhood features and compound and constituent features.
%R 10.18653/v1/2021.starsem-1.24
%U https://aclanthology.org/2021.starsem-1.24
%U https://doi.org/10.18653/v1/2021.starsem-1.24
%P 252-262
Markdown (Informal)
[Compound or Term Features? Analyzing Salience in Predicting the Difficulty of German Noun Compounds across Domains](https://aclanthology.org/2021.starsem-1.24) (Hätty et al., *SEM 2021)
ACL