@inproceedings{kammer-etal-2023-resolving,
title = "Resolving Elliptical Compounds in {G}erman Medical Text",
author = "Kammer, Niklas and
Borchert, Florian and
Winkler, Silvia and
de Melo, Gerard and
Schapranow, Matthieu-P.",
editor = "Demner-fushman, Dina and
Ananiadou, Sophia and
Cohen, Kevin",
booktitle = "The 22nd Workshop on Biomedical Natural Language Processing and BioNLP Shared Tasks",
month = jul,
year = "2023",
address = "Toronto, Canada",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.bionlp-1.26",
doi = "10.18653/v1/2023.bionlp-1.26",
pages = "292--305",
abstract = "Elliptical coordinated compound noun phrases (ECCNPs), a special kind of coordination ellipsis, are a common phenomenon in German medical texts. As their presence is known to affect the performance in downstream tasks such as entity extraction and disambiguation, their resolution can be a useful preprocessing step in information extraction pipelines. In this work, we present a new comprehensive dataset of more than 4,000 manually annotated ECCNPs in German medical text, along with the respective ground truth resolutions. Based on this data, we propose a generative encoder-decoder Transformer model, allowing for a simple end-to-end resolution of ECCNPs from raw input strings with very high accuracy (90.5{\%} exact match score). We compare our approach to an elaborate rule-based baseline, which the generative model outperforms by a large margin. We further investigate different scenarios for prompting large language models (LLM) to resolve ECCNPs. In a zero-shot setting, performance is remarkably poor (21.6{\%} exact matches), as the LLM tends to apply complex changes to the inputs unrelated to our specific task. We also find no improvement over the generative model when using the LLM for post-filtering of generated candidate resolutions.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="kammer-etal-2023-resolving">
<titleInfo>
<title>Resolving Elliptical Compounds in German Medical Text</title>
</titleInfo>
<name type="personal">
<namePart type="given">Niklas</namePart>
<namePart type="family">Kammer</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Florian</namePart>
<namePart type="family">Borchert</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Silvia</namePart>
<namePart type="family">Winkler</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Gerard</namePart>
<namePart type="family">de Melo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Matthieu-P.</namePart>
<namePart type="family">Schapranow</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>The 22nd Workshop on Biomedical Natural Language Processing and BioNLP Shared Tasks</title>
</titleInfo>
<name type="personal">
<namePart type="given">Dina</namePart>
<namePart type="family">Demner-fushman</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sophia</namePart>
<namePart type="family">Ananiadou</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kevin</namePart>
<namePart type="family">Cohen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Toronto, Canada</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Elliptical coordinated compound noun phrases (ECCNPs), a special kind of coordination ellipsis, are a common phenomenon in German medical texts. As their presence is known to affect the performance in downstream tasks such as entity extraction and disambiguation, their resolution can be a useful preprocessing step in information extraction pipelines. In this work, we present a new comprehensive dataset of more than 4,000 manually annotated ECCNPs in German medical text, along with the respective ground truth resolutions. Based on this data, we propose a generative encoder-decoder Transformer model, allowing for a simple end-to-end resolution of ECCNPs from raw input strings with very high accuracy (90.5% exact match score). We compare our approach to an elaborate rule-based baseline, which the generative model outperforms by a large margin. We further investigate different scenarios for prompting large language models (LLM) to resolve ECCNPs. In a zero-shot setting, performance is remarkably poor (21.6% exact matches), as the LLM tends to apply complex changes to the inputs unrelated to our specific task. We also find no improvement over the generative model when using the LLM for post-filtering of generated candidate resolutions.</abstract>
<identifier type="citekey">kammer-etal-2023-resolving</identifier>
<identifier type="doi">10.18653/v1/2023.bionlp-1.26</identifier>
<location>
<url>https://aclanthology.org/2023.bionlp-1.26</url>
</location>
<part>
<date>2023-07</date>
<extent unit="page">
<start>292</start>
<end>305</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Resolving Elliptical Compounds in German Medical Text
%A Kammer, Niklas
%A Borchert, Florian
%A Winkler, Silvia
%A de Melo, Gerard
%A Schapranow, Matthieu-P.
%Y Demner-fushman, Dina
%Y Ananiadou, Sophia
%Y Cohen, Kevin
%S The 22nd Workshop on Biomedical Natural Language Processing and BioNLP Shared Tasks
%D 2023
%8 July
%I Association for Computational Linguistics
%C Toronto, Canada
%F kammer-etal-2023-resolving
%X Elliptical coordinated compound noun phrases (ECCNPs), a special kind of coordination ellipsis, are a common phenomenon in German medical texts. As their presence is known to affect the performance in downstream tasks such as entity extraction and disambiguation, their resolution can be a useful preprocessing step in information extraction pipelines. In this work, we present a new comprehensive dataset of more than 4,000 manually annotated ECCNPs in German medical text, along with the respective ground truth resolutions. Based on this data, we propose a generative encoder-decoder Transformer model, allowing for a simple end-to-end resolution of ECCNPs from raw input strings with very high accuracy (90.5% exact match score). We compare our approach to an elaborate rule-based baseline, which the generative model outperforms by a large margin. We further investigate different scenarios for prompting large language models (LLM) to resolve ECCNPs. In a zero-shot setting, performance is remarkably poor (21.6% exact matches), as the LLM tends to apply complex changes to the inputs unrelated to our specific task. We also find no improvement over the generative model when using the LLM for post-filtering of generated candidate resolutions.
%R 10.18653/v1/2023.bionlp-1.26
%U https://aclanthology.org/2023.bionlp-1.26
%U https://doi.org/10.18653/v1/2023.bionlp-1.26
%P 292-305
Markdown (Informal)
[Resolving Elliptical Compounds in German Medical Text](https://aclanthology.org/2023.bionlp-1.26) (Kammer et al., BioNLP 2023)
ACL
- Niklas Kammer, Florian Borchert, Silvia Winkler, Gerard de Melo, and Matthieu-P. Schapranow. 2023. Resolving Elliptical Compounds in German Medical Text. In The 22nd Workshop on Biomedical Natural Language Processing and BioNLP Shared Tasks, pages 292–305, Toronto, Canada. Association for Computational Linguistics.