@inproceedings{dalla-serra-etal-2022-multimodal,
title = "Multimodal Generation of Radiology Reports using Knowledge-Grounded Extraction of Entities and Relations",
author = "Dalla Serra, Francesco and
Clackett, William and
MacKinnon, Hamish and
Wang, Chaoyang and
Deligianni, Fani and
Dalton, Jeff and
O{'}Neil, Alison Q.",
editor = "He, Yulan and
Ji, Heng and
Li, Sujian and
Liu, Yang and
Chang, Chua-Hui",
booktitle = "Proceedings of the 2nd Conference of the Asia-Pacific Chapter of the Association for Computational Linguistics and the 12th International Joint Conference on Natural Language Processing (Volume 1: Long Papers)",
month = nov,
year = "2022",
address = "Online only",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.aacl-main.47",
pages = "615--624",
abstract = "Automated reporting has the potential to assist radiologists with the time-consuming procedure of generating text radiology reports. Most existing approaches generate the report directly from the radiology image, however we observe that the resulting reports exhibit realistic style but lack clinical accuracy. Therefore, we propose a two-step pipeline that subdivides the problem into factual triple extraction followed by free-text report generation. The first step comprises supervised extraction of clinically relevant structured information from the image, expressed as triples of the form (entity1, relation, entity2). In the second step, these triples are input to condition the generation of the radiology report. In particular, we focus our work on Chest X-Ray (CXR) radiology report generation. The proposed framework shows state-of-the-art results on the MIMIC-CXR dataset according to most of the standard text generation metrics that we employ (BLEU, METEOR, ROUGE) and to clinical accuracy metrics (recall, precision and F1 assessed using the CheXpert labeler), also giving a 23{\%} reduction in the total number of errors and a 29{\%} reduction in critical clinical errors as assessed by expert human evaluation. In future, this solution can easily integrate more advanced model architectures - to both improve the triple extraction and the report generation - and can be applied to other complex image captioning tasks, such as those found in the medical domain.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="dalla-serra-etal-2022-multimodal">
<titleInfo>
<title>Multimodal Generation of Radiology Reports using Knowledge-Grounded Extraction of Entities and Relations</title>
</titleInfo>
<name type="personal">
<namePart type="given">Francesco</namePart>
<namePart type="family">Dalla Serra</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">William</namePart>
<namePart type="family">Clackett</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hamish</namePart>
<namePart type="family">MacKinnon</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chaoyang</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Fani</namePart>
<namePart type="family">Deligianni</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jeff</namePart>
<namePart type="family">Dalton</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alison</namePart>
<namePart type="given">Q</namePart>
<namePart type="family">O’Neil</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2nd Conference of the Asia-Pacific Chapter of the Association for Computational Linguistics and the 12th International Joint Conference on Natural Language Processing (Volume 1: Long Papers)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yulan</namePart>
<namePart type="family">He</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Heng</namePart>
<namePart type="family">Ji</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sujian</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yang</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chua-Hui</namePart>
<namePart type="family">Chang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online only</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Automated reporting has the potential to assist radiologists with the time-consuming procedure of generating text radiology reports. Most existing approaches generate the report directly from the radiology image, however we observe that the resulting reports exhibit realistic style but lack clinical accuracy. Therefore, we propose a two-step pipeline that subdivides the problem into factual triple extraction followed by free-text report generation. The first step comprises supervised extraction of clinically relevant structured information from the image, expressed as triples of the form (entity1, relation, entity2). In the second step, these triples are input to condition the generation of the radiology report. In particular, we focus our work on Chest X-Ray (CXR) radiology report generation. The proposed framework shows state-of-the-art results on the MIMIC-CXR dataset according to most of the standard text generation metrics that we employ (BLEU, METEOR, ROUGE) and to clinical accuracy metrics (recall, precision and F1 assessed using the CheXpert labeler), also giving a 23% reduction in the total number of errors and a 29% reduction in critical clinical errors as assessed by expert human evaluation. In future, this solution can easily integrate more advanced model architectures - to both improve the triple extraction and the report generation - and can be applied to other complex image captioning tasks, such as those found in the medical domain.</abstract>
<identifier type="citekey">dalla-serra-etal-2022-multimodal</identifier>
<location>
<url>https://aclanthology.org/2022.aacl-main.47</url>
</location>
<part>
<date>2022-11</date>
<extent unit="page">
<start>615</start>
<end>624</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Multimodal Generation of Radiology Reports using Knowledge-Grounded Extraction of Entities and Relations
%A Dalla Serra, Francesco
%A Clackett, William
%A MacKinnon, Hamish
%A Wang, Chaoyang
%A Deligianni, Fani
%A Dalton, Jeff
%A O’Neil, Alison Q.
%Y He, Yulan
%Y Ji, Heng
%Y Li, Sujian
%Y Liu, Yang
%Y Chang, Chua-Hui
%S Proceedings of the 2nd Conference of the Asia-Pacific Chapter of the Association for Computational Linguistics and the 12th International Joint Conference on Natural Language Processing (Volume 1: Long Papers)
%D 2022
%8 November
%I Association for Computational Linguistics
%C Online only
%F dalla-serra-etal-2022-multimodal
%X Automated reporting has the potential to assist radiologists with the time-consuming procedure of generating text radiology reports. Most existing approaches generate the report directly from the radiology image, however we observe that the resulting reports exhibit realistic style but lack clinical accuracy. Therefore, we propose a two-step pipeline that subdivides the problem into factual triple extraction followed by free-text report generation. The first step comprises supervised extraction of clinically relevant structured information from the image, expressed as triples of the form (entity1, relation, entity2). In the second step, these triples are input to condition the generation of the radiology report. In particular, we focus our work on Chest X-Ray (CXR) radiology report generation. The proposed framework shows state-of-the-art results on the MIMIC-CXR dataset according to most of the standard text generation metrics that we employ (BLEU, METEOR, ROUGE) and to clinical accuracy metrics (recall, precision and F1 assessed using the CheXpert labeler), also giving a 23% reduction in the total number of errors and a 29% reduction in critical clinical errors as assessed by expert human evaluation. In future, this solution can easily integrate more advanced model architectures - to both improve the triple extraction and the report generation - and can be applied to other complex image captioning tasks, such as those found in the medical domain.
%U https://aclanthology.org/2022.aacl-main.47
%P 615-624
Markdown (Informal)
[Multimodal Generation of Radiology Reports using Knowledge-Grounded Extraction of Entities and Relations](https://aclanthology.org/2022.aacl-main.47) (Dalla Serra et al., AACL-IJCNLP 2022)
ACL
- Francesco Dalla Serra, William Clackett, Hamish MacKinnon, Chaoyang Wang, Fani Deligianni, Jeff Dalton, and Alison Q. O’Neil. 2022. Multimodal Generation of Radiology Reports using Knowledge-Grounded Extraction of Entities and Relations. In Proceedings of the 2nd Conference of the Asia-Pacific Chapter of the Association for Computational Linguistics and the 12th International Joint Conference on Natural Language Processing (Volume 1: Long Papers), pages 615–624, Online only. Association for Computational Linguistics.