@inproceedings{arcan-etal-2022-towards,
title = "Towards Bootstrapping a Chatbot on Industrial Heritage through Term and Relation Extraction",
author = "Arcan, Mihael and
O{'}Halloran, Rory and
Robin, C{\'e}cile and
Buitelaar, Paul",
editor = {H{\"a}m{\"a}l{\"a}inen, Mika and
Alnajjar, Khalid and
Partanen, Niko and
Rueter, Jack},
booktitle = "Proceedings of the 2nd International Workshop on Natural Language Processing for Digital Humanities",
month = nov,
year = "2022",
address = "Taipei, Taiwan",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.nlp4dh-1.15",
doi = "10.18653/v1/2022.nlp4dh-1.15",
pages = "108--122",
abstract = "We describe initial work in developing a methodology for the automatic generation of a conversational agent or {`}chatbot{'} through term and relation extraction from a relevant corpus of language data. We develop our approach in the domain of industrial heritage in the 18th and 19th centuries, and more specifically on the industrial history of canals and mills in Ireland. We collected a corpus of relevant newspaper reports and Wikipedia articles, which we deemed representative of a layman{'}s understanding of this topic. We used the Saffron toolkit to extract relevant terms and relations between the terms from the corpus and leveraged the extracted knowledge to query the British Library Digital Collection and the Project Gutenberg library. We leveraged the extracted terms and relations in identifying possible answers for a constructed set of questions based on the extracted terms, by matching them with sentences in the British Library Digital Collection and the Project Gutenberg library. In a final step, we then took this data set of question-answer pairs to train a chatbot. We evaluate our approach by manually assessing the appropriateness of the generated answers for a random sample, each of which is judged by four annotators.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="arcan-etal-2022-towards">
<titleInfo>
<title>Towards Bootstrapping a Chatbot on Industrial Heritage through Term and Relation Extraction</title>
</titleInfo>
<name type="personal">
<namePart type="given">Mihael</namePart>
<namePart type="family">Arcan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rory</namePart>
<namePart type="family">O’Halloran</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Cécile</namePart>
<namePart type="family">Robin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Paul</namePart>
<namePart type="family">Buitelaar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2nd International Workshop on Natural Language Processing for Digital Humanities</title>
</titleInfo>
<name type="personal">
<namePart type="given">Mika</namePart>
<namePart type="family">Hämäläinen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Khalid</namePart>
<namePart type="family">Alnajjar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Niko</namePart>
<namePart type="family">Partanen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jack</namePart>
<namePart type="family">Rueter</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Taipei, Taiwan</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We describe initial work in developing a methodology for the automatic generation of a conversational agent or ‘chatbot’ through term and relation extraction from a relevant corpus of language data. We develop our approach in the domain of industrial heritage in the 18th and 19th centuries, and more specifically on the industrial history of canals and mills in Ireland. We collected a corpus of relevant newspaper reports and Wikipedia articles, which we deemed representative of a layman’s understanding of this topic. We used the Saffron toolkit to extract relevant terms and relations between the terms from the corpus and leveraged the extracted knowledge to query the British Library Digital Collection and the Project Gutenberg library. We leveraged the extracted terms and relations in identifying possible answers for a constructed set of questions based on the extracted terms, by matching them with sentences in the British Library Digital Collection and the Project Gutenberg library. In a final step, we then took this data set of question-answer pairs to train a chatbot. We evaluate our approach by manually assessing the appropriateness of the generated answers for a random sample, each of which is judged by four annotators.</abstract>
<identifier type="citekey">arcan-etal-2022-towards</identifier>
<identifier type="doi">10.18653/v1/2022.nlp4dh-1.15</identifier>
<location>
<url>https://aclanthology.org/2022.nlp4dh-1.15</url>
</location>
<part>
<date>2022-11</date>
<extent unit="page">
<start>108</start>
<end>122</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Towards Bootstrapping a Chatbot on Industrial Heritage through Term and Relation Extraction
%A Arcan, Mihael
%A O’Halloran, Rory
%A Robin, Cécile
%A Buitelaar, Paul
%Y Hämäläinen, Mika
%Y Alnajjar, Khalid
%Y Partanen, Niko
%Y Rueter, Jack
%S Proceedings of the 2nd International Workshop on Natural Language Processing for Digital Humanities
%D 2022
%8 November
%I Association for Computational Linguistics
%C Taipei, Taiwan
%F arcan-etal-2022-towards
%X We describe initial work in developing a methodology for the automatic generation of a conversational agent or ‘chatbot’ through term and relation extraction from a relevant corpus of language data. We develop our approach in the domain of industrial heritage in the 18th and 19th centuries, and more specifically on the industrial history of canals and mills in Ireland. We collected a corpus of relevant newspaper reports and Wikipedia articles, which we deemed representative of a layman’s understanding of this topic. We used the Saffron toolkit to extract relevant terms and relations between the terms from the corpus and leveraged the extracted knowledge to query the British Library Digital Collection and the Project Gutenberg library. We leveraged the extracted terms and relations in identifying possible answers for a constructed set of questions based on the extracted terms, by matching them with sentences in the British Library Digital Collection and the Project Gutenberg library. In a final step, we then took this data set of question-answer pairs to train a chatbot. We evaluate our approach by manually assessing the appropriateness of the generated answers for a random sample, each of which is judged by four annotators.
%R 10.18653/v1/2022.nlp4dh-1.15
%U https://aclanthology.org/2022.nlp4dh-1.15
%U https://doi.org/10.18653/v1/2022.nlp4dh-1.15
%P 108-122
Markdown (Informal)
[Towards Bootstrapping a Chatbot on Industrial Heritage through Term and Relation Extraction](https://aclanthology.org/2022.nlp4dh-1.15) (Arcan et al., NLP4DH 2022)
ACL