@inproceedings{vasicek-etal-2024-truth,
title = "It is a Truth Individually Acknowledged: Cross-references On Demand",
author = "Vasicek, Piper and
Byun, Courtni and
Seppi, Kevin",
editor = {H{\"a}m{\"a}l{\"a}inen, Mika and
{\"O}hman, Emily and
Miyagawa, So and
Alnajjar, Khalid and
Bizzoni, Yuri},
booktitle = "Proceedings of the 4th International Conference on Natural Language Processing for Digital Humanities",
month = nov,
year = "2024",
address = "Miami, USA",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.nlp4dh-1.7",
pages = "63--74",
abstract = "Cross-references link source passages of text to other passages that elucidate the source passage in some way and can deepen human understanding. Despite their usefulness, however, good cross-references are hard to find, and extensive sets of cross-references only exist for the few most highly studied books such as the Bible, for which scholars have been collecting cross-references for hundreds of years. Therefore, we propose a new task: generate cross-references for user-selected text on demand. We define a metric, coverage, to evaluate task performance. We adapt several models to generate cross references, including an Anchor Words topic model, SBERT SentenceTransformers, and ChatGPT, and evaluate their coverage in both English and German on existing cross-reference datasets. While ChatGPT outperforms other models on these datasets, this is likely due to data contamination. We hand-evaluate performance on the well-known works of Jane Austen and a less-known science fiction series Sons of the Starfarers by Joe Vasicek, finding that ChatGPT does not perform as well on these works; sentence embeddings perform best. We experiment with newer LLMs and large context windows, and suggest that future work should focus on deploying cross-references on-demand with readers to determine their effectiveness in the wild.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="vasicek-etal-2024-truth">
<titleInfo>
<title>It is a Truth Individually Acknowledged: Cross-references On Demand</title>
</titleInfo>
<name type="personal">
<namePart type="given">Piper</namePart>
<namePart type="family">Vasicek</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Courtni</namePart>
<namePart type="family">Byun</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kevin</namePart>
<namePart type="family">Seppi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 4th International Conference on Natural Language Processing for Digital Humanities</title>
</titleInfo>
<name type="personal">
<namePart type="given">Mika</namePart>
<namePart type="family">Hämäläinen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Emily</namePart>
<namePart type="family">Öhman</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">So</namePart>
<namePart type="family">Miyagawa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Khalid</namePart>
<namePart type="family">Alnajjar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yuri</namePart>
<namePart type="family">Bizzoni</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Miami, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Cross-references link source passages of text to other passages that elucidate the source passage in some way and can deepen human understanding. Despite their usefulness, however, good cross-references are hard to find, and extensive sets of cross-references only exist for the few most highly studied books such as the Bible, for which scholars have been collecting cross-references for hundreds of years. Therefore, we propose a new task: generate cross-references for user-selected text on demand. We define a metric, coverage, to evaluate task performance. We adapt several models to generate cross references, including an Anchor Words topic model, SBERT SentenceTransformers, and ChatGPT, and evaluate their coverage in both English and German on existing cross-reference datasets. While ChatGPT outperforms other models on these datasets, this is likely due to data contamination. We hand-evaluate performance on the well-known works of Jane Austen and a less-known science fiction series Sons of the Starfarers by Joe Vasicek, finding that ChatGPT does not perform as well on these works; sentence embeddings perform best. We experiment with newer LLMs and large context windows, and suggest that future work should focus on deploying cross-references on-demand with readers to determine their effectiveness in the wild.</abstract>
<identifier type="citekey">vasicek-etal-2024-truth</identifier>
<location>
<url>https://aclanthology.org/2024.nlp4dh-1.7</url>
</location>
<part>
<date>2024-11</date>
<extent unit="page">
<start>63</start>
<end>74</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T It is a Truth Individually Acknowledged: Cross-references On Demand
%A Vasicek, Piper
%A Byun, Courtni
%A Seppi, Kevin
%Y Hämäläinen, Mika
%Y Öhman, Emily
%Y Miyagawa, So
%Y Alnajjar, Khalid
%Y Bizzoni, Yuri
%S Proceedings of the 4th International Conference on Natural Language Processing for Digital Humanities
%D 2024
%8 November
%I Association for Computational Linguistics
%C Miami, USA
%F vasicek-etal-2024-truth
%X Cross-references link source passages of text to other passages that elucidate the source passage in some way and can deepen human understanding. Despite their usefulness, however, good cross-references are hard to find, and extensive sets of cross-references only exist for the few most highly studied books such as the Bible, for which scholars have been collecting cross-references for hundreds of years. Therefore, we propose a new task: generate cross-references for user-selected text on demand. We define a metric, coverage, to evaluate task performance. We adapt several models to generate cross references, including an Anchor Words topic model, SBERT SentenceTransformers, and ChatGPT, and evaluate their coverage in both English and German on existing cross-reference datasets. While ChatGPT outperforms other models on these datasets, this is likely due to data contamination. We hand-evaluate performance on the well-known works of Jane Austen and a less-known science fiction series Sons of the Starfarers by Joe Vasicek, finding that ChatGPT does not perform as well on these works; sentence embeddings perform best. We experiment with newer LLMs and large context windows, and suggest that future work should focus on deploying cross-references on-demand with readers to determine their effectiveness in the wild.
%U https://aclanthology.org/2024.nlp4dh-1.7
%P 63-74
Markdown (Informal)
[It is a Truth Individually Acknowledged: Cross-references On Demand](https://aclanthology.org/2024.nlp4dh-1.7) (Vasicek et al., NLP4DH 2024)
ACL