@inproceedings{miyagawa-etal-2024-exploring,
title = "Exploring Similarity Measures and Intertextuality in {V}edic {S}anskrit Literature",
author = "Miyagawa, So and
Kyogoku, Yuki and
Tsukagoshi, Yuzuki and
Amano, Kyoko",
editor = {H{\"a}m{\"a}l{\"a}inen, Mika and
{\"O}hman, Emily and
Miyagawa, So and
Alnajjar, Khalid and
Bizzoni, Yuri},
booktitle = "Proceedings of the 4th International Conference on Natural Language Processing for Digital Humanities",
month = nov,
year = "2024",
address = "Miami, USA",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.nlp4dh-1.12",
pages = "123--131",
abstract = "This paper examines semantic similarity and intertextuality in selected texts from the Vedic Sanskrit corpus, specifically the Maitr{\=a}yaṇ{\=\i} Saṃhit{\=a} (MS) and K{\=a}ṭhaka-Saṃhit{\=a} (KS). Three computational methods are employed: Word2Vec for word embeddings, stylo package for stylometric analysis, and TRACER for text reuse detection. By comparing various sections of the texts at different granularities, patterns of similarity and structural alignment are uncovered, providing insights into textual relationships and chronology. Word embeddings capture semantic similarities, while stylometric analysis reveals clusters and components that differentiate the texts. TRACER identifies parallel passages, indicating probable instances of text reuse. The computational analysis corroborates previous philological studies, suggesting a shared period of composition between MS.1.9 and MS.1.7. This research highlights the potential of computational methods in studying ancient Sanskrit literature, complementing traditional approaches. The agreement among the methods strengthens the validity of the findings, and the visualizations offer a nuanced understanding of textual connections. The study demonstrates that smaller chunk sizes are more effective for detecting intertextual parallels, showcasing the power of these techniques in unraveling the complexities of ancient texts.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="miyagawa-etal-2024-exploring">
<titleInfo>
<title>Exploring Similarity Measures and Intertextuality in Vedic Sanskrit Literature</title>
</titleInfo>
<name type="personal">
<namePart type="given">So</namePart>
<namePart type="family">Miyagawa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yuki</namePart>
<namePart type="family">Kyogoku</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yuzuki</namePart>
<namePart type="family">Tsukagoshi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kyoko</namePart>
<namePart type="family">Amano</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 4th International Conference on Natural Language Processing for Digital Humanities</title>
</titleInfo>
<name type="personal">
<namePart type="given">Mika</namePart>
<namePart type="family">Hämäläinen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Emily</namePart>
<namePart type="family">Öhman</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">So</namePart>
<namePart type="family">Miyagawa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Khalid</namePart>
<namePart type="family">Alnajjar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yuri</namePart>
<namePart type="family">Bizzoni</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Miami, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This paper examines semantic similarity and intertextuality in selected texts from the Vedic Sanskrit corpus, specifically the Maitrāyaṇī Saṃhitā (MS) and Kāṭhaka-Saṃhitā (KS). Three computational methods are employed: Word2Vec for word embeddings, stylo package for stylometric analysis, and TRACER for text reuse detection. By comparing various sections of the texts at different granularities, patterns of similarity and structural alignment are uncovered, providing insights into textual relationships and chronology. Word embeddings capture semantic similarities, while stylometric analysis reveals clusters and components that differentiate the texts. TRACER identifies parallel passages, indicating probable instances of text reuse. The computational analysis corroborates previous philological studies, suggesting a shared period of composition between MS.1.9 and MS.1.7. This research highlights the potential of computational methods in studying ancient Sanskrit literature, complementing traditional approaches. The agreement among the methods strengthens the validity of the findings, and the visualizations offer a nuanced understanding of textual connections. The study demonstrates that smaller chunk sizes are more effective for detecting intertextual parallels, showcasing the power of these techniques in unraveling the complexities of ancient texts.</abstract>
<identifier type="citekey">miyagawa-etal-2024-exploring</identifier>
<location>
<url>https://aclanthology.org/2024.nlp4dh-1.12</url>
</location>
<part>
<date>2024-11</date>
<extent unit="page">
<start>123</start>
<end>131</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Exploring Similarity Measures and Intertextuality in Vedic Sanskrit Literature
%A Miyagawa, So
%A Kyogoku, Yuki
%A Tsukagoshi, Yuzuki
%A Amano, Kyoko
%Y Hämäläinen, Mika
%Y Öhman, Emily
%Y Miyagawa, So
%Y Alnajjar, Khalid
%Y Bizzoni, Yuri
%S Proceedings of the 4th International Conference on Natural Language Processing for Digital Humanities
%D 2024
%8 November
%I Association for Computational Linguistics
%C Miami, USA
%F miyagawa-etal-2024-exploring
%X This paper examines semantic similarity and intertextuality in selected texts from the Vedic Sanskrit corpus, specifically the Maitrāyaṇī Saṃhitā (MS) and Kāṭhaka-Saṃhitā (KS). Three computational methods are employed: Word2Vec for word embeddings, stylo package for stylometric analysis, and TRACER for text reuse detection. By comparing various sections of the texts at different granularities, patterns of similarity and structural alignment are uncovered, providing insights into textual relationships and chronology. Word embeddings capture semantic similarities, while stylometric analysis reveals clusters and components that differentiate the texts. TRACER identifies parallel passages, indicating probable instances of text reuse. The computational analysis corroborates previous philological studies, suggesting a shared period of composition between MS.1.9 and MS.1.7. This research highlights the potential of computational methods in studying ancient Sanskrit literature, complementing traditional approaches. The agreement among the methods strengthens the validity of the findings, and the visualizations offer a nuanced understanding of textual connections. The study demonstrates that smaller chunk sizes are more effective for detecting intertextual parallels, showcasing the power of these techniques in unraveling the complexities of ancient texts.
%U https://aclanthology.org/2024.nlp4dh-1.12
%P 123-131
Markdown (Informal)
[Exploring Similarity Measures and Intertextuality in Vedic Sanskrit Literature](https://aclanthology.org/2024.nlp4dh-1.12) (Miyagawa et al., NLP4DH 2024)
ACL