@inproceedings{ishihara-etal-2022-estimating,
title = "Estimating the Strength of Authorship Evidence with a Deep-Learning-Based Approach",
author = "Ishihara, Shunichi and
Tsuge, Satoru and
Inaba, Mitsuyuki and
Zaitsu, Wataru",
editor = "Parameswaran, Pradeesh and
Biggs, Jennifer and
Powers, David",
booktitle = "Proceedings of the 20th Annual Workshop of the Australasian Language Technology Association",
month = dec,
year = "2022",
address = "Adelaide, Australia",
publisher = "Australasian Language Technology Association",
url = "https://aclanthology.org/2022.alta-1.25",
pages = "183--187",
abstract = "This study is the first likelihood ratio (LR)-based forensic text comparison study in which each text is mapped onto an embedding vector using RoBERTa as the pre-trained model. The scores obtained with Cosine distance and probabilistic linear discriminant analysis (PLDA) were calibrated to LRs with logistic regression; the quality of the LRs was assessed by log LR cost (Cllr). Although the documents in the experiments were very short (maximum 100 words), the systems reached the Cllr values of 0.55595 and 0.71591 for the Cosine and PLDA systems, respectively. The effectiveness of deep-learning-based text representation is discussed by comparing the results of the current study to those of the previous studies of systems based on conventional feature engineering tested with longer documents.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="ishihara-etal-2022-estimating">
<titleInfo>
<title>Estimating the Strength of Authorship Evidence with a Deep-Learning-Based Approach</title>
</titleInfo>
<name type="personal">
<namePart type="given">Shunichi</namePart>
<namePart type="family">Ishihara</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Satoru</namePart>
<namePart type="family">Tsuge</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mitsuyuki</namePart>
<namePart type="family">Inaba</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Wataru</namePart>
<namePart type="family">Zaitsu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 20th Annual Workshop of the Australasian Language Technology Association</title>
</titleInfo>
<name type="personal">
<namePart type="given">Pradeesh</namePart>
<namePart type="family">Parameswaran</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jennifer</namePart>
<namePart type="family">Biggs</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">David</namePart>
<namePart type="family">Powers</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Australasian Language Technology Association</publisher>
<place>
<placeTerm type="text">Adelaide, Australia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This study is the first likelihood ratio (LR)-based forensic text comparison study in which each text is mapped onto an embedding vector using RoBERTa as the pre-trained model. The scores obtained with Cosine distance and probabilistic linear discriminant analysis (PLDA) were calibrated to LRs with logistic regression; the quality of the LRs was assessed by log LR cost (Cllr). Although the documents in the experiments were very short (maximum 100 words), the systems reached the Cllr values of 0.55595 and 0.71591 for the Cosine and PLDA systems, respectively. The effectiveness of deep-learning-based text representation is discussed by comparing the results of the current study to those of the previous studies of systems based on conventional feature engineering tested with longer documents.</abstract>
<identifier type="citekey">ishihara-etal-2022-estimating</identifier>
<location>
<url>https://aclanthology.org/2022.alta-1.25</url>
</location>
<part>
<date>2022-12</date>
<extent unit="page">
<start>183</start>
<end>187</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Estimating the Strength of Authorship Evidence with a Deep-Learning-Based Approach
%A Ishihara, Shunichi
%A Tsuge, Satoru
%A Inaba, Mitsuyuki
%A Zaitsu, Wataru
%Y Parameswaran, Pradeesh
%Y Biggs, Jennifer
%Y Powers, David
%S Proceedings of the 20th Annual Workshop of the Australasian Language Technology Association
%D 2022
%8 December
%I Australasian Language Technology Association
%C Adelaide, Australia
%F ishihara-etal-2022-estimating
%X This study is the first likelihood ratio (LR)-based forensic text comparison study in which each text is mapped onto an embedding vector using RoBERTa as the pre-trained model. The scores obtained with Cosine distance and probabilistic linear discriminant analysis (PLDA) were calibrated to LRs with logistic regression; the quality of the LRs was assessed by log LR cost (Cllr). Although the documents in the experiments were very short (maximum 100 words), the systems reached the Cllr values of 0.55595 and 0.71591 for the Cosine and PLDA systems, respectively. The effectiveness of deep-learning-based text representation is discussed by comparing the results of the current study to those of the previous studies of systems based on conventional feature engineering tested with longer documents.
%U https://aclanthology.org/2022.alta-1.25
%P 183-187
Markdown (Informal)
[Estimating the Strength of Authorship Evidence with a Deep-Learning-Based Approach](https://aclanthology.org/2022.alta-1.25) (Ishihara et al., ALTA 2022)
ACL