@inproceedings{carne-ishihara-2020-feature,
title = "Feature-Based Forensic Text Comparison Using a {P}oisson Model for Likelihood Ratio Estimation",
author = "Carne, Michael and
Ishihara, Shunichi",
editor = "Kim, Maria and
Beck, Daniel and
Mistica, Meladel",
booktitle = "Proceedings of the The 18th Annual Workshop of the Australasian Language Technology Association",
month = dec,
year = "2020",
address = "Virtual Workshop",
publisher = "Australasian Language Technology Association",
url = "https://aclanthology.org/2020.alta-1.4",
pages = "32--42",
abstract = "Score- and feature-based methods are the two main ones for estimating a forensic likelihood ratio (LR) quantifying the strength of evidence. In this forensic text comparison (FTC) study, a score-based method using the Cosine distance is compared with a feature-based method built on a Poisson model with texts collected from 2,157 authors. Distance measures (e.g. Burrows{'}s Delta, Cosine distance) are a standard tool in authorship attribution studies. Thus, the implementation of a score-based method using a distance measure is naturally the first step for estimating LRs for textual evidence. However, textual data often violates the statistical assumptions underlying distance-based models. Furthermore, such models only assess the similarity, not the typicality, of the objects (i.e. documents) under comparison. A Poisson model is theoretically more appropriate than distance-based measures for authorship attribution, but it has never been tested with linguistic text evidence within the LR framework. The log-LR cost (Cllr) was used to assess the performance of the two methods. This study demonstrates that: (1) the feature-based method outperforms the score-based method by a Cllr value of ca. 0.09 under the best-performing settings and; (2) the performance of the feature-based method can be further improved by feature selection.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="carne-ishihara-2020-feature">
<titleInfo>
<title>Feature-Based Forensic Text Comparison Using a Poisson Model for Likelihood Ratio Estimation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Michael</namePart>
<namePart type="family">Carne</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shunichi</namePart>
<namePart type="family">Ishihara</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the The 18th Annual Workshop of the Australasian Language Technology Association</title>
</titleInfo>
<name type="personal">
<namePart type="given">Maria</namePart>
<namePart type="family">Kim</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Daniel</namePart>
<namePart type="family">Beck</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Meladel</namePart>
<namePart type="family">Mistica</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Australasian Language Technology Association</publisher>
<place>
<placeTerm type="text">Virtual Workshop</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Score- and feature-based methods are the two main ones for estimating a forensic likelihood ratio (LR) quantifying the strength of evidence. In this forensic text comparison (FTC) study, a score-based method using the Cosine distance is compared with a feature-based method built on a Poisson model with texts collected from 2,157 authors. Distance measures (e.g. Burrows’s Delta, Cosine distance) are a standard tool in authorship attribution studies. Thus, the implementation of a score-based method using a distance measure is naturally the first step for estimating LRs for textual evidence. However, textual data often violates the statistical assumptions underlying distance-based models. Furthermore, such models only assess the similarity, not the typicality, of the objects (i.e. documents) under comparison. A Poisson model is theoretically more appropriate than distance-based measures for authorship attribution, but it has never been tested with linguistic text evidence within the LR framework. The log-LR cost (Cllr) was used to assess the performance of the two methods. This study demonstrates that: (1) the feature-based method outperforms the score-based method by a Cllr value of ca. 0.09 under the best-performing settings and; (2) the performance of the feature-based method can be further improved by feature selection.</abstract>
<identifier type="citekey">carne-ishihara-2020-feature</identifier>
<location>
<url>https://aclanthology.org/2020.alta-1.4</url>
</location>
<part>
<date>2020-12</date>
<extent unit="page">
<start>32</start>
<end>42</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Feature-Based Forensic Text Comparison Using a Poisson Model for Likelihood Ratio Estimation
%A Carne, Michael
%A Ishihara, Shunichi
%Y Kim, Maria
%Y Beck, Daniel
%Y Mistica, Meladel
%S Proceedings of the The 18th Annual Workshop of the Australasian Language Technology Association
%D 2020
%8 December
%I Australasian Language Technology Association
%C Virtual Workshop
%F carne-ishihara-2020-feature
%X Score- and feature-based methods are the two main ones for estimating a forensic likelihood ratio (LR) quantifying the strength of evidence. In this forensic text comparison (FTC) study, a score-based method using the Cosine distance is compared with a feature-based method built on a Poisson model with texts collected from 2,157 authors. Distance measures (e.g. Burrows’s Delta, Cosine distance) are a standard tool in authorship attribution studies. Thus, the implementation of a score-based method using a distance measure is naturally the first step for estimating LRs for textual evidence. However, textual data often violates the statistical assumptions underlying distance-based models. Furthermore, such models only assess the similarity, not the typicality, of the objects (i.e. documents) under comparison. A Poisson model is theoretically more appropriate than distance-based measures for authorship attribution, but it has never been tested with linguistic text evidence within the LR framework. The log-LR cost (Cllr) was used to assess the performance of the two methods. This study demonstrates that: (1) the feature-based method outperforms the score-based method by a Cllr value of ca. 0.09 under the best-performing settings and; (2) the performance of the feature-based method can be further improved by feature selection.
%U https://aclanthology.org/2020.alta-1.4
%P 32-42
Markdown (Informal)
[Feature-Based Forensic Text Comparison Using a Poisson Model for Likelihood Ratio Estimation](https://aclanthology.org/2020.alta-1.4) (Carne & Ishihara, ALTA 2020)
ACL