@inproceedings{tsuge-ishihara-2018-text,
title = "Text-dependent Forensic Voice Comparison: Likelihood Ratio Estimation with the Hidden {M}arkov Model ({HMM}) and {G}aussian Mixture Model",
author = "Tsuge, Satoru and
Ishihara, Shunichi",
editor = "Kim, Sunghwan Mac and
Zhang, Xiuzhen (Jenny)",
booktitle = "Proceedings of the Australasian Language Technology Association Workshop 2018",
month = dec,
year = "2018",
address = "Dunedin, New Zealand",
url = "https://aclanthology.org/U18-1002",
pages = "17--25",
abstract = "Among the more typical forensic voice comparison (FVC) approaches, the acoustic-phonetic statistical approach is suitable for text-dependent FVC, but it does not fully exploit available time-varying information of speech in its modelling. The automatic approach, on the other hand, essentially deals with text-independent cases, which means temporal information is not explicitly incorporated in the modelling. Text-dependent likelihood ratio (LR)-based FVC studies, in particular those that adopt the automatic approach, are few. This preliminary LR-based FVC study compares two statistical models, the Hidden Markov Model (HMM) and the Gaussian Mixture Model (GMM), for the calculation of forensic LRs using the same speech data. FVC experiments were carried out using different lengths of Japanese short words under a forensically realistic, but challenging condition: only two speech tokens for model training and LR estimation. Log-likelihood-ratio cost (Cllr) was used as the assessment metric. The study demonstrates that the HMM system constantly outperforms the GMM system in terms of average Cllr values. However, words longer than three mora are needed if the advantage of the HMM is to become evident. With a seven-mora word, for example, the HMM outperformed the GMM by a Cllr value of 0.073.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="tsuge-ishihara-2018-text">
<titleInfo>
<title>Text-dependent Forensic Voice Comparison: Likelihood Ratio Estimation with the Hidden Markov Model (HMM) and Gaussian Mixture Model</title>
</titleInfo>
<name type="personal">
<namePart type="given">Satoru</namePart>
<namePart type="family">Tsuge</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shunichi</namePart>
<namePart type="family">Ishihara</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2018-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Australasian Language Technology Association Workshop 2018</title>
</titleInfo>
<name type="personal">
<namePart type="given">Sunghwan</namePart>
<namePart type="given">Mac</namePart>
<namePart type="family">Kim</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xiuzhen</namePart>
<namePart type="given">(Jenny)</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<place>
<placeTerm type="text">Dunedin, New Zealand</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Among the more typical forensic voice comparison (FVC) approaches, the acoustic-phonetic statistical approach is suitable for text-dependent FVC, but it does not fully exploit available time-varying information of speech in its modelling. The automatic approach, on the other hand, essentially deals with text-independent cases, which means temporal information is not explicitly incorporated in the modelling. Text-dependent likelihood ratio (LR)-based FVC studies, in particular those that adopt the automatic approach, are few. This preliminary LR-based FVC study compares two statistical models, the Hidden Markov Model (HMM) and the Gaussian Mixture Model (GMM), for the calculation of forensic LRs using the same speech data. FVC experiments were carried out using different lengths of Japanese short words under a forensically realistic, but challenging condition: only two speech tokens for model training and LR estimation. Log-likelihood-ratio cost (Cllr) was used as the assessment metric. The study demonstrates that the HMM system constantly outperforms the GMM system in terms of average Cllr values. However, words longer than three mora are needed if the advantage of the HMM is to become evident. With a seven-mora word, for example, the HMM outperformed the GMM by a Cllr value of 0.073.</abstract>
<identifier type="citekey">tsuge-ishihara-2018-text</identifier>
<location>
<url>https://aclanthology.org/U18-1002</url>
</location>
<part>
<date>2018-12</date>
<extent unit="page">
<start>17</start>
<end>25</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Text-dependent Forensic Voice Comparison: Likelihood Ratio Estimation with the Hidden Markov Model (HMM) and Gaussian Mixture Model
%A Tsuge, Satoru
%A Ishihara, Shunichi
%Y Kim, Sunghwan Mac
%Y Zhang, Xiuzhen (Jenny)
%S Proceedings of the Australasian Language Technology Association Workshop 2018
%D 2018
%8 December
%C Dunedin, New Zealand
%F tsuge-ishihara-2018-text
%X Among the more typical forensic voice comparison (FVC) approaches, the acoustic-phonetic statistical approach is suitable for text-dependent FVC, but it does not fully exploit available time-varying information of speech in its modelling. The automatic approach, on the other hand, essentially deals with text-independent cases, which means temporal information is not explicitly incorporated in the modelling. Text-dependent likelihood ratio (LR)-based FVC studies, in particular those that adopt the automatic approach, are few. This preliminary LR-based FVC study compares two statistical models, the Hidden Markov Model (HMM) and the Gaussian Mixture Model (GMM), for the calculation of forensic LRs using the same speech data. FVC experiments were carried out using different lengths of Japanese short words under a forensically realistic, but challenging condition: only two speech tokens for model training and LR estimation. Log-likelihood-ratio cost (Cllr) was used as the assessment metric. The study demonstrates that the HMM system constantly outperforms the GMM system in terms of average Cllr values. However, words longer than three mora are needed if the advantage of the HMM is to become evident. With a seven-mora word, for example, the HMM outperformed the GMM by a Cllr value of 0.073.
%U https://aclanthology.org/U18-1002
%P 17-25
Markdown (Informal)
[Text-dependent Forensic Voice Comparison: Likelihood Ratio Estimation with the Hidden Markov Model (HMM) and Gaussian Mixture Model](https://aclanthology.org/U18-1002) (Tsuge & Ishihara, ALTA 2018)
ACL