@inproceedings{noguchi-etal-2014-japanese,
title = "{J}apanese conversation corpus for training and evaluation of backchannel prediction model.",
author = "Noguchi, Hiroaki and
Katagiri, Yasuhiro and
Den, Yasuharu",
editor = "Calzolari, Nicoletta and
Choukri, Khalid and
Declerck, Thierry and
Loftsson, Hrafn and
Maegaard, Bente and
Mariani, Joseph and
Moreno, Asuncion and
Odijk, Jan and
Piperidis, Stelios",
booktitle = "Proceedings of the Ninth International Conference on Language Resources and Evaluation ({LREC}'14)",
month = may,
year = "2014",
address = "Reykjavik, Iceland",
publisher = "European Language Resources Association (ELRA)",
url = "http://www.lrec-conf.org/proceedings/lrec2014/pdf/717_Paper.pdf",
pages = "4429--4433",
abstract = "In this paper, we propose an experimental method for building a specialized corpus for training and evaluating backchannel prediction models of spoken dialogue. To develop a backchannel prediction model using a machine learning technique, it is necessary to discriminate between the timings of the interlocutor s speech when more listeners commonly respond with backchannels and the timings when fewer listeners do so. The proposed corpus indicates the normative timings for backchannels in each speech with millisecond accuracy. In the proposed method, we first extracted each speech comprising a single turn from recorded conversation. Second, we presented these speeches as stimuli to 89 participants and asked them to respond by key hitting whenever they thought it appropriate to respond with a backchannel. In this way, we collected 28983 responses. Third, we applied the Gaussian mixture model to the temporal distribution of the responses and estimated the center of Gaussian distribution, that is, the backchannel relevance place (BRP), in each case. Finally, we synthesized 10 pairs of stereo speech stimuli and asked 19 participants to rate each on a 7-point scale of naturalness. The results show that backchannels inserted at BRPs were significantly higher than those in the original condition.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="noguchi-etal-2014-japanese">
<titleInfo>
<title>Japanese conversation corpus for training and evaluation of backchannel prediction model.</title>
</titleInfo>
<name type="personal">
<namePart type="given">Hiroaki</namePart>
<namePart type="family">Noguchi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yasuhiro</namePart>
<namePart type="family">Katagiri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yasuharu</namePart>
<namePart type="family">Den</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2014-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Ninth International Conference on Language Resources and Evaluation (LREC’14)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Khalid</namePart>
<namePart type="family">Choukri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Thierry</namePart>
<namePart type="family">Declerck</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hrafn</namePart>
<namePart type="family">Loftsson</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bente</namePart>
<namePart type="family">Maegaard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joseph</namePart>
<namePart type="family">Mariani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Asuncion</namePart>
<namePart type="family">Moreno</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jan</namePart>
<namePart type="family">Odijk</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stelios</namePart>
<namePart type="family">Piperidis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Language Resources Association (ELRA)</publisher>
<place>
<placeTerm type="text">Reykjavik, Iceland</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In this paper, we propose an experimental method for building a specialized corpus for training and evaluating backchannel prediction models of spoken dialogue. To develop a backchannel prediction model using a machine learning technique, it is necessary to discriminate between the timings of the interlocutor s speech when more listeners commonly respond with backchannels and the timings when fewer listeners do so. The proposed corpus indicates the normative timings for backchannels in each speech with millisecond accuracy. In the proposed method, we first extracted each speech comprising a single turn from recorded conversation. Second, we presented these speeches as stimuli to 89 participants and asked them to respond by key hitting whenever they thought it appropriate to respond with a backchannel. In this way, we collected 28983 responses. Third, we applied the Gaussian mixture model to the temporal distribution of the responses and estimated the center of Gaussian distribution, that is, the backchannel relevance place (BRP), in each case. Finally, we synthesized 10 pairs of stereo speech stimuli and asked 19 participants to rate each on a 7-point scale of naturalness. The results show that backchannels inserted at BRPs were significantly higher than those in the original condition.</abstract>
<identifier type="citekey">noguchi-etal-2014-japanese</identifier>
<location>
<url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/717_Paper.pdf</url>
</location>
<part>
<date>2014-05</date>
<extent unit="page">
<start>4429</start>
<end>4433</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Japanese conversation corpus for training and evaluation of backchannel prediction model.
%A Noguchi, Hiroaki
%A Katagiri, Yasuhiro
%A Den, Yasuharu
%Y Calzolari, Nicoletta
%Y Choukri, Khalid
%Y Declerck, Thierry
%Y Loftsson, Hrafn
%Y Maegaard, Bente
%Y Mariani, Joseph
%Y Moreno, Asuncion
%Y Odijk, Jan
%Y Piperidis, Stelios
%S Proceedings of the Ninth International Conference on Language Resources and Evaluation (LREC’14)
%D 2014
%8 May
%I European Language Resources Association (ELRA)
%C Reykjavik, Iceland
%F noguchi-etal-2014-japanese
%X In this paper, we propose an experimental method for building a specialized corpus for training and evaluating backchannel prediction models of spoken dialogue. To develop a backchannel prediction model using a machine learning technique, it is necessary to discriminate between the timings of the interlocutor s speech when more listeners commonly respond with backchannels and the timings when fewer listeners do so. The proposed corpus indicates the normative timings for backchannels in each speech with millisecond accuracy. In the proposed method, we first extracted each speech comprising a single turn from recorded conversation. Second, we presented these speeches as stimuli to 89 participants and asked them to respond by key hitting whenever they thought it appropriate to respond with a backchannel. In this way, we collected 28983 responses. Third, we applied the Gaussian mixture model to the temporal distribution of the responses and estimated the center of Gaussian distribution, that is, the backchannel relevance place (BRP), in each case. Finally, we synthesized 10 pairs of stereo speech stimuli and asked 19 participants to rate each on a 7-point scale of naturalness. The results show that backchannels inserted at BRPs were significantly higher than those in the original condition.
%U http://www.lrec-conf.org/proceedings/lrec2014/pdf/717_Paper.pdf
%P 4429-4433
Markdown (Informal)
[Japanese conversation corpus for training and evaluation of backchannel prediction model.](http://www.lrec-conf.org/proceedings/lrec2014/pdf/717_Paper.pdf) (Noguchi et al., LREC 2014)
ACL