@inproceedings{paggio-etal-2017-automatic,
title = "Automatic identification of head movements in video-recorded conversations: can words help?",
author = "Paggio, Patrizia and
Navarretta, Costanza and
Jongejan, Bart",
editor = "Belz, Anya and
Erdem, Erkut and
Pastra, Katerina and
Mikolajczyk, Krystian",
booktitle = "Proceedings of the Sixth Workshop on Vision and Language",
month = apr,
year = "2017",
address = "Valencia, Spain",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/W17-2006/",
doi = "10.18653/v1/W17-2006",
pages = "40--42",
abstract = "We present an approach where an SVM classifier learns to classify head movements based on measurements of velocity, acceleration, and the third derivative of position with respect to time, jerk. Consequently, annotations of head movements are added to new video data. The results of the automatic annotation are evaluated against manual annotations in the same data and show an accuracy of 68{\%} with respect to these. The results also show that using jerk improves accuracy. We then conduct an investigation of the overlap between temporal sequences classified as either movement or non-movement and the speech stream of the person performing the gesture. The statistics derived from this analysis show that using word features may help increase the accuracy of the model."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="paggio-etal-2017-automatic">
<titleInfo>
<title>Automatic identification of head movements in video-recorded conversations: can words help?</title>
</titleInfo>
<name type="personal">
<namePart type="given">Patrizia</namePart>
<namePart type="family">Paggio</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Costanza</namePart>
<namePart type="family">Navarretta</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bart</namePart>
<namePart type="family">Jongejan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2017-04</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Sixth Workshop on Vision and Language</title>
</titleInfo>
<name type="personal">
<namePart type="given">Anya</namePart>
<namePart type="family">Belz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Erkut</namePart>
<namePart type="family">Erdem</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Katerina</namePart>
<namePart type="family">Pastra</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Krystian</namePart>
<namePart type="family">Mikolajczyk</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Valencia, Spain</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We present an approach where an SVM classifier learns to classify head movements based on measurements of velocity, acceleration, and the third derivative of position with respect to time, jerk. Consequently, annotations of head movements are added to new video data. The results of the automatic annotation are evaluated against manual annotations in the same data and show an accuracy of 68% with respect to these. The results also show that using jerk improves accuracy. We then conduct an investigation of the overlap between temporal sequences classified as either movement or non-movement and the speech stream of the person performing the gesture. The statistics derived from this analysis show that using word features may help increase the accuracy of the model.</abstract>
<identifier type="citekey">paggio-etal-2017-automatic</identifier>
<identifier type="doi">10.18653/v1/W17-2006</identifier>
<location>
<url>https://aclanthology.org/W17-2006/</url>
</location>
<part>
<date>2017-04</date>
<extent unit="page">
<start>40</start>
<end>42</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Automatic identification of head movements in video-recorded conversations: can words help?
%A Paggio, Patrizia
%A Navarretta, Costanza
%A Jongejan, Bart
%Y Belz, Anya
%Y Erdem, Erkut
%Y Pastra, Katerina
%Y Mikolajczyk, Krystian
%S Proceedings of the Sixth Workshop on Vision and Language
%D 2017
%8 April
%I Association for Computational Linguistics
%C Valencia, Spain
%F paggio-etal-2017-automatic
%X We present an approach where an SVM classifier learns to classify head movements based on measurements of velocity, acceleration, and the third derivative of position with respect to time, jerk. Consequently, annotations of head movements are added to new video data. The results of the automatic annotation are evaluated against manual annotations in the same data and show an accuracy of 68% with respect to these. The results also show that using jerk improves accuracy. We then conduct an investigation of the overlap between temporal sequences classified as either movement or non-movement and the speech stream of the person performing the gesture. The statistics derived from this analysis show that using word features may help increase the accuracy of the model.
%R 10.18653/v1/W17-2006
%U https://aclanthology.org/W17-2006/
%U https://doi.org/10.18653/v1/W17-2006
%P 40-42
Markdown (Informal)
[Automatic identification of head movements in video-recorded conversations: can words help?](https://aclanthology.org/W17-2006/) (Paggio et al., VL 2017)
ACL