@inproceedings{chang-hu-2022-study,
title = "A Study on Using Different Audio Lengths in Transfer Learning for Improving Chainsaw Sound Recognition",
author = "Chang, Jia-Wei and
Hu, Zhong-Yun",
editor = "Chang, Yung-Chun and
Huang, Yi-Chin",
booktitle = "Proceedings of the 34th Conference on Computational Linguistics and Speech Processing (ROCLING 2022)",
month = nov,
year = "2022",
address = "Taipei, Taiwan",
publisher = "The Association for Computational Linguistics and Chinese Language Processing (ACLCLP)",
url = "https://aclanthology.org/2022.rocling-1.9/",
pages = "67--74",
language = "zho",
abstract = "Chainsaw sound recognition is a challenging task because of the complexity of sound and the excessive noises in mountain environments. This study aims to discuss the influence of different sound lengths on the accuracy of model training. Therefore, this study used LeNet, a simple model with few parameters, and adopted the design of average pooling to enable the proposed models to receive audio of any length. In performance comparison, we mainly compared the influence of different audio lengths and further tested the transfer learning from short-to-long and long-to-short audio. In experiments, we used the ESC-10 dataset for training models and validated their performance via the self-collected chainsaw-audio dataset. The experimental results show that (a) the models trained with different audio lengths (1s, 3s, and 5s) have accuracy from 74{\%} 78{\%}, 74{\%} 77{\%}, and 79{\%} 83{\%} on the self-collected dataset. (b) The generalization of the previous models is significantly improved by transfer learning, the models achieved 85.28{\%}, 88.67{\%}, and 91.8{\%} of accuracy. (c) In transfer learning, the model learned from short-to-long audios can achieve better results than that learned from long-to-short audios, especially being differed 14{\%} of accuracy on 5s chainsaw-audios."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="chang-hu-2022-study">
<titleInfo>
<title>A Study on Using Different Audio Lengths in Transfer Learning for Improving Chainsaw Sound Recognition</title>
</titleInfo>
<name type="personal">
<namePart type="given">Jia-Wei</namePart>
<namePart type="family">Chang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zhong-Yun</namePart>
<namePart type="family">Hu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<language>
<languageTerm type="text">zho</languageTerm>
</language>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 34th Conference on Computational Linguistics and Speech Processing (ROCLING 2022)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yung-Chun</namePart>
<namePart type="family">Chang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yi-Chin</namePart>
<namePart type="family">Huang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>The Association for Computational Linguistics and Chinese Language Processing (ACLCLP)</publisher>
<place>
<placeTerm type="text">Taipei, Taiwan</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Chainsaw sound recognition is a challenging task because of the complexity of sound and the excessive noises in mountain environments. This study aims to discuss the influence of different sound lengths on the accuracy of model training. Therefore, this study used LeNet, a simple model with few parameters, and adopted the design of average pooling to enable the proposed models to receive audio of any length. In performance comparison, we mainly compared the influence of different audio lengths and further tested the transfer learning from short-to-long and long-to-short audio. In experiments, we used the ESC-10 dataset for training models and validated their performance via the self-collected chainsaw-audio dataset. The experimental results show that (a) the models trained with different audio lengths (1s, 3s, and 5s) have accuracy from 74% 78%, 74% 77%, and 79% 83% on the self-collected dataset. (b) The generalization of the previous models is significantly improved by transfer learning, the models achieved 85.28%, 88.67%, and 91.8% of accuracy. (c) In transfer learning, the model learned from short-to-long audios can achieve better results than that learned from long-to-short audios, especially being differed 14% of accuracy on 5s chainsaw-audios.</abstract>
<identifier type="citekey">chang-hu-2022-study</identifier>
<location>
<url>https://aclanthology.org/2022.rocling-1.9/</url>
</location>
<part>
<date>2022-11</date>
<extent unit="page">
<start>67</start>
<end>74</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T A Study on Using Different Audio Lengths in Transfer Learning for Improving Chainsaw Sound Recognition
%A Chang, Jia-Wei
%A Hu, Zhong-Yun
%Y Chang, Yung-Chun
%Y Huang, Yi-Chin
%S Proceedings of the 34th Conference on Computational Linguistics and Speech Processing (ROCLING 2022)
%D 2022
%8 November
%I The Association for Computational Linguistics and Chinese Language Processing (ACLCLP)
%C Taipei, Taiwan
%G zho
%F chang-hu-2022-study
%X Chainsaw sound recognition is a challenging task because of the complexity of sound and the excessive noises in mountain environments. This study aims to discuss the influence of different sound lengths on the accuracy of model training. Therefore, this study used LeNet, a simple model with few parameters, and adopted the design of average pooling to enable the proposed models to receive audio of any length. In performance comparison, we mainly compared the influence of different audio lengths and further tested the transfer learning from short-to-long and long-to-short audio. In experiments, we used the ESC-10 dataset for training models and validated their performance via the self-collected chainsaw-audio dataset. The experimental results show that (a) the models trained with different audio lengths (1s, 3s, and 5s) have accuracy from 74% 78%, 74% 77%, and 79% 83% on the self-collected dataset. (b) The generalization of the previous models is significantly improved by transfer learning, the models achieved 85.28%, 88.67%, and 91.8% of accuracy. (c) In transfer learning, the model learned from short-to-long audios can achieve better results than that learned from long-to-short audios, especially being differed 14% of accuracy on 5s chainsaw-audios.
%U https://aclanthology.org/2022.rocling-1.9/
%P 67-74
Markdown (Informal)
[A Study on Using Different Audio Lengths in Transfer Learning for Improving Chainsaw Sound Recognition](https://aclanthology.org/2022.rocling-1.9/) (Chang & Hu, ROCLING 2022)
ACL