@inproceedings{liu-etal-2023-dopplerbas,
title = "{D}oppler{BAS}: Binaural Audio Synthesis Addressing Doppler Effect",
author = "Liu, Jinglin and
Ye, Zhenhui and
Chen, Qian and
Zheng, Siqi and
Wang, Wen and
Qinglin, Zhang and
Zhao, Zhou",
editor = "Rogers, Anna and
Boyd-Graber, Jordan and
Okazaki, Naoaki",
booktitle = "Findings of the Association for Computational Linguistics: ACL 2023",
month = jul,
year = "2023",
address = "Toronto, Canada",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.findings-acl.753",
doi = "10.18653/v1/2023.findings-acl.753",
pages = "11905--11912",
abstract = "Recently, binaural audio synthesis (BAS) has emerged as a promising research field for its applications in augmented and virtual realities. Binaural audio helps ususers orient themselves and establish immersion by providing the brain with interaural time differences reflecting spatial information. However, existing BAS methods are limited in terms of phase estimation, which is crucial for spatial hearing. In this paper, we propose the DopplerBAS method to explicitly address the Doppler effect of the moving sound source. Specifically, we calculate the radial relative velocity of the moving speaker in spherical coordinates, which further guides the synthesis of binaural audio. This simple method introduces no additional hyper-parameters and does not modify the loss functions, and is plug-and-play: it scales well to different types of backbones. DopperBAS distinctly improves the representative WarpNet and BinauralGrad backbones in the phase error metric and reaches a new state of the art (SOTA): 0.780 (versus the current SOTA 0.807). Experiments and ablation studies demonstrate the effectiveness of our method.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="liu-etal-2023-dopplerbas">
<titleInfo>
<title>DopplerBAS: Binaural Audio Synthesis Addressing Doppler Effect</title>
</titleInfo>
<name type="personal">
<namePart type="given">Jinglin</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zhenhui</namePart>
<namePart type="family">Ye</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Qian</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Siqi</namePart>
<namePart type="family">Zheng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Wen</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zhang</namePart>
<namePart type="family">Qinglin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zhou</namePart>
<namePart type="family">Zhao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: ACL 2023</title>
</titleInfo>
<name type="personal">
<namePart type="given">Anna</namePart>
<namePart type="family">Rogers</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jordan</namePart>
<namePart type="family">Boyd-Graber</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Naoaki</namePart>
<namePart type="family">Okazaki</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Toronto, Canada</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Recently, binaural audio synthesis (BAS) has emerged as a promising research field for its applications in augmented and virtual realities. Binaural audio helps ususers orient themselves and establish immersion by providing the brain with interaural time differences reflecting spatial information. However, existing BAS methods are limited in terms of phase estimation, which is crucial for spatial hearing. In this paper, we propose the DopplerBAS method to explicitly address the Doppler effect of the moving sound source. Specifically, we calculate the radial relative velocity of the moving speaker in spherical coordinates, which further guides the synthesis of binaural audio. This simple method introduces no additional hyper-parameters and does not modify the loss functions, and is plug-and-play: it scales well to different types of backbones. DopperBAS distinctly improves the representative WarpNet and BinauralGrad backbones in the phase error metric and reaches a new state of the art (SOTA): 0.780 (versus the current SOTA 0.807). Experiments and ablation studies demonstrate the effectiveness of our method.</abstract>
<identifier type="citekey">liu-etal-2023-dopplerbas</identifier>
<identifier type="doi">10.18653/v1/2023.findings-acl.753</identifier>
<location>
<url>https://aclanthology.org/2023.findings-acl.753</url>
</location>
<part>
<date>2023-07</date>
<extent unit="page">
<start>11905</start>
<end>11912</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T DopplerBAS: Binaural Audio Synthesis Addressing Doppler Effect
%A Liu, Jinglin
%A Ye, Zhenhui
%A Chen, Qian
%A Zheng, Siqi
%A Wang, Wen
%A Qinglin, Zhang
%A Zhao, Zhou
%Y Rogers, Anna
%Y Boyd-Graber, Jordan
%Y Okazaki, Naoaki
%S Findings of the Association for Computational Linguistics: ACL 2023
%D 2023
%8 July
%I Association for Computational Linguistics
%C Toronto, Canada
%F liu-etal-2023-dopplerbas
%X Recently, binaural audio synthesis (BAS) has emerged as a promising research field for its applications in augmented and virtual realities. Binaural audio helps ususers orient themselves and establish immersion by providing the brain with interaural time differences reflecting spatial information. However, existing BAS methods are limited in terms of phase estimation, which is crucial for spatial hearing. In this paper, we propose the DopplerBAS method to explicitly address the Doppler effect of the moving sound source. Specifically, we calculate the radial relative velocity of the moving speaker in spherical coordinates, which further guides the synthesis of binaural audio. This simple method introduces no additional hyper-parameters and does not modify the loss functions, and is plug-and-play: it scales well to different types of backbones. DopperBAS distinctly improves the representative WarpNet and BinauralGrad backbones in the phase error metric and reaches a new state of the art (SOTA): 0.780 (versus the current SOTA 0.807). Experiments and ablation studies demonstrate the effectiveness of our method.
%R 10.18653/v1/2023.findings-acl.753
%U https://aclanthology.org/2023.findings-acl.753
%U https://doi.org/10.18653/v1/2023.findings-acl.753
%P 11905-11912
Markdown (Informal)
[DopplerBAS: Binaural Audio Synthesis Addressing Doppler Effect](https://aclanthology.org/2023.findings-acl.753) (Liu et al., Findings 2023)
ACL