@inproceedings{rao-etal-2025-sign,
title = "Sign Language Video Segmentation Using Temporal Boundary Identification",
author = "Rao, Kavu Maithri and
Hamidullah, Yasser and
Avramidis, Eleftherios",
editor = "Zhao, Jin and
Wang, Mingyang and
Liu, Zhu",
booktitle = "Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 4: Student Research Workshop)",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.acl-srw.93/",
doi = "10.18653/v1/2025.acl-srw.93",
pages = "1213--1224",
ISBN = "979-8-89176-254-1",
abstract = "Sign language segmentation focuses on identifying temporal boundaries within sign language videos. As compared to previous segmentation techniques that have depended on frame-level and phrase-level segmentation, our study emphasizes on subtitle-level segmentation, using synchronized subtitle data to facilitate temporal boundary recognition. Based on Beginning-Inside-Outside (BIO) tagging for subtitle unit delineation, we train a sequence-to-sequence (Seq2Seq) model with and without attention for subtitle boundary identification. Training on optical flow data and aligned subtitles from BOBSL and YouTube-ASL, we show that the Seq2Seq model with attention outperforms baseline models, achieving improved percentage of segments, F1 and IoU score. An additional contribution is the development of an method for subtitle temporal resolution, aiming to facilitate manual annotation."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="rao-etal-2025-sign">
<titleInfo>
<title>Sign Language Video Segmentation Using Temporal Boundary Identification</title>
</titleInfo>
<name type="personal">
<namePart type="given">Kavu</namePart>
<namePart type="given">Maithri</namePart>
<namePart type="family">Rao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yasser</namePart>
<namePart type="family">Hamidullah</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Eleftherios</namePart>
<namePart type="family">Avramidis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 4: Student Research Workshop)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Jin</namePart>
<namePart type="family">Zhao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mingyang</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zhu</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Vienna, Austria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-254-1</identifier>
</relatedItem>
<abstract>Sign language segmentation focuses on identifying temporal boundaries within sign language videos. As compared to previous segmentation techniques that have depended on frame-level and phrase-level segmentation, our study emphasizes on subtitle-level segmentation, using synchronized subtitle data to facilitate temporal boundary recognition. Based on Beginning-Inside-Outside (BIO) tagging for subtitle unit delineation, we train a sequence-to-sequence (Seq2Seq) model with and without attention for subtitle boundary identification. Training on optical flow data and aligned subtitles from BOBSL and YouTube-ASL, we show that the Seq2Seq model with attention outperforms baseline models, achieving improved percentage of segments, F1 and IoU score. An additional contribution is the development of an method for subtitle temporal resolution, aiming to facilitate manual annotation.</abstract>
<identifier type="citekey">rao-etal-2025-sign</identifier>
<identifier type="doi">10.18653/v1/2025.acl-srw.93</identifier>
<location>
<url>https://aclanthology.org/2025.acl-srw.93/</url>
</location>
<part>
<date>2025-07</date>
<extent unit="page">
<start>1213</start>
<end>1224</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Sign Language Video Segmentation Using Temporal Boundary Identification
%A Rao, Kavu Maithri
%A Hamidullah, Yasser
%A Avramidis, Eleftherios
%Y Zhao, Jin
%Y Wang, Mingyang
%Y Liu, Zhu
%S Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 4: Student Research Workshop)
%D 2025
%8 July
%I Association for Computational Linguistics
%C Vienna, Austria
%@ 979-8-89176-254-1
%F rao-etal-2025-sign
%X Sign language segmentation focuses on identifying temporal boundaries within sign language videos. As compared to previous segmentation techniques that have depended on frame-level and phrase-level segmentation, our study emphasizes on subtitle-level segmentation, using synchronized subtitle data to facilitate temporal boundary recognition. Based on Beginning-Inside-Outside (BIO) tagging for subtitle unit delineation, we train a sequence-to-sequence (Seq2Seq) model with and without attention for subtitle boundary identification. Training on optical flow data and aligned subtitles from BOBSL and YouTube-ASL, we show that the Seq2Seq model with attention outperforms baseline models, achieving improved percentage of segments, F1 and IoU score. An additional contribution is the development of an method for subtitle temporal resolution, aiming to facilitate manual annotation.
%R 10.18653/v1/2025.acl-srw.93
%U https://aclanthology.org/2025.acl-srw.93/
%U https://doi.org/10.18653/v1/2025.acl-srw.93
%P 1213-1224
Markdown (Informal)
[Sign Language Video Segmentation Using Temporal Boundary Identification](https://aclanthology.org/2025.acl-srw.93/) (Rao et al., ACL 2025)
ACL