@inproceedings{zhang-etal-2025-eng,
title = "{ENG}-{DRB}: {PDTB}-style Discourse Relation Bank on Engineering Tutorial Video Scripts",
author = "Zhang, Cheng and
Kakarla, Rajasekhar and
Wei, Kangda and
Huang, Ruihong",
editor = "Inui, Kentaro and
Sakti, Sakriani and
Wang, Haofen and
Wong, Derek F. and
Bhattacharyya, Pushpak and
Banerjee, Biplab and
Ekbal, Asif and
Chakraborty, Tanmoy and
Singh, Dhirendra Pratap",
booktitle = "Proceedings of the 14th International Joint Conference on Natural Language Processing and the 4th Conference of the Asia-Pacific Chapter of the Association for Computational Linguistics",
month = dec,
year = "2025",
address = "Mumbai, India",
publisher = "The Asian Federation of Natural Language Processing and The Association for Computational Linguistics",
url = "https://aclanthology.org/2025.findings-ijcnlp.81/",
pages = "1318--1330",
ISBN = "979-8-89176-303-6",
abstract = "Discourse relation parsing plays a crucial role in uncovering the logical structure of text, yet existing corpora focus almost exclusively on general-domain genres, leaving specialized fields like engineering under-resourced. We introduce ENG{-}DRB, the first PDTB{-}style discourse relation corpus derived from transcripts of hands{-}on engineering tutorial videos. ENG{-}DRB comprises 11 tutorials spanning civil, mechanical, and electrical/electronics engineering (155 minutes total) with 1,215 annotated relations. Compared to general{-}domain benchmarks, this dataset features a high proportion of explicit senses, dense causal and temporal relations, and frequent overlapping and embedded senses. Our benchmarking experiments underscore the dataset{'}s difficulty. A top parser (HITS) detects segment boundaries well (98.6{\%} F1), but its relation classification is more than 11 F1 percentages lower than on the standard PDTB. In addition, state{-}of{-}the{-}art LLMs (OpenAI o4{-}mini, Claude 3.7, LLaMA{-}3.1) achieve at best 41{\%} F1 on explicit relations and less than 9{\%} F1 on implicit relations, revealing systematic errors in temporal and causal sense detection. The dataset can be accessed at: https://doi.org/10.57967/hf/6895. Code to reproduce our results is available at: https://github.com/chengzhangedu/ENG-DRB."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="zhang-etal-2025-eng">
<titleInfo>
<title>ENG-DRB: PDTB-style Discourse Relation Bank on Engineering Tutorial Video Scripts</title>
</titleInfo>
<name type="personal">
<namePart type="given">Cheng</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rajasekhar</namePart>
<namePart type="family">Kakarla</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kangda</namePart>
<namePart type="family">Wei</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ruihong</namePart>
<namePart type="family">Huang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 14th International Joint Conference on Natural Language Processing and the 4th Conference of the Asia-Pacific Chapter of the Association for Computational Linguistics</title>
</titleInfo>
<name type="personal">
<namePart type="given">Kentaro</namePart>
<namePart type="family">Inui</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sakriani</namePart>
<namePart type="family">Sakti</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Haofen</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Derek</namePart>
<namePart type="given">F</namePart>
<namePart type="family">Wong</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Pushpak</namePart>
<namePart type="family">Bhattacharyya</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Biplab</namePart>
<namePart type="family">Banerjee</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Asif</namePart>
<namePart type="family">Ekbal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tanmoy</namePart>
<namePart type="family">Chakraborty</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dhirendra</namePart>
<namePart type="given">Pratap</namePart>
<namePart type="family">Singh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>The Asian Federation of Natural Language Processing and The Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Mumbai, India</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-303-6</identifier>
</relatedItem>
<abstract>Discourse relation parsing plays a crucial role in uncovering the logical structure of text, yet existing corpora focus almost exclusively on general-domain genres, leaving specialized fields like engineering under-resourced. We introduce ENG-DRB, the first PDTB-style discourse relation corpus derived from transcripts of hands-on engineering tutorial videos. ENG-DRB comprises 11 tutorials spanning civil, mechanical, and electrical/electronics engineering (155 minutes total) with 1,215 annotated relations. Compared to general-domain benchmarks, this dataset features a high proportion of explicit senses, dense causal and temporal relations, and frequent overlapping and embedded senses. Our benchmarking experiments underscore the dataset’s difficulty. A top parser (HITS) detects segment boundaries well (98.6% F1), but its relation classification is more than 11 F1 percentages lower than on the standard PDTB. In addition, state-of-the-art LLMs (OpenAI o4-mini, Claude 3.7, LLaMA-3.1) achieve at best 41% F1 on explicit relations and less than 9% F1 on implicit relations, revealing systematic errors in temporal and causal sense detection. The dataset can be accessed at: https://doi.org/10.57967/hf/6895. Code to reproduce our results is available at: https://github.com/chengzhangedu/ENG-DRB.</abstract>
<identifier type="citekey">zhang-etal-2025-eng</identifier>
<location>
<url>https://aclanthology.org/2025.findings-ijcnlp.81/</url>
</location>
<part>
<date>2025-12</date>
<extent unit="page">
<start>1318</start>
<end>1330</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T ENG-DRB: PDTB-style Discourse Relation Bank on Engineering Tutorial Video Scripts
%A Zhang, Cheng
%A Kakarla, Rajasekhar
%A Wei, Kangda
%A Huang, Ruihong
%Y Inui, Kentaro
%Y Sakti, Sakriani
%Y Wang, Haofen
%Y Wong, Derek F.
%Y Bhattacharyya, Pushpak
%Y Banerjee, Biplab
%Y Ekbal, Asif
%Y Chakraborty, Tanmoy
%Y Singh, Dhirendra Pratap
%S Proceedings of the 14th International Joint Conference on Natural Language Processing and the 4th Conference of the Asia-Pacific Chapter of the Association for Computational Linguistics
%D 2025
%8 December
%I The Asian Federation of Natural Language Processing and The Association for Computational Linguistics
%C Mumbai, India
%@ 979-8-89176-303-6
%F zhang-etal-2025-eng
%X Discourse relation parsing plays a crucial role in uncovering the logical structure of text, yet existing corpora focus almost exclusively on general-domain genres, leaving specialized fields like engineering under-resourced. We introduce ENG-DRB, the first PDTB-style discourse relation corpus derived from transcripts of hands-on engineering tutorial videos. ENG-DRB comprises 11 tutorials spanning civil, mechanical, and electrical/electronics engineering (155 minutes total) with 1,215 annotated relations. Compared to general-domain benchmarks, this dataset features a high proportion of explicit senses, dense causal and temporal relations, and frequent overlapping and embedded senses. Our benchmarking experiments underscore the dataset’s difficulty. A top parser (HITS) detects segment boundaries well (98.6% F1), but its relation classification is more than 11 F1 percentages lower than on the standard PDTB. In addition, state-of-the-art LLMs (OpenAI o4-mini, Claude 3.7, LLaMA-3.1) achieve at best 41% F1 on explicit relations and less than 9% F1 on implicit relations, revealing systematic errors in temporal and causal sense detection. The dataset can be accessed at: https://doi.org/10.57967/hf/6895. Code to reproduce our results is available at: https://github.com/chengzhangedu/ENG-DRB.
%U https://aclanthology.org/2025.findings-ijcnlp.81/
%P 1318-1330
Markdown (Informal)
[ENG-DRB: PDTB-style Discourse Relation Bank on Engineering Tutorial Video Scripts](https://aclanthology.org/2025.findings-ijcnlp.81/) (Zhang et al., Findings 2025)
ACL
- Cheng Zhang, Rajasekhar Kakarla, Kangda Wei, and Ruihong Huang. 2025. ENG-DRB: PDTB-style Discourse Relation Bank on Engineering Tutorial Video Scripts. In Proceedings of the 14th International Joint Conference on Natural Language Processing and the 4th Conference of the Asia-Pacific Chapter of the Association for Computational Linguistics, pages 1318–1330, Mumbai, India. The Asian Federation of Natural Language Processing and The Association for Computational Linguistics.