@inproceedings{ocampo-diaz-ouyang-2022-alignment,
title = "An Alignment-based Approach to Text Segmentation Similarity Scoring",
author = "Ocampo Diaz, Gerardo and
Ouyang, Jessica",
editor = "Fokkens, Antske and
Srikumar, Vivek",
booktitle = "Proceedings of the 26th Conference on Computational Natural Language Learning (CoNLL)",
month = dec,
year = "2022",
address = "Abu Dhabi, United Arab Emirates (Hybrid)",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.conll-1.26",
doi = "10.18653/v1/2022.conll-1.26",
pages = "374--383",
abstract = "Text segmentation is a natural language processing task with popular applications, such as topic segmentation, element discourse extraction, and sentence tokenization. Much work has been done to develop accurate segmentation similarity metrics, but even the most advanced metrics used today, B, and WindowDiff, exhibit incorrect behavior due to their evaluation of boundaries in isolation. In this paper, we present a new segment-alignment based approach to segmentation similarity scoring and a new similarity metric A. We show that A does not exhibit the erratic behavior of {\$} and WindowDiff, quantify the likelihood of B and WindowDiff misbehaving through simulation, and discuss the versatility of alignment-based approaches for segmentation similarity scoring. We make our implementation of A publicly available and encourage the community to explore more sophisticated approaches to text segmentation similarity scoring.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="ocampo-diaz-ouyang-2022-alignment">
<titleInfo>
<title>An Alignment-based Approach to Text Segmentation Similarity Scoring</title>
</titleInfo>
<name type="personal">
<namePart type="given">Gerardo</namePart>
<namePart type="family">Ocampo Diaz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jessica</namePart>
<namePart type="family">Ouyang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 26th Conference on Computational Natural Language Learning (CoNLL)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Antske</namePart>
<namePart type="family">Fokkens</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vivek</namePart>
<namePart type="family">Srikumar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Abu Dhabi, United Arab Emirates (Hybrid)</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Text segmentation is a natural language processing task with popular applications, such as topic segmentation, element discourse extraction, and sentence tokenization. Much work has been done to develop accurate segmentation similarity metrics, but even the most advanced metrics used today, B, and WindowDiff, exhibit incorrect behavior due to their evaluation of boundaries in isolation. In this paper, we present a new segment-alignment based approach to segmentation similarity scoring and a new similarity metric A. We show that A does not exhibit the erratic behavior of $ and WindowDiff, quantify the likelihood of B and WindowDiff misbehaving through simulation, and discuss the versatility of alignment-based approaches for segmentation similarity scoring. We make our implementation of A publicly available and encourage the community to explore more sophisticated approaches to text segmentation similarity scoring.</abstract>
<identifier type="citekey">ocampo-diaz-ouyang-2022-alignment</identifier>
<identifier type="doi">10.18653/v1/2022.conll-1.26</identifier>
<location>
<url>https://aclanthology.org/2022.conll-1.26</url>
</location>
<part>
<date>2022-12</date>
<extent unit="page">
<start>374</start>
<end>383</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T An Alignment-based Approach to Text Segmentation Similarity Scoring
%A Ocampo Diaz, Gerardo
%A Ouyang, Jessica
%Y Fokkens, Antske
%Y Srikumar, Vivek
%S Proceedings of the 26th Conference on Computational Natural Language Learning (CoNLL)
%D 2022
%8 December
%I Association for Computational Linguistics
%C Abu Dhabi, United Arab Emirates (Hybrid)
%F ocampo-diaz-ouyang-2022-alignment
%X Text segmentation is a natural language processing task with popular applications, such as topic segmentation, element discourse extraction, and sentence tokenization. Much work has been done to develop accurate segmentation similarity metrics, but even the most advanced metrics used today, B, and WindowDiff, exhibit incorrect behavior due to their evaluation of boundaries in isolation. In this paper, we present a new segment-alignment based approach to segmentation similarity scoring and a new similarity metric A. We show that A does not exhibit the erratic behavior of $ and WindowDiff, quantify the likelihood of B and WindowDiff misbehaving through simulation, and discuss the versatility of alignment-based approaches for segmentation similarity scoring. We make our implementation of A publicly available and encourage the community to explore more sophisticated approaches to text segmentation similarity scoring.
%R 10.18653/v1/2022.conll-1.26
%U https://aclanthology.org/2022.conll-1.26
%U https://doi.org/10.18653/v1/2022.conll-1.26
%P 374-383
Markdown (Informal)
[An Alignment-based Approach to Text Segmentation Similarity Scoring](https://aclanthology.org/2022.conll-1.26) (Ocampo Diaz & Ouyang, CoNLL 2022)
ACL