@inproceedings{steingrimsson-etal-2023-sentalign,
title = "{S}ent{A}lign: Accurate and Scalable Sentence Alignment",
author = "Steingrimsson, Steinthor and
Loftsson, Hrafn and
Way, Andy",
editor = "Feng, Yansong and
Lefever, Els",
booktitle = "Proceedings of the 2023 Conference on Empirical Methods in Natural Language Processing: System Demonstrations",
month = dec,
year = "2023",
address = "Singapore",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.emnlp-demo.22",
doi = "10.18653/v1/2023.emnlp-demo.22",
pages = "256--263",
abstract = "We present SentAlign, an accurate sentence alignment tool designed to handle very large parallel document pairs. Given user-defined parameters, the alignment algorithm evaluates all possible alignment paths in fairly large documents of thousands of sentences and uses a divide-and-conquer approach to align documents containing tens of thousands of sentences. The scoring function is based on LaBSE bilingual sentence representations. SentAlign outperforms five other sentence alignment tools when evaluated on two different evaluation sets, German-French and English-Icelandic, and on a downstream machine translation task.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="steingrimsson-etal-2023-sentalign">
<titleInfo>
<title>SentAlign: Accurate and Scalable Sentence Alignment</title>
</titleInfo>
<name type="personal">
<namePart type="given">Steinthor</namePart>
<namePart type="family">Steingrimsson</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hrafn</namePart>
<namePart type="family">Loftsson</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Andy</namePart>
<namePart type="family">Way</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2023 Conference on Empirical Methods in Natural Language Processing: System Demonstrations</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yansong</namePart>
<namePart type="family">Feng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Els</namePart>
<namePart type="family">Lefever</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Singapore</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We present SentAlign, an accurate sentence alignment tool designed to handle very large parallel document pairs. Given user-defined parameters, the alignment algorithm evaluates all possible alignment paths in fairly large documents of thousands of sentences and uses a divide-and-conquer approach to align documents containing tens of thousands of sentences. The scoring function is based on LaBSE bilingual sentence representations. SentAlign outperforms five other sentence alignment tools when evaluated on two different evaluation sets, German-French and English-Icelandic, and on a downstream machine translation task.</abstract>
<identifier type="citekey">steingrimsson-etal-2023-sentalign</identifier>
<identifier type="doi">10.18653/v1/2023.emnlp-demo.22</identifier>
<location>
<url>https://aclanthology.org/2023.emnlp-demo.22</url>
</location>
<part>
<date>2023-12</date>
<extent unit="page">
<start>256</start>
<end>263</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T SentAlign: Accurate and Scalable Sentence Alignment
%A Steingrimsson, Steinthor
%A Loftsson, Hrafn
%A Way, Andy
%Y Feng, Yansong
%Y Lefever, Els
%S Proceedings of the 2023 Conference on Empirical Methods in Natural Language Processing: System Demonstrations
%D 2023
%8 December
%I Association for Computational Linguistics
%C Singapore
%F steingrimsson-etal-2023-sentalign
%X We present SentAlign, an accurate sentence alignment tool designed to handle very large parallel document pairs. Given user-defined parameters, the alignment algorithm evaluates all possible alignment paths in fairly large documents of thousands of sentences and uses a divide-and-conquer approach to align documents containing tens of thousands of sentences. The scoring function is based on LaBSE bilingual sentence representations. SentAlign outperforms five other sentence alignment tools when evaluated on two different evaluation sets, German-French and English-Icelandic, and on a downstream machine translation task.
%R 10.18653/v1/2023.emnlp-demo.22
%U https://aclanthology.org/2023.emnlp-demo.22
%U https://doi.org/10.18653/v1/2023.emnlp-demo.22
%P 256-263
Markdown (Informal)
[SentAlign: Accurate and Scalable Sentence Alignment](https://aclanthology.org/2023.emnlp-demo.22) (Steingrimsson et al., EMNLP 2023)
ACL
- Steinthor Steingrimsson, Hrafn Loftsson, and Andy Way. 2023. SentAlign: Accurate and Scalable Sentence Alignment. In Proceedings of the 2023 Conference on Empirical Methods in Natural Language Processing: System Demonstrations, pages 256–263, Singapore. Association for Computational Linguistics.