@inproceedings{santos-etal-2012-structural,
title = "Structural alignment of plain text books",
author = "Santos, Andr{\'e} and
Almeida, Jos{\'e} Jo{\~a}o and
Carvalho, Nuno",
editor = "Calzolari, Nicoletta and
Choukri, Khalid and
Declerck, Thierry and
Do{\u{g}}an, Mehmet U{\u{g}}ur and
Maegaard, Bente and
Mariani, Joseph and
Moreno, Asuncion and
Odijk, Jan and
Piperidis, Stelios",
booktitle = "Proceedings of the Eighth International Conference on Language Resources and Evaluation ({LREC}'12)",
month = may,
year = "2012",
address = "Istanbul, Turkey",
publisher = "European Language Resources Association (ELRA)",
url = "http://www.lrec-conf.org/proceedings/lrec2012/pdf/967_Paper.pdf",
pages = "2069--2074",
abstract = "Text alignment is one of the main processes for obtaining parallel corpora. When aligning two versions of a book, results are often affected by unpaired sections ― sections which only exist in one of the versions of the book. We developed Text::Perfide::BookSync, a Perl module which performs books synchronization (structural alignment based on section delimitation), provided they have been previously annotated by Text::Perfide::BookCleaner. We discuss the need for such a tool and several implementation decisions. The main functions are described, and examples of input and output are presented. Text::Perfide::PartialAlign is an extension of the partialAlign.py tool bundled with hunalign which proposes an alternative methods for splitting bitexts.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="santos-etal-2012-structural">
<titleInfo>
<title>Structural alignment of plain text books</title>
</titleInfo>
<name type="personal">
<namePart type="given">André</namePart>
<namePart type="family">Santos</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">José</namePart>
<namePart type="given">João</namePart>
<namePart type="family">Almeida</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nuno</namePart>
<namePart type="family">Carvalho</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2012-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Eighth International Conference on Language Resources and Evaluation (LREC’12)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Khalid</namePart>
<namePart type="family">Choukri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Thierry</namePart>
<namePart type="family">Declerck</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mehmet</namePart>
<namePart type="given">Uğur</namePart>
<namePart type="family">Doğan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bente</namePart>
<namePart type="family">Maegaard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joseph</namePart>
<namePart type="family">Mariani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Asuncion</namePart>
<namePart type="family">Moreno</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jan</namePart>
<namePart type="family">Odijk</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stelios</namePart>
<namePart type="family">Piperidis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Language Resources Association (ELRA)</publisher>
<place>
<placeTerm type="text">Istanbul, Turkey</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Text alignment is one of the main processes for obtaining parallel corpora. When aligning two versions of a book, results are often affected by unpaired sections ― sections which only exist in one of the versions of the book. We developed Text::Perfide::BookSync, a Perl module which performs books synchronization (structural alignment based on section delimitation), provided they have been previously annotated by Text::Perfide::BookCleaner. We discuss the need for such a tool and several implementation decisions. The main functions are described, and examples of input and output are presented. Text::Perfide::PartialAlign is an extension of the partialAlign.py tool bundled with hunalign which proposes an alternative methods for splitting bitexts.</abstract>
<identifier type="citekey">santos-etal-2012-structural</identifier>
<location>
<url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/967_Paper.pdf</url>
</location>
<part>
<date>2012-05</date>
<extent unit="page">
<start>2069</start>
<end>2074</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Structural alignment of plain text books
%A Santos, André
%A Almeida, José João
%A Carvalho, Nuno
%Y Calzolari, Nicoletta
%Y Choukri, Khalid
%Y Declerck, Thierry
%Y Doğan, Mehmet Uğur
%Y Maegaard, Bente
%Y Mariani, Joseph
%Y Moreno, Asuncion
%Y Odijk, Jan
%Y Piperidis, Stelios
%S Proceedings of the Eighth International Conference on Language Resources and Evaluation (LREC’12)
%D 2012
%8 May
%I European Language Resources Association (ELRA)
%C Istanbul, Turkey
%F santos-etal-2012-structural
%X Text alignment is one of the main processes for obtaining parallel corpora. When aligning two versions of a book, results are often affected by unpaired sections ― sections which only exist in one of the versions of the book. We developed Text::Perfide::BookSync, a Perl module which performs books synchronization (structural alignment based on section delimitation), provided they have been previously annotated by Text::Perfide::BookCleaner. We discuss the need for such a tool and several implementation decisions. The main functions are described, and examples of input and output are presented. Text::Perfide::PartialAlign is an extension of the partialAlign.py tool bundled with hunalign which proposes an alternative methods for splitting bitexts.
%U http://www.lrec-conf.org/proceedings/lrec2012/pdf/967_Paper.pdf
%P 2069-2074
Markdown (Informal)
[Structural alignment of plain text books](http://www.lrec-conf.org/proceedings/lrec2012/pdf/967_Paper.pdf) (Santos et al., LREC 2012)
ACL
- André Santos, José João Almeida, and Nuno Carvalho. 2012. Structural alignment of plain text books. In Proceedings of the Eighth International Conference on Language Resources and Evaluation (LREC'12), pages 2069–2074, Istanbul, Turkey. European Language Resources Association (ELRA).