@inproceedings{basile-sangati-2016-h,
title = "{D}({H})ante: A New Set of Tools for {XIII} Century {I}talian",
author = "Basile, Angelo and
Sangati, Federico",
editor = "Calzolari, Nicoletta and
Choukri, Khalid and
Declerck, Thierry and
Goggi, Sara and
Grobelnik, Marko and
Maegaard, Bente and
Mariani, Joseph and
Mazo, Helene and
Moreno, Asuncion and
Odijk, Jan and
Piperidis, Stelios",
booktitle = "Proceedings of the Tenth International Conference on Language Resources and Evaluation ({LREC}'16)",
month = may,
year = "2016",
address = "Portoro{\v{z}}, Slovenia",
publisher = "European Language Resources Association (ELRA)",
url = "https://aclanthology.org/L16-1450",
pages = "2825--2828",
abstract = "In this paper we describe 1) the process of converting a corpus of Dante Alighieri from a TEI XML format in to a pseudo-CoNLL format; 2) how a pos-tagger trained on modern Italian performs on Dante{'}s Italian 3) the performances of two different pos-taggers trained on the given corpus. We are making our conversion scripts and models available to the community. The two other models trained on the corpus performs reasonably well. The tool used for the conversion process might turn useful for bridging the gap between traditional digital humanities and modern NLP applications since the TEI original format is not usually suitable for being processed with standard NLP tools. We believe our work will serve both communities: the DH community will be able to tag new documents and the NLP world will have an easier way in converting existing documents to a standardized machine-readable format.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="basile-sangati-2016-h">
<titleInfo>
<title>D(H)ante: A New Set of Tools for XIII Century Italian</title>
</titleInfo>
<name type="personal">
<namePart type="given">Angelo</namePart>
<namePart type="family">Basile</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Federico</namePart>
<namePart type="family">Sangati</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2016-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Tenth International Conference on Language Resources and Evaluation (LREC’16)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Khalid</namePart>
<namePart type="family">Choukri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Thierry</namePart>
<namePart type="family">Declerck</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sara</namePart>
<namePart type="family">Goggi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marko</namePart>
<namePart type="family">Grobelnik</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bente</namePart>
<namePart type="family">Maegaard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joseph</namePart>
<namePart type="family">Mariani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Helene</namePart>
<namePart type="family">Mazo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Asuncion</namePart>
<namePart type="family">Moreno</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jan</namePart>
<namePart type="family">Odijk</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stelios</namePart>
<namePart type="family">Piperidis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Language Resources Association (ELRA)</publisher>
<place>
<placeTerm type="text">Portorož, Slovenia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In this paper we describe 1) the process of converting a corpus of Dante Alighieri from a TEI XML format in to a pseudo-CoNLL format; 2) how a pos-tagger trained on modern Italian performs on Dante’s Italian 3) the performances of two different pos-taggers trained on the given corpus. We are making our conversion scripts and models available to the community. The two other models trained on the corpus performs reasonably well. The tool used for the conversion process might turn useful for bridging the gap between traditional digital humanities and modern NLP applications since the TEI original format is not usually suitable for being processed with standard NLP tools. We believe our work will serve both communities: the DH community will be able to tag new documents and the NLP world will have an easier way in converting existing documents to a standardized machine-readable format.</abstract>
<identifier type="citekey">basile-sangati-2016-h</identifier>
<location>
<url>https://aclanthology.org/L16-1450</url>
</location>
<part>
<date>2016-05</date>
<extent unit="page">
<start>2825</start>
<end>2828</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T D(H)ante: A New Set of Tools for XIII Century Italian
%A Basile, Angelo
%A Sangati, Federico
%Y Calzolari, Nicoletta
%Y Choukri, Khalid
%Y Declerck, Thierry
%Y Goggi, Sara
%Y Grobelnik, Marko
%Y Maegaard, Bente
%Y Mariani, Joseph
%Y Mazo, Helene
%Y Moreno, Asuncion
%Y Odijk, Jan
%Y Piperidis, Stelios
%S Proceedings of the Tenth International Conference on Language Resources and Evaluation (LREC’16)
%D 2016
%8 May
%I European Language Resources Association (ELRA)
%C Portorož, Slovenia
%F basile-sangati-2016-h
%X In this paper we describe 1) the process of converting a corpus of Dante Alighieri from a TEI XML format in to a pseudo-CoNLL format; 2) how a pos-tagger trained on modern Italian performs on Dante’s Italian 3) the performances of two different pos-taggers trained on the given corpus. We are making our conversion scripts and models available to the community. The two other models trained on the corpus performs reasonably well. The tool used for the conversion process might turn useful for bridging the gap between traditional digital humanities and modern NLP applications since the TEI original format is not usually suitable for being processed with standard NLP tools. We believe our work will serve both communities: the DH community will be able to tag new documents and the NLP world will have an easier way in converting existing documents to a standardized machine-readable format.
%U https://aclanthology.org/L16-1450
%P 2825-2828
Markdown (Informal)
[D(H)ante: A New Set of Tools for XIII Century Italian](https://aclanthology.org/L16-1450) (Basile & Sangati, LREC 2016)
ACL
- Angelo Basile and Federico Sangati. 2016. D(H)ante: A New Set of Tools for XIII Century Italian. In Proceedings of the Tenth International Conference on Language Resources and Evaluation (LREC'16), pages 2825–2828, Portorož, Slovenia. European Language Resources Association (ELRA).