@inproceedings{sajous-etal-2020-englawi,
title = "{ENGLAWI}: From Human- to Machine-Readable {W}iktionary",
author = "Sajous, Franck and
Calderone, Basilio and
Hathout, Nabil",
booktitle = "Proceedings of the Twelfth Language Resources and Evaluation Conference",
month = may,
year = "2020",
address = "Marseille, France",
publisher = "European Language Resources Association",
url = "https://aclanthology.org/2020.lrec-1.369",
pages = "3016--3026",
abstract = "This paper introduces ENGLAWI, a large, versatile, XML-encoded machine-readable dictionary extracted from Wiktionary. ENGLAWI contains 752,769 articles encoding the full body of information included in Wiktionary: simple words, compounds and multiword expressions, lemmas and inflectional paradigms, etymologies, phonemic transcriptions in IPA, definition glosses and usage examples, translations, semantic and morphological relations, spelling variants, etc. It is fully documented, released under a free license and supplied with G-PeTo, a series of scripts allowing easy information extraction from ENGLAWI. Additional resources extracted from ENGLAWI, such as an inflectional lexicon, a lexicon of diatopic variants and the inclusion dates of headwords in Wiktionary{'}s nomenclature are also provided. The paper describes the content of the resource and illustrates how it can be - and has been - used in previous studies. We finally introduce an ongoing work that computes lexicographic word embeddings from ENGLAWI{'}s definitions.",
language = "English",
ISBN = "979-10-95546-34-4",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="sajous-etal-2020-englawi">
<titleInfo>
<title>ENGLAWI: From Human- to Machine-Readable Wiktionary</title>
</titleInfo>
<name type="personal">
<namePart type="given">Franck</namePart>
<namePart type="family">Sajous</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Basilio</namePart>
<namePart type="family">Calderone</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nabil</namePart>
<namePart type="family">Hathout</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<language>
<languageTerm type="text">English</languageTerm>
<languageTerm type="code" authority="iso639-2b">eng</languageTerm>
</language>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Twelfth Language Resources and Evaluation Conference</title>
</titleInfo>
<originInfo>
<publisher>European Language Resources Association</publisher>
<place>
<placeTerm type="text">Marseille, France</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-10-95546-34-4</identifier>
</relatedItem>
<abstract>This paper introduces ENGLAWI, a large, versatile, XML-encoded machine-readable dictionary extracted from Wiktionary. ENGLAWI contains 752,769 articles encoding the full body of information included in Wiktionary: simple words, compounds and multiword expressions, lemmas and inflectional paradigms, etymologies, phonemic transcriptions in IPA, definition glosses and usage examples, translations, semantic and morphological relations, spelling variants, etc. It is fully documented, released under a free license and supplied with G-PeTo, a series of scripts allowing easy information extraction from ENGLAWI. Additional resources extracted from ENGLAWI, such as an inflectional lexicon, a lexicon of diatopic variants and the inclusion dates of headwords in Wiktionary’s nomenclature are also provided. The paper describes the content of the resource and illustrates how it can be - and has been - used in previous studies. We finally introduce an ongoing work that computes lexicographic word embeddings from ENGLAWI’s definitions.</abstract>
<identifier type="citekey">sajous-etal-2020-englawi</identifier>
<location>
<url>https://aclanthology.org/2020.lrec-1.369</url>
</location>
<part>
<date>2020-05</date>
<extent unit="page">
<start>3016</start>
<end>3026</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T ENGLAWI: From Human- to Machine-Readable Wiktionary
%A Sajous, Franck
%A Calderone, Basilio
%A Hathout, Nabil
%S Proceedings of the Twelfth Language Resources and Evaluation Conference
%D 2020
%8 May
%I European Language Resources Association
%C Marseille, France
%@ 979-10-95546-34-4
%G English
%F sajous-etal-2020-englawi
%X This paper introduces ENGLAWI, a large, versatile, XML-encoded machine-readable dictionary extracted from Wiktionary. ENGLAWI contains 752,769 articles encoding the full body of information included in Wiktionary: simple words, compounds and multiword expressions, lemmas and inflectional paradigms, etymologies, phonemic transcriptions in IPA, definition glosses and usage examples, translations, semantic and morphological relations, spelling variants, etc. It is fully documented, released under a free license and supplied with G-PeTo, a series of scripts allowing easy information extraction from ENGLAWI. Additional resources extracted from ENGLAWI, such as an inflectional lexicon, a lexicon of diatopic variants and the inclusion dates of headwords in Wiktionary’s nomenclature are also provided. The paper describes the content of the resource and illustrates how it can be - and has been - used in previous studies. We finally introduce an ongoing work that computes lexicographic word embeddings from ENGLAWI’s definitions.
%U https://aclanthology.org/2020.lrec-1.369
%P 3016-3026
Markdown (Informal)
[ENGLAWI: From Human- to Machine-Readable Wiktionary](https://aclanthology.org/2020.lrec-1.369) (Sajous et al., LREC 2020)
ACL
- Franck Sajous, Basilio Calderone, and Nabil Hathout. 2020. ENGLAWI: From Human- to Machine-Readable Wiktionary. In Proceedings of the Twelfth Language Resources and Evaluation Conference, pages 3016–3026, Marseille, France. European Language Resources Association.