@inproceedings{boitet-etal-2018-towards,
title = "Towards an Automatic Classification of Illustrative Examples in a Large {J}apanese-{F}rench Dictionary Obtained by {OCR}",
author = "Boitet, Christian and
Mangeot, Mathieu and
Tomokiyo, Mutsuko",
editor = "Machonis, Peter and
Barreiro, Anabela and
Kocijan, Kristina and
Silberztein, Max",
booktitle = "Proceedings of the First Workshop on Linguistic Resources for Natural Language Processing",
month = aug,
year = "2018",
address = "Santa Fe, New Mexico, USA",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/W18-3815",
pages = "112--121",
abstract = "We work on improving the Cesselin, a large and open source Japanese-French bilingual dictionary digitalized by OCR, available on the web, and contributively improvable online. Labelling its examples (about 226000) would significantly enhance their usefulness for language learners. Examples are proverbs, idiomatic constructions, normal usage examples, and, for nouns, phrases containing a quantifier. Proverbs are easy to spot, but not examples of other types. To find a method for automatically or at least semi-automatically annotating them, we have studied many entries, and hypothesized that the degree of lexical similarity between results of MT into a third language might give good cues. To confirm that hypothesis, we sampled 500 examples and used Google Translate to translate into English their Japanese expressions and their French translations. The hypothesis holds well, in particular for distinguishing examples of normal usage from idiomatic examples. Finally, we propose a detailed annotation procedure and discuss its future automatization.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="boitet-etal-2018-towards">
<titleInfo>
<title>Towards an Automatic Classification of Illustrative Examples in a Large Japanese-French Dictionary Obtained by OCR</title>
</titleInfo>
<name type="personal">
<namePart type="given">Christian</namePart>
<namePart type="family">Boitet</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mathieu</namePart>
<namePart type="family">Mangeot</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mutsuko</namePart>
<namePart type="family">Tomokiyo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2018-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the First Workshop on Linguistic Resources for Natural Language Processing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Peter</namePart>
<namePart type="family">Machonis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anabela</namePart>
<namePart type="family">Barreiro</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kristina</namePart>
<namePart type="family">Kocijan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Max</namePart>
<namePart type="family">Silberztein</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Santa Fe, New Mexico, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We work on improving the Cesselin, a large and open source Japanese-French bilingual dictionary digitalized by OCR, available on the web, and contributively improvable online. Labelling its examples (about 226000) would significantly enhance their usefulness for language learners. Examples are proverbs, idiomatic constructions, normal usage examples, and, for nouns, phrases containing a quantifier. Proverbs are easy to spot, but not examples of other types. To find a method for automatically or at least semi-automatically annotating them, we have studied many entries, and hypothesized that the degree of lexical similarity between results of MT into a third language might give good cues. To confirm that hypothesis, we sampled 500 examples and used Google Translate to translate into English their Japanese expressions and their French translations. The hypothesis holds well, in particular for distinguishing examples of normal usage from idiomatic examples. Finally, we propose a detailed annotation procedure and discuss its future automatization.</abstract>
<identifier type="citekey">boitet-etal-2018-towards</identifier>
<location>
<url>https://aclanthology.org/W18-3815</url>
</location>
<part>
<date>2018-08</date>
<extent unit="page">
<start>112</start>
<end>121</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Towards an Automatic Classification of Illustrative Examples in a Large Japanese-French Dictionary Obtained by OCR
%A Boitet, Christian
%A Mangeot, Mathieu
%A Tomokiyo, Mutsuko
%Y Machonis, Peter
%Y Barreiro, Anabela
%Y Kocijan, Kristina
%Y Silberztein, Max
%S Proceedings of the First Workshop on Linguistic Resources for Natural Language Processing
%D 2018
%8 August
%I Association for Computational Linguistics
%C Santa Fe, New Mexico, USA
%F boitet-etal-2018-towards
%X We work on improving the Cesselin, a large and open source Japanese-French bilingual dictionary digitalized by OCR, available on the web, and contributively improvable online. Labelling its examples (about 226000) would significantly enhance their usefulness for language learners. Examples are proverbs, idiomatic constructions, normal usage examples, and, for nouns, phrases containing a quantifier. Proverbs are easy to spot, but not examples of other types. To find a method for automatically or at least semi-automatically annotating them, we have studied many entries, and hypothesized that the degree of lexical similarity between results of MT into a third language might give good cues. To confirm that hypothesis, we sampled 500 examples and used Google Translate to translate into English their Japanese expressions and their French translations. The hypothesis holds well, in particular for distinguishing examples of normal usage from idiomatic examples. Finally, we propose a detailed annotation procedure and discuss its future automatization.
%U https://aclanthology.org/W18-3815
%P 112-121
Markdown (Informal)
[Towards an Automatic Classification of Illustrative Examples in a Large Japanese-French Dictionary Obtained by OCR](https://aclanthology.org/W18-3815) (Boitet et al., LR4NLP 2018)
ACL