@inproceedings{dai-cai-2017-glyph,
title = "Glyph-aware Embedding of {C}hinese Characters",
author = "Dai, Falcon and
Cai, Zheng",
editor = "Faruqui, Manaal and
Schuetze, Hinrich and
Trancoso, Isabel and
Yaghoobzadeh, Yadollah",
booktitle = "Proceedings of the First Workshop on Subword and Character Level Models in {NLP}",
month = sep,
year = "2017",
address = "Copenhagen, Denmark",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/W17-4109/",
doi = "10.18653/v1/W17-4109",
pages = "64--69",
abstract = "Given the advantage and recent success of English character-level and subword-unit models in several NLP tasks, we consider the equivalent modeling problem for Chinese. Chinese script is logographic and many Chinese logograms are composed of common substructures that provide semantic, phonetic and syntactic hints. In this work, we propose to explicitly incorporate the visual appearance of a character`s glyph in its representation, resulting in a novel glyph-aware embedding of Chinese characters. Being inspired by the success of convolutional neural networks in computer vision, we use them to incorporate the spatio-structural patterns of Chinese glyphs as rendered in raw pixels. In the context of two basic Chinese NLP tasks of language modeling and word segmentation, the model learns to represent each character`s task-relevant semantic and syntactic information in the character-level embedding."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="dai-cai-2017-glyph">
<titleInfo>
<title>Glyph-aware Embedding of Chinese Characters</title>
</titleInfo>
<name type="personal">
<namePart type="given">Falcon</namePart>
<namePart type="family">Dai</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zheng</namePart>
<namePart type="family">Cai</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2017-09</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the First Workshop on Subword and Character Level Models in NLP</title>
</titleInfo>
<name type="personal">
<namePart type="given">Manaal</namePart>
<namePart type="family">Faruqui</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hinrich</namePart>
<namePart type="family">Schuetze</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Isabel</namePart>
<namePart type="family">Trancoso</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yadollah</namePart>
<namePart type="family">Yaghoobzadeh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Copenhagen, Denmark</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Given the advantage and recent success of English character-level and subword-unit models in several NLP tasks, we consider the equivalent modeling problem for Chinese. Chinese script is logographic and many Chinese logograms are composed of common substructures that provide semantic, phonetic and syntactic hints. In this work, we propose to explicitly incorporate the visual appearance of a character‘s glyph in its representation, resulting in a novel glyph-aware embedding of Chinese characters. Being inspired by the success of convolutional neural networks in computer vision, we use them to incorporate the spatio-structural patterns of Chinese glyphs as rendered in raw pixels. In the context of two basic Chinese NLP tasks of language modeling and word segmentation, the model learns to represent each character‘s task-relevant semantic and syntactic information in the character-level embedding.</abstract>
<identifier type="citekey">dai-cai-2017-glyph</identifier>
<identifier type="doi">10.18653/v1/W17-4109</identifier>
<location>
<url>https://aclanthology.org/W17-4109/</url>
</location>
<part>
<date>2017-09</date>
<extent unit="page">
<start>64</start>
<end>69</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Glyph-aware Embedding of Chinese Characters
%A Dai, Falcon
%A Cai, Zheng
%Y Faruqui, Manaal
%Y Schuetze, Hinrich
%Y Trancoso, Isabel
%Y Yaghoobzadeh, Yadollah
%S Proceedings of the First Workshop on Subword and Character Level Models in NLP
%D 2017
%8 September
%I Association for Computational Linguistics
%C Copenhagen, Denmark
%F dai-cai-2017-glyph
%X Given the advantage and recent success of English character-level and subword-unit models in several NLP tasks, we consider the equivalent modeling problem for Chinese. Chinese script is logographic and many Chinese logograms are composed of common substructures that provide semantic, phonetic and syntactic hints. In this work, we propose to explicitly incorporate the visual appearance of a character‘s glyph in its representation, resulting in a novel glyph-aware embedding of Chinese characters. Being inspired by the success of convolutional neural networks in computer vision, we use them to incorporate the spatio-structural patterns of Chinese glyphs as rendered in raw pixels. In the context of two basic Chinese NLP tasks of language modeling and word segmentation, the model learns to represent each character‘s task-relevant semantic and syntactic information in the character-level embedding.
%R 10.18653/v1/W17-4109
%U https://aclanthology.org/W17-4109/
%U https://doi.org/10.18653/v1/W17-4109
%P 64-69
Markdown (Informal)
[Glyph-aware Embedding of Chinese Characters](https://aclanthology.org/W17-4109/) (Dai & Cai, SCLeM 2017)
ACL
- Falcon Dai and Zheng Cai. 2017. Glyph-aware Embedding of Chinese Characters. In Proceedings of the First Workshop on Subword and Character Level Models in NLP, pages 64–69, Copenhagen, Denmark. Association for Computational Linguistics.