@inproceedings{nguyen-etal-2017-sub,
title = "Sub-character Neural Language Modelling in {J}apanese",
author = "Nguyen, Viet and
Brooke, Julian and
Baldwin, Timothy",
editor = "Faruqui, Manaal and
Schuetze, Hinrich and
Trancoso, Isabel and
Yaghoobzadeh, Yadollah",
booktitle = "Proceedings of the First Workshop on Subword and Character Level Models in {NLP}",
month = sep,
year = "2017",
address = "Copenhagen, Denmark",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/W17-4122",
doi = "10.18653/v1/W17-4122",
pages = "148--153",
abstract = "In East Asian languages such as Japanese and Chinese, the semantics of a character are (somewhat) reflected in its sub-character elements. This paper examines the effect of using sub-characters for language modeling in Japanese. This is achieved by decomposing characters according to a range of character decomposition datasets, and training a neural language model over variously decomposed character representations. Our results indicate that language modelling can be improved through the inclusion of sub-characters, though this result depends on a good choice of decomposition dataset and the appropriate granularity of decomposition.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="nguyen-etal-2017-sub">
<titleInfo>
<title>Sub-character Neural Language Modelling in Japanese</title>
</titleInfo>
<name type="personal">
<namePart type="given">Viet</namePart>
<namePart type="family">Nguyen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Julian</namePart>
<namePart type="family">Brooke</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Timothy</namePart>
<namePart type="family">Baldwin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2017-09</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the First Workshop on Subword and Character Level Models in NLP</title>
</titleInfo>
<name type="personal">
<namePart type="given">Manaal</namePart>
<namePart type="family">Faruqui</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hinrich</namePart>
<namePart type="family">Schuetze</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Isabel</namePart>
<namePart type="family">Trancoso</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yadollah</namePart>
<namePart type="family">Yaghoobzadeh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Copenhagen, Denmark</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In East Asian languages such as Japanese and Chinese, the semantics of a character are (somewhat) reflected in its sub-character elements. This paper examines the effect of using sub-characters for language modeling in Japanese. This is achieved by decomposing characters according to a range of character decomposition datasets, and training a neural language model over variously decomposed character representations. Our results indicate that language modelling can be improved through the inclusion of sub-characters, though this result depends on a good choice of decomposition dataset and the appropriate granularity of decomposition.</abstract>
<identifier type="citekey">nguyen-etal-2017-sub</identifier>
<identifier type="doi">10.18653/v1/W17-4122</identifier>
<location>
<url>https://aclanthology.org/W17-4122</url>
</location>
<part>
<date>2017-09</date>
<extent unit="page">
<start>148</start>
<end>153</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Sub-character Neural Language Modelling in Japanese
%A Nguyen, Viet
%A Brooke, Julian
%A Baldwin, Timothy
%Y Faruqui, Manaal
%Y Schuetze, Hinrich
%Y Trancoso, Isabel
%Y Yaghoobzadeh, Yadollah
%S Proceedings of the First Workshop on Subword and Character Level Models in NLP
%D 2017
%8 September
%I Association for Computational Linguistics
%C Copenhagen, Denmark
%F nguyen-etal-2017-sub
%X In East Asian languages such as Japanese and Chinese, the semantics of a character are (somewhat) reflected in its sub-character elements. This paper examines the effect of using sub-characters for language modeling in Japanese. This is achieved by decomposing characters according to a range of character decomposition datasets, and training a neural language model over variously decomposed character representations. Our results indicate that language modelling can be improved through the inclusion of sub-characters, though this result depends on a good choice of decomposition dataset and the appropriate granularity of decomposition.
%R 10.18653/v1/W17-4122
%U https://aclanthology.org/W17-4122
%U https://doi.org/10.18653/v1/W17-4122
%P 148-153
Markdown (Informal)
[Sub-character Neural Language Modelling in Japanese](https://aclanthology.org/W17-4122) (Nguyen et al., SCLeM 2017)
ACL
- Viet Nguyen, Julian Brooke, and Timothy Baldwin. 2017. Sub-character Neural Language Modelling in Japanese. In Proceedings of the First Workshop on Subword and Character Level Models in NLP, pages 148–153, Copenhagen, Denmark. Association for Computational Linguistics.