@inproceedings{ustun-etal-2018-characters,
title = "Characters or Morphemes: How to Represent Words?",
author = {{\"U}st{\"u}n, Ahmet and
Kurfal{\i}, Murathan and
Can, Burcu},
editor = "Augenstein, Isabelle and
Cao, Kris and
He, He and
Hill, Felix and
Gella, Spandana and
Kiros, Jamie and
Mei, Hongyuan and
Misra, Dipendra",
booktitle = "Proceedings of the Third Workshop on Representation Learning for {NLP}",
month = jul,
year = "2018",
address = "Melbourne, Australia",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/W18-3019",
doi = "10.18653/v1/W18-3019",
pages = "144--153",
abstract = "In this paper, we investigate the effects of using subword information in representation learning. We argue that using syntactic subword units effects the quality of the word representations positively. We introduce a morpheme-based model and compare it against to word-based, character-based, and character n-gram level models. Our model takes a list of candidate segmentations of a word and learns the representation of the word based on different segmentations that are weighted by an attention mechanism. We performed experiments on Turkish as a morphologically rich language and English with a comparably poorer morphology. The results show that morpheme-based models are better at learning word representations of morphologically complex languages compared to character-based and character n-gram level models since the morphemes help to incorporate more syntactic knowledge in learning, that makes morpheme-based models better at syntactic tasks.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="ustun-etal-2018-characters">
<titleInfo>
<title>Characters or Morphemes: How to Represent Words?</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ahmet</namePart>
<namePart type="family">Üstün</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Murathan</namePart>
<namePart type="family">Kurfalı</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Burcu</namePart>
<namePart type="family">Can</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2018-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Third Workshop on Representation Learning for NLP</title>
</titleInfo>
<name type="personal">
<namePart type="given">Isabelle</namePart>
<namePart type="family">Augenstein</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kris</namePart>
<namePart type="family">Cao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">He</namePart>
<namePart type="family">He</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Felix</namePart>
<namePart type="family">Hill</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Spandana</namePart>
<namePart type="family">Gella</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jamie</namePart>
<namePart type="family">Kiros</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hongyuan</namePart>
<namePart type="family">Mei</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dipendra</namePart>
<namePart type="family">Misra</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Melbourne, Australia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In this paper, we investigate the effects of using subword information in representation learning. We argue that using syntactic subword units effects the quality of the word representations positively. We introduce a morpheme-based model and compare it against to word-based, character-based, and character n-gram level models. Our model takes a list of candidate segmentations of a word and learns the representation of the word based on different segmentations that are weighted by an attention mechanism. We performed experiments on Turkish as a morphologically rich language and English with a comparably poorer morphology. The results show that morpheme-based models are better at learning word representations of morphologically complex languages compared to character-based and character n-gram level models since the morphemes help to incorporate more syntactic knowledge in learning, that makes morpheme-based models better at syntactic tasks.</abstract>
<identifier type="citekey">ustun-etal-2018-characters</identifier>
<identifier type="doi">10.18653/v1/W18-3019</identifier>
<location>
<url>https://aclanthology.org/W18-3019</url>
</location>
<part>
<date>2018-07</date>
<extent unit="page">
<start>144</start>
<end>153</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Characters or Morphemes: How to Represent Words?
%A Üstün, Ahmet
%A Kurfalı, Murathan
%A Can, Burcu
%Y Augenstein, Isabelle
%Y Cao, Kris
%Y He, He
%Y Hill, Felix
%Y Gella, Spandana
%Y Kiros, Jamie
%Y Mei, Hongyuan
%Y Misra, Dipendra
%S Proceedings of the Third Workshop on Representation Learning for NLP
%D 2018
%8 July
%I Association for Computational Linguistics
%C Melbourne, Australia
%F ustun-etal-2018-characters
%X In this paper, we investigate the effects of using subword information in representation learning. We argue that using syntactic subword units effects the quality of the word representations positively. We introduce a morpheme-based model and compare it against to word-based, character-based, and character n-gram level models. Our model takes a list of candidate segmentations of a word and learns the representation of the word based on different segmentations that are weighted by an attention mechanism. We performed experiments on Turkish as a morphologically rich language and English with a comparably poorer morphology. The results show that morpheme-based models are better at learning word representations of morphologically complex languages compared to character-based and character n-gram level models since the morphemes help to incorporate more syntactic knowledge in learning, that makes morpheme-based models better at syntactic tasks.
%R 10.18653/v1/W18-3019
%U https://aclanthology.org/W18-3019
%U https://doi.org/10.18653/v1/W18-3019
%P 144-153
Markdown (Informal)
[Characters or Morphemes: How to Represent Words?](https://aclanthology.org/W18-3019) (Üstün et al., RepL4NLP 2018)
ACL
- Ahmet Üstün, Murathan Kurfalı, and Burcu Can. 2018. Characters or Morphemes: How to Represent Words?. In Proceedings of the Third Workshop on Representation Learning for NLP, pages 144–153, Melbourne, Australia. Association for Computational Linguistics.