@inproceedings{stoyanova-2014-automatic,
title = "Automatic Categorisation of Multiword Expressions and Named Entities in {B}ulgarian",
author = "Stoyanova, Ivelina",
booktitle = "Proceedings of the First International Conference on Computational Linguistics in Bulgaria (CLIB 2014)",
month = sep,
year = "2014",
address = "Sofia, Bulgaria",
publisher = "Department of Computational Linguistics, Institute for Bulgarian Language, Bulgarian Academy of Sciences",
url = "https://aclanthology.org/2014.clib-1.6",
pages = "40--48",
abstract = "This paper describes an approach for automatic categorisation of various types of multiword expressions (MWEs) with a focus on multiword named entities (MNEs), which compose a large portion of MWEs in general. The proposed algorithm is based on a refined classification of MWEs according to their idiomaticity. While MWE categorisation can be considered as a separate and independent task, it complements the general task of MWE recognition. After outlining the method, we set up an experiment to demonstrate its performance. We use the corpus Wiki1000+ that comprises 6,311 annotated Wikipedia articles of 1,000 or more words each, amounting to 13.4 million words in total. The study also employs a large dictionary of 59,369 MWEs noun phrases (out of more than 85,000 MWEs), labelled with their respective types. The dictionary is compiled automatically and verified semi-automatically. The research presented here is based on Bulgarian although most of the ideas, the methodology and the analysis are applicable to other Slavic and possibly other European languages.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="stoyanova-2014-automatic">
<titleInfo>
<title>Automatic Categorisation of Multiword Expressions and Named Entities in Bulgarian</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ivelina</namePart>
<namePart type="family">Stoyanova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2014-09</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the First International Conference on Computational Linguistics in Bulgaria (CLIB 2014)</title>
</titleInfo>
<originInfo>
<publisher>Department of Computational Linguistics, Institute for Bulgarian Language, Bulgarian Academy of Sciences</publisher>
<place>
<placeTerm type="text">Sofia, Bulgaria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This paper describes an approach for automatic categorisation of various types of multiword expressions (MWEs) with a focus on multiword named entities (MNEs), which compose a large portion of MWEs in general. The proposed algorithm is based on a refined classification of MWEs according to their idiomaticity. While MWE categorisation can be considered as a separate and independent task, it complements the general task of MWE recognition. After outlining the method, we set up an experiment to demonstrate its performance. We use the corpus Wiki1000+ that comprises 6,311 annotated Wikipedia articles of 1,000 or more words each, amounting to 13.4 million words in total. The study also employs a large dictionary of 59,369 MWEs noun phrases (out of more than 85,000 MWEs), labelled with their respective types. The dictionary is compiled automatically and verified semi-automatically. The research presented here is based on Bulgarian although most of the ideas, the methodology and the analysis are applicable to other Slavic and possibly other European languages.</abstract>
<identifier type="citekey">stoyanova-2014-automatic</identifier>
<location>
<url>https://aclanthology.org/2014.clib-1.6</url>
</location>
<part>
<date>2014-09</date>
<extent unit="page">
<start>40</start>
<end>48</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Automatic Categorisation of Multiword Expressions and Named Entities in Bulgarian
%A Stoyanova, Ivelina
%S Proceedings of the First International Conference on Computational Linguistics in Bulgaria (CLIB 2014)
%D 2014
%8 September
%I Department of Computational Linguistics, Institute for Bulgarian Language, Bulgarian Academy of Sciences
%C Sofia, Bulgaria
%F stoyanova-2014-automatic
%X This paper describes an approach for automatic categorisation of various types of multiword expressions (MWEs) with a focus on multiword named entities (MNEs), which compose a large portion of MWEs in general. The proposed algorithm is based on a refined classification of MWEs according to their idiomaticity. While MWE categorisation can be considered as a separate and independent task, it complements the general task of MWE recognition. After outlining the method, we set up an experiment to demonstrate its performance. We use the corpus Wiki1000+ that comprises 6,311 annotated Wikipedia articles of 1,000 or more words each, amounting to 13.4 million words in total. The study also employs a large dictionary of 59,369 MWEs noun phrases (out of more than 85,000 MWEs), labelled with their respective types. The dictionary is compiled automatically and verified semi-automatically. The research presented here is based on Bulgarian although most of the ideas, the methodology and the analysis are applicable to other Slavic and possibly other European languages.
%U https://aclanthology.org/2014.clib-1.6
%P 40-48
Markdown (Informal)
[Automatic Categorisation of Multiword Expressions and Named Entities in Bulgarian](https://aclanthology.org/2014.clib-1.6) (Stoyanova, CLIB 2014)
ACL