@inproceedings{marszalek-kowalewska-2021-impact,
title = "The Impact of Text Normalization on Multiword Expressions Discovery in {P}ersian",
author = "Marsza{\l}ek-Kowalewska, Katarzyna",
editor = "Mitkov, Ruslan and
Angelova, Galia",
booktitle = "Proceedings of the International Conference on Recent Advances in Natural Language Processing (RANLP 2021)",
month = sep,
year = "2021",
address = "Held Online",
publisher = "INCOMA Ltd.",
url = "https://aclanthology.org/2021.ranlp-1.106",
pages = "929--939",
abstract = "This paper evaluates normalization procedures of Persian text for a downstream NLP task - multiword expressions (MWEs) discovery. We discuss the challenges the Persian language poses for NLP and evaluate open-source tools that try to address these difficulties. The best-performing tool is later used in the main task - MWEs discovery. In order to discover MWEs, we use association measures and a subpart of the MirasText corpus. The results show that an F-score is 26{\%} higher in the case of normalized input data.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="marszalek-kowalewska-2021-impact">
<titleInfo>
<title>The Impact of Text Normalization on Multiword Expressions Discovery in Persian</title>
</titleInfo>
<name type="personal">
<namePart type="given">Katarzyna</namePart>
<namePart type="family">Marszałek-Kowalewska</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-09</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the International Conference on Recent Advances in Natural Language Processing (RANLP 2021)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ruslan</namePart>
<namePart type="family">Mitkov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Galia</namePart>
<namePart type="family">Angelova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>INCOMA Ltd.</publisher>
<place>
<placeTerm type="text">Held Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This paper evaluates normalization procedures of Persian text for a downstream NLP task - multiword expressions (MWEs) discovery. We discuss the challenges the Persian language poses for NLP and evaluate open-source tools that try to address these difficulties. The best-performing tool is later used in the main task - MWEs discovery. In order to discover MWEs, we use association measures and a subpart of the MirasText corpus. The results show that an F-score is 26% higher in the case of normalized input data.</abstract>
<identifier type="citekey">marszalek-kowalewska-2021-impact</identifier>
<location>
<url>https://aclanthology.org/2021.ranlp-1.106</url>
</location>
<part>
<date>2021-09</date>
<extent unit="page">
<start>929</start>
<end>939</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T The Impact of Text Normalization on Multiword Expressions Discovery in Persian
%A Marszałek-Kowalewska, Katarzyna
%Y Mitkov, Ruslan
%Y Angelova, Galia
%S Proceedings of the International Conference on Recent Advances in Natural Language Processing (RANLP 2021)
%D 2021
%8 September
%I INCOMA Ltd.
%C Held Online
%F marszalek-kowalewska-2021-impact
%X This paper evaluates normalization procedures of Persian text for a downstream NLP task - multiword expressions (MWEs) discovery. We discuss the challenges the Persian language poses for NLP and evaluate open-source tools that try to address these difficulties. The best-performing tool is later used in the main task - MWEs discovery. In order to discover MWEs, we use association measures and a subpart of the MirasText corpus. The results show that an F-score is 26% higher in the case of normalized input data.
%U https://aclanthology.org/2021.ranlp-1.106
%P 929-939
Markdown (Informal)
[The Impact of Text Normalization on Multiword Expressions Discovery in Persian](https://aclanthology.org/2021.ranlp-1.106) (Marszałek-Kowalewska, RANLP 2021)
ACL