@inproceedings{tajalli-etal-2025-developing,
title = "Developing an Informal-Formal {P}ersian Corpus: Highlighting the Differences between Two Writing Styles",
author = "Tajalli, Vahide and
Shamsfard, Mehrnoush and
Kalantari, Fateme",
editor = "El-Haj, Mo",
booktitle = "Proceedings of the 1st Workshop on NLP for Languages Using Arabic Script",
month = jan,
year = "2025",
address = "Abu Dhabi, UAE",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.abjadnlp-1.6/",
pages = "44--53",
abstract = "Informal language is a style of spoken or written language frequently used in casual conversations, social media, weblogs, emails and text messages. In informal writing, the language undergoes some lexical and/or syntactic changes varying among different languages. Persian is one of the languages with many differences between its formal and informal styles of writing, thus developing informal language processing tools for this language seems necessary. In the present paper, the methodology in building a parallel corpus of 50,000 sentence pairs with alignments in the word/phrase level is described. The resulting corpus has about 530,000 alignments and a dictionary containing 49,397 word and phrase pairs. The observed differences between formal and informal writing are explained in detail."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="tajalli-etal-2025-developing">
<titleInfo>
<title>Developing an Informal-Formal Persian Corpus: Highlighting the Differences between Two Writing Styles</title>
</titleInfo>
<name type="personal">
<namePart type="given">Vahide</namePart>
<namePart type="family">Tajalli</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mehrnoush</namePart>
<namePart type="family">Shamsfard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Fateme</namePart>
<namePart type="family">Kalantari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-01</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 1st Workshop on NLP for Languages Using Arabic Script</title>
</titleInfo>
<name type="personal">
<namePart type="given">Mo</namePart>
<namePart type="family">El-Haj</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Abu Dhabi, UAE</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Informal language is a style of spoken or written language frequently used in casual conversations, social media, weblogs, emails and text messages. In informal writing, the language undergoes some lexical and/or syntactic changes varying among different languages. Persian is one of the languages with many differences between its formal and informal styles of writing, thus developing informal language processing tools for this language seems necessary. In the present paper, the methodology in building a parallel corpus of 50,000 sentence pairs with alignments in the word/phrase level is described. The resulting corpus has about 530,000 alignments and a dictionary containing 49,397 word and phrase pairs. The observed differences between formal and informal writing are explained in detail.</abstract>
<identifier type="citekey">tajalli-etal-2025-developing</identifier>
<location>
<url>https://aclanthology.org/2025.abjadnlp-1.6/</url>
</location>
<part>
<date>2025-01</date>
<extent unit="page">
<start>44</start>
<end>53</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Developing an Informal-Formal Persian Corpus: Highlighting the Differences between Two Writing Styles
%A Tajalli, Vahide
%A Shamsfard, Mehrnoush
%A Kalantari, Fateme
%Y El-Haj, Mo
%S Proceedings of the 1st Workshop on NLP for Languages Using Arabic Script
%D 2025
%8 January
%I Association for Computational Linguistics
%C Abu Dhabi, UAE
%F tajalli-etal-2025-developing
%X Informal language is a style of spoken or written language frequently used in casual conversations, social media, weblogs, emails and text messages. In informal writing, the language undergoes some lexical and/or syntactic changes varying among different languages. Persian is one of the languages with many differences between its formal and informal styles of writing, thus developing informal language processing tools for this language seems necessary. In the present paper, the methodology in building a parallel corpus of 50,000 sentence pairs with alignments in the word/phrase level is described. The resulting corpus has about 530,000 alignments and a dictionary containing 49,397 word and phrase pairs. The observed differences between formal and informal writing are explained in detail.
%U https://aclanthology.org/2025.abjadnlp-1.6/
%P 44-53
Markdown (Informal)
[Developing an Informal-Formal Persian Corpus: Highlighting the Differences between Two Writing Styles](https://aclanthology.org/2025.abjadnlp-1.6/) (Tajalli et al., AbjadNLP 2025)
ACL