@inproceedings{razzaghi-etal-2026-parscore,
title = "{P}ars{CORE}: The {P}ersian Corpus of Online Registers",
author = "Razzaghi, Alireza and
Henriksson, Erik and
Laipalla, Veronika",
editor = "Merchant, Rayyan and
Megerdoomian, Karine",
booktitle = "The Proceedings of the First Workshop on {NLP} and {LLM}s for the {I}ranian Language Family",
month = mar,
year = "2026",
address = "Rabat, Morocco",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.silkroadnlp-1.7/",
pages = "60--73",
ISBN = "979-8-89176-371-5",
abstract = "Despite recent advances in automatic web register (genre) labeling and its applications to web-scale datasets and LLM development, the effectiveness of these tools for digitally lowresource languages remains unclear. This study introduces ParsCORE, the first largescale collection of Persian web registers (genres), and evaluates deep learning models for register classification and keyword analysis across major registers. Using 2,000 humanannotated documents, the models achieved a micro F1-score of 0.76. The findings provide a foundation for future research on the linguistic and cultural specificities of Persian registers."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="razzaghi-etal-2026-parscore">
<titleInfo>
<title>ParsCORE: The Persian Corpus of Online Registers</title>
</titleInfo>
<name type="personal">
<namePart type="given">Alireza</namePart>
<namePart type="family">Razzaghi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Erik</namePart>
<namePart type="family">Henriksson</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Veronika</namePart>
<namePart type="family">Laipalla</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-03</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>The Proceedings of the First Workshop on NLP and LLMs for the Iranian Language Family</title>
</titleInfo>
<name type="personal">
<namePart type="given">Rayyan</namePart>
<namePart type="family">Merchant</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Karine</namePart>
<namePart type="family">Megerdoomian</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Rabat, Morocco</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-371-5</identifier>
</relatedItem>
<abstract>Despite recent advances in automatic web register (genre) labeling and its applications to web-scale datasets and LLM development, the effectiveness of these tools for digitally lowresource languages remains unclear. This study introduces ParsCORE, the first largescale collection of Persian web registers (genres), and evaluates deep learning models for register classification and keyword analysis across major registers. Using 2,000 humanannotated documents, the models achieved a micro F1-score of 0.76. The findings provide a foundation for future research on the linguistic and cultural specificities of Persian registers.</abstract>
<identifier type="citekey">razzaghi-etal-2026-parscore</identifier>
<location>
<url>https://aclanthology.org/2026.silkroadnlp-1.7/</url>
</location>
<part>
<date>2026-03</date>
<extent unit="page">
<start>60</start>
<end>73</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T ParsCORE: The Persian Corpus of Online Registers
%A Razzaghi, Alireza
%A Henriksson, Erik
%A Laipalla, Veronika
%Y Merchant, Rayyan
%Y Megerdoomian, Karine
%S The Proceedings of the First Workshop on NLP and LLMs for the Iranian Language Family
%D 2026
%8 March
%I Association for Computational Linguistics
%C Rabat, Morocco
%@ 979-8-89176-371-5
%F razzaghi-etal-2026-parscore
%X Despite recent advances in automatic web register (genre) labeling and its applications to web-scale datasets and LLM development, the effectiveness of these tools for digitally lowresource languages remains unclear. This study introduces ParsCORE, the first largescale collection of Persian web registers (genres), and evaluates deep learning models for register classification and keyword analysis across major registers. Using 2,000 humanannotated documents, the models achieved a micro F1-score of 0.76. The findings provide a foundation for future research on the linguistic and cultural specificities of Persian registers.
%U https://aclanthology.org/2026.silkroadnlp-1.7/
%P 60-73
Markdown (Informal)
[ParsCORE: The Persian Corpus of Online Registers](https://aclanthology.org/2026.silkroadnlp-1.7/) (Razzaghi et al., SilkRoadNLP 2026)
ACL
- Alireza Razzaghi, Erik Henriksson, and Veronika Laipalla. 2026. ParsCORE: The Persian Corpus of Online Registers. In The Proceedings of the First Workshop on NLP and LLMs for the Iranian Language Family, pages 60–73, Rabat, Morocco. Association for Computational Linguistics.