@inproceedings{churin-etal-2025-long,
title = "Long Context Benchmark for the {R}ussian Language",
author = "Churin, Igor and
Apishev, Murat and
Tikhonova, Maria and
Shevelev, Denis and
Bulatov, Aydar and
Kuratov, Yuri and
Averkiev, Sergei and
Fenogenova, Alena",
editor = "Strube, Michael and
Braud, Chloe and
Hardmeier, Christian and
Li, Junyi Jessy and
Loaiciga, Sharid and
Zeldes, Amir and
Li, Chuyuan",
booktitle = "Proceedings of the 6th Workshop on Computational Approaches to Discourse, Context and Document-Level Inferences (CODI 2025)",
month = nov,
year = "2025",
address = "Suzhou, China",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.codi-1.1/",
pages = "1--13",
ISBN = "979-8-89176-343-2",
abstract = "Recent progress in Natural Language Processing (NLP) has driven the creation of Large Language Models (LLMs) capable of tackling a vast range of tasks. A critical property of these models is their ability to handle large documents and process long token sequences, which has fostered the need for a robust evaluation methodology for long-text scenarios. To meet this requirement in the context of the Russian language, we present our benchmark consisting of 18 datasets designed to assess LLM performance in tasks such as information retrieval, knowledge extraction, machine reading, question answering, and reasoning. These datasets are categorized into four levels of complexity, enabling model evaluation across context lengths up to 128k tokens. To facilitate further research, we provide open-source datasets, a codebase, and a public leaderboard associated with the benchmark."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="churin-etal-2025-long">
<titleInfo>
<title>Long Context Benchmark for the Russian Language</title>
</titleInfo>
<name type="personal">
<namePart type="given">Igor</namePart>
<namePart type="family">Churin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Murat</namePart>
<namePart type="family">Apishev</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Maria</namePart>
<namePart type="family">Tikhonova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Denis</namePart>
<namePart type="family">Shevelev</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Aydar</namePart>
<namePart type="family">Bulatov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yuri</namePart>
<namePart type="family">Kuratov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sergei</namePart>
<namePart type="family">Averkiev</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alena</namePart>
<namePart type="family">Fenogenova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 6th Workshop on Computational Approaches to Discourse, Context and Document-Level Inferences (CODI 2025)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Michael</namePart>
<namePart type="family">Strube</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chloe</namePart>
<namePart type="family">Braud</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Christian</namePart>
<namePart type="family">Hardmeier</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Junyi</namePart>
<namePart type="given">Jessy</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sharid</namePart>
<namePart type="family">Loaiciga</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Amir</namePart>
<namePart type="family">Zeldes</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chuyuan</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Suzhou, China</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-343-2</identifier>
</relatedItem>
<abstract>Recent progress in Natural Language Processing (NLP) has driven the creation of Large Language Models (LLMs) capable of tackling a vast range of tasks. A critical property of these models is their ability to handle large documents and process long token sequences, which has fostered the need for a robust evaluation methodology for long-text scenarios. To meet this requirement in the context of the Russian language, we present our benchmark consisting of 18 datasets designed to assess LLM performance in tasks such as information retrieval, knowledge extraction, machine reading, question answering, and reasoning. These datasets are categorized into four levels of complexity, enabling model evaluation across context lengths up to 128k tokens. To facilitate further research, we provide open-source datasets, a codebase, and a public leaderboard associated with the benchmark.</abstract>
<identifier type="citekey">churin-etal-2025-long</identifier>
<location>
<url>https://aclanthology.org/2025.codi-1.1/</url>
</location>
<part>
<date>2025-11</date>
<extent unit="page">
<start>1</start>
<end>13</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Long Context Benchmark for the Russian Language
%A Churin, Igor
%A Apishev, Murat
%A Tikhonova, Maria
%A Shevelev, Denis
%A Bulatov, Aydar
%A Kuratov, Yuri
%A Averkiev, Sergei
%A Fenogenova, Alena
%Y Strube, Michael
%Y Braud, Chloe
%Y Hardmeier, Christian
%Y Li, Junyi Jessy
%Y Loaiciga, Sharid
%Y Zeldes, Amir
%Y Li, Chuyuan
%S Proceedings of the 6th Workshop on Computational Approaches to Discourse, Context and Document-Level Inferences (CODI 2025)
%D 2025
%8 November
%I Association for Computational Linguistics
%C Suzhou, China
%@ 979-8-89176-343-2
%F churin-etal-2025-long
%X Recent progress in Natural Language Processing (NLP) has driven the creation of Large Language Models (LLMs) capable of tackling a vast range of tasks. A critical property of these models is their ability to handle large documents and process long token sequences, which has fostered the need for a robust evaluation methodology for long-text scenarios. To meet this requirement in the context of the Russian language, we present our benchmark consisting of 18 datasets designed to assess LLM performance in tasks such as information retrieval, knowledge extraction, machine reading, question answering, and reasoning. These datasets are categorized into four levels of complexity, enabling model evaluation across context lengths up to 128k tokens. To facilitate further research, we provide open-source datasets, a codebase, and a public leaderboard associated with the benchmark.
%U https://aclanthology.org/2025.codi-1.1/
%P 1-13
Markdown (Informal)
[Long Context Benchmark for the Russian Language](https://aclanthology.org/2025.codi-1.1/) (Churin et al., CODI 2025)
ACL
- Igor Churin, Murat Apishev, Maria Tikhonova, Denis Shevelev, Aydar Bulatov, Yuri Kuratov, Sergei Averkiev, and Alena Fenogenova. 2025. Long Context Benchmark for the Russian Language. In Proceedings of the 6th Workshop on Computational Approaches to Discourse, Context and Document-Level Inferences (CODI 2025), pages 1–13, Suzhou, China. Association for Computational Linguistics.