@inproceedings{zhou-etal-2025-llmxmapreduce,
title = "{LLM}$\times${M}ap{R}educe: Simplified Long-Sequence Processing using Large Language Models",
author = "Zhou, Zihan and
Li, Chong and
Chen, Xinyi and
Wang, Shuo and
Chao, Yu and
Li, Zhili and
Wang, Haoyu and
Shi, Qi and
Tan, Zhixing and
Han, Xu and
Shi, Xiaodong and
Liu, Zhiyuan and
Sun, Maosong",
editor = "Che, Wanxiang and
Nabende, Joyce and
Shutova, Ekaterina and
Pilehvar, Mohammad Taher",
booktitle = "Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.acl-long.1341/",
doi = "10.18653/v1/2025.acl-long.1341",
pages = "27664--27678",
ISBN = "979-8-89176-251-0",
abstract = "We propose a training-free framework that enables large language models (LLMs) to effectively process long texts, using a divide-and-conquer strategy for comprehensive document understanding.The proposed LLM$\times$MapReduce framework splits the entire document into several chunks for LLMs to read and then aggregates the intermediate outputs to produce the final response. The main challenge for divide-and-conquer long text processing frameworks lies in the risk of losing essential long-range information due to document splitting, which can lead the model to produce incomplete or incorrect answers based on the segmented texts.Disrupted long-range information can be classified into two categories: inter-chunk dependency and inter-chunk conflict.We design a structured information protocol to better cope with inter-chunk dependency and an in-context confidence calibration mechanism to resolve inter-chunk conflicts. Experiments demonstrate that LLM$\times$MapReduce outperforms representative open-source and commercial long-context LLMs and is compatible with several models.Our framework can also function as a data synthesis engine, capable of generating high-quality long-alignment data using only short-context LLMs."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="zhou-etal-2025-llmxmapreduce">
<titleInfo>
<title>LLM\timesMapReduce: Simplified Long-Sequence Processing using Large Language Models</title>
</titleInfo>
<name type="personal">
<namePart type="given">Zihan</namePart>
<namePart type="family">Zhou</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chong</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xinyi</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shuo</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yu</namePart>
<namePart type="family">Chao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zhili</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Haoyu</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Qi</namePart>
<namePart type="family">Shi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zhixing</namePart>
<namePart type="family">Tan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xu</namePart>
<namePart type="family">Han</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xiaodong</namePart>
<namePart type="family">Shi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zhiyuan</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Maosong</namePart>
<namePart type="family">Sun</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Wanxiang</namePart>
<namePart type="family">Che</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joyce</namePart>
<namePart type="family">Nabende</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ekaterina</namePart>
<namePart type="family">Shutova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohammad</namePart>
<namePart type="given">Taher</namePart>
<namePart type="family">Pilehvar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Vienna, Austria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-251-0</identifier>
</relatedItem>
<abstract>We propose a training-free framework that enables large language models (LLMs) to effectively process long texts, using a divide-and-conquer strategy for comprehensive document understanding.The proposed LLM\timesMapReduce framework splits the entire document into several chunks for LLMs to read and then aggregates the intermediate outputs to produce the final response. The main challenge for divide-and-conquer long text processing frameworks lies in the risk of losing essential long-range information due to document splitting, which can lead the model to produce incomplete or incorrect answers based on the segmented texts.Disrupted long-range information can be classified into two categories: inter-chunk dependency and inter-chunk conflict.We design a structured information protocol to better cope with inter-chunk dependency and an in-context confidence calibration mechanism to resolve inter-chunk conflicts. Experiments demonstrate that LLM\timesMapReduce outperforms representative open-source and commercial long-context LLMs and is compatible with several models.Our framework can also function as a data synthesis engine, capable of generating high-quality long-alignment data using only short-context LLMs.</abstract>
<identifier type="citekey">zhou-etal-2025-llmxmapreduce</identifier>
<identifier type="doi">10.18653/v1/2025.acl-long.1341</identifier>
<location>
<url>https://aclanthology.org/2025.acl-long.1341/</url>
</location>
<part>
<date>2025-07</date>
<extent unit="page">
<start>27664</start>
<end>27678</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T LLM\timesMapReduce: Simplified Long-Sequence Processing using Large Language Models
%A Zhou, Zihan
%A Li, Chong
%A Chen, Xinyi
%A Wang, Shuo
%A Chao, Yu
%A Li, Zhili
%A Wang, Haoyu
%A Shi, Qi
%A Tan, Zhixing
%A Han, Xu
%A Shi, Xiaodong
%A Liu, Zhiyuan
%A Sun, Maosong
%Y Che, Wanxiang
%Y Nabende, Joyce
%Y Shutova, Ekaterina
%Y Pilehvar, Mohammad Taher
%S Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)
%D 2025
%8 July
%I Association for Computational Linguistics
%C Vienna, Austria
%@ 979-8-89176-251-0
%F zhou-etal-2025-llmxmapreduce
%X We propose a training-free framework that enables large language models (LLMs) to effectively process long texts, using a divide-and-conquer strategy for comprehensive document understanding.The proposed LLM\timesMapReduce framework splits the entire document into several chunks for LLMs to read and then aggregates the intermediate outputs to produce the final response. The main challenge for divide-and-conquer long text processing frameworks lies in the risk of losing essential long-range information due to document splitting, which can lead the model to produce incomplete or incorrect answers based on the segmented texts.Disrupted long-range information can be classified into two categories: inter-chunk dependency and inter-chunk conflict.We design a structured information protocol to better cope with inter-chunk dependency and an in-context confidence calibration mechanism to resolve inter-chunk conflicts. Experiments demonstrate that LLM\timesMapReduce outperforms representative open-source and commercial long-context LLMs and is compatible with several models.Our framework can also function as a data synthesis engine, capable of generating high-quality long-alignment data using only short-context LLMs.
%R 10.18653/v1/2025.acl-long.1341
%U https://aclanthology.org/2025.acl-long.1341/
%U https://doi.org/10.18653/v1/2025.acl-long.1341
%P 27664-27678
Markdown (Informal)
[LLM×MapReduce: Simplified Long-Sequence Processing using Large Language Models](https://aclanthology.org/2025.acl-long.1341/) (Zhou et al., ACL 2025)
ACL
- Zihan Zhou, Chong Li, Xinyi Chen, Shuo Wang, Yu Chao, Zhili Li, Haoyu Wang, Qi Shi, Zhixing Tan, Xu Han, Xiaodong Shi, Zhiyuan Liu, and Maosong Sun. 2025. LLM×MapReduce: Simplified Long-Sequence Processing using Large Language Models. In Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers), pages 27664–27678, Vienna, Austria. Association for Computational Linguistics.