@inproceedings{si-etal-2023-empirical,
title = "An Empirical Study of Instruction-tuning Large Language Models in {C}hinese",
author = "Si, Qingyi and
Wang, Tong and
Lin, Zheng and
Zhang, Xu and
Cao, Yanan and
Wang, Weiping",
editor = "Bouamor, Houda and
Pino, Juan and
Bali, Kalika",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2023",
month = dec,
year = "2023",
address = "Singapore",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.findings-emnlp.269",
doi = "10.18653/v1/2023.findings-emnlp.269",
pages = "4086--4107",
abstract = "The success of ChatGPT validates the potential of large language models (LLMs) in artificial general intelligence (AGI). Subsequently, the release of LLMs has sparked the open-source community{'}s interest in instruction-tuning, which is deemed to accelerate ChatGPT{'}s replication process. However, research on instruction-tuning LLMs in Chinese, the world{'}s most spoken language, is still in its early stages. Therefore, this paper makes an in-depth empirical study of instruction-tuning LLMs in Chinese, which can serve as a cookbook that provides valuable findings for effectively customizing LLMs that can better respond to Chinese instructions. Specifically, we systematically explore the impact of LLM bases, parameter-efficient methods, instruction data types, which are the three most important elements for instruction-tuning. Besides, we also conduct experiment to study the impact of other factors, e.g., chain-of-thought data and human-value alignment. We hope that this empirical study can make a modest contribution to the open Chinese version of ChatGPT. This paper will release a powerful Chinese LLM that is comparable to ChatGLM. The code and data are available at https: //github.com/PhoebusSi/Alpaca-CoT.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="si-etal-2023-empirical">
<titleInfo>
<title>An Empirical Study of Instruction-tuning Large Language Models in Chinese</title>
</titleInfo>
<name type="personal">
<namePart type="given">Qingyi</namePart>
<namePart type="family">Si</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tong</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zheng</namePart>
<namePart type="family">Lin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xu</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yanan</namePart>
<namePart type="family">Cao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Weiping</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: EMNLP 2023</title>
</titleInfo>
<name type="personal">
<namePart type="given">Houda</namePart>
<namePart type="family">Bouamor</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Juan</namePart>
<namePart type="family">Pino</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kalika</namePart>
<namePart type="family">Bali</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Singapore</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>The success of ChatGPT validates the potential of large language models (LLMs) in artificial general intelligence (AGI). Subsequently, the release of LLMs has sparked the open-source community’s interest in instruction-tuning, which is deemed to accelerate ChatGPT’s replication process. However, research on instruction-tuning LLMs in Chinese, the world’s most spoken language, is still in its early stages. Therefore, this paper makes an in-depth empirical study of instruction-tuning LLMs in Chinese, which can serve as a cookbook that provides valuable findings for effectively customizing LLMs that can better respond to Chinese instructions. Specifically, we systematically explore the impact of LLM bases, parameter-efficient methods, instruction data types, which are the three most important elements for instruction-tuning. Besides, we also conduct experiment to study the impact of other factors, e.g., chain-of-thought data and human-value alignment. We hope that this empirical study can make a modest contribution to the open Chinese version of ChatGPT. This paper will release a powerful Chinese LLM that is comparable to ChatGLM. The code and data are available at https: //github.com/PhoebusSi/Alpaca-CoT.</abstract>
<identifier type="citekey">si-etal-2023-empirical</identifier>
<identifier type="doi">10.18653/v1/2023.findings-emnlp.269</identifier>
<location>
<url>https://aclanthology.org/2023.findings-emnlp.269</url>
</location>
<part>
<date>2023-12</date>
<extent unit="page">
<start>4086</start>
<end>4107</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T An Empirical Study of Instruction-tuning Large Language Models in Chinese
%A Si, Qingyi
%A Wang, Tong
%A Lin, Zheng
%A Zhang, Xu
%A Cao, Yanan
%A Wang, Weiping
%Y Bouamor, Houda
%Y Pino, Juan
%Y Bali, Kalika
%S Findings of the Association for Computational Linguistics: EMNLP 2023
%D 2023
%8 December
%I Association for Computational Linguistics
%C Singapore
%F si-etal-2023-empirical
%X The success of ChatGPT validates the potential of large language models (LLMs) in artificial general intelligence (AGI). Subsequently, the release of LLMs has sparked the open-source community’s interest in instruction-tuning, which is deemed to accelerate ChatGPT’s replication process. However, research on instruction-tuning LLMs in Chinese, the world’s most spoken language, is still in its early stages. Therefore, this paper makes an in-depth empirical study of instruction-tuning LLMs in Chinese, which can serve as a cookbook that provides valuable findings for effectively customizing LLMs that can better respond to Chinese instructions. Specifically, we systematically explore the impact of LLM bases, parameter-efficient methods, instruction data types, which are the three most important elements for instruction-tuning. Besides, we also conduct experiment to study the impact of other factors, e.g., chain-of-thought data and human-value alignment. We hope that this empirical study can make a modest contribution to the open Chinese version of ChatGPT. This paper will release a powerful Chinese LLM that is comparable to ChatGLM. The code and data are available at https: //github.com/PhoebusSi/Alpaca-CoT.
%R 10.18653/v1/2023.findings-emnlp.269
%U https://aclanthology.org/2023.findings-emnlp.269
%U https://doi.org/10.18653/v1/2023.findings-emnlp.269
%P 4086-4107
Markdown (Informal)
[An Empirical Study of Instruction-tuning Large Language Models in Chinese](https://aclanthology.org/2023.findings-emnlp.269) (Si et al., Findings 2023)
ACL