@inproceedings{lin-etal-2024-fineweb,
title = "{F}ine{W}eb-zhtw: Scalable Curation of Traditional {C}hinese Text Data from the Web",
author = "Lin, Cheng-Wen and
Hsieh, Wan-Hsuan and
Guan, Kai-Xin and
Hsu, Chan-Jan and
Kuo, Chia-Chen and
Lai, Chuan-Lin and
Chung, Chung-Wei and
Wang, Ming-Jen and
Shiu, Da-Shan",
editor = "Tseng, Shu-Chuan and
Tsao, Yu and
Huang, Hen-Hsen and
Fan, Yao-Chung and
Chang, Chia-Hui",
booktitle = "Proceedings of the 36th Conference on Computational Linguistics and Speech Processing (ROCLING 2024)",
month = nov,
year = "2024",
address = "Taipei City, Taiwan",
publisher = "The Association for Computational Linguistics and Chinese Language Processing (ACLCLP)",
url = "https://aclanthology.org/2024.rocling-1.16/",
pages = "129--136"
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="lin-etal-2024-fineweb">
<titleInfo>
<title>FineWeb-zhtw: Scalable Curation of Traditional Chinese Text Data from the Web</title>
</titleInfo>
<name type="personal">
<namePart type="given">Cheng-Wen</namePart>
<namePart type="family">Lin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Wan-Hsuan</namePart>
<namePart type="family">Hsieh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kai-Xin</namePart>
<namePart type="family">Guan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chan-Jan</namePart>
<namePart type="family">Hsu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chia-Chen</namePart>
<namePart type="family">Kuo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chuan-Lin</namePart>
<namePart type="family">Lai</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chung-Wei</namePart>
<namePart type="family">Chung</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ming-Jen</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Da-Shan</namePart>
<namePart type="family">Shiu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 36th Conference on Computational Linguistics and Speech Processing (ROCLING 2024)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Shu-Chuan</namePart>
<namePart type="family">Tseng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yu</namePart>
<namePart type="family">Tsao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hen-Hsen</namePart>
<namePart type="family">Huang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yao-Chung</namePart>
<namePart type="family">Fan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chia-Hui</namePart>
<namePart type="family">Chang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>The Association for Computational Linguistics and Chinese Language Processing (ACLCLP)</publisher>
<place>
<placeTerm type="text">Taipei City, Taiwan</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<identifier type="citekey">lin-etal-2024-fineweb</identifier>
<location>
<url>https://aclanthology.org/2024.rocling-1.16/</url>
</location>
<part>
<date>2024-11</date>
<extent unit="page">
<start>129</start>
<end>136</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T FineWeb-zhtw: Scalable Curation of Traditional Chinese Text Data from the Web
%A Lin, Cheng-Wen
%A Hsieh, Wan-Hsuan
%A Guan, Kai-Xin
%A Hsu, Chan-Jan
%A Kuo, Chia-Chen
%A Lai, Chuan-Lin
%A Chung, Chung-Wei
%A Wang, Ming-Jen
%A Shiu, Da-Shan
%Y Tseng, Shu-Chuan
%Y Tsao, Yu
%Y Huang, Hen-Hsen
%Y Fan, Yao-Chung
%Y Chang, Chia-Hui
%S Proceedings of the 36th Conference on Computational Linguistics and Speech Processing (ROCLING 2024)
%D 2024
%8 November
%I The Association for Computational Linguistics and Chinese Language Processing (ACLCLP)
%C Taipei City, Taiwan
%F lin-etal-2024-fineweb
%U https://aclanthology.org/2024.rocling-1.16/
%P 129-136
Markdown (Informal)
[FineWeb-zhtw: Scalable Curation of Traditional Chinese Text Data from the Web](https://aclanthology.org/2024.rocling-1.16/) (Lin et al., ROCLING 2024)
ACL
- Cheng-Wen Lin, Wan-Hsuan Hsieh, Kai-Xin Guan, Chan-Jan Hsu, Chia-Chen Kuo, Chuan-Lin Lai, Chung-Wei Chung, Ming-Jen Wang, and Da-Shan Shiu. 2024. FineWeb-zhtw: Scalable Curation of Traditional Chinese Text Data from the Web. In Proceedings of the 36th Conference on Computational Linguistics and Speech Processing (ROCLING 2024), pages 129–136, Taipei City, Taiwan. The Association for Computational Linguistics and Chinese Language Processing (ACLCLP).