@inproceedings{fei-etal-2024-mtls,
title = "{MTLS}: Making Texts into Linguistic Symbols",
author = "Fei, Wenlong and
Wang, Xiaohua and
Hu, Min and
Zhang, Qingyu and
Li, Hongbo",
editor = "Al-Onaizan, Yaser and
Bansal, Mohit and
Chen, Yun-Nung",
booktitle = "Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing",
month = nov,
year = "2024",
address = "Miami, Florida, USA",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.emnlp-main.206/",
doi = "10.18653/v1/2024.emnlp-main.206",
pages = "3521--3535",
abstract = "In linguistics, all languages can be considered as symbolic systems, with each language relying on symbolic processes to associate specific symbols with meanings. In the same language, there is a fixed correspondence between linguistic symbol and meaning. In different languages, universal meanings follow varying rules of symbolization in one-to-one correspondence with symbols. Most work overlooks the properties of languages as symbol systems. In this paper, we shift the focus to the symbolic properties and introduce MTLS: a pre-training method to improve the multilingual capability of models by Making Texts into Linguistic Symbols. Initially, we replace the vocabulary in pre-trained language models by mapping relations between linguistic symbols and semantics. Subsequently, universal semantics within the symbolic system serve as bridges, linking symbols from different languages to the embedding space of the model, thereby enabling the model to process linguistic symbols. To evaluate the effectiveness of MTLS, we conducted experiments on multilingual tasks using BERT and RoBERTa, respectively, as the backbone. The results indicate that despite having just over 12,000 pieces of English data in pre-training, the improvement that MTLS brings to multilingual capabilities is remarkably significant."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="fei-etal-2024-mtls">
<titleInfo>
<title>MTLS: Making Texts into Linguistic Symbols</title>
</titleInfo>
<name type="personal">
<namePart type="given">Wenlong</namePart>
<namePart type="family">Fei</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xiaohua</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Min</namePart>
<namePart type="family">Hu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Qingyu</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hongbo</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yaser</namePart>
<namePart type="family">Al-Onaizan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohit</namePart>
<namePart type="family">Bansal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yun-Nung</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Miami, Florida, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In linguistics, all languages can be considered as symbolic systems, with each language relying on symbolic processes to associate specific symbols with meanings. In the same language, there is a fixed correspondence between linguistic symbol and meaning. In different languages, universal meanings follow varying rules of symbolization in one-to-one correspondence with symbols. Most work overlooks the properties of languages as symbol systems. In this paper, we shift the focus to the symbolic properties and introduce MTLS: a pre-training method to improve the multilingual capability of models by Making Texts into Linguistic Symbols. Initially, we replace the vocabulary in pre-trained language models by mapping relations between linguistic symbols and semantics. Subsequently, universal semantics within the symbolic system serve as bridges, linking symbols from different languages to the embedding space of the model, thereby enabling the model to process linguistic symbols. To evaluate the effectiveness of MTLS, we conducted experiments on multilingual tasks using BERT and RoBERTa, respectively, as the backbone. The results indicate that despite having just over 12,000 pieces of English data in pre-training, the improvement that MTLS brings to multilingual capabilities is remarkably significant.</abstract>
<identifier type="citekey">fei-etal-2024-mtls</identifier>
<identifier type="doi">10.18653/v1/2024.emnlp-main.206</identifier>
<location>
<url>https://aclanthology.org/2024.emnlp-main.206/</url>
</location>
<part>
<date>2024-11</date>
<extent unit="page">
<start>3521</start>
<end>3535</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T MTLS: Making Texts into Linguistic Symbols
%A Fei, Wenlong
%A Wang, Xiaohua
%A Hu, Min
%A Zhang, Qingyu
%A Li, Hongbo
%Y Al-Onaizan, Yaser
%Y Bansal, Mohit
%Y Chen, Yun-Nung
%S Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing
%D 2024
%8 November
%I Association for Computational Linguistics
%C Miami, Florida, USA
%F fei-etal-2024-mtls
%X In linguistics, all languages can be considered as symbolic systems, with each language relying on symbolic processes to associate specific symbols with meanings. In the same language, there is a fixed correspondence between linguistic symbol and meaning. In different languages, universal meanings follow varying rules of symbolization in one-to-one correspondence with symbols. Most work overlooks the properties of languages as symbol systems. In this paper, we shift the focus to the symbolic properties and introduce MTLS: a pre-training method to improve the multilingual capability of models by Making Texts into Linguistic Symbols. Initially, we replace the vocabulary in pre-trained language models by mapping relations between linguistic symbols and semantics. Subsequently, universal semantics within the symbolic system serve as bridges, linking symbols from different languages to the embedding space of the model, thereby enabling the model to process linguistic symbols. To evaluate the effectiveness of MTLS, we conducted experiments on multilingual tasks using BERT and RoBERTa, respectively, as the backbone. The results indicate that despite having just over 12,000 pieces of English data in pre-training, the improvement that MTLS brings to multilingual capabilities is remarkably significant.
%R 10.18653/v1/2024.emnlp-main.206
%U https://aclanthology.org/2024.emnlp-main.206/
%U https://doi.org/10.18653/v1/2024.emnlp-main.206
%P 3521-3535
Markdown (Informal)
[MTLS: Making Texts into Linguistic Symbols](https://aclanthology.org/2024.emnlp-main.206/) (Fei et al., EMNLP 2024)
ACL
- Wenlong Fei, Xiaohua Wang, Min Hu, Qingyu Zhang, and Hongbo Li. 2024. MTLS: Making Texts into Linguistic Symbols. In Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing, pages 3521–3535, Miami, Florida, USA. Association for Computational Linguistics.