@inproceedings{nguyen-etal-2022-famie,
title = "{FAMIE}: A Fast Active Learning Framework for Multilingual Information Extraction",
author = "Nguyen, Minh Van and
Ngo, Nghia and
Min, Bonan and
Nguyen, Thien",
editor = "Hajishirzi, Hannaneh and
Ning, Qiang and
Sil, Avi",
booktitle = "Proceedings of the 2022 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies: System Demonstrations",
month = jul,
year = "2022",
address = "Hybrid: Seattle, Washington + Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.naacl-demo.14",
doi = "10.18653/v1/2022.naacl-demo.14",
pages = "131--139",
abstract = "This paper presents FAMIE, a comprehensive and efficient active learning (AL) toolkit for multilingual information extraction. FAMIE is designed to address a fundamental problem in existing AL frameworks where annotators need to wait for a long time between annotation batches due to the time-consuming nature of model training and data selection at each AL iteration. This hinders the engagement, productivity, and efficiency of annotators. Based on the idea of using a small proxy network for fast data selection, we introduce a novel knowledge distillation mechanism to synchronize the proxy network with the main large model (i.e., BERT-based) to ensure the appropriateness of the selected annotation examples for the main model. Our AL framework can support multiple languages. The experiments demonstrate the advantages of FAMIE in terms of competitive performance and time efficiency for sequence labeling with AL. We publicly release our code (\url{https://github.com/nlp-uoregon/famie}) and demo website (\url{http://nlp.uoregon.edu:9000/}). A demo video for FAMIE is provided at: \url{https://youtu.be/I2i8n_jAyrY}",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="nguyen-etal-2022-famie">
<titleInfo>
<title>FAMIE: A Fast Active Learning Framework for Multilingual Information Extraction</title>
</titleInfo>
<name type="personal">
<namePart type="given">Minh</namePart>
<namePart type="given">Van</namePart>
<namePart type="family">Nguyen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nghia</namePart>
<namePart type="family">Ngo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bonan</namePart>
<namePart type="family">Min</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Thien</namePart>
<namePart type="family">Nguyen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2022 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies: System Demonstrations</title>
</titleInfo>
<name type="personal">
<namePart type="given">Hannaneh</namePart>
<namePart type="family">Hajishirzi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Qiang</namePart>
<namePart type="family">Ning</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Avi</namePart>
<namePart type="family">Sil</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Hybrid: Seattle, Washington + Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This paper presents FAMIE, a comprehensive and efficient active learning (AL) toolkit for multilingual information extraction. FAMIE is designed to address a fundamental problem in existing AL frameworks where annotators need to wait for a long time between annotation batches due to the time-consuming nature of model training and data selection at each AL iteration. This hinders the engagement, productivity, and efficiency of annotators. Based on the idea of using a small proxy network for fast data selection, we introduce a novel knowledge distillation mechanism to synchronize the proxy network with the main large model (i.e., BERT-based) to ensure the appropriateness of the selected annotation examples for the main model. Our AL framework can support multiple languages. The experiments demonstrate the advantages of FAMIE in terms of competitive performance and time efficiency for sequence labeling with AL. We publicly release our code (https://github.com/nlp-uoregon/famie) and demo website (http://nlp.uoregon.edu:9000/). A demo video for FAMIE is provided at: https://youtu.be/I2i8n_jAyrY</abstract>
<identifier type="citekey">nguyen-etal-2022-famie</identifier>
<identifier type="doi">10.18653/v1/2022.naacl-demo.14</identifier>
<location>
<url>https://aclanthology.org/2022.naacl-demo.14</url>
</location>
<part>
<date>2022-07</date>
<extent unit="page">
<start>131</start>
<end>139</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T FAMIE: A Fast Active Learning Framework for Multilingual Information Extraction
%A Nguyen, Minh Van
%A Ngo, Nghia
%A Min, Bonan
%A Nguyen, Thien
%Y Hajishirzi, Hannaneh
%Y Ning, Qiang
%Y Sil, Avi
%S Proceedings of the 2022 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies: System Demonstrations
%D 2022
%8 July
%I Association for Computational Linguistics
%C Hybrid: Seattle, Washington + Online
%F nguyen-etal-2022-famie
%X This paper presents FAMIE, a comprehensive and efficient active learning (AL) toolkit for multilingual information extraction. FAMIE is designed to address a fundamental problem in existing AL frameworks where annotators need to wait for a long time between annotation batches due to the time-consuming nature of model training and data selection at each AL iteration. This hinders the engagement, productivity, and efficiency of annotators. Based on the idea of using a small proxy network for fast data selection, we introduce a novel knowledge distillation mechanism to synchronize the proxy network with the main large model (i.e., BERT-based) to ensure the appropriateness of the selected annotation examples for the main model. Our AL framework can support multiple languages. The experiments demonstrate the advantages of FAMIE in terms of competitive performance and time efficiency for sequence labeling with AL. We publicly release our code (https://github.com/nlp-uoregon/famie) and demo website (http://nlp.uoregon.edu:9000/). A demo video for FAMIE is provided at: https://youtu.be/I2i8n_jAyrY
%R 10.18653/v1/2022.naacl-demo.14
%U https://aclanthology.org/2022.naacl-demo.14
%U https://doi.org/10.18653/v1/2022.naacl-demo.14
%P 131-139
Markdown (Informal)
[FAMIE: A Fast Active Learning Framework for Multilingual Information Extraction](https://aclanthology.org/2022.naacl-demo.14) (Nguyen et al., NAACL 2022)
ACL
- Minh Van Nguyen, Nghia Ngo, Bonan Min, and Thien Nguyen. 2022. FAMIE: A Fast Active Learning Framework for Multilingual Information Extraction. In Proceedings of the 2022 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies: System Demonstrations, pages 131–139, Hybrid: Seattle, Washington + Online. Association for Computational Linguistics.