@inproceedings{jang-etal-2024-driftwatch,
title = "{D}rift{W}atch: A Tool that Automatically Detects Data Drift and Extracts Representative Examples Affected by Drift",
author = "Jang, Myeongjun and
Georgiadis, Antonios and
Zhao, Yiyun and
Silavong, Fran",
editor = "Yang, Yi and
Davani, Aida and
Sil, Avi and
Kumar, Anoop",
booktitle = "Proceedings of the 2024 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies (Volume 6: Industry Track)",
month = jun,
year = "2024",
address = "Mexico City, Mexico",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.naacl-industry.28",
doi = "10.18653/v1/2024.naacl-industry.28",
pages = "335--346",
abstract = "Data drift, which denotes a misalignment between the distribution of reference (i.e., training) and production data, constitutes a significant challenge for AI applications, as it undermines the generalisation capacity of machine learning (ML) models. Therefore, it is imperative to proactively identify data drift before users meet with performance degradation. Moreover, to ensure the successful execution of AI services, endeavours should be directed not only toward detecting the occurrence of drift but also toward effectively addressing this challenge. {\%} considering the limited resources prevalent in practical industrial domains. In this work, we introduce a tool designed to detect data drift in text data. In addition, we propose an unsupervised sampling technique for extracting representative examples from drifted instances. This approach bestows a practical advantage by significantly reducing expenses associated with annotating the labels for drifted instances, an essential prerequisite for retraining the model to sustain its performance on production data.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="jang-etal-2024-driftwatch">
<titleInfo>
<title>DriftWatch: A Tool that Automatically Detects Data Drift and Extracts Representative Examples Affected by Drift</title>
</titleInfo>
<name type="personal">
<namePart type="given">Myeongjun</namePart>
<namePart type="family">Jang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Antonios</namePart>
<namePart type="family">Georgiadis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yiyun</namePart>
<namePart type="family">Zhao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Fran</namePart>
<namePart type="family">Silavong</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2024 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies (Volume 6: Industry Track)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yi</namePart>
<namePart type="family">Yang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Aida</namePart>
<namePart type="family">Davani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Avi</namePart>
<namePart type="family">Sil</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anoop</namePart>
<namePart type="family">Kumar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Mexico City, Mexico</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Data drift, which denotes a misalignment between the distribution of reference (i.e., training) and production data, constitutes a significant challenge for AI applications, as it undermines the generalisation capacity of machine learning (ML) models. Therefore, it is imperative to proactively identify data drift before users meet with performance degradation. Moreover, to ensure the successful execution of AI services, endeavours should be directed not only toward detecting the occurrence of drift but also toward effectively addressing this challenge. % considering the limited resources prevalent in practical industrial domains. In this work, we introduce a tool designed to detect data drift in text data. In addition, we propose an unsupervised sampling technique for extracting representative examples from drifted instances. This approach bestows a practical advantage by significantly reducing expenses associated with annotating the labels for drifted instances, an essential prerequisite for retraining the model to sustain its performance on production data.</abstract>
<identifier type="citekey">jang-etal-2024-driftwatch</identifier>
<identifier type="doi">10.18653/v1/2024.naacl-industry.28</identifier>
<location>
<url>https://aclanthology.org/2024.naacl-industry.28</url>
</location>
<part>
<date>2024-06</date>
<extent unit="page">
<start>335</start>
<end>346</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T DriftWatch: A Tool that Automatically Detects Data Drift and Extracts Representative Examples Affected by Drift
%A Jang, Myeongjun
%A Georgiadis, Antonios
%A Zhao, Yiyun
%A Silavong, Fran
%Y Yang, Yi
%Y Davani, Aida
%Y Sil, Avi
%Y Kumar, Anoop
%S Proceedings of the 2024 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies (Volume 6: Industry Track)
%D 2024
%8 June
%I Association for Computational Linguistics
%C Mexico City, Mexico
%F jang-etal-2024-driftwatch
%X Data drift, which denotes a misalignment between the distribution of reference (i.e., training) and production data, constitutes a significant challenge for AI applications, as it undermines the generalisation capacity of machine learning (ML) models. Therefore, it is imperative to proactively identify data drift before users meet with performance degradation. Moreover, to ensure the successful execution of AI services, endeavours should be directed not only toward detecting the occurrence of drift but also toward effectively addressing this challenge. % considering the limited resources prevalent in practical industrial domains. In this work, we introduce a tool designed to detect data drift in text data. In addition, we propose an unsupervised sampling technique for extracting representative examples from drifted instances. This approach bestows a practical advantage by significantly reducing expenses associated with annotating the labels for drifted instances, an essential prerequisite for retraining the model to sustain its performance on production data.
%R 10.18653/v1/2024.naacl-industry.28
%U https://aclanthology.org/2024.naacl-industry.28
%U https://doi.org/10.18653/v1/2024.naacl-industry.28
%P 335-346
Markdown (Informal)
[DriftWatch: A Tool that Automatically Detects Data Drift and Extracts Representative Examples Affected by Drift](https://aclanthology.org/2024.naacl-industry.28) (Jang et al., NAACL 2024)
ACL