@inproceedings{koleva-etal-2022-named,
title = "Named Entity Recognition in Industrial Tables using Tabular Language Models",
author = "Koleva, Aneta and
Ringsquandl, Martin and
Buckley, Mark and
Hasan, Rakeb and
Tresp, Volker",
editor = "Li, Yunyao and
Lazaridou, Angeliki",
booktitle = "Proceedings of the 2022 Conference on Empirical Methods in Natural Language Processing: Industry Track",
month = dec,
year = "2022",
address = "Abu Dhabi, UAE",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.emnlp-industry.35",
doi = "10.18653/v1/2022.emnlp-industry.35",
pages = "348--356",
abstract = "Specialized transformer-based models for encoding tabular data have gained interest in academia. Although tabular data is omnipresent in industry, applications of table transformers are still missing. In this paper, we study how these models can be applied to an industrial Named Entity Recognition (NER) problem where the entities are mentioned in tabular-structured spreadsheets. The highly technical nature of spreadsheets as well as the lack of labeled data present major challenges for fine-tuning transformer-based models. Therefore, we develop a dedicated table data augmentation strategy based on available domain-specific knowledge graphs. We show that this boosts performance in our low-resource scenario considerably. Further, we investigate the benefits of tabular structure as inductive bias compared to tables as linearized sequences. Our experiments confirm that a table transformer outperforms other baselines and that its tabular inductive bias is vital for convergence of transformer-based models.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="koleva-etal-2022-named">
<titleInfo>
<title>Named Entity Recognition in Industrial Tables using Tabular Language Models</title>
</titleInfo>
<name type="personal">
<namePart type="given">Aneta</namePart>
<namePart type="family">Koleva</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Martin</namePart>
<namePart type="family">Ringsquandl</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mark</namePart>
<namePart type="family">Buckley</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rakeb</namePart>
<namePart type="family">Hasan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Volker</namePart>
<namePart type="family">Tresp</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2022 Conference on Empirical Methods in Natural Language Processing: Industry Track</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yunyao</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Angeliki</namePart>
<namePart type="family">Lazaridou</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Abu Dhabi, UAE</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Specialized transformer-based models for encoding tabular data have gained interest in academia. Although tabular data is omnipresent in industry, applications of table transformers are still missing. In this paper, we study how these models can be applied to an industrial Named Entity Recognition (NER) problem where the entities are mentioned in tabular-structured spreadsheets. The highly technical nature of spreadsheets as well as the lack of labeled data present major challenges for fine-tuning transformer-based models. Therefore, we develop a dedicated table data augmentation strategy based on available domain-specific knowledge graphs. We show that this boosts performance in our low-resource scenario considerably. Further, we investigate the benefits of tabular structure as inductive bias compared to tables as linearized sequences. Our experiments confirm that a table transformer outperforms other baselines and that its tabular inductive bias is vital for convergence of transformer-based models.</abstract>
<identifier type="citekey">koleva-etal-2022-named</identifier>
<identifier type="doi">10.18653/v1/2022.emnlp-industry.35</identifier>
<location>
<url>https://aclanthology.org/2022.emnlp-industry.35</url>
</location>
<part>
<date>2022-12</date>
<extent unit="page">
<start>348</start>
<end>356</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Named Entity Recognition in Industrial Tables using Tabular Language Models
%A Koleva, Aneta
%A Ringsquandl, Martin
%A Buckley, Mark
%A Hasan, Rakeb
%A Tresp, Volker
%Y Li, Yunyao
%Y Lazaridou, Angeliki
%S Proceedings of the 2022 Conference on Empirical Methods in Natural Language Processing: Industry Track
%D 2022
%8 December
%I Association for Computational Linguistics
%C Abu Dhabi, UAE
%F koleva-etal-2022-named
%X Specialized transformer-based models for encoding tabular data have gained interest in academia. Although tabular data is omnipresent in industry, applications of table transformers are still missing. In this paper, we study how these models can be applied to an industrial Named Entity Recognition (NER) problem where the entities are mentioned in tabular-structured spreadsheets. The highly technical nature of spreadsheets as well as the lack of labeled data present major challenges for fine-tuning transformer-based models. Therefore, we develop a dedicated table data augmentation strategy based on available domain-specific knowledge graphs. We show that this boosts performance in our low-resource scenario considerably. Further, we investigate the benefits of tabular structure as inductive bias compared to tables as linearized sequences. Our experiments confirm that a table transformer outperforms other baselines and that its tabular inductive bias is vital for convergence of transformer-based models.
%R 10.18653/v1/2022.emnlp-industry.35
%U https://aclanthology.org/2022.emnlp-industry.35
%U https://doi.org/10.18653/v1/2022.emnlp-industry.35
%P 348-356
Markdown (Informal)
[Named Entity Recognition in Industrial Tables using Tabular Language Models](https://aclanthology.org/2022.emnlp-industry.35) (Koleva et al., EMNLP 2022)
ACL