@inproceedings{noriega-atala-etal-2022-neural,
title = "Neural-Guided Program Synthesis of Information Extraction Rules Using Self-Supervision",
author = "Noriega-Atala, Enrique and
Vacareanu, Robert and
Hahn-Powell, Gus and
Valenzuela-Esc{\'a}rcega, Marco A.",
editor = "Chiticariu, Laura and
Goldberg, Yoav and
Hahn-Powell, Gus and
Morrison, Clayton T. and
Naik, Aakanksha and
Sharp, Rebecca and
Surdeanu, Mihai and
Valenzuela-Esc{\'a}rcega, Marco and
Noriega-Atala, Enrique",
booktitle = "Proceedings of the First Workshop on Pattern-based Approaches to NLP in the Age of Deep Learning",
month = oct,
year = "2022",
address = "Gyeongju, Republic of Korea",
publisher = "International Conference on Computational Linguistics",
url = "https://aclanthology.org/2022.pandl-1.10",
pages = "85--93",
abstract = "We propose a neural-based approach for rule synthesis designed to help bridge the gap between the interpretability, precision and maintainability exhibited by rule-based information extraction systems with the scalability and convenience of statistical information extraction systems. This is achieved by avoiding placing the burden of learning another specialized language on domain experts and instead asking them to provide a small set of examples in the form of highlighted spans of text. We introduce a transformer-based architecture that drives a rule synthesis system that leverages a self-supervised approach for pre-training a large-scale language model complemented by an analysis of different loss functions and aggregation mechanisms for variable length sequences of user-annotated spans of text. The results are encouraging and point to different desirable properties, such as speed and quality, depending on the choice of loss and aggregation method.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="noriega-atala-etal-2022-neural">
<titleInfo>
<title>Neural-Guided Program Synthesis of Information Extraction Rules Using Self-Supervision</title>
</titleInfo>
<name type="personal">
<namePart type="given">Enrique</namePart>
<namePart type="family">Noriega-Atala</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Robert</namePart>
<namePart type="family">Vacareanu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Gus</namePart>
<namePart type="family">Hahn-Powell</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marco</namePart>
<namePart type="given">A</namePart>
<namePart type="family">Valenzuela-Escárcega</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-10</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the First Workshop on Pattern-based Approaches to NLP in the Age of Deep Learning</title>
</titleInfo>
<name type="personal">
<namePart type="given">Laura</namePart>
<namePart type="family">Chiticariu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yoav</namePart>
<namePart type="family">Goldberg</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Gus</namePart>
<namePart type="family">Hahn-Powell</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Clayton</namePart>
<namePart type="given">T</namePart>
<namePart type="family">Morrison</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Aakanksha</namePart>
<namePart type="family">Naik</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rebecca</namePart>
<namePart type="family">Sharp</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mihai</namePart>
<namePart type="family">Surdeanu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marco</namePart>
<namePart type="family">Valenzuela-Escárcega</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Enrique</namePart>
<namePart type="family">Noriega-Atala</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>International Conference on Computational Linguistics</publisher>
<place>
<placeTerm type="text">Gyeongju, Republic of Korea</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We propose a neural-based approach for rule synthesis designed to help bridge the gap between the interpretability, precision and maintainability exhibited by rule-based information extraction systems with the scalability and convenience of statistical information extraction systems. This is achieved by avoiding placing the burden of learning another specialized language on domain experts and instead asking them to provide a small set of examples in the form of highlighted spans of text. We introduce a transformer-based architecture that drives a rule synthesis system that leverages a self-supervised approach for pre-training a large-scale language model complemented by an analysis of different loss functions and aggregation mechanisms for variable length sequences of user-annotated spans of text. The results are encouraging and point to different desirable properties, such as speed and quality, depending on the choice of loss and aggregation method.</abstract>
<identifier type="citekey">noriega-atala-etal-2022-neural</identifier>
<location>
<url>https://aclanthology.org/2022.pandl-1.10</url>
</location>
<part>
<date>2022-10</date>
<extent unit="page">
<start>85</start>
<end>93</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Neural-Guided Program Synthesis of Information Extraction Rules Using Self-Supervision
%A Noriega-Atala, Enrique
%A Vacareanu, Robert
%A Hahn-Powell, Gus
%A Valenzuela-Escárcega, Marco A.
%Y Chiticariu, Laura
%Y Goldberg, Yoav
%Y Hahn-Powell, Gus
%Y Morrison, Clayton T.
%Y Naik, Aakanksha
%Y Sharp, Rebecca
%Y Surdeanu, Mihai
%Y Valenzuela-Escárcega, Marco
%Y Noriega-Atala, Enrique
%S Proceedings of the First Workshop on Pattern-based Approaches to NLP in the Age of Deep Learning
%D 2022
%8 October
%I International Conference on Computational Linguistics
%C Gyeongju, Republic of Korea
%F noriega-atala-etal-2022-neural
%X We propose a neural-based approach for rule synthesis designed to help bridge the gap between the interpretability, precision and maintainability exhibited by rule-based information extraction systems with the scalability and convenience of statistical information extraction systems. This is achieved by avoiding placing the burden of learning another specialized language on domain experts and instead asking them to provide a small set of examples in the form of highlighted spans of text. We introduce a transformer-based architecture that drives a rule synthesis system that leverages a self-supervised approach for pre-training a large-scale language model complemented by an analysis of different loss functions and aggregation mechanisms for variable length sequences of user-annotated spans of text. The results are encouraging and point to different desirable properties, such as speed and quality, depending on the choice of loss and aggregation method.
%U https://aclanthology.org/2022.pandl-1.10
%P 85-93
Markdown (Informal)
[Neural-Guided Program Synthesis of Information Extraction Rules Using Self-Supervision](https://aclanthology.org/2022.pandl-1.10) (Noriega-Atala et al., PANDL 2022)
ACL