@inproceedings{ash-etal-2021-machine,
title = "Machine Extraction of Tax Laws from Legislative Texts",
author = "Ash, Elliott and
Guillot, Malka and
Han, Luyang",
editor = "Aletras, Nikolaos and
Androutsopoulos, Ion and
Barrett, Leslie and
Goanta, Catalina and
Preotiuc-Pietro, Daniel",
booktitle = "Proceedings of the Natural Legal Language Processing Workshop 2021",
month = nov,
year = "2021",
address = "Punta Cana, Dominican Republic",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2021.nllp-1.7",
doi = "10.18653/v1/2021.nllp-1.7",
pages = "76--85",
abstract = "Using a corpus of compiled codes from U.S. states containing labeled tax law sections, we train text classifiers to automatically tag tax-law documents and, further, to identify the associated revenue source (e.g. income, property, or sales). After evaluating classifier performance in held-out test data, we apply them to an historical corpus of U.S. state legislation to extract the flow of relevant laws over the years 1910 through 2010. We document that the classifiers are effective in the historical corpus, for example by automatically detecting establishments of state personal income taxes. The trained models with replication code are published at \url{https://github.com/luyang521/tax-classification}.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="ash-etal-2021-machine">
<titleInfo>
<title>Machine Extraction of Tax Laws from Legislative Texts</title>
</titleInfo>
<name type="personal">
<namePart type="given">Elliott</namePart>
<namePart type="family">Ash</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Malka</namePart>
<namePart type="family">Guillot</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Luyang</namePart>
<namePart type="family">Han</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Natural Legal Language Processing Workshop 2021</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nikolaos</namePart>
<namePart type="family">Aletras</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ion</namePart>
<namePart type="family">Androutsopoulos</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Leslie</namePart>
<namePart type="family">Barrett</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Catalina</namePart>
<namePart type="family">Goanta</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Daniel</namePart>
<namePart type="family">Preotiuc-Pietro</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Punta Cana, Dominican Republic</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Using a corpus of compiled codes from U.S. states containing labeled tax law sections, we train text classifiers to automatically tag tax-law documents and, further, to identify the associated revenue source (e.g. income, property, or sales). After evaluating classifier performance in held-out test data, we apply them to an historical corpus of U.S. state legislation to extract the flow of relevant laws over the years 1910 through 2010. We document that the classifiers are effective in the historical corpus, for example by automatically detecting establishments of state personal income taxes. The trained models with replication code are published at https://github.com/luyang521/tax-classification.</abstract>
<identifier type="citekey">ash-etal-2021-machine</identifier>
<identifier type="doi">10.18653/v1/2021.nllp-1.7</identifier>
<location>
<url>https://aclanthology.org/2021.nllp-1.7</url>
</location>
<part>
<date>2021-11</date>
<extent unit="page">
<start>76</start>
<end>85</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Machine Extraction of Tax Laws from Legislative Texts
%A Ash, Elliott
%A Guillot, Malka
%A Han, Luyang
%Y Aletras, Nikolaos
%Y Androutsopoulos, Ion
%Y Barrett, Leslie
%Y Goanta, Catalina
%Y Preotiuc-Pietro, Daniel
%S Proceedings of the Natural Legal Language Processing Workshop 2021
%D 2021
%8 November
%I Association for Computational Linguistics
%C Punta Cana, Dominican Republic
%F ash-etal-2021-machine
%X Using a corpus of compiled codes from U.S. states containing labeled tax law sections, we train text classifiers to automatically tag tax-law documents and, further, to identify the associated revenue source (e.g. income, property, or sales). After evaluating classifier performance in held-out test data, we apply them to an historical corpus of U.S. state legislation to extract the flow of relevant laws over the years 1910 through 2010. We document that the classifiers are effective in the historical corpus, for example by automatically detecting establishments of state personal income taxes. The trained models with replication code are published at https://github.com/luyang521/tax-classification.
%R 10.18653/v1/2021.nllp-1.7
%U https://aclanthology.org/2021.nllp-1.7
%U https://doi.org/10.18653/v1/2021.nllp-1.7
%P 76-85
Markdown (Informal)
[Machine Extraction of Tax Laws from Legislative Texts](https://aclanthology.org/2021.nllp-1.7) (Ash et al., NLLP 2021)
ACL