@inproceedings{wiechetek-etal-2021-rules,
title = "Rules Ruling Neural Networks - Neural vs. Rule-Based Grammar Checking for a Low Resource Language",
author = {Wiechetek, Linda and
Pirinen, Flammie and
H{\"a}m{\"a}l{\"a}inen, Mika and
Argese, Chiara},
editor = "Mitkov, Ruslan and
Angelova, Galia",
booktitle = "Proceedings of the International Conference on Recent Advances in Natural Language Processing (RANLP 2021)",
month = sep,
year = "2021",
address = "Held Online",
publisher = "INCOMA Ltd.",
url = "https://aclanthology.org/2021.ranlp-1.171",
pages = "1526--1535",
abstract = "We investigate both rule-based and machine learning methods for the task of compound error correction and evaluate their efficiency for North S{\'a}mi, a low resource language. The lack of error-free data needed for a neural approach is a challenge to the development of these tools, which is not shared by bigger languages. In order to compensate for that, we used a rule-based grammar checker to remove erroneous sentences and insert compound errors by splitting correct compounds. We describe how we set up the error detection rules, and how we train a bi-RNN based neural network. The precision of the rule-based model tested on a corpus with real errors (81.0{\%}) is slightly better than the neural model (79.4{\%}). The rule-based model is also more flexible with regard to fixing specific errors requested by the user community. However, the neural model has a better recall (98{\%}). The results suggest that an approach that combines the advantages of both models would be desirable in the future. Our tools and data sets are open-source and freely available on GitHub and Zenodo.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="wiechetek-etal-2021-rules">
<titleInfo>
<title>Rules Ruling Neural Networks - Neural vs. Rule-Based Grammar Checking for a Low Resource Language</title>
</titleInfo>
<name type="personal">
<namePart type="given">Linda</namePart>
<namePart type="family">Wiechetek</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Flammie</namePart>
<namePart type="family">Pirinen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mika</namePart>
<namePart type="family">Hämäläinen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chiara</namePart>
<namePart type="family">Argese</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-09</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the International Conference on Recent Advances in Natural Language Processing (RANLP 2021)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ruslan</namePart>
<namePart type="family">Mitkov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Galia</namePart>
<namePart type="family">Angelova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>INCOMA Ltd.</publisher>
<place>
<placeTerm type="text">Held Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We investigate both rule-based and machine learning methods for the task of compound error correction and evaluate their efficiency for North Sámi, a low resource language. The lack of error-free data needed for a neural approach is a challenge to the development of these tools, which is not shared by bigger languages. In order to compensate for that, we used a rule-based grammar checker to remove erroneous sentences and insert compound errors by splitting correct compounds. We describe how we set up the error detection rules, and how we train a bi-RNN based neural network. The precision of the rule-based model tested on a corpus with real errors (81.0%) is slightly better than the neural model (79.4%). The rule-based model is also more flexible with regard to fixing specific errors requested by the user community. However, the neural model has a better recall (98%). The results suggest that an approach that combines the advantages of both models would be desirable in the future. Our tools and data sets are open-source and freely available on GitHub and Zenodo.</abstract>
<identifier type="citekey">wiechetek-etal-2021-rules</identifier>
<location>
<url>https://aclanthology.org/2021.ranlp-1.171</url>
</location>
<part>
<date>2021-09</date>
<extent unit="page">
<start>1526</start>
<end>1535</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Rules Ruling Neural Networks - Neural vs. Rule-Based Grammar Checking for a Low Resource Language
%A Wiechetek, Linda
%A Pirinen, Flammie
%A Hämäläinen, Mika
%A Argese, Chiara
%Y Mitkov, Ruslan
%Y Angelova, Galia
%S Proceedings of the International Conference on Recent Advances in Natural Language Processing (RANLP 2021)
%D 2021
%8 September
%I INCOMA Ltd.
%C Held Online
%F wiechetek-etal-2021-rules
%X We investigate both rule-based and machine learning methods for the task of compound error correction and evaluate their efficiency for North Sámi, a low resource language. The lack of error-free data needed for a neural approach is a challenge to the development of these tools, which is not shared by bigger languages. In order to compensate for that, we used a rule-based grammar checker to remove erroneous sentences and insert compound errors by splitting correct compounds. We describe how we set up the error detection rules, and how we train a bi-RNN based neural network. The precision of the rule-based model tested on a corpus with real errors (81.0%) is slightly better than the neural model (79.4%). The rule-based model is also more flexible with regard to fixing specific errors requested by the user community. However, the neural model has a better recall (98%). The results suggest that an approach that combines the advantages of both models would be desirable in the future. Our tools and data sets are open-source and freely available on GitHub and Zenodo.
%U https://aclanthology.org/2021.ranlp-1.171
%P 1526-1535
Markdown (Informal)
[Rules Ruling Neural Networks - Neural vs. Rule-Based Grammar Checking for a Low Resource Language](https://aclanthology.org/2021.ranlp-1.171) (Wiechetek et al., RANLP 2021)
ACL