@inproceedings{olstad-etal-2023-generation,
title = "Generation of Replacement Options in Text Sanitization",
author = "Olstad, Annika Willoch and
Papadopoulou, Anthi and
Lison, Pierre",
editor = {Alum{\"a}e, Tanel and
Fishel, Mark},
booktitle = "Proceedings of the 24th Nordic Conference on Computational Linguistics (NoDaLiDa)",
month = may,
year = "2023",
address = "T{\'o}rshavn, Faroe Islands",
publisher = "University of Tartu Library",
url = "https://aclanthology.org/2023.nodalida-1.30",
pages = "292--300",
abstract = "The purpose of text sanitization is to edit text documents to mask text spans that may directly or indirectly reveal personal information. An important problem in text sanitization is to find less specific, yet still informative replacements for each text span to mask. We present an approach to generate possible replacements using a combination of heuristic rules and an ontology derived from Wikidata. Those replacement options are hierarchically structured and cover various types of personal identifiers. Using this approach, we extend a recently released text sanitization dataset with manually selected replacements. The outcome of this data collection shows that the approach is able to suggest appropriate replacement options for most text spans.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="olstad-etal-2023-generation">
<titleInfo>
<title>Generation of Replacement Options in Text Sanitization</title>
</titleInfo>
<name type="personal">
<namePart type="given">Annika</namePart>
<namePart type="given">Willoch</namePart>
<namePart type="family">Olstad</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anthi</namePart>
<namePart type="family">Papadopoulou</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Pierre</namePart>
<namePart type="family">Lison</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 24th Nordic Conference on Computational Linguistics (NoDaLiDa)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Tanel</namePart>
<namePart type="family">Alumäe</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mark</namePart>
<namePart type="family">Fishel</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>University of Tartu Library</publisher>
<place>
<placeTerm type="text">Tórshavn, Faroe Islands</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>The purpose of text sanitization is to edit text documents to mask text spans that may directly or indirectly reveal personal information. An important problem in text sanitization is to find less specific, yet still informative replacements for each text span to mask. We present an approach to generate possible replacements using a combination of heuristic rules and an ontology derived from Wikidata. Those replacement options are hierarchically structured and cover various types of personal identifiers. Using this approach, we extend a recently released text sanitization dataset with manually selected replacements. The outcome of this data collection shows that the approach is able to suggest appropriate replacement options for most text spans.</abstract>
<identifier type="citekey">olstad-etal-2023-generation</identifier>
<location>
<url>https://aclanthology.org/2023.nodalida-1.30</url>
</location>
<part>
<date>2023-05</date>
<extent unit="page">
<start>292</start>
<end>300</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Generation of Replacement Options in Text Sanitization
%A Olstad, Annika Willoch
%A Papadopoulou, Anthi
%A Lison, Pierre
%Y Alumäe, Tanel
%Y Fishel, Mark
%S Proceedings of the 24th Nordic Conference on Computational Linguistics (NoDaLiDa)
%D 2023
%8 May
%I University of Tartu Library
%C Tórshavn, Faroe Islands
%F olstad-etal-2023-generation
%X The purpose of text sanitization is to edit text documents to mask text spans that may directly or indirectly reveal personal information. An important problem in text sanitization is to find less specific, yet still informative replacements for each text span to mask. We present an approach to generate possible replacements using a combination of heuristic rules and an ontology derived from Wikidata. Those replacement options are hierarchically structured and cover various types of personal identifiers. Using this approach, we extend a recently released text sanitization dataset with manually selected replacements. The outcome of this data collection shows that the approach is able to suggest appropriate replacement options for most text spans.
%U https://aclanthology.org/2023.nodalida-1.30
%P 292-300
Markdown (Informal)
[Generation of Replacement Options in Text Sanitization](https://aclanthology.org/2023.nodalida-1.30) (Olstad et al., NoDaLiDa 2023)
ACL
- Annika Willoch Olstad, Anthi Papadopoulou, and Pierre Lison. 2023. Generation of Replacement Options in Text Sanitization. In Proceedings of the 24th Nordic Conference on Computational Linguistics (NoDaLiDa), pages 292–300, Tórshavn, Faroe Islands. University of Tartu Library.