@inproceedings{kajiyama-etal-2018-de,
title = "De-identifying Free Text of {J}apanese Dummy Electronic Health Records",
author = "Kajiyama, Kohei and
Horiguchi, Hiromasa and
Okumura, Takashi and
Morita, Mizuki and
Kano, Yoshinobu",
editor = "Lavelli, Alberto and
Minard, Anne-Lyse and
Rinaldi, Fabio",
booktitle = "Proceedings of the Ninth International Workshop on Health Text Mining and Information Analysis",
month = oct,
year = "2018",
address = "Brussels, Belgium",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/W18-5608/",
doi = "10.18653/v1/W18-5608",
pages = "65--70",
abstract = "A new law was established in Japan to promote utilization of EHRs for research and developments, while de-identification is required to use EHRs. However, studies of automatic de-identification in the healthcare domain is not active for Japanese language, no de-identification tool available in practical performance for Japanese medical domains, as far as we know. Previous work shows that rule-based methods are still effective, while deep learning methods are reported to be better recently. In order to implement and evaluate a de-identification tool in a practical level, we implemented three methods, rule-based, CRF, and LSTM. We prepared three datasets of pseudo EHRs with de-identification tags manually annotated. These datasets are derived from shared task data to compare with previous work, and our new data to increase training data. Our result shows that our LSTM-based method is better and robust, which leads to our future work that plans to apply our system to actual de-identification tasks in hospitals."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="kajiyama-etal-2018-de">
<titleInfo>
<title>De-identifying Free Text of Japanese Dummy Electronic Health Records</title>
</titleInfo>
<name type="personal">
<namePart type="given">Kohei</namePart>
<namePart type="family">Kajiyama</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hiromasa</namePart>
<namePart type="family">Horiguchi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Takashi</namePart>
<namePart type="family">Okumura</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mizuki</namePart>
<namePart type="family">Morita</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yoshinobu</namePart>
<namePart type="family">Kano</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2018-10</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Ninth International Workshop on Health Text Mining and Information Analysis</title>
</titleInfo>
<name type="personal">
<namePart type="given">Alberto</namePart>
<namePart type="family">Lavelli</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anne-Lyse</namePart>
<namePart type="family">Minard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Fabio</namePart>
<namePart type="family">Rinaldi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Brussels, Belgium</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>A new law was established in Japan to promote utilization of EHRs for research and developments, while de-identification is required to use EHRs. However, studies of automatic de-identification in the healthcare domain is not active for Japanese language, no de-identification tool available in practical performance for Japanese medical domains, as far as we know. Previous work shows that rule-based methods are still effective, while deep learning methods are reported to be better recently. In order to implement and evaluate a de-identification tool in a practical level, we implemented three methods, rule-based, CRF, and LSTM. We prepared three datasets of pseudo EHRs with de-identification tags manually annotated. These datasets are derived from shared task data to compare with previous work, and our new data to increase training data. Our result shows that our LSTM-based method is better and robust, which leads to our future work that plans to apply our system to actual de-identification tasks in hospitals.</abstract>
<identifier type="citekey">kajiyama-etal-2018-de</identifier>
<identifier type="doi">10.18653/v1/W18-5608</identifier>
<location>
<url>https://aclanthology.org/W18-5608/</url>
</location>
<part>
<date>2018-10</date>
<extent unit="page">
<start>65</start>
<end>70</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T De-identifying Free Text of Japanese Dummy Electronic Health Records
%A Kajiyama, Kohei
%A Horiguchi, Hiromasa
%A Okumura, Takashi
%A Morita, Mizuki
%A Kano, Yoshinobu
%Y Lavelli, Alberto
%Y Minard, Anne-Lyse
%Y Rinaldi, Fabio
%S Proceedings of the Ninth International Workshop on Health Text Mining and Information Analysis
%D 2018
%8 October
%I Association for Computational Linguistics
%C Brussels, Belgium
%F kajiyama-etal-2018-de
%X A new law was established in Japan to promote utilization of EHRs for research and developments, while de-identification is required to use EHRs. However, studies of automatic de-identification in the healthcare domain is not active for Japanese language, no de-identification tool available in practical performance for Japanese medical domains, as far as we know. Previous work shows that rule-based methods are still effective, while deep learning methods are reported to be better recently. In order to implement and evaluate a de-identification tool in a practical level, we implemented three methods, rule-based, CRF, and LSTM. We prepared three datasets of pseudo EHRs with de-identification tags manually annotated. These datasets are derived from shared task data to compare with previous work, and our new data to increase training data. Our result shows that our LSTM-based method is better and robust, which leads to our future work that plans to apply our system to actual de-identification tasks in hospitals.
%R 10.18653/v1/W18-5608
%U https://aclanthology.org/W18-5608/
%U https://doi.org/10.18653/v1/W18-5608
%P 65-70
Markdown (Informal)
[De-identifying Free Text of Japanese Dummy Electronic Health Records](https://aclanthology.org/W18-5608/) (Kajiyama et al., Louhi 2018)
ACL