@InProceedings{lavergne-EtAl:2016:BioTxtM2016,
  author    = {Lavergne, Thomas  and  Neveol, Aurelie  and  Robert, Aude  and  Grouin, Cyril  and  Rey, Gr\'{e}goire  and  Zweigenbaum, Pierre},
  title     = {A Dataset for ICD-10 Coding of Death Certificates: Creation and Usage},
  booktitle = {Proceedings of the Fifth Workshop on Building and Evaluating Resources for Biomedical Text Mining (BioTxtM2016)},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {60--69},
  abstract  = {Very few datasets have been released for the evaluation of diagnosis coding
	with the International Classification of Diseases, and only one so far in a
	language other than English. This paper describes a large-scale dataset
	prepared from French death certificates, and the problems which needed to be
	solved to turn it into a dataset suitable for the application of machine
	learning and natural language processing methods of ICD-10 coding. The dataset
	includes the free-text statements written by medical doctors, the associated
	meta-data, the human coder-assigned codes for each statement, as well as the
	statement segments which supported the coder’s decision for each code. The
	dataset comprises 93,694 death certificates totalling 276,103 statements and
	377,677 ICD-10 code assignments (3,457 unique codes). It was made available for
	an international automated coding shared task, which attracted five
	participating teams. An extended version of the dataset will be used in a new
	edition of the shared task.},
  url       = {http://aclweb.org/anthology/W16-5107}
}

