@InProceedings{eckartdecastilho:2016:WLSI-OIAF4HLT2016,
  author    = {Eckart de Castilho, Richard},
  title     = {Automatic Analysis of Flaws in Pre-Trained NLP Models},
  booktitle = {Proceedings of the Third International Workshop on Worldwide Language Service Infrastructure and Second Workshop on Open Infrastructures and Analysis Frameworks for Human Language Technologies (WLSI/OIAF4HLT2016)},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {19--27},
  abstract  = {Most tools for natural language processing today are based on machine learning
	and come with pre-trained models. In addition, third-parties provide
	pre-trained models for popular NLP tools. The predictive power and accuracy of
	these tools depends on the quality of these models. Downstream researchers
	often base their results on pre-trained models instead of training their own.
	Consequently, pre-trained models are an essential resource to our community.
	However, to be best of our knowledge, no systematic study of pre-trained models
	has been conducted so far.
	This paper reports on the analysis of 274 pre-models for six NLP tools and four
	potential causes of problems: encoding, tokenization, normalization and change
	over time. The analysis is implemented in the open source tool Model
	Investigator. Our work 1) allows model consumers to better assess whether a
	model is suitable for their task, 2) enables tool and model creators to
	sanity-check their models before distributing them, and 3) enables improvements
	in tool interoperability by performing automatic adjustments of normalization
	or other pre-processing based on the models used.},
  url       = {http://aclweb.org/anthology/W16-5203}
}

