@InProceedings{alvarezmelis-jaakkola:2017:EMNLP2017,
  author    = {Alvarez-Melis, David  and  Jaakkola, Tommi},
  title     = {A causal framework for explaining the predictions of black-box sequence-to-sequence models},
  booktitle = {Proceedings of the 2017 Conference on Empirical Methods in Natural Language Processing},
  month     = {September},
  year      = {2017},
  address   = {Copenhagen, Denmark},
  publisher = {Association for Computational Linguistics},
  pages     = {412--421},
  abstract  = {We interpret the predictions of any black-box structured input-structured
	output model around a specific input-output pair. Our method returns an
	"explanation" consisting of groups of input-output tokens that are causally
	related.  These dependencies are inferred by querying the model with perturbed
	inputs, generating a graph over tokens from the responses, and solving a
	partitioning problem to select the most relevant components. We focus the
	general approach on sequence-to-sequence problems, adopting a variational
	autoencoder to yield meaningful input perturbations. We test our method across
	several NLP sequence generation tasks.},
  url       = {https://www.aclweb.org/anthology/D17-1042}
}