@inproceedings{corlett-penn-2021-reanalyzing,
title = "Reanalyzing the Most Probable Sentence Problem: A Case Study in Explicating the Role of Entropy in Algorithmic Complexity",
author = "Corlett, Eric and
Penn, Gerald",
booktitle = "Proceedings of the 16th Conference of the European Chapter of the Association for Computational Linguistics: Main Volume",
month = apr,
year = "2021",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2021.eacl-main.294",
doi = "10.18653/v1/2021.eacl-main.294",
pages = "3354--3362",
abstract = "When working with problems in natural language processing, we can find ourselves in situations where the traditional measurements of descriptive complexity are ineffective at describing the behaviour of our algorithms. It is easy to see why {---} the models we use are often general frameworks into which difficult-to-define tasks can be embedded. These frameworks can have more power than we typically use, and so complexity measures such as worst-case running time can drastically overestimate the cost of running our algorithms. In particular, they can make an apparently tractable problem seem NP-complete. Using empirical studies to evaluate performance is a necessary but incomplete method of dealing with this mismatch, since these studies no longer act as a guarantee of good performance. In this paper we use statistical measures such as entropy to give an updated analysis of the complexity of the NP-complete Most Probable Sentence problem for pCFGs, which can then be applied to word sense disambiguation and inference tasks. We can bound both the running time and the error in a simple search algorithm, allowing for a much faster search than the NP-completeness of this problem would suggest.",
}