@inproceedings{prakash-etal-2026-thesis,
title = "Thesis Proposal: A Normalization-First Framework for Sound, Complete, and Utility-Ready Open Information Extraction",
author = "Prakash, Chandan and
Chittimalli, Pavan Kumar and
Bhattacharya, Arnab",
editor = "T.Y.S.S., Santosh and
Rodriguez, Juan Diego and
de Gibert, Ona",
booktitle = "Proceedings of the 64th Annual Meeting of the {A}ssociation for {C}omputational {L}inguistics ({ACL} 2026)",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.acl-srw.116/",
pages = "1291--1304",
ISBN = "979-8-89176-393-7",
abstract = "Open Information Extraction (OIE) has largely focused on extracting relational tuples from text, yet in its current form remains unsuitable for downstream systems due to the absence of standardized, semantically sound representations. This thesis argues that the field has been addressing extraction as a surface-level prediction problem, leading to outputs that are semantically incomplete and logically ambiguous, particularly in the presence of modality, negation, conditionality, quantification, and attribution. We propose a normalization-first framework that reframes OIE as a structured semantic transformation pipeline, where raw text is first converted into a lossless, canonical form of declarative, active-voice, and irreducible sentence units, and extraction is constrained to atomic unary and binary relations augmented with explicit semantic annotations. Within a Probably Approximately Correct (PAC) learning perspective, we formalize soundness, completeness, and usefulness as approximate yet verifiable guarantees over extraction quality, acknowledging the inherent undecidability of full semantic interpretation. This thesis outlines a feasible research program to develop the theoretical foundations, models, and evaluation protocols required to produce system-ready OIE representations, thereby establishing a principled and executable path toward making OIE directly usable for downstream reasoning and machine interpretability."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="prakash-etal-2026-thesis">
<titleInfo>
<title>Thesis Proposal: A Normalization-First Framework for Sound, Complete, and Utility-Ready Open Information Extraction</title>
</titleInfo>
<name type="personal">
<namePart type="given">Chandan</namePart>
<namePart type="family">Prakash</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Pavan</namePart>
<namePart type="given">Kumar</namePart>
<namePart type="family">Chittimalli</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Arnab</namePart>
<namePart type="family">Bhattacharya</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 64th Annual Meeting of the Association for Computational Linguistics (ACL 2026)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Santosh</namePart>
<namePart type="family">T.Y.S.S.</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Juan</namePart>
<namePart type="given">Diego</namePart>
<namePart type="family">Rodriguez</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ona</namePart>
<namePart type="family">de Gibert</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">San Diego, California, United States</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-393-7</identifier>
</relatedItem>
<abstract>Open Information Extraction (OIE) has largely focused on extracting relational tuples from text, yet in its current form remains unsuitable for downstream systems due to the absence of standardized, semantically sound representations. This thesis argues that the field has been addressing extraction as a surface-level prediction problem, leading to outputs that are semantically incomplete and logically ambiguous, particularly in the presence of modality, negation, conditionality, quantification, and attribution. We propose a normalization-first framework that reframes OIE as a structured semantic transformation pipeline, where raw text is first converted into a lossless, canonical form of declarative, active-voice, and irreducible sentence units, and extraction is constrained to atomic unary and binary relations augmented with explicit semantic annotations. Within a Probably Approximately Correct (PAC) learning perspective, we formalize soundness, completeness, and usefulness as approximate yet verifiable guarantees over extraction quality, acknowledging the inherent undecidability of full semantic interpretation. This thesis outlines a feasible research program to develop the theoretical foundations, models, and evaluation protocols required to produce system-ready OIE representations, thereby establishing a principled and executable path toward making OIE directly usable for downstream reasoning and machine interpretability.</abstract>
<identifier type="citekey">prakash-etal-2026-thesis</identifier>
<location>
<url>https://aclanthology.org/2026.acl-srw.116/</url>
</location>
<part>
<date>2026-07</date>
<extent unit="page">
<start>1291</start>
<end>1304</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Thesis Proposal: A Normalization-First Framework for Sound, Complete, and Utility-Ready Open Information Extraction
%A Prakash, Chandan
%A Chittimalli, Pavan Kumar
%A Bhattacharya, Arnab
%Y T.Y.S.S., Santosh
%Y Rodriguez, Juan Diego
%Y de Gibert, Ona
%S Proceedings of the 64th Annual Meeting of the Association for Computational Linguistics (ACL 2026)
%D 2026
%8 July
%I Association for Computational Linguistics
%C San Diego, California, United States
%@ 979-8-89176-393-7
%F prakash-etal-2026-thesis
%X Open Information Extraction (OIE) has largely focused on extracting relational tuples from text, yet in its current form remains unsuitable for downstream systems due to the absence of standardized, semantically sound representations. This thesis argues that the field has been addressing extraction as a surface-level prediction problem, leading to outputs that are semantically incomplete and logically ambiguous, particularly in the presence of modality, negation, conditionality, quantification, and attribution. We propose a normalization-first framework that reframes OIE as a structured semantic transformation pipeline, where raw text is first converted into a lossless, canonical form of declarative, active-voice, and irreducible sentence units, and extraction is constrained to atomic unary and binary relations augmented with explicit semantic annotations. Within a Probably Approximately Correct (PAC) learning perspective, we formalize soundness, completeness, and usefulness as approximate yet verifiable guarantees over extraction quality, acknowledging the inherent undecidability of full semantic interpretation. This thesis outlines a feasible research program to develop the theoretical foundations, models, and evaluation protocols required to produce system-ready OIE representations, thereby establishing a principled and executable path toward making OIE directly usable for downstream reasoning and machine interpretability.
%U https://aclanthology.org/2026.acl-srw.116/
%P 1291-1304
Markdown (Informal)
[Thesis Proposal: A Normalization-First Framework for Sound, Complete, and Utility-Ready Open Information Extraction](https://aclanthology.org/2026.acl-srw.116/) (Prakash et al., ACL 2026)
ACL