@inproceedings{lazar-etal-2025-generating,
title = "Generating {O}pen{API} Specifications from Online {API} Documentation with Large Language Models",
author = "Lazar, Koren and
Vetzler, Matan and
Kate, Kiran and
Tsay, Jason and
Boaz, David and
Gupta, Himanshu and
Shinnar, Avraham and
Vallam, Rohith D and
Amid, David and
Goldbraich, Esther and
Laredo, Jim and
Anaby Tavor, Ateret",
editor = "Rehm, Georg and
Li, Yunyao",
booktitle = "Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 6: Industry Track)",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.acl-industry.18/",
doi = "10.18653/v1/2025.acl-industry.18",
pages = "237--253",
ISBN = "979-8-89176-288-6",
abstract = "AI agents and business automation tools interacting with external web services require standardized, machine-readable information about their APIs in the form of API specifications. However, the information about APIs available online is often presented as unstructured, free-form HTML documentation, requiring external users to spend significant time manually converting it into a structured format. To address this, we introduce , a novel framework that transforms long and diverse API documentation pages into consistent, machine-readable API specifications. This is achieved through a carefully crafted pipeline that integrates large language models and rule-based algorithms which are guided by domain knowledge of the structure of documentation webpages. Our experiments demonstrate that generalizes well across hundreds of APIs, and produces valid OpenAPI specifications that encapsulate most of the information from the original documentation. has been successfully implemented in an enterprise environment, saving thousands of hours of manual effort and making hundreds of complex enterprise APIs accessible as tools for LLMs."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="lazar-etal-2025-generating">
<titleInfo>
<title>Generating OpenAPI Specifications from Online API Documentation with Large Language Models</title>
</titleInfo>
<name type="personal">
<namePart type="given">Koren</namePart>
<namePart type="family">Lazar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Matan</namePart>
<namePart type="family">Vetzler</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kiran</namePart>
<namePart type="family">Kate</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jason</namePart>
<namePart type="family">Tsay</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">David</namePart>
<namePart type="family">Boaz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Himanshu</namePart>
<namePart type="family">Gupta</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Avraham</namePart>
<namePart type="family">Shinnar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rohith</namePart>
<namePart type="given">D</namePart>
<namePart type="family">Vallam</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">David</namePart>
<namePart type="family">Amid</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Esther</namePart>
<namePart type="family">Goldbraich</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jim</namePart>
<namePart type="family">Laredo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ateret</namePart>
<namePart type="family">Anaby Tavor</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 6: Industry Track)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Georg</namePart>
<namePart type="family">Rehm</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yunyao</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Vienna, Austria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-288-6</identifier>
</relatedItem>
<abstract>AI agents and business automation tools interacting with external web services require standardized, machine-readable information about their APIs in the form of API specifications. However, the information about APIs available online is often presented as unstructured, free-form HTML documentation, requiring external users to spend significant time manually converting it into a structured format. To address this, we introduce , a novel framework that transforms long and diverse API documentation pages into consistent, machine-readable API specifications. This is achieved through a carefully crafted pipeline that integrates large language models and rule-based algorithms which are guided by domain knowledge of the structure of documentation webpages. Our experiments demonstrate that generalizes well across hundreds of APIs, and produces valid OpenAPI specifications that encapsulate most of the information from the original documentation. has been successfully implemented in an enterprise environment, saving thousands of hours of manual effort and making hundreds of complex enterprise APIs accessible as tools for LLMs.</abstract>
<identifier type="citekey">lazar-etal-2025-generating</identifier>
<identifier type="doi">10.18653/v1/2025.acl-industry.18</identifier>
<location>
<url>https://aclanthology.org/2025.acl-industry.18/</url>
</location>
<part>
<date>2025-07</date>
<extent unit="page">
<start>237</start>
<end>253</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Generating OpenAPI Specifications from Online API Documentation with Large Language Models
%A Lazar, Koren
%A Vetzler, Matan
%A Kate, Kiran
%A Tsay, Jason
%A Boaz, David
%A Gupta, Himanshu
%A Shinnar, Avraham
%A Vallam, Rohith D.
%A Amid, David
%A Goldbraich, Esther
%A Laredo, Jim
%A Anaby Tavor, Ateret
%Y Rehm, Georg
%Y Li, Yunyao
%S Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 6: Industry Track)
%D 2025
%8 July
%I Association for Computational Linguistics
%C Vienna, Austria
%@ 979-8-89176-288-6
%F lazar-etal-2025-generating
%X AI agents and business automation tools interacting with external web services require standardized, machine-readable information about their APIs in the form of API specifications. However, the information about APIs available online is often presented as unstructured, free-form HTML documentation, requiring external users to spend significant time manually converting it into a structured format. To address this, we introduce , a novel framework that transforms long and diverse API documentation pages into consistent, machine-readable API specifications. This is achieved through a carefully crafted pipeline that integrates large language models and rule-based algorithms which are guided by domain knowledge of the structure of documentation webpages. Our experiments demonstrate that generalizes well across hundreds of APIs, and produces valid OpenAPI specifications that encapsulate most of the information from the original documentation. has been successfully implemented in an enterprise environment, saving thousands of hours of manual effort and making hundreds of complex enterprise APIs accessible as tools for LLMs.
%R 10.18653/v1/2025.acl-industry.18
%U https://aclanthology.org/2025.acl-industry.18/
%U https://doi.org/10.18653/v1/2025.acl-industry.18
%P 237-253
Markdown (Informal)
[Generating OpenAPI Specifications from Online API Documentation with Large Language Models](https://aclanthology.org/2025.acl-industry.18/) (Lazar et al., ACL 2025)
ACL
- Koren Lazar, Matan Vetzler, Kiran Kate, Jason Tsay, David Boaz, Himanshu Gupta, Avraham Shinnar, Rohith D Vallam, David Amid, Esther Goldbraich, Jim Laredo, and Ateret Anaby Tavor. 2025. Generating OpenAPI Specifications from Online API Documentation with Large Language Models. In Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 6: Industry Track), pages 237–253, Vienna, Austria. Association for Computational Linguistics.