@inproceedings{liu-etal-2025-variable,
title = "Variable Extraction for Model Recovery in Scientific Literature",
author = "Liu, Chunwei and
Noriega-Atala, Enrique and
Pyarelal, Adarsh and
Morrison, Clayton T and
Cafarella, Mike",
editor = "Jansen, Peter and
Dalvi Mishra, Bhavana and
Trivedi, Harsh and
Prasad Majumder, Bodhisattwa and
Hope, Tom and
Khot, Tushar and
Downey, Doug and
Horvitz, Eric",
booktitle = "Proceedings of the 1st Workshop on AI and Scientific Discovery: Directions and Opportunities",
month = may,
year = "2025",
address = "Albuquerque, New Mexico, USA",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.aisd-main.1/",
doi = "10.18653/v1/2025.aisd-main.1",
pages = "1--12",
ISBN = "979-8-89176-224-4",
abstract = "Due to the increasing productivity in the scientific community, it is difficult to keep up with the literature without the assistance of AI methods. This paper evaluates various methods for extracting mathematical model variables from epidemiological studies, such as `infection rate ($\alpha$),'' `recovery rate ($\gamma$),'' and `mortality rate ($\mu$).'' Variable extraction appears to be a basic task, but plays a pivotal role in recovering models from scientific literature. Once extracted, we can use these variables for automatic mathematical modeling, simulation, and replication of published results. We also introduce a benchmark dataset comprising manually-annotated variable descriptions and variable values extracted from scientific papers. Our analysis shows that LLM-based solutions perform the best. Despite the incremental benefits of combining rule-based extraction outputs with LLMs, the leap in performance attributed to the transfer-learning and instruction-tuning capabilities of LLMs themselves is far more significant. This investigation demonstrates the potential of LLMs to enhance automatic comprehension of scientific artifacts and for automatic model recovery and simulation."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="liu-etal-2025-variable">
<titleInfo>
<title>Variable Extraction for Model Recovery in Scientific Literature</title>
</titleInfo>
<name type="personal">
<namePart type="given">Chunwei</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Enrique</namePart>
<namePart type="family">Noriega-Atala</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Adarsh</namePart>
<namePart type="family">Pyarelal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Clayton</namePart>
<namePart type="given">T</namePart>
<namePart type="family">Morrison</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mike</namePart>
<namePart type="family">Cafarella</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 1st Workshop on AI and Scientific Discovery: Directions and Opportunities</title>
</titleInfo>
<name type="personal">
<namePart type="given">Peter</namePart>
<namePart type="family">Jansen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bhavana</namePart>
<namePart type="family">Dalvi Mishra</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Harsh</namePart>
<namePart type="family">Trivedi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bodhisattwa</namePart>
<namePart type="family">Prasad Majumder</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tom</namePart>
<namePart type="family">Hope</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tushar</namePart>
<namePart type="family">Khot</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Doug</namePart>
<namePart type="family">Downey</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Eric</namePart>
<namePart type="family">Horvitz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Albuquerque, New Mexico, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-224-4</identifier>
</relatedItem>
<abstract>Due to the increasing productivity in the scientific community, it is difficult to keep up with the literature without the assistance of AI methods. This paper evaluates various methods for extracting mathematical model variables from epidemiological studies, such as ‘infection rate (α),” ‘recovery rate (γ),” and ‘mortality rate (μ).” Variable extraction appears to be a basic task, but plays a pivotal role in recovering models from scientific literature. Once extracted, we can use these variables for automatic mathematical modeling, simulation, and replication of published results. We also introduce a benchmark dataset comprising manually-annotated variable descriptions and variable values extracted from scientific papers. Our analysis shows that LLM-based solutions perform the best. Despite the incremental benefits of combining rule-based extraction outputs with LLMs, the leap in performance attributed to the transfer-learning and instruction-tuning capabilities of LLMs themselves is far more significant. This investigation demonstrates the potential of LLMs to enhance automatic comprehension of scientific artifacts and for automatic model recovery and simulation.</abstract>
<identifier type="citekey">liu-etal-2025-variable</identifier>
<identifier type="doi">10.18653/v1/2025.aisd-main.1</identifier>
<location>
<url>https://aclanthology.org/2025.aisd-main.1/</url>
</location>
<part>
<date>2025-05</date>
<extent unit="page">
<start>1</start>
<end>12</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Variable Extraction for Model Recovery in Scientific Literature
%A Liu, Chunwei
%A Noriega-Atala, Enrique
%A Pyarelal, Adarsh
%A Morrison, Clayton T.
%A Cafarella, Mike
%Y Jansen, Peter
%Y Dalvi Mishra, Bhavana
%Y Trivedi, Harsh
%Y Prasad Majumder, Bodhisattwa
%Y Hope, Tom
%Y Khot, Tushar
%Y Downey, Doug
%Y Horvitz, Eric
%S Proceedings of the 1st Workshop on AI and Scientific Discovery: Directions and Opportunities
%D 2025
%8 May
%I Association for Computational Linguistics
%C Albuquerque, New Mexico, USA
%@ 979-8-89176-224-4
%F liu-etal-2025-variable
%X Due to the increasing productivity in the scientific community, it is difficult to keep up with the literature without the assistance of AI methods. This paper evaluates various methods for extracting mathematical model variables from epidemiological studies, such as ‘infection rate (α),” ‘recovery rate (γ),” and ‘mortality rate (μ).” Variable extraction appears to be a basic task, but plays a pivotal role in recovering models from scientific literature. Once extracted, we can use these variables for automatic mathematical modeling, simulation, and replication of published results. We also introduce a benchmark dataset comprising manually-annotated variable descriptions and variable values extracted from scientific papers. Our analysis shows that LLM-based solutions perform the best. Despite the incremental benefits of combining rule-based extraction outputs with LLMs, the leap in performance attributed to the transfer-learning and instruction-tuning capabilities of LLMs themselves is far more significant. This investigation demonstrates the potential of LLMs to enhance automatic comprehension of scientific artifacts and for automatic model recovery and simulation.
%R 10.18653/v1/2025.aisd-main.1
%U https://aclanthology.org/2025.aisd-main.1/
%U https://doi.org/10.18653/v1/2025.aisd-main.1
%P 1-12
Markdown (Informal)
[Variable Extraction for Model Recovery in Scientific Literature](https://aclanthology.org/2025.aisd-main.1/) (Liu et al., AISD 2025)
ACL
- Chunwei Liu, Enrique Noriega-Atala, Adarsh Pyarelal, Clayton T Morrison, and Mike Cafarella. 2025. Variable Extraction for Model Recovery in Scientific Literature. In Proceedings of the 1st Workshop on AI and Scientific Discovery: Directions and Opportunities, pages 1–12, Albuquerque, New Mexico, USA. Association for Computational Linguistics.