@inproceedings{qian-etal-2024-zxq,
title = "{ZXQ} at {S}em{E}val-2024 Task 7: Fine-tuning {GPT}-3.5-Turbo for Numerical Reasoning",
author = "Qian, Zhen and
Xu, Xiaofei and
Zhang, Xiuzhen",
editor = {Ojha, Atul Kr. and
Do{\u{g}}ru{\"o}z, A. Seza and
Tayyar Madabushi, Harish and
Da San Martino, Giovanni and
Rosenthal, Sara and
Ros{\'a}, Aiala},
booktitle = "Proceedings of the 18th International Workshop on Semantic Evaluation (SemEval-2024)",
month = jun,
year = "2024",
address = "Mexico City, Mexico",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.semeval-1.34",
doi = "10.18653/v1/2024.semeval-1.34",
pages = "218--223",
abstract = "In this paper, we present our system for the SemEval-2024 Task 7, i.e., NumEval subtask 3: Numericial Reasoning. Given a news article and its headline, the numerical reasoning task involves creating a system to compute the intentionally excluded number within the news headline. We propose a fine-tuned GPT-3.5-turbo model, specifically engineered to deduce missing numerals directly from the content of news article. The model is trained with a human-engineered prompt that itegrates the news content and the masked headline, tailoring its accuracy for the designated task. It achieves an accuracy of 0.94 on the test data and secures the second position in the official leaderboard. An examination on the system{'}s inference results reveals its commendable accuracy in identifying correct numerals when they can be directly {``}copied{''} from the articles. However, the error rates increase when it comes to some ambiguous operations such as rounding.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="qian-etal-2024-zxq">
<titleInfo>
<title>ZXQ at SemEval-2024 Task 7: Fine-tuning GPT-3.5-Turbo for Numerical Reasoning</title>
</titleInfo>
<name type="personal">
<namePart type="given">Zhen</namePart>
<namePart type="family">Qian</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xiaofei</namePart>
<namePart type="family">Xu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xiuzhen</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 18th International Workshop on Semantic Evaluation (SemEval-2024)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Atul</namePart>
<namePart type="given">Kr.</namePart>
<namePart type="family">Ojha</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">A</namePart>
<namePart type="given">Seza</namePart>
<namePart type="family">Doğruöz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Harish</namePart>
<namePart type="family">Tayyar Madabushi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Giovanni</namePart>
<namePart type="family">Da San Martino</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sara</namePart>
<namePart type="family">Rosenthal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Aiala</namePart>
<namePart type="family">Rosá</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Mexico City, Mexico</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In this paper, we present our system for the SemEval-2024 Task 7, i.e., NumEval subtask 3: Numericial Reasoning. Given a news article and its headline, the numerical reasoning task involves creating a system to compute the intentionally excluded number within the news headline. We propose a fine-tuned GPT-3.5-turbo model, specifically engineered to deduce missing numerals directly from the content of news article. The model is trained with a human-engineered prompt that itegrates the news content and the masked headline, tailoring its accuracy for the designated task. It achieves an accuracy of 0.94 on the test data and secures the second position in the official leaderboard. An examination on the system’s inference results reveals its commendable accuracy in identifying correct numerals when they can be directly “copied” from the articles. However, the error rates increase when it comes to some ambiguous operations such as rounding.</abstract>
<identifier type="citekey">qian-etal-2024-zxq</identifier>
<identifier type="doi">10.18653/v1/2024.semeval-1.34</identifier>
<location>
<url>https://aclanthology.org/2024.semeval-1.34</url>
</location>
<part>
<date>2024-06</date>
<extent unit="page">
<start>218</start>
<end>223</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T ZXQ at SemEval-2024 Task 7: Fine-tuning GPT-3.5-Turbo for Numerical Reasoning
%A Qian, Zhen
%A Xu, Xiaofei
%A Zhang, Xiuzhen
%Y Ojha, Atul Kr.
%Y Doğruöz, A. Seza
%Y Tayyar Madabushi, Harish
%Y Da San Martino, Giovanni
%Y Rosenthal, Sara
%Y Rosá, Aiala
%S Proceedings of the 18th International Workshop on Semantic Evaluation (SemEval-2024)
%D 2024
%8 June
%I Association for Computational Linguistics
%C Mexico City, Mexico
%F qian-etal-2024-zxq
%X In this paper, we present our system for the SemEval-2024 Task 7, i.e., NumEval subtask 3: Numericial Reasoning. Given a news article and its headline, the numerical reasoning task involves creating a system to compute the intentionally excluded number within the news headline. We propose a fine-tuned GPT-3.5-turbo model, specifically engineered to deduce missing numerals directly from the content of news article. The model is trained with a human-engineered prompt that itegrates the news content and the masked headline, tailoring its accuracy for the designated task. It achieves an accuracy of 0.94 on the test data and secures the second position in the official leaderboard. An examination on the system’s inference results reveals its commendable accuracy in identifying correct numerals when they can be directly “copied” from the articles. However, the error rates increase when it comes to some ambiguous operations such as rounding.
%R 10.18653/v1/2024.semeval-1.34
%U https://aclanthology.org/2024.semeval-1.34
%U https://doi.org/10.18653/v1/2024.semeval-1.34
%P 218-223
Markdown (Informal)
[ZXQ at SemEval-2024 Task 7: Fine-tuning GPT-3.5-Turbo for Numerical Reasoning](https://aclanthology.org/2024.semeval-1.34) (Qian et al., SemEval 2024)
ACL