@inproceedings{mahdi-etal-2024-advancing,
title = "Advancing Community Directories: Leveraging {LLM}s for Automated Extraction in {MARC} Standard Venue Availability Notes",
author = "Mahdi, Mostafa Didar and
Atapattu, Thushari and
Thilakaratne, Menasha",
editor = "Baldwin, Tim and
Rodr{\'i}guez M{\'e}ndez, Sergio Jos{\'e} and
Kuo, Nicholas",
booktitle = "Proceedings of the 22nd Annual Workshop of the Australasian Language Technology Association",
month = dec,
year = "2024",
address = "Canberra, Australia",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.alta-1.9/",
pages = "118--129",
abstract = "This paper addresses the challenge of efficiently managing and accessing community service information, specifically focusing on venue hire details within the SAcommunity directory. By leveraging Large Language Models (LLMs), particularly the RoBERTa transformer model, we developed an automated system to extract and structure venue availability information according to MARC (Machine-Readable Cataloging) standards. Our approach involved fine-tuning the RoBERTa model on a dataset of community service descriptions, enabling it to identify and categorize key elements such as facility names, capacities, equipment availability, and accessibility features. The model was then applied to process unstructured text data from the SAcommunity database, automatically extracting relevant information and organizing it into standardized fields. The results demonstrate the effectiveness of this method in transforming free-text summaries into structured, MARC-compliant data. This automation not only significantly reduces the time and effort required for data entry and categorization but also enhances the accessibility and usability of community information."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="mahdi-etal-2024-advancing">
<titleInfo>
<title>Advancing Community Directories: Leveraging LLMs for Automated Extraction in MARC Standard Venue Availability Notes</title>
</titleInfo>
<name type="personal">
<namePart type="given">Mostafa</namePart>
<namePart type="given">Didar</namePart>
<namePart type="family">Mahdi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Thushari</namePart>
<namePart type="family">Atapattu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Menasha</namePart>
<namePart type="family">Thilakaratne</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 22nd Annual Workshop of the Australasian Language Technology Association</title>
</titleInfo>
<name type="personal">
<namePart type="given">Tim</namePart>
<namePart type="family">Baldwin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sergio</namePart>
<namePart type="given">José</namePart>
<namePart type="family">Rodríguez Méndez</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nicholas</namePart>
<namePart type="family">Kuo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Canberra, Australia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This paper addresses the challenge of efficiently managing and accessing community service information, specifically focusing on venue hire details within the SAcommunity directory. By leveraging Large Language Models (LLMs), particularly the RoBERTa transformer model, we developed an automated system to extract and structure venue availability information according to MARC (Machine-Readable Cataloging) standards. Our approach involved fine-tuning the RoBERTa model on a dataset of community service descriptions, enabling it to identify and categorize key elements such as facility names, capacities, equipment availability, and accessibility features. The model was then applied to process unstructured text data from the SAcommunity database, automatically extracting relevant information and organizing it into standardized fields. The results demonstrate the effectiveness of this method in transforming free-text summaries into structured, MARC-compliant data. This automation not only significantly reduces the time and effort required for data entry and categorization but also enhances the accessibility and usability of community information.</abstract>
<identifier type="citekey">mahdi-etal-2024-advancing</identifier>
<location>
<url>https://aclanthology.org/2024.alta-1.9/</url>
</location>
<part>
<date>2024-12</date>
<extent unit="page">
<start>118</start>
<end>129</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Advancing Community Directories: Leveraging LLMs for Automated Extraction in MARC Standard Venue Availability Notes
%A Mahdi, Mostafa Didar
%A Atapattu, Thushari
%A Thilakaratne, Menasha
%Y Baldwin, Tim
%Y Rodríguez Méndez, Sergio José
%Y Kuo, Nicholas
%S Proceedings of the 22nd Annual Workshop of the Australasian Language Technology Association
%D 2024
%8 December
%I Association for Computational Linguistics
%C Canberra, Australia
%F mahdi-etal-2024-advancing
%X This paper addresses the challenge of efficiently managing and accessing community service information, specifically focusing on venue hire details within the SAcommunity directory. By leveraging Large Language Models (LLMs), particularly the RoBERTa transformer model, we developed an automated system to extract and structure venue availability information according to MARC (Machine-Readable Cataloging) standards. Our approach involved fine-tuning the RoBERTa model on a dataset of community service descriptions, enabling it to identify and categorize key elements such as facility names, capacities, equipment availability, and accessibility features. The model was then applied to process unstructured text data from the SAcommunity database, automatically extracting relevant information and organizing it into standardized fields. The results demonstrate the effectiveness of this method in transforming free-text summaries into structured, MARC-compliant data. This automation not only significantly reduces the time and effort required for data entry and categorization but also enhances the accessibility and usability of community information.
%U https://aclanthology.org/2024.alta-1.9/
%P 118-129
Markdown (Informal)
[Advancing Community Directories: Leveraging LLMs for Automated Extraction in MARC Standard Venue Availability Notes](https://aclanthology.org/2024.alta-1.9/) (Mahdi et al., ALTA 2024)
ACL