@inproceedings{zuo-etal-2023-exploring,
title = "Exploring Data-Centric Strategies for {F}rench Patent Classification: A Baseline and Comparisons",
author = "Zuo, You and
Sagot, Beno{\^\i}t and
Gerdes, Kim and
Mouzoun, Houda and
Ghamri Doudane, Samir",
editor = "Servan, Christophe and
Vilnat, Anne",
booktitle = "Actes de CORIA-TALN 2023. Actes de la 30e Conf{\'e}rence sur le Traitement Automatique des Langues Naturelles (TALN), volume 1 : travaux de recherche originaux -- articles longs",
month = "6",
year = "2023",
address = "Paris, France",
publisher = "ATALA",
url = "https://aclanthology.org/2023.jeptalnrecital-long.27",
pages = "349--365",
abstract = "This paper proposes a novel approach to French patent classification leveraging data-centric strategies. We compare different approaches for the two deepest levels of the IPC hierarchy: the IPC group and subgroups. Our experiments show that while simple ensemble strategies work for shallower levels, deeper levels require more sophisticated techniques such as data augmentation, clustering, and negative sampling. Our research highlights the importance of language-specific features and data-centric strategies for accurate and reliable French patent classification. It provides valuable insights and solutions for researchers and practitioners in the field of patent classification, advancing research in French patent classification.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="zuo-etal-2023-exploring">
<titleInfo>
<title>Exploring Data-Centric Strategies for French Patent Classification: A Baseline and Comparisons</title>
</titleInfo>
<name type="personal">
<namePart type="given">You</namePart>
<namePart type="family">Zuo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Benoît</namePart>
<namePart type="family">Sagot</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kim</namePart>
<namePart type="family">Gerdes</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Houda</namePart>
<namePart type="family">Mouzoun</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Samir</namePart>
<namePart type="family">Ghamri Doudane</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Actes de CORIA-TALN 2023. Actes de la 30e Conférence sur le Traitement Automatique des Langues Naturelles (TALN), volume 1 : travaux de recherche originaux – articles longs</title>
</titleInfo>
<name type="personal">
<namePart type="given">Christophe</namePart>
<namePart type="family">Servan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anne</namePart>
<namePart type="family">Vilnat</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>ATALA</publisher>
<place>
<placeTerm type="text">Paris, France</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This paper proposes a novel approach to French patent classification leveraging data-centric strategies. We compare different approaches for the two deepest levels of the IPC hierarchy: the IPC group and subgroups. Our experiments show that while simple ensemble strategies work for shallower levels, deeper levels require more sophisticated techniques such as data augmentation, clustering, and negative sampling. Our research highlights the importance of language-specific features and data-centric strategies for accurate and reliable French patent classification. It provides valuable insights and solutions for researchers and practitioners in the field of patent classification, advancing research in French patent classification.</abstract>
<identifier type="citekey">zuo-etal-2023-exploring</identifier>
<location>
<url>https://aclanthology.org/2023.jeptalnrecital-long.27</url>
</location>
<part>
<date>2023-6</date>
<extent unit="page">
<start>349</start>
<end>365</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Exploring Data-Centric Strategies for French Patent Classification: A Baseline and Comparisons
%A Zuo, You
%A Sagot, Benoît
%A Gerdes, Kim
%A Mouzoun, Houda
%A Ghamri Doudane, Samir
%Y Servan, Christophe
%Y Vilnat, Anne
%S Actes de CORIA-TALN 2023. Actes de la 30e Conférence sur le Traitement Automatique des Langues Naturelles (TALN), volume 1 : travaux de recherche originaux – articles longs
%D 2023
%8 June
%I ATALA
%C Paris, France
%F zuo-etal-2023-exploring
%X This paper proposes a novel approach to French patent classification leveraging data-centric strategies. We compare different approaches for the two deepest levels of the IPC hierarchy: the IPC group and subgroups. Our experiments show that while simple ensemble strategies work for shallower levels, deeper levels require more sophisticated techniques such as data augmentation, clustering, and negative sampling. Our research highlights the importance of language-specific features and data-centric strategies for accurate and reliable French patent classification. It provides valuable insights and solutions for researchers and practitioners in the field of patent classification, advancing research in French patent classification.
%U https://aclanthology.org/2023.jeptalnrecital-long.27
%P 349-365
Markdown (Informal)
[Exploring Data-Centric Strategies for French Patent Classification: A Baseline and Comparisons](https://aclanthology.org/2023.jeptalnrecital-long.27) (Zuo et al., JEP/TALN/RECITAL 2023)
ACL