@inproceedings{hercig-kral-2020-uwb,
title = "{UWB}@{F}in{TOC}-2020 Shared Task: Financial Document Title Detection",
author = "Hercig, Tom{\'a}{\v{s}} and
Kral, Pavel",
editor = "El-Haj, Dr Mahmoud and
Athanasakou, Dr Vasiliki and
Ferradans, Dr Sira and
Salzedo, Dr Catherine and
Elhag, Dr Ans and
Bouamor, Dr Houda and
Litvak, Dr Marina and
Rayson, Dr Paul and
Giannakopoulos, Dr George and
Pittaras, Nikiforos",
booktitle = "Proceedings of the 1st Joint Workshop on Financial Narrative Processing and MultiLing Financial Summarisation",
month = dec,
year = "2020",
address = "Barcelona, Spain (Online)",
publisher = "COLING",
url = "https://aclanthology.org/2020.fnp-1.27",
pages = "158--162",
abstract = "This paper describes our system created for the Financial Document Structure Extraction Shared Task (FinTOC-2020): Title Detection. We rely on the Apache PDFBox library to extract text and all additional information e.g. font type and font size from the financial prospectuses. Our constrained system uses only the provided training data without any additional external resources. Our system is based on the Maximum Entropy classifier and various features including font type and font size. Our system achieves F1 score 81{\%} and {\#}1 place in the French track and F1 score 77{\%} and {\#}2 place among 5 participating teams in the English track.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="hercig-kral-2020-uwb">
<titleInfo>
<title>UWB@FinTOC-2020 Shared Task: Financial Document Title Detection</title>
</titleInfo>
<name type="personal">
<namePart type="given">Tomáš</namePart>
<namePart type="family">Hercig</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Pavel</namePart>
<namePart type="family">Kral</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 1st Joint Workshop on Financial Narrative Processing and MultiLing Financial Summarisation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Dr</namePart>
<namePart type="given">Mahmoud</namePart>
<namePart type="family">El-Haj</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dr</namePart>
<namePart type="given">Vasiliki</namePart>
<namePart type="family">Athanasakou</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dr</namePart>
<namePart type="given">Sira</namePart>
<namePart type="family">Ferradans</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dr</namePart>
<namePart type="given">Catherine</namePart>
<namePart type="family">Salzedo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dr</namePart>
<namePart type="given">Ans</namePart>
<namePart type="family">Elhag</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dr</namePart>
<namePart type="given">Houda</namePart>
<namePart type="family">Bouamor</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dr</namePart>
<namePart type="given">Marina</namePart>
<namePart type="family">Litvak</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dr</namePart>
<namePart type="given">Paul</namePart>
<namePart type="family">Rayson</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dr</namePart>
<namePart type="given">George</namePart>
<namePart type="family">Giannakopoulos</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nikiforos</namePart>
<namePart type="family">Pittaras</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>COLING</publisher>
<place>
<placeTerm type="text">Barcelona, Spain (Online)</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This paper describes our system created for the Financial Document Structure Extraction Shared Task (FinTOC-2020): Title Detection. We rely on the Apache PDFBox library to extract text and all additional information e.g. font type and font size from the financial prospectuses. Our constrained system uses only the provided training data without any additional external resources. Our system is based on the Maximum Entropy classifier and various features including font type and font size. Our system achieves F1 score 81% and #1 place in the French track and F1 score 77% and #2 place among 5 participating teams in the English track.</abstract>
<identifier type="citekey">hercig-kral-2020-uwb</identifier>
<location>
<url>https://aclanthology.org/2020.fnp-1.27</url>
</location>
<part>
<date>2020-12</date>
<extent unit="page">
<start>158</start>
<end>162</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T UWB@FinTOC-2020 Shared Task: Financial Document Title Detection
%A Hercig, Tomáš
%A Kral, Pavel
%Y El-Haj, Dr Mahmoud
%Y Athanasakou, Dr Vasiliki
%Y Ferradans, Dr Sira
%Y Salzedo, Dr Catherine
%Y Elhag, Dr Ans
%Y Bouamor, Dr Houda
%Y Litvak, Dr Marina
%Y Rayson, Dr Paul
%Y Giannakopoulos, Dr George
%Y Pittaras, Nikiforos
%S Proceedings of the 1st Joint Workshop on Financial Narrative Processing and MultiLing Financial Summarisation
%D 2020
%8 December
%I COLING
%C Barcelona, Spain (Online)
%F hercig-kral-2020-uwb
%X This paper describes our system created for the Financial Document Structure Extraction Shared Task (FinTOC-2020): Title Detection. We rely on the Apache PDFBox library to extract text and all additional information e.g. font type and font size from the financial prospectuses. Our constrained system uses only the provided training data without any additional external resources. Our system is based on the Maximum Entropy classifier and various features including font type and font size. Our system achieves F1 score 81% and #1 place in the French track and F1 score 77% and #2 place among 5 participating teams in the English track.
%U https://aclanthology.org/2020.fnp-1.27
%P 158-162
Markdown (Informal)
[UWB@FinTOC-2020 Shared Task: Financial Document Title Detection](https://aclanthology.org/2020.fnp-1.27) (Hercig & Kral, FNP 2020)
ACL