@inproceedings{tutin-kraif-2017-comparing,
title = "Comparing Recurring Lexico-Syntactic Trees ({RLT}s) and Ngram Techniques for Extended Phraseology Extraction",
author = "Tutin, Agn{\`e}s and
Kraif, Olivier",
editor = "Markantonatou, Stella and
Ramisch, Carlos and
Savary, Agata and
Vincze, Veronika",
booktitle = "Proceedings of the 13th Workshop on Multiword Expressions ({MWE} 2017)",
month = apr,
year = "2017",
address = "Valencia, Spain",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/W17-1724",
doi = "10.18653/v1/W17-1724",
pages = "176--180",
abstract = "This paper aims at assessing to what extent a syntax-based method (Recurring Lexico-syntactic Trees (RLT) extraction) allows us to extract large phraseological units such as prefabricated routines, e.g. {``}as previously said{''} or {``}as far as we/I know{''} in scientific writing. In order to evaluate this method, we compare it to the classical ngram extraction technique, on a subset of recurring segments including speech verbs in a French corpus of scientific writing. Results show that the LRT extraction technique is far more efficient for extended MWEs such as routines or collocations but performs more poorly for surface phenomena such as syntactic constructions or fully frozen expressions.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="tutin-kraif-2017-comparing">
<titleInfo>
<title>Comparing Recurring Lexico-Syntactic Trees (RLTs) and Ngram Techniques for Extended Phraseology Extraction</title>
</titleInfo>
<name type="personal">
<namePart type="given">Agnès</namePart>
<namePart type="family">Tutin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Olivier</namePart>
<namePart type="family">Kraif</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2017-04</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 13th Workshop on Multiword Expressions (MWE 2017)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Stella</namePart>
<namePart type="family">Markantonatou</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Carlos</namePart>
<namePart type="family">Ramisch</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Agata</namePart>
<namePart type="family">Savary</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Veronika</namePart>
<namePart type="family">Vincze</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Valencia, Spain</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This paper aims at assessing to what extent a syntax-based method (Recurring Lexico-syntactic Trees (RLT) extraction) allows us to extract large phraseological units such as prefabricated routines, e.g. “as previously said” or “as far as we/I know” in scientific writing. In order to evaluate this method, we compare it to the classical ngram extraction technique, on a subset of recurring segments including speech verbs in a French corpus of scientific writing. Results show that the LRT extraction technique is far more efficient for extended MWEs such as routines or collocations but performs more poorly for surface phenomena such as syntactic constructions or fully frozen expressions.</abstract>
<identifier type="citekey">tutin-kraif-2017-comparing</identifier>
<identifier type="doi">10.18653/v1/W17-1724</identifier>
<location>
<url>https://aclanthology.org/W17-1724</url>
</location>
<part>
<date>2017-04</date>
<extent unit="page">
<start>176</start>
<end>180</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Comparing Recurring Lexico-Syntactic Trees (RLTs) and Ngram Techniques for Extended Phraseology Extraction
%A Tutin, Agnès
%A Kraif, Olivier
%Y Markantonatou, Stella
%Y Ramisch, Carlos
%Y Savary, Agata
%Y Vincze, Veronika
%S Proceedings of the 13th Workshop on Multiword Expressions (MWE 2017)
%D 2017
%8 April
%I Association for Computational Linguistics
%C Valencia, Spain
%F tutin-kraif-2017-comparing
%X This paper aims at assessing to what extent a syntax-based method (Recurring Lexico-syntactic Trees (RLT) extraction) allows us to extract large phraseological units such as prefabricated routines, e.g. “as previously said” or “as far as we/I know” in scientific writing. In order to evaluate this method, we compare it to the classical ngram extraction technique, on a subset of recurring segments including speech verbs in a French corpus of scientific writing. Results show that the LRT extraction technique is far more efficient for extended MWEs such as routines or collocations but performs more poorly for surface phenomena such as syntactic constructions or fully frozen expressions.
%R 10.18653/v1/W17-1724
%U https://aclanthology.org/W17-1724
%U https://doi.org/10.18653/v1/W17-1724
%P 176-180
Markdown (Informal)
[Comparing Recurring Lexico-Syntactic Trees (RLTs) and Ngram Techniques for Extended Phraseology Extraction](https://aclanthology.org/W17-1724) (Tutin & Kraif, MWE 2017)
ACL