@inproceedings{wandmacher-antoine-2006-training,
    title = "Training Language Models without Appropriate Language Resources: Experiments with an {AAC} System for Disabled People",
    author = "Wandmacher, Tonio  and
      Antoine, Jean-Yves",
    editor = "Calzolari, Nicoletta  and
      Choukri, Khalid  and
      Gangemi, Aldo  and
      Maegaard, Bente  and
      Mariani, Joseph  and
      Odijk, Jan  and
      Tapias, Daniel",
    booktitle = "Proceedings of the Fifth International Conference on Language Resources and Evaluation ({LREC}{'}06)",
    month = may,
    year = "2006",
    address = "Genoa, Italy",
    publisher = "European Language Resources Association (ELRA)",
    url = "https://aclanthology.org/L06-1059/",
    abstract = "Statistical Language Models (LM) are highly dependent on their training resources. This makes it not only difficult to interpret evaluation results, it also has a deteriorating effect on the use of an LM-based application. This question has already been studied by others. Considering a specific domain (text prediction in a communication aid for handicapped people) we want to address the problem from a different point of view: the influence of the language register. Considering corpora from five different registers, we want to discuss three methods to adapt a language model to its actual language resource ultimately reducing the effect of training dependency: (a) A simple cache model augmenting the probability of the n last inserted words; (b) a user dictionary, keeping every unseen word; and (c) a combined LM interpolating a base model with a dynamically updated user model. Our evaluation is based on the results obtained from a text prediction system working on a trigram LM."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="wandmacher-antoine-2006-training">
    <titleInfo>
        <title>Training Language Models without Appropriate Language Resources: Experiments with an AAC System for Disabled People</title>
    </titleInfo>
    <name type="personal">
        <namePart type="given">Tonio</namePart>
        <namePart type="family">Wandmacher</namePart>
        <role>
            <roleTerm authority="marcrelator" type="text">author</roleTerm>
        </role>
    </name>
    <name type="personal">
        <namePart type="given">Jean-Yves</namePart>
        <namePart type="family">Antoine</namePart>
        <role>
            <roleTerm authority="marcrelator" type="text">author</roleTerm>
        </role>
    </name>
    <originInfo>
        <dateIssued>2006-05</dateIssued>
    </originInfo>
    <typeOfResource>text</typeOfResource>
    <relatedItem type="host">
        <titleInfo>
            <title>Proceedings of the Fifth International Conference on Language Resources and Evaluation (LREC’06)</title>
        </titleInfo>
        <name type="personal">
            <namePart type="given">Nicoletta</namePart>
            <namePart type="family">Calzolari</namePart>
            <role>
                <roleTerm authority="marcrelator" type="text">editor</roleTerm>
            </role>
        </name>
        <name type="personal">
            <namePart type="given">Khalid</namePart>
            <namePart type="family">Choukri</namePart>
            <role>
                <roleTerm authority="marcrelator" type="text">editor</roleTerm>
            </role>
        </name>
        <name type="personal">
            <namePart type="given">Aldo</namePart>
            <namePart type="family">Gangemi</namePart>
            <role>
                <roleTerm authority="marcrelator" type="text">editor</roleTerm>
            </role>
        </name>
        <name type="personal">
            <namePart type="given">Bente</namePart>
            <namePart type="family">Maegaard</namePart>
            <role>
                <roleTerm authority="marcrelator" type="text">editor</roleTerm>
            </role>
        </name>
        <name type="personal">
            <namePart type="given">Joseph</namePart>
            <namePart type="family">Mariani</namePart>
            <role>
                <roleTerm authority="marcrelator" type="text">editor</roleTerm>
            </role>
        </name>
        <name type="personal">
            <namePart type="given">Jan</namePart>
            <namePart type="family">Odijk</namePart>
            <role>
                <roleTerm authority="marcrelator" type="text">editor</roleTerm>
            </role>
        </name>
        <name type="personal">
            <namePart type="given">Daniel</namePart>
            <namePart type="family">Tapias</namePart>
            <role>
                <roleTerm authority="marcrelator" type="text">editor</roleTerm>
            </role>
        </name>
        <originInfo>
            <publisher>European Language Resources Association (ELRA)</publisher>
            <place>
                <placeTerm type="text">Genoa, Italy</placeTerm>
            </place>
        </originInfo>
        <genre authority="marcgt">conference publication</genre>
    </relatedItem>
    <abstract>Statistical Language Models (LM) are highly dependent on their training resources. This makes it not only difficult to interpret evaluation results, it also has a deteriorating effect on the use of an LM-based application. This question has already been studied by others. Considering a specific domain (text prediction in a communication aid for handicapped people) we want to address the problem from a different point of view: the influence of the language register. Considering corpora from five different registers, we want to discuss three methods to adapt a language model to its actual language resource ultimately reducing the effect of training dependency: (a) A simple cache model augmenting the probability of the n last inserted words; (b) a user dictionary, keeping every unseen word; and (c) a combined LM interpolating a base model with a dynamically updated user model. Our evaluation is based on the results obtained from a text prediction system working on a trigram LM.</abstract>
    <identifier type="citekey">wandmacher-antoine-2006-training</identifier>
    <location>
        <url>https://aclanthology.org/L06-1059/</url>
    </location>
    <part>
        <date>2006-05</date>
    </part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Training Language Models without Appropriate Language Resources: Experiments with an AAC System for Disabled People
%A Wandmacher, Tonio
%A Antoine, Jean-Yves
%Y Calzolari, Nicoletta
%Y Choukri, Khalid
%Y Gangemi, Aldo
%Y Maegaard, Bente
%Y Mariani, Joseph
%Y Odijk, Jan
%Y Tapias, Daniel
%S Proceedings of the Fifth International Conference on Language Resources and Evaluation (LREC’06)
%D 2006
%8 May
%I European Language Resources Association (ELRA)
%C Genoa, Italy
%F wandmacher-antoine-2006-training
%X Statistical Language Models (LM) are highly dependent on their training resources. This makes it not only difficult to interpret evaluation results, it also has a deteriorating effect on the use of an LM-based application. This question has already been studied by others. Considering a specific domain (text prediction in a communication aid for handicapped people) we want to address the problem from a different point of view: the influence of the language register. Considering corpora from five different registers, we want to discuss three methods to adapt a language model to its actual language resource ultimately reducing the effect of training dependency: (a) A simple cache model augmenting the probability of the n last inserted words; (b) a user dictionary, keeping every unseen word; and (c) a combined LM interpolating a base model with a dynamically updated user model. Our evaluation is based on the results obtained from a text prediction system working on a trigram LM.
%U https://aclanthology.org/L06-1059/
Markdown (Informal)
[Training Language Models without Appropriate Language Resources: Experiments with an AAC System for Disabled People](https://aclanthology.org/L06-1059/) (Wandmacher & Antoine, LREC 2006)
ACL