@article{krishnamurthy-kollar-2013-jointly,
title = "Jointly Learning to Parse and Perceive: Connecting Natural Language to the Physical World",
author = "Krishnamurthy, Jayant and
Kollar, Thomas",
editor = "Lin, Dekang and
Collins, Michael",
journal = "Transactions of the Association for Computational Linguistics",
volume = "1",
year = "2013",
address = "Cambridge, MA",
publisher = "MIT Press",
url = "https://aclanthology.org/Q13-1016",
doi = "10.1162/tacl_a_00220",
pages = "193--206",
abstract = "This paper introduces Logical Semantics with Perception (LSP), a model for grounded language acquisition that learns to map natural language statements to their referents in a physical environment. For example, given an image, LSP can map the statement {``}blue mug on the table{''} to the set of image segments showing blue mugs on tables. LSP learns physical representations for both categorical ({``}blue,{''} {``}mug{''}) and relational ({``}on{''}) language, and also learns to compose these representations to produce the referents of entire statements. We further introduce a weakly supervised training procedure that estimates LSP{'}s parameters using annotated referents for entire statements, without annotated referents for individual words or the parse structure of the statement. We perform experiments on two applications: scene understanding and geographical question answering. We find that LSP outperforms existing, less expressive models that cannot represent relational language. We further find that weakly supervised training is competitive with fully supervised training while requiring significantly less annotation effort.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="krishnamurthy-kollar-2013-jointly">
<titleInfo>
<title>Jointly Learning to Parse and Perceive: Connecting Natural Language to the Physical World</title>
</titleInfo>
<name type="personal">
<namePart type="given">Jayant</namePart>
<namePart type="family">Krishnamurthy</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Thomas</namePart>
<namePart type="family">Kollar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2013</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<genre authority="bibutilsgt">journal article</genre>
<relatedItem type="host">
<titleInfo>
<title>Transactions of the Association for Computational Linguistics</title>
</titleInfo>
<originInfo>
<issuance>continuing</issuance>
<publisher>MIT Press</publisher>
<place>
<placeTerm type="text">Cambridge, MA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">periodical</genre>
<genre authority="bibutilsgt">academic journal</genre>
</relatedItem>
<abstract>This paper introduces Logical Semantics with Perception (LSP), a model for grounded language acquisition that learns to map natural language statements to their referents in a physical environment. For example, given an image, LSP can map the statement “blue mug on the table” to the set of image segments showing blue mugs on tables. LSP learns physical representations for both categorical (“blue,” “mug”) and relational (“on”) language, and also learns to compose these representations to produce the referents of entire statements. We further introduce a weakly supervised training procedure that estimates LSP’s parameters using annotated referents for entire statements, without annotated referents for individual words or the parse structure of the statement. We perform experiments on two applications: scene understanding and geographical question answering. We find that LSP outperforms existing, less expressive models that cannot represent relational language. We further find that weakly supervised training is competitive with fully supervised training while requiring significantly less annotation effort.</abstract>
<identifier type="citekey">krishnamurthy-kollar-2013-jointly</identifier>
<identifier type="doi">10.1162/tacl_a_00220</identifier>
<location>
<url>https://aclanthology.org/Q13-1016</url>
</location>
<part>
<date>2013</date>
<detail type="volume"><number>1</number></detail>
<extent unit="page">
<start>193</start>
<end>206</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Journal Article
%T Jointly Learning to Parse and Perceive: Connecting Natural Language to the Physical World
%A Krishnamurthy, Jayant
%A Kollar, Thomas
%J Transactions of the Association for Computational Linguistics
%D 2013
%V 1
%I MIT Press
%C Cambridge, MA
%F krishnamurthy-kollar-2013-jointly
%X This paper introduces Logical Semantics with Perception (LSP), a model for grounded language acquisition that learns to map natural language statements to their referents in a physical environment. For example, given an image, LSP can map the statement “blue mug on the table” to the set of image segments showing blue mugs on tables. LSP learns physical representations for both categorical (“blue,” “mug”) and relational (“on”) language, and also learns to compose these representations to produce the referents of entire statements. We further introduce a weakly supervised training procedure that estimates LSP’s parameters using annotated referents for entire statements, without annotated referents for individual words or the parse structure of the statement. We perform experiments on two applications: scene understanding and geographical question answering. We find that LSP outperforms existing, less expressive models that cannot represent relational language. We further find that weakly supervised training is competitive with fully supervised training while requiring significantly less annotation effort.
%R 10.1162/tacl_a_00220
%U https://aclanthology.org/Q13-1016
%U https://doi.org/10.1162/tacl_a_00220
%P 193-206
Markdown (Informal)
[Jointly Learning to Parse and Perceive: Connecting Natural Language to the Physical World](https://aclanthology.org/Q13-1016) (Krishnamurthy & Kollar, TACL 2013)
ACL