@inproceedings{mousselly-sergieh-etal-2018-multimodal,
title = "A Multimodal Translation-Based Approach for Knowledge Graph Representation Learning",
author = "Mousselly-Sergieh, Hatem and
Botschen, Teresa and
Gurevych, Iryna and
Roth, Stefan",
editor = "Nissim, Malvina and
Berant, Jonathan and
Lenci, Alessandro",
booktitle = "Proceedings of the Seventh Joint Conference on Lexical and Computational Semantics",
month = jun,
year = "2018",
address = "New Orleans, Louisiana",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/S18-2027/",
doi = "10.18653/v1/S18-2027",
pages = "225--234",
abstract = "Current methods for knowledge graph (KG) representation learning focus solely on the structure of the KG and do not exploit any kind of external information, such as visual and linguistic information corresponding to the KG entities. In this paper, we propose a multimodal translation-based approach that defines the energy of a KG triple as the sum of sub-energy functions that leverage both multimodal (visual and linguistic) and structural KG representations. Next, a ranking-based loss is minimized using a simple neural network architecture. Moreover, we introduce a new large-scale dataset for multimodal KG representation learning. We compared the performance of our approach to other baselines on two standard tasks, namely knowledge graph completion and triple classification, using our as well as the WN9-IMG dataset. The results demonstrate that our approach outperforms all baselines on both tasks and datasets."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="mousselly-sergieh-etal-2018-multimodal">
<titleInfo>
<title>A Multimodal Translation-Based Approach for Knowledge Graph Representation Learning</title>
</titleInfo>
<name type="personal">
<namePart type="given">Hatem</namePart>
<namePart type="family">Mousselly-Sergieh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Teresa</namePart>
<namePart type="family">Botschen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Iryna</namePart>
<namePart type="family">Gurevych</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stefan</namePart>
<namePart type="family">Roth</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2018-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Seventh Joint Conference on Lexical and Computational Semantics</title>
</titleInfo>
<name type="personal">
<namePart type="given">Malvina</namePart>
<namePart type="family">Nissim</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jonathan</namePart>
<namePart type="family">Berant</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alessandro</namePart>
<namePart type="family">Lenci</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">New Orleans, Louisiana</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Current methods for knowledge graph (KG) representation learning focus solely on the structure of the KG and do not exploit any kind of external information, such as visual and linguistic information corresponding to the KG entities. In this paper, we propose a multimodal translation-based approach that defines the energy of a KG triple as the sum of sub-energy functions that leverage both multimodal (visual and linguistic) and structural KG representations. Next, a ranking-based loss is minimized using a simple neural network architecture. Moreover, we introduce a new large-scale dataset for multimodal KG representation learning. We compared the performance of our approach to other baselines on two standard tasks, namely knowledge graph completion and triple classification, using our as well as the WN9-IMG dataset. The results demonstrate that our approach outperforms all baselines on both tasks and datasets.</abstract>
<identifier type="citekey">mousselly-sergieh-etal-2018-multimodal</identifier>
<identifier type="doi">10.18653/v1/S18-2027</identifier>
<location>
<url>https://aclanthology.org/S18-2027/</url>
</location>
<part>
<date>2018-06</date>
<extent unit="page">
<start>225</start>
<end>234</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T A Multimodal Translation-Based Approach for Knowledge Graph Representation Learning
%A Mousselly-Sergieh, Hatem
%A Botschen, Teresa
%A Gurevych, Iryna
%A Roth, Stefan
%Y Nissim, Malvina
%Y Berant, Jonathan
%Y Lenci, Alessandro
%S Proceedings of the Seventh Joint Conference on Lexical and Computational Semantics
%D 2018
%8 June
%I Association for Computational Linguistics
%C New Orleans, Louisiana
%F mousselly-sergieh-etal-2018-multimodal
%X Current methods for knowledge graph (KG) representation learning focus solely on the structure of the KG and do not exploit any kind of external information, such as visual and linguistic information corresponding to the KG entities. In this paper, we propose a multimodal translation-based approach that defines the energy of a KG triple as the sum of sub-energy functions that leverage both multimodal (visual and linguistic) and structural KG representations. Next, a ranking-based loss is minimized using a simple neural network architecture. Moreover, we introduce a new large-scale dataset for multimodal KG representation learning. We compared the performance of our approach to other baselines on two standard tasks, namely knowledge graph completion and triple classification, using our as well as the WN9-IMG dataset. The results demonstrate that our approach outperforms all baselines on both tasks and datasets.
%R 10.18653/v1/S18-2027
%U https://aclanthology.org/S18-2027/
%U https://doi.org/10.18653/v1/S18-2027
%P 225-234
Markdown (Informal)
[A Multimodal Translation-Based Approach for Knowledge Graph Representation Learning](https://aclanthology.org/S18-2027/) (Mousselly-Sergieh et al., *SEM 2018)
ACL