@inproceedings{nitin-etal-2021-direct,
title = "{DIRECT} : A Transformer-based Model for Decompiled Identifier Renaming",
author = "Nitin, Vikram and
Saieva, Anthony and
Ray, Baishakhi and
Kaiser, Gail",
editor = "Lachmy, Royi and
Yao, Ziyu and
Durrett, Greg and
Gligoric, Milos and
Li, Junyi Jessy and
Mooney, Ray and
Neubig, Graham and
Su, Yu and
Sun, Huan and
Tsarfaty, Reut",
booktitle = "Proceedings of the 1st Workshop on Natural Language Processing for Programming (NLP4Prog 2021)",
month = aug,
year = "2021",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2021.nlp4prog-1.6",
doi = "10.18653/v1/2021.nlp4prog-1.6",
pages = "48--57",
abstract = "Decompiling binary executables to high-level code is an important step in reverse engineering scenarios, such as malware analysis and legacy code maintenance. However, the generated high-level code is difficult to understand since the original variable names are lost. In this paper, we leverage transformer models to reconstruct the original variable names from decompiled code. Inherent differences between code and natural language present certain challenges in applying conventional transformer-based architectures to variable name recovery. We propose DIRECT, a novel transformer-based architecture customized specifically for the task at hand. We evaluate our model on a dataset of decompiled functions and find that DIRECT outperforms the previous state-of-the-art model by up to 20{\%}. We also present ablation studies evaluating the impact of each of our modifications. We make the source code of DIRECT available to encourage reproducible research.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="nitin-etal-2021-direct">
<titleInfo>
<title>DIRECT : A Transformer-based Model for Decompiled Identifier Renaming</title>
</titleInfo>
<name type="personal">
<namePart type="given">Vikram</namePart>
<namePart type="family">Nitin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anthony</namePart>
<namePart type="family">Saieva</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Baishakhi</namePart>
<namePart type="family">Ray</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Gail</namePart>
<namePart type="family">Kaiser</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 1st Workshop on Natural Language Processing for Programming (NLP4Prog 2021)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Royi</namePart>
<namePart type="family">Lachmy</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ziyu</namePart>
<namePart type="family">Yao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Greg</namePart>
<namePart type="family">Durrett</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Milos</namePart>
<namePart type="family">Gligoric</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Junyi</namePart>
<namePart type="given">Jessy</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ray</namePart>
<namePart type="family">Mooney</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Graham</namePart>
<namePart type="family">Neubig</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yu</namePart>
<namePart type="family">Su</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Huan</namePart>
<namePart type="family">Sun</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Reut</namePart>
<namePart type="family">Tsarfaty</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Decompiling binary executables to high-level code is an important step in reverse engineering scenarios, such as malware analysis and legacy code maintenance. However, the generated high-level code is difficult to understand since the original variable names are lost. In this paper, we leverage transformer models to reconstruct the original variable names from decompiled code. Inherent differences between code and natural language present certain challenges in applying conventional transformer-based architectures to variable name recovery. We propose DIRECT, a novel transformer-based architecture customized specifically for the task at hand. We evaluate our model on a dataset of decompiled functions and find that DIRECT outperforms the previous state-of-the-art model by up to 20%. We also present ablation studies evaluating the impact of each of our modifications. We make the source code of DIRECT available to encourage reproducible research.</abstract>
<identifier type="citekey">nitin-etal-2021-direct</identifier>
<identifier type="doi">10.18653/v1/2021.nlp4prog-1.6</identifier>
<location>
<url>https://aclanthology.org/2021.nlp4prog-1.6</url>
</location>
<part>
<date>2021-08</date>
<extent unit="page">
<start>48</start>
<end>57</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T DIRECT : A Transformer-based Model for Decompiled Identifier Renaming
%A Nitin, Vikram
%A Saieva, Anthony
%A Ray, Baishakhi
%A Kaiser, Gail
%Y Lachmy, Royi
%Y Yao, Ziyu
%Y Durrett, Greg
%Y Gligoric, Milos
%Y Li, Junyi Jessy
%Y Mooney, Ray
%Y Neubig, Graham
%Y Su, Yu
%Y Sun, Huan
%Y Tsarfaty, Reut
%S Proceedings of the 1st Workshop on Natural Language Processing for Programming (NLP4Prog 2021)
%D 2021
%8 August
%I Association for Computational Linguistics
%C Online
%F nitin-etal-2021-direct
%X Decompiling binary executables to high-level code is an important step in reverse engineering scenarios, such as malware analysis and legacy code maintenance. However, the generated high-level code is difficult to understand since the original variable names are lost. In this paper, we leverage transformer models to reconstruct the original variable names from decompiled code. Inherent differences between code and natural language present certain challenges in applying conventional transformer-based architectures to variable name recovery. We propose DIRECT, a novel transformer-based architecture customized specifically for the task at hand. We evaluate our model on a dataset of decompiled functions and find that DIRECT outperforms the previous state-of-the-art model by up to 20%. We also present ablation studies evaluating the impact of each of our modifications. We make the source code of DIRECT available to encourage reproducible research.
%R 10.18653/v1/2021.nlp4prog-1.6
%U https://aclanthology.org/2021.nlp4prog-1.6
%U https://doi.org/10.18653/v1/2021.nlp4prog-1.6
%P 48-57
Markdown (Informal)
[DIRECT : A Transformer-based Model for Decompiled Identifier Renaming](https://aclanthology.org/2021.nlp4prog-1.6) (Nitin et al., NLP4Prog 2021)
ACL