@inproceedings{zaidi-etal-2021-document,
title = "Document Level Hierarchical Transformer",
author = "Zaidi, Najam and
Cohn, Trevor and
Haffari, Gholamreza",
editor = "Rahimi, Afshin and
Lane, William and
Zuccon, Guido",
booktitle = "Proceedings of the 19th Annual Workshop of the Australasian Language Technology Association",
month = dec,
year = "2021",
address = "Online",
publisher = "Australasian Language Technology Association",
url = "https://aclanthology.org/2021.alta-1.13",
pages = "128--137",
abstract = "Generating long and coherent text is an important and challenging task encompassing many application areas such as summarization, document level machine translation and story generation. Despite the success in modeling intra-sentence coherence, existing long text generation models (e.g., BART and GPT-3) still struggle to maintain a coherent event sequence throughout the generated text. We conjecture that this is because of the difficulty for the model to revise, replace, revoke or delete any part that has been generated by the model. In this paper, we present a novel semi-autoregressive document generation model capable of revising and editing the generated text. Building on recent models by (Gu et al., 2019; Xu and Carpuat, 2020) we propose document generation as a hierarchical Markov decision process with a two level hierarchy, where the high and low level editing programs. We train our model using imitation learning (Hussein et al., 2017) and introduce roll-in policy such that each policy learns on the output of applying the previous action. Experiments applying the proposed approach sheds various insights on the problems of long text generation using our model. We suggest various remedies such as using distilled dataset, designing better attention mechanisms and using autoregressive models as a low level program.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="zaidi-etal-2021-document">
<titleInfo>
<title>Document Level Hierarchical Transformer</title>
</titleInfo>
<name type="personal">
<namePart type="given">Najam</namePart>
<namePart type="family">Zaidi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Trevor</namePart>
<namePart type="family">Cohn</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Gholamreza</namePart>
<namePart type="family">Haffari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 19th Annual Workshop of the Australasian Language Technology Association</title>
</titleInfo>
<name type="personal">
<namePart type="given">Afshin</namePart>
<namePart type="family">Rahimi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">William</namePart>
<namePart type="family">Lane</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Guido</namePart>
<namePart type="family">Zuccon</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Australasian Language Technology Association</publisher>
<place>
<placeTerm type="text">Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Generating long and coherent text is an important and challenging task encompassing many application areas such as summarization, document level machine translation and story generation. Despite the success in modeling intra-sentence coherence, existing long text generation models (e.g., BART and GPT-3) still struggle to maintain a coherent event sequence throughout the generated text. We conjecture that this is because of the difficulty for the model to revise, replace, revoke or delete any part that has been generated by the model. In this paper, we present a novel semi-autoregressive document generation model capable of revising and editing the generated text. Building on recent models by (Gu et al., 2019; Xu and Carpuat, 2020) we propose document generation as a hierarchical Markov decision process with a two level hierarchy, where the high and low level editing programs. We train our model using imitation learning (Hussein et al., 2017) and introduce roll-in policy such that each policy learns on the output of applying the previous action. Experiments applying the proposed approach sheds various insights on the problems of long text generation using our model. We suggest various remedies such as using distilled dataset, designing better attention mechanisms and using autoregressive models as a low level program.</abstract>
<identifier type="citekey">zaidi-etal-2021-document</identifier>
<location>
<url>https://aclanthology.org/2021.alta-1.13</url>
</location>
<part>
<date>2021-12</date>
<extent unit="page">
<start>128</start>
<end>137</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Document Level Hierarchical Transformer
%A Zaidi, Najam
%A Cohn, Trevor
%A Haffari, Gholamreza
%Y Rahimi, Afshin
%Y Lane, William
%Y Zuccon, Guido
%S Proceedings of the 19th Annual Workshop of the Australasian Language Technology Association
%D 2021
%8 December
%I Australasian Language Technology Association
%C Online
%F zaidi-etal-2021-document
%X Generating long and coherent text is an important and challenging task encompassing many application areas such as summarization, document level machine translation and story generation. Despite the success in modeling intra-sentence coherence, existing long text generation models (e.g., BART and GPT-3) still struggle to maintain a coherent event sequence throughout the generated text. We conjecture that this is because of the difficulty for the model to revise, replace, revoke or delete any part that has been generated by the model. In this paper, we present a novel semi-autoregressive document generation model capable of revising and editing the generated text. Building on recent models by (Gu et al., 2019; Xu and Carpuat, 2020) we propose document generation as a hierarchical Markov decision process with a two level hierarchy, where the high and low level editing programs. We train our model using imitation learning (Hussein et al., 2017) and introduce roll-in policy such that each policy learns on the output of applying the previous action. Experiments applying the proposed approach sheds various insights on the problems of long text generation using our model. We suggest various remedies such as using distilled dataset, designing better attention mechanisms and using autoregressive models as a low level program.
%U https://aclanthology.org/2021.alta-1.13
%P 128-137
Markdown (Informal)
[Document Level Hierarchical Transformer](https://aclanthology.org/2021.alta-1.13) (Zaidi et al., ALTA 2021)
ACL
- Najam Zaidi, Trevor Cohn, and Gholamreza Haffari. 2021. Document Level Hierarchical Transformer. In Proceedings of the 19th Annual Workshop of the Australasian Language Technology Association, pages 128–137, Online. Australasian Language Technology Association.