\begin{thebibliography}{64}
\expandafter\ifx\csname natexlab\endcsname\relax\def\natexlab#1{#1}\fi

\bibitem[{Anderson et~al.(2018)Anderson, He, Buehler, Teney, Johnson, Gould,
  and Zhang}]{anderson2018bottom}
Peter Anderson, Xiaodong He, Chris Buehler, Damien Teney, Mark Johnson, Stephen
  Gould, and Lei Zhang. 2018.
\newblock Bottom-up and top-down attention for image captioning and visual
  question answering.
\newblock In \emph{CVPR}.

\bibitem[{Artetxe et~al.(2018)Artetxe, Labaka, Agirre, and
  Cho}]{artetxe2017unsupervised}
Mikel Artetxe, Gorka Labaka, Eneko Agirre, and Kyunghyun Cho. 2018.
\newblock Unsupervised neural machine translation.
\newblock In \emph{ICLR}.

\bibitem[{Bahdanau et~al.(2015)Bahdanau, Cho, and Bengio}]{bahdanau2014neural}
Dzmitry Bahdanau, Kyunghyun Cho, and Yoshua Bengio. 2015.
\newblock Neural machine translation by jointly learning to align and
  translate.
\newblock In \emph{ICLR}.

\bibitem[{Banerjee and Lavie(2005)}]{banerjee2005meteor}
Satanjeev Banerjee and Alon Lavie. 2005.
\newblock Meteor: An automatic metric for mt evaluation with improved
  correlation with human judgments.
\newblock In \emph{ACL Workshop}.

\bibitem[{Caffarelli and McCann(2010)}]{caffarelli2010free}
Luis Caffarelli and Robert~J McCann. 2010.
\newblock Free boundaries in optimal transport and monge-ampere obstacle
  problems.
\newblock \emph{Annals of mathematics}.

\bibitem[{Chen et~al.(2018)Chen, Dai, Tao, Zhang, Gan, Shen, Zhang, Wang,
  Zhang, and Carin}]{chen2018adversarial}
Liqun Chen, Shuyang Dai, Chenyang Tao, Haichao Zhang, Zhe Gan, Dinghan Shen,
  Yizhe Zhang, Guoyin Wang, Ruiyi Zhang, and Lawrence Carin. 2018.
\newblock Adversarial text generation via feature-mover's distance.
\newblock In \emph{NeurIPS}.

\bibitem[{Chen et~al.(2020)Chen, Gan, Cheng, Li, Carin, and
  Liu}]{chen2020graph}
Liqun Chen, Zhe Gan, Yu~Cheng, Linjie Li, Lawrence Carin, and Jingjing Liu.
  2020.
\newblock Graph optimal transport for cross-domain alignment.
\newblock In \emph{ICML}.

\bibitem[{Chen et~al.(2019)Chen, Zhang, Zhang, Tao, Gan, Zhang, Li, Shen, Chen,
  and Carin}]{chen2019improving}
Liqun Chen, Yizhe Zhang, Ruiyi Zhang, Chenyang Tao, Zhe Gan, Haichao Zhang, Bai
  Li, Dinghan Shen, Changyou Chen, and Lawrence Carin. 2019.
\newblock Improving sequence-to-sequence learning via optimal transport.
\newblock In \emph{ICLR}.

\bibitem[{Chen and Bansal(2018)}]{chen2018fast}
Yen-Chun Chen and Mohit Bansal. 2018.
\newblock Fast abstractive summarization with reinforce-selected sentence
  rewriting.
\newblock In \emph{ACL}.

\bibitem[{Cho et~al.(2014)Cho, Van~Merri{\"e}nboer, Gulcehre, Bahdanau,
  Bougares, Schwenk, and Bengio}]{cho2014learning}
Kyunghyun Cho, Bart Van~Merri{\"e}nboer, Caglar Gulcehre, Dzmitry Bahdanau,
  Fethi Bougares, Holger Schwenk, and Yoshua Bengio. 2014.
\newblock Learning phrase representations using rnn encoder-decoder for
  statistical machine translation.
\newblock In \emph{EMNLP}.

\bibitem[{Chopra et~al.(2016)Chopra, Auli, and Rush}]{chopra2016abstractive}
Sumit Chopra, Michael Auli, and Alexander~M Rush. 2016.
\newblock Abstractive sentence summarization with attentive recurrent neural
  networks.
\newblock In \emph{NAACL}.

\bibitem[{Craig(2014)}]{Craig:thesis14}
Katy Craig, editor. 2014.
\newblock \emph{The exponential formula for the Wasserstein metric}.
\newblock PhD thesis, The State University of New Jersey.

\bibitem[{Dai et~al.(2019)Dai, Liang, Qiu, and Huang}]{dai2019style}
Ning Dai, Jianze Liang, Xipeng Qiu, and Xuanjing Huang. 2019.
\newblock Style transformer: Unpaired text style transfer without disentangled
  latent representation.
\newblock \emph{arXiv preprint arXiv:1905.05621}.

\bibitem[{Dhingra et~al.(2019)Dhingra, Faruqui, Parikh, Chang, Das, and
  Cohen}]{Dhingra2019}
Bhuwan Dhingra, Manaal Faruqui, Ankur Parikh, Ming-Wei Chang, Dipanjan Das, and
  William~W. Cohen. 2019.
\newblock Handling divergent reference texts when evaluating table-to-text
  generation.
\newblock In \emph{Proceedings of 57th Annual Meeting of the Association for
  Computational Linguistics}.

\bibitem[{Fedus et~al.(2018)Fedus, Goodfellow, and Dai}]{fedus2018maskgan}
William Fedus, Ian Goodfellow, and Andrew~M Dai. 2018.
\newblock Maskgan: Better text generation via filling in the \_.
\newblock In \emph{ICLR}.

\bibitem[{Feng et~al.(2019)Feng, Ma, Liu, and Luo}]{feng2019unsupervised}
Yang Feng, Lin Ma, Wei Liu, and Jiebo Luo. 2019.
\newblock Unsupervised image captioning.
\newblock In \emph{CVPR}.

\bibitem[{Figalli(2010)}]{figalli2010optimal}
Alessio Figalli. 2010.
\newblock The optimal partial transport problem.
\newblock \emph{Archive for rational mechanics and analysis}.

\bibitem[{Fu et~al.(2018)Fu, Tan, Peng, Zhao, and Yan}]{fu2018style}
Zhenxin Fu, Xiaoye Tan, Nanyun Peng, Dongyan Zhao, and Rui Yan. 2018.
\newblock Style transfer in text: Exploration and evaluation.
\newblock In \emph{AAAI}.

\bibitem[{Gan et~al.(2017)Gan, Gan, He, Pu, Tran, Gao, Carin, and
  Deng}]{gan2017semantic}
Zhe Gan, Chuang Gan, Xiaodong He, Yunchen Pu, Kenneth Tran, Jianfeng Gao,
  Lawrence Carin, and Li~Deng. 2017.
\newblock Semantic compositional networks for visual captioning.
\newblock In \emph{CVPR}.

\bibitem[{Gong et~al.(2019)Gong, Bhat, Wu, Xiong, and
  Hwu}]{gong2019reinforcement}
Hongyu Gong, Suma Bhat, Lingfei Wu, JinJun Xiong, and Wen-mei Hwu. 2019.
\newblock Reinforcement learning based text style transfer without parallel
  training corpus.
\newblock In \emph{ACL}.

\bibitem[{Goodfellow et~al.(2014)Goodfellow, Pouget-Abadie, Mirza, Xu,
  Warde-Farley, Ozair, Courville, and Bengio}]{goodfellow2014generative}
Ian Goodfellow, Jean Pouget-Abadie, Mehdi Mirza, Bing Xu, David Warde-Farley,
  Sherjil Ozair, Aaron Courville, and Yoshua Bengio. 2014.
\newblock Generative adversarial nets.
\newblock In \emph{NIPS}.

\bibitem[{Gu et~al.(2016)Gu, Lu, Li, and Li}]{gu2016incorporating}
Jiatao Gu, Zhengdong Lu, Hang Li, and Victor~OK Li. 2016.
\newblock Incorporating copying mechanism in sequence-to-sequence learning.
\newblock In \emph{ACL}.

\bibitem[{Hochreiter and Schmidhuber(1997)}]{hochreiter1997long}
Sepp Hochreiter and J{\"u}rgen Schmidhuber. 1997.
\newblock Long short-term memory.
\newblock \emph{Neural computation}.

\bibitem[{Hu et~al.(2018)Hu, Shi, Yang et~al.}]{hu2018texar}
Zhiting Hu, Haoran Shi, Zichao Yang, et~al. 2018.
\newblock Texar: A modularized, versatile, and extensible toolkit for text
  generation.
\newblock \emph{arXiv preprint arXiv:1809.00794}.

\bibitem[{Hu et~al.(2017)Hu, Yang, Liang, Salakhutdinov, and
  Xing}]{hu2017controllable}
Zhiting Hu, Zichao Yang, Xiaodan Liang, Ruslan Salakhutdinov, and Eric~P Xing.
  2017.
\newblock Controllable text generation.
\newblock In \emph{ICML}.

\bibitem[{Karpathy and Fei-Fei(2015)}]{karpathy2015deep}
Andrej Karpathy and Li~Fei-Fei. 2015.
\newblock Deep visual-semantic alignments for generating image descriptions.
\newblock In \emph{CVPR}.

\bibitem[{Kingma and Ba(2014)}]{kingma2014adam}
Diederik~P Kingma and Jimmy Ba. 2014.
\newblock Adam: A method for stochastic optimization.
\newblock In \emph{ICLR}.

\bibitem[{Kusner et~al.(2015)Kusner, Sun, Kolkin, and
  Weinberger}]{kusner2015word}
Matt Kusner, Yu~Sun, Nicholas Kolkin, and Kilian Weinberger. 2015.
\newblock From word embeddings to document distances.
\newblock In \emph{ICML}.

\bibitem[{Lample et~al.(2017)Lample, Conneau, Denoyer, and
  Ranzato}]{lample2017unsupervised}
Guillaume Lample, Alexis Conneau, Ludovic Denoyer, and Marc'Aurelio Ranzato.
  2017.
\newblock Unsupervised machine translation using monolingual corpora only.
\newblock In \emph{ICLR}.

\bibitem[{Lebret et~al.(2016)Lebret, Grangier, and Auli}]{Wikiemnlp2016}
R{\'e}mi Lebret, David Grangier, and Michael Auli. 2016.
\newblock Neural text generation from structured data with application to the
  biography domain.
\newblock In \emph{Proceedings of the Conference on Empirical Methods in
  Natural Language Processing}.

\bibitem[{Li et~al.(2018)Li, Jia, He, and Liang}]{li2018delete}
Juncen Li, Robin Jia, He~He, and Percy Liang. 2018.
\newblock Delete, retrieve, generate: A simple approach to sentiment and style
  transfer.
\newblock In \emph{NAACL}.

\bibitem[{Lin(2015)}]{linrouge}
Chin-Yew Lin. 2015.
\newblock Rouge: A package for automatic evaluation of summaries.

\bibitem[{Lin et~al.(2017)Lin, Li, He, Zhang, and Sun}]{lin2017adversarial}
Kevin Lin, Dianqi Li, Xiaodong He, Zhengyou Zhang, and Ming-Ting Sun. 2017.
\newblock Adversarial ranking for language generation.
\newblock In \emph{NIPS}.

\bibitem[{Lin et~al.(2014)Lin, Maire, Belongie, Hays, Perona, Ramanan,
  Doll{\'a}r, and Zitnick}]{lin2014microsoft}
Tsung-Yi Lin, Michael Maire, Serge Belongie, James Hays, Pietro Perona, Deva
  Ramanan, Piotr Doll{\'a}r, and C~Lawrence Zitnick. 2014.
\newblock Microsoft coco: Common objects in context.
\newblock In \emph{ECCV}.

\bibitem[{Liu et~al.(2018)Liu, Wang, Sha, Chang, and Sui}]{liu2018table}
Tianyu Liu, Kexiang Wang, Lei Sha, Baobao Chang, and Zhifang Sui. 2018.
\newblock Table-to-text generation by structure-aware seq2seq learning.
\newblock In \emph{AAAI}.

\bibitem[{Lu et~al.(2017)Lu, Xiong, Parikh, and Socher}]{lu2017knowing}
Jiasen Lu, Caiming Xiong, Devi Parikh, and Richard Socher. 2017.
\newblock Knowing when to look: Adaptive attention via a visual sentinel for
  image captioning.
\newblock In \emph{CVPR}.

\bibitem[{Luo et~al.(2019)Luo, Li, Zhou, Yang, Chang, Sui, and
  Sun}]{luo2019dual}
Fuli Luo, Peng Li, Jie Zhou, Pengcheng Yang, Baobao Chang, Zhifang Sui, and
  Xu~Sun. 2019.
\newblock A dual reinforcement learning framework for unsupervised text style
  transfer.
\newblock In \emph{IJCAI}.

\bibitem[{Papineni et~al.(2002)Papineni, Roukos, Ward, and
  Zhu}]{papineni2002bleu}
Kishore Papineni, Salim Roukos, Todd Ward, and Wei-Jing Zhu. 2002.
\newblock Bleu: a method for automatic evaluation of machine translation.
\newblock In \emph{ACL}.

\bibitem[{Peyr{\'e} et~al.(2017)Peyr{\'e}, Cuturi
  et~al.}]{peyre2017computational}
Gabriel Peyr{\'e}, Marco Cuturi, et~al. 2017.
\newblock Computational optimal transport.
\newblock Technical report.

\bibitem[{Prabhumoye et~al.(2018)Prabhumoye, Tsvetkov, Salakhutdinov, and
  Black}]{prabhumoye2018style}
Shrimai Prabhumoye, Yulia Tsvetkov, Ruslan Salakhutdinov, and Alan~W Black.
  2018.
\newblock Style transfer through back-translation.
\newblock In \emph{ACL}.

\bibitem[{Rush et~al.(2015)Rush, Chopra, and Weston}]{rush2015neural}
Alexander~M Rush, Sumit Chopra, and Jason Weston. 2015.
\newblock A neural attention model for abstractive sentence summarization.
\newblock \emph{arXiv:1509.00685}.

\bibitem[{See et~al.(2017)See, Liu, and Manning}]{see2017get}
Abigail See, Peter~J Liu, and Christopher~D Manning. 2017.
\newblock Get to the point: summarization with pointer-generator networks.
\newblock In \emph{ACL}.

\bibitem[{Shen et~al.(2017)Shen, Lei, Barzilay, and Jaakkola}]{shen2017style}
Tianxiao Shen, Tao Lei, Regina Barzilay, and Tommi Jaakkola. 2017.
\newblock Style transfer from non-parallel text by cross-alignment.
\newblock In \emph{NIPS}.

\bibitem[{Sudhakar et~al.(2019)Sudhakar, Upadhyay, and
  Maheswaran}]{sudhakar2019transforming}
Akhilesh Sudhakar, Bhargav Upadhyay, and Arjun Maheswaran. 2019.
\newblock Transforming delete, retrieve, generate approach for controlled text
  style transfer.
\newblock In \emph{EMNLP}.

\bibitem[{Sutskever et~al.(2014)Sutskever, Vinyals, and
  Le}]{sutskever2014sequence}
Ilya Sutskever, Oriol Vinyals, and Quoc~V Le. 2014.
\newblock Sequence to sequence learning with neural networks.
\newblock In \emph{NIPS}.

\bibitem[{Sutton et~al.(2000)Sutton, McAllester, Singh, and
  Mansour}]{sutton2000policy}
Richard~S Sutton, David~A McAllester, Satinder~P Singh, and Yishay Mansour.
  2000.
\newblock Policy gradient methods for reinforcement learning with function
  approximation.
\newblock In \emph{NIPS}.

\bibitem[{Tikhonov et~al.(2019)Tikhonov, Shibaev, Nagaev, Nugmanova, and
  Yamshchikov}]{tikhonov2019style}
Alexey Tikhonov, Viacheslav Shibaev, Aleksander Nagaev, Aigul Nugmanova, and
  Ivan~P. Yamshchikov. 2019.
\newblock Style transfer for texts: Retrain, report errors, compare with
  rewrites.
\newblock In \emph{EMNLP}.

\bibitem[{Vaswani et~al.(2017)Vaswani, Shazeer, Parmar, Uszkoreit, Jones,
  Gomez, Kaiser, and Polosukhin}]{vaswani2017attention}
Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones,
  Aidan~N Gomez, {\L}ukasz Kaiser, and Illia Polosukhin. 2017.
\newblock Attention is all you need.
\newblock In \emph{NeurIPS}.

\bibitem[{Vedantam et~al.(2015)Vedantam, Lawrence~Zitnick, and
  Parikh}]{vedantam2015cider}
Ramakrishna Vedantam, C~Lawrence~Zitnick, and Devi Parikh. 2015.
\newblock Cider: Consensus-based image description evaluation.
\newblock In \emph{CVPR}.

\bibitem[{Villani(2008)}]{villani2008optimal}
C{\'e}dric Villani. 2008.
\newblock \emph{Optimal transport: old and new}.
\newblock Springer Science \& Business Media.

\bibitem[{Vinyals et~al.(2015)Vinyals, Toshev, Bengio, and
  Erhan}]{vinyals2015show}
Oriol Vinyals, Alexander Toshev, Samy Bengio, and Dumitru Erhan. 2015.
\newblock Show and tell: A neural image caption generator.
\newblock In \emph{CVPR}.

\bibitem[{Wang et~al.(2018)Wang, Pan, Huang, Zhang, Jiang, Ji, and
  Knight}]{wang2018describing}
Qingyun Wang, Xiaoman Pan, Lifu Huang, Boliang Zhang, Zhiying Jiang, Heng Ji,
  and Kevin Knight. 2018.
\newblock Describing a knowledge base.
\newblock \emph{arXiv preprint arXiv:1809.01797}.

\bibitem[{Wang et~al.(2019)Wang, Gan, Xu, Zhang, Wang, Shen, Chen, and
  Carin}]{wang2019sequence}
Wenlin Wang, Zhe Gan, Hongteng Xu, Ruiyi Zhang, Guoyin Wang, Dinghan Shen,
  Changyou Chen, and Lawrence Carin. 2019.
\newblock Topic-guided variational autoencoders for text generation.
\newblock In \emph{NAACL}.

\bibitem[{Wiegreffe and Pinter(2019)}]{wiegreffe2019attention}
Sarah Wiegreffe and Yuval Pinter. 2019.
\newblock Attention is not not explanation.
\newblock In \emph{EMNLP}.

\bibitem[{Wiseman and Rush(2016)}]{wiseman2016sequence}
Sam Wiseman and Alexander~M Rush. 2016.
\newblock Sequence-to-sequence learning as beam-search optimization.
\newblock In \emph{EMNLP}.

\bibitem[{Wiseman et~al.(2018)Wiseman, Shieber, and Rush}]{Wiseman2018}
Sam Wiseman, Stuart~M. Shieber, and Alexander~M. Rush. 2018.
\newblock Learning neural templates for text generation.
\newblock In \emph{Proceedings of the Conference on Empirical Methods in
  Natural Language Processing}.

\bibitem[{Wu et~al.(2019)Wu, Ren, Luo, and Sun}]{wu2019hierarchical}
Chen Wu, Xuancheng Ren, Fuli Luo, and Xu~Sun. 2019.
\newblock A hierarchical reinforced sequence operation method for unsupervised
  text style transfer.
\newblock In \emph{ACL}.

\bibitem[{Xie et~al.(2018)Xie, Wang, Wang, and Zha}]{xie2018fast}
Yujia Xie, Xiangfeng Wang, Ruijia Wang, and Hongyuan Zha. 2018.
\newblock A fast proximal point method for {Wasserstein} distance.
\newblock In \emph{arXiv:1802.04307}.

\bibitem[{Xu et~al.(2015)Xu, Ba, Kiros, Cho, Courville, Salakhutdinov, Zemel,
  and Bengio}]{xu2015show}
Kelvin Xu, Jimmy Ba, Ryan Kiros, Kyunghyun Cho, Aaron~C Courville, Ruslan
  Salakhutdinov, Richard~S Zemel, and Yoshua Bengio. 2015.
\newblock Show, attend and tell: Neural image caption generation with visual
  attention.
\newblock In \emph{ICML}.

\bibitem[{Yang et~al.(2018)Yang, Hu, Dyer, Xing, and
  Berg-Kirkpatrick}]{yang2018unsupervised}
Zichao Yang, Zhiting Hu, Chris Dyer, Eric~P Xing, and Taylor Berg-Kirkpatrick.
  2018.
\newblock Unsupervised text style transfer using language models as
  discriminators.
\newblock In \emph{NeurIPS}.

\bibitem[{Yu et~al.(2017)Yu, Zhang, Wang, and Yu}]{yu2017seqgan}
Lantao Yu, Weinan Zhang, Jun Wang, and Yong Yu. 2017.
\newblock Seqgan: Sequence generative adversarial nets with policy gradient.
\newblock In \emph{AAAI}.

\bibitem[{Zhang et~al.(2018)Zhang, Chen, Li, and Carin}]{zhang2018policy}
Ruiyi Zhang, Changyou Chen, Chunyuan Li, and Lawrence Carin. 2018.
\newblock Policy optimization as wasserstein gradient flows.
\newblock In \emph{ICML}.

\bibitem[{Zhang et~al.(2019)Zhang, Yu, Chen, and Carin}]{zhang2020reccon}
Ruiyi Zhang, Tong Yu, Changyou Chen, and Lawrence Carin. 2019.
\newblock Text-based interactive recommendation via constraint augumented
  reinforcement learning.
\newblock In \emph{NeurIPS}.

\bibitem[{Zhang et~al.(2017)Zhang, Gan, Fan, Chen, Henao, Shen, and
  Carin}]{zhang2017adversarial}
Yizhe Zhang, Zhe Gan, Kai Fan, Zhi Chen, Ricardo Henao, Dinghan Shen, and
  Lawrence Carin. 2017.
\newblock Adversarial feature matching for text generation.
\newblock In \emph{ICML}.

\end{thebibliography}