Skip to content

Instantly share code, notes, and snippets.

@thanhnguyentang
Last active July 3, 2022 17:18
Show Gist options
  • Select an option

  • Save thanhnguyentang/7045bdff9d381c4e8fc0b3489a197c97 to your computer and use it in GitHub Desktop.

Select an option

Save thanhnguyentang/7045bdff9d381c4e8fc0b3489a197c97 to your computer and use it in GitHub Desktop.
Bib refs for RL works
@inproceedings{tripuraneni2021provable,
title={Provable meta-learning of linear representations},
author={Tripuraneni, Nilesh and Jin, Chi and Jordan, Michael},
booktitle={International Conference on Machine Learning},
pages={10434--10443},
year={2021},
organization={PMLR}
}
@inproceedings{mitchell2021offline,
title={Offline meta-reinforcement learning with advantage weighting},
author={Mitchell, Eric and Rafailov, Rafael and Peng, Xue Bin and Levine, Sergey and Finn, Chelsea},
booktitle={International Conference on Machine Learning},
pages={7780--7791},
year={2021},
organization={PMLR}
}
@article{dorfman2021offline,
title={Offline Meta Reinforcement Learning--Identifiability Challenges and Effective Data Collection Strategies},
author={Dorfman, Ron and Shenfeld, Idan and Tamar, Aviv},
journal={Advances in Neural Information Processing Systems},
volume={34},
year={2021}
}
@inproceedings{cella2020meta,
title={Meta-learning with stochastic linear bandits},
author={Cella, Leonardo and Lazaric, Alessandro and Pontil, Massimiliano},
booktitle={International Conference on Machine Learning},
pages={1360--1370},
year={2020},
organization={PMLR}
}
@article{cesa2021multitask,
title={Multitask Online Mirror Descent},
author={Cesa-Bianchi, Nicol{\`o} and Laforgue, Pierre and Paudice, Andrea and Pontil, Massimiliano},
journal={arXiv preprint arXiv:2106.02393},
year={2021}
}
@inproceedings{hu2021near,
title={Near-optimal representation learning for linear bandits and linear rl},
author={Hu, Jiachen and Chen, Xiaoyu and Jin, Chi and Li, Lihong and Wang, Liwei},
booktitle={International Conference on Machine Learning},
pages={4349--4358},
year={2021},
organization={PMLR}
}
@article{zhang2021variance,
title={Variance-aware confidence set: Variance-dependent bound for linear bandits and horizon-free bound for linear mixture mdp},
author={Zhang, Zihan and Yang, Jiaqi and Ji, Xiangyang and Du, Simon S},
journal={arXiv preprint arXiv:2101.12745},
year={2021}
}
@article{minimax_repr,
title={Nearly Minimax Algorithms for Linear Bandits with Shared Representation},
author={Anonymous},
journal={Under review for ICML},
year={2022}
}
@inproceedings{jin2020provably,
title={Provably efficient reinforcement learning with linear function approximation},
author={Jin, Chi and Yang, Zhuoran and Wang, Zhaoran and Jordan, Michael I},
booktitle={Conference on Learning Theory},
pages={2137--2143},
year={2020},
organization={PMLR}
}
@misc{nguyentang2021sample,
title={Sample Complexity of Offline Reinforcement Learning with Deep ReLU Networks},
author={Thanh Nguyen-Tang and Sunil Gupta and Hung Tran-The and Svetha Venkatesh},
year={2021},
eprint={2103.06671},
archivePrefix={arXiv},
primaryClass={stat.ML}
}
@inproceedings{wang2021instabilities,
title={Instabilities of offline rl with pre-trained neural representation},
author={Wang, Ruosong and Wu, Yifan and Salakhutdinov, Ruslan and Kakade, Sham},
booktitle={International Conference on Machine Learning},
pages={10948--10960},
year={2021},
organization={PMLR}
}
@article{Jin2021BellmanED,
title={Bellman Eluder Dimension: New Rich Classes of RL Problems, and Sample-Efficient Algorithms},
author={Chi Jin and Qinghua Liu and Sobhan Miryoosefi},
journal={ArXiv},
year={2021},
volume={abs/2102.00815}
}
@article{nguyen2021offline,
title={Offline Neural Contextual Bandits: Pessimism, Optimization and Generalization},
author={Nguyen-Tang, Thanh and Gupta, Sunil and Nguyen, A Tuan and Venkatesh, Svetha},
journal={arXiv preprint arXiv:2111.13807},
year={2021}
}
@article{yinnear,
title={NEAR-OPTIMAL OFFLINE REINFORCEMENT LEARNING WITH LINEAR REPRESENTATION: LEVERAGING VARIANCE INFORMATION WITH PESSIMISM},
author={Yin, Ming and Wang, Yu-Xiang and Duan, Yaqi and Wang, Mengdi},
year={2022}
}
@article{yang2020function,
title={On function approximation in reinforcement learning: Optimism in the face of large state spaces},
author={Yang, Zhuoran and Jin, Chi and Wang, Zhaoran and Wang, Mengdi and Jordan, Michael I},
journal={arXiv preprint arXiv:2011.04622},
year={2020}
}
@inproceedings{jin2021pessimism,
title={Is pessimism provably efficient for offline rl?},
author={Jin, Ying and Yang, Zhuoran and Wang, Zhaoran},
booktitle={International Conference on Machine Learning},
pages={5084--5096},
year={2021},
organization={PMLR}
}
@inproceedings{cai2020provably,
title={Provably efficient exploration in policy optimization},
author={Cai, Qi and Yang, Zhuoran and Jin, Chi and Wang, Zhaoran},
booktitle={International Conference on Machine Learning},
pages={1283--1294},
year={2020},
organization={PMLR}
}
@article{mucke2021data,
title={Data splitting improves statistical performance in overparametrized regimes},
author={M{\"u}cke, Nicole and Reiss, Enrico and Rungenhagen, Jonas and Klein, Markus},
journal={arXiv preprint arXiv:2110.10956},
year={2021}
}
@article{cai2019neural,
title={Neural temporal-difference learning converges to global optima},
author={Cai, Qi and Yang, Zhuoran and Lee, Jason D and Wang, Zhaoran},
journal={Advances in Neural Information Processing Systems},
volume={32},
year={2019}
}
@article{jacot2018neural,
title={Neural tangent kernel: Convergence and generalization in neural networks},
author={Jacot, Arthur and Gabriel, Franck and Hongler, Cl{\'e}ment},
journal={arXiv preprint arXiv:1806.07572},
year={2018}
}
@article{arora2019exact,
title={On exact computation with an infinitely wide neural net},
author={Arora, Sanjeev and Du, Simon S and Hu, Wei and Li, Zhiyuan and Salakhutdinov, Ruslan and Wang, Ruosong},
journal={arXiv preprint arXiv:1904.11955},
year={2019}
}
@inproceedings{allen2019convergence,
title={A convergence theory for deep learning via over-parameterization},
author={Allen-Zhu, Zeyuan and Li, Yuanzhi and Song, Zhao},
booktitle={International Conference on Machine Learning},
pages={242--252},
year={2019},
organization={PMLR}
}
@article{hanin2019finite,
title={Finite depth and width corrections to the neural tangent kernel},
author={Hanin, Boris and Nica, Mihai},
journal={arXiv preprint arXiv:1909.05989},
year={2019}
}
@article{cao2019generalization,
title={Generalization bounds of stochastic gradient descent for wide and deep neural networks},
author={Cao, Yuan and Gu, Quanquan},
journal={Advances in Neural Information Processing Systems},
volume={32},
pages={10836--10846},
year={2019}
}
@article{belkin2021fit,
title={Fit without fear: remarkable mathematical phenomena of deep learning through the prism of interpolation},
author={Belkin, Mikhail},
journal={arXiv preprint arXiv:2105.14368},
year={2021}
}
@inproceedings{zhou2020neural,
title={Neural contextual bandits with ucb-based exploration},
author={Zhou, Dongruo and Li, Lihong and Gu, Quanquan},
booktitle={International Conference on Machine Learning},
pages={11492--11502},
year={2020},
organization={PMLR}
}
@article{dumer2007covering,
title={Covering spheres with spheres},
author={Dumer, Ilya},
journal={Discrete \& Computational Geometry},
volume={38},
number={4},
pages={665--679},
year={2007},
publisher={Springer}
}
@article{gouk2021regularisation,
title={Regularisation of neural networks by enforcing lipschitz continuity},
author={Gouk, Henry and Frank, Eibe and Pfahringer, Bernhard and Cree, Michael J},
journal={Machine Learning},
volume={110},
number={2},
pages={393--416},
year={2021},
publisher={Springer}
}
@inproceedings{nguyen2021tight,
title={Tight bounds on the smallest eigenvalue of the neural tangent kernel for deep relu networks},
author={Nguyen, Quynh and Mondelli, Marco and Montufar, Guido F},
booktitle={International Conference on Machine Learning},
pages={8119--8129},
year={2021},
organization={PMLR}
}
@article{gao2019convergence,
title={Convergence of adversarial training in overparametrized neural networks},
author={Gao, Ruiqi and Cai, Tianle and Li, Haochuan and Hsieh, Cho-Jui and Wang, Liwei and Lee, Jason D},
journal={Advances in Neural Information Processing Systems},
volume={32},
year={2019}
}
@article{fulton2000eigenvalues,
title={Eigenvalues, invariant factors, highest weights, and Schubert calculus},
author={Fulton, William},
journal={Bulletin of the American Mathematical Society},
volume={37},
number={3},
pages={209--249},
year={2000}
}
@article{Schur1911,
author = {Schur, J.},
journal = {Journal für die reine und angewandte Mathematik},
pages = {1-28},
title = {Bemerkungen zur Theorie der beschränkten Bilinearformen mit unendlich vielen Veränderlichen.},
url = {http://eudml.org/doc/149352},
volume = {140},
year = {1911},
}
@article{foster2021offline,
title={Offline Reinforcement Learning: Fundamental Barriers for Value Function Approximation},
author={Foster, Dylan J and Krishnamurthy, Akshay and Simchi-Levi, David and Xu, Yunzong},
journal={arXiv preprint arXiv:2111.10919},
year={2021}
}
@article{zhan2022offline,
title={Offline Reinforcement Learning with Realizability and Single-policy Concentrability},
author={Zhan, Wenhao and Huang, Baihe and Huang, Audrey and Jiang, Nan and Lee, Jason D},
journal={arXiv preprint arXiv:2202.04634},
year={2022}
}
@article{yin2020near,
title={Near-Optimal Provable Uniform Convergence in Offline Policy Evaluation for Reinforcement Learning},
author={Yin, Ming and Bai, Yu and Wang, Yu-Xiang},
journal={arXiv preprint arXiv:2007.03760},
year={2020}
}
@inproceedings{szepesvari2005finite,
title={Finite time bounds for sampling based fitted value iteration},
author={Szepesv{\'a}ri, Csaba and Munos, R{\'e}mi},
booktitle={Proceedings of the 22nd international conference on Machine learning},
pages={880--887},
year={2005}
}
@inproceedings{chen2019information,
title={Information-theoretic considerations in batch reinforcement learning},
author={Chen, Jinglin and Jiang, Nan},
booktitle={International Conference on Machine Learning},
pages={1042--1051},
year={2019},
organization={PMLR}
}
@article{liu2019off,
title={Off-policy policy gradient with state distribution correction},
author={Liu, Yao and Swaminathan, Adith and Agarwal, Alekh and Brunskill, Emma},
journal={arXiv preprint arXiv:1904.08473},
year={2019}
}
@article{rashidinejad2021bridging,
title={Bridging offline reinforcement learning and imitation learning: A tale of pessimism},
author={Rashidinejad, Paria and Zhu, Banghua and Ma, Cong and Jiao, Jiantao and Russell, Stuart},
journal={Advances in Neural Information Processing Systems},
volume={34},
year={2021}
}
@article{xie2021policy,
title={Policy finetuning: Bridging sample-efficient offline and online reinforcement learning},
author={Xie, Tengyang and Jiang, Nan and Wang, Huan and Xiong, Caiming and Bai, Yu},
journal={Advances in neural information processing systems},
volume={34},
year={2021}
}
@article{yin2021towards,
title={Towards instance-optimal offline reinforcement learning with pessimism},
author={Yin, Ming and Wang, Yu-Xiang},
journal={Advances in neural information processing systems},
volume={34},
year={2021}
}
@article{xie2021bellman,
title={Bellman-consistent pessimism for offline reinforcement learning},
author={Xie, Tengyang and Cheng, Ching-An and Jiang, Nan and Mineiro, Paul and Agarwal, Alekh},
journal={Advances in neural information processing systems},
volume={34},
year={2021}
}
@article{chang2021mitigating,
title={Mitigating Covariate Shift in Imitation Learning via Offline Data With Partial Coverage},
author={Chang, Jonathan and Uehara, Masatoshi and Sreenivas, Dhruv and Kidambi, Rahul and Sun, Wen},
journal={Advances in Neural Information Processing Systems},
volume={34},
year={2021}
}
@article{uehara2021pessimistic,
title={Pessimistic Model-based Offline Reinforcement Learning under Partial Coverage},
author={Uehara, Masatoshi and Sun, Wen},
journal={arXiv preprint arXiv:2107.06226},
year={2021}
}
@article{liu2020provably,
title={Provably good batch reinforcement learning without great exploration},
author={Liu, Yao and Swaminathan, Adith and Agarwal, Alekh and Brunskill, Emma},
journal={arXiv preprint arXiv:2007.08202},
year={2020}
}
@article{kidambi2020morel,
title={Morel: Model-based offline reinforcement learning},
author={Kidambi, Rahul and Rajeswaran, Aravind and Netrapalli, Praneeth and Joachims, Thorsten},
journal={Advances in neural information processing systems},
volume={33},
pages={21810--21823},
year={2020}
}
@article{wang2020statistical,
title={What are the Statistical Limits of Offline RL with Linear Function Approximation?},
author={Wang, Ruosong and Foster, Dean P and Kakade, Sham M},
journal={arXiv preprint arXiv:2010.11895},
year={2020}
}
@article{amortila2020variant,
title={A variant of the wang-foster-kakade lower bound for the discounted setting},
author={Amortila, Philip and Jiang, Nan and Xie, Tengyang},
journal={arXiv preprint arXiv:2011.01075},
year={2020}
}
@inproceedings{zanette2021exponential,
title={Exponential lower bounds for batch reinforcement learning: Batch rl can be exponentially harder than online rl},
author={Zanette, Andrea},
booktitle={International Conference on Machine Learning},
pages={12287--12297},
year={2021},
organization={PMLR}
}
@article{chen2021infinite,
title={Infinite-horizon offline reinforcement learning with linear function approximation: Curse of dimensionality and algorithm},
author={Chen, Lin and Scherrer, Bruno and Bartlett, Peter L},
journal={arXiv preprint arXiv:2103.09847},
year={2021}
}
@inproceedings{Chen2022OfflineRL,
title={Offline Reinforcement Learning Under Value and Density-Ratio Realizability: the Power of Gaps},
author={Jinglin Chen and Nan Jiang},
year={2022}
}
@inproceedings{zhou2021nearly,
title={Nearly minimax optimal reinforcement learning for linear mixture markov decision processes},
author={Zhou, Dongruo and Gu, Quanquan and Szepesvari, Csaba},
booktitle={Conference on Learning Theory},
pages={4532--4576},
year={2021},
organization={PMLR}
}
@inproceedings{NIPS2014_2ab56412,
author = {Maillard, Odalric-Ambrym and Mann, Timothy A and Mannor, Shie},
booktitle = {Advances in Neural Information Processing Systems},
editor = {Z. Ghahramani and M. Welling and C. Cortes and N. Lawrence and K. Q. Weinberger},
pages = {},
publisher = {Curran Associates, Inc.},
title = {How hard is my MDP?" The distribution-norm to the rescue"},
url = {https://proceedings.neurips.cc/paper/2014/file/2ab56412b1163ee131e1246da0955bd1-Paper.pdf},
volume = {27},
year = {2014}
}
@inproceedings{azar2017minimax,
title={Minimax regret bounds for reinforcement learning},
author={Azar, Mohammad Gheshlaghi and Osband, Ian and Munos, R{\'e}mi},
booktitle={International Conference on Machine Learning},
pages={263--272},
year={2017},
organization={PMLR}
}
@inproceedings{weisz2021exponential,
title={Exponential lower bounds for planning in mdps with linearly-realizable optimal action-value functions},
author={Weisz, Gell{\'e}rt and Amortila, Philip and Szepesv{\'a}ri, Csaba},
booktitle={Algorithmic Learning Theory},
pages={1237--1264},
year={2021},
organization={PMLR}
}
@article{bubeck2012regret,
title={Regret analysis of stochastic and nonstochastic multi-armed bandit problems},
author={Bubeck, S{\'e}bastien and Cesa-Bianchi, Nicolo},
journal={arXiv preprint arXiv:1204.5721},
year={2012}
}
@article{ok2018exploration,
title={Exploration in structured reinforcement learning},
author={Ok, Jungseul and Proutiere, Alexandre and Tranos, Damianos},
journal={Advances in Neural Information Processing Systems},
volume={31},
year={2018}
}
@article{simchowitz2019non,
title={Non-asymptotic gap-dependent regret bounds for tabular mdps},
author={Simchowitz, Max and Jamieson, Kevin G},
journal={Advances in Neural Information Processing Systems},
volume={32},
year={2019}
}
@book{lattimore_szepesvari_2020,
place={Cambridge},
title={Bandit Algorithms},
DOI={10.1017/9781108571401},
publisher={Cambridge University Press},
author={Lattimore, Tor and Szepesvári, Csaba},
year={2020}}
@inproceedings{he2021logarithmic,
title={Logarithmic regret for reinforcement learning with linear function approximation},
author={He, Jiafan and Zhou, Dongruo and Gu, Quanquan},
booktitle={International Conference on Machine Learning},
pages={4171--4180},
year={2021},
organization={PMLR}
}
@article{papini2021reinforcement,
title={Reinforcement Learning in Linear MDPs: Constant Regret and Representation Selection},
author={Papini, Matteo and Tirinzoni, Andrea and Pacchiano, Aldo and Restelli, Marcello and Lazaric, Alessandro and Pirotta, Matteo},
journal={Advances in Neural Information Processing Systems},
volume={34},
year={2021}
}
@inproceedings{zanette2020learning,
title={Learning near optimal policies with low inherent bellman error},
author={Zanette, Andrea and Lazaric, Alessandro and Kochenderfer, Mykel and Brunskill, Emma},
booktitle={International Conference on Machine Learning},
pages={10978--10989},
year={2020},
organization={PMLR}
}
@inproceedings{zanette2020frequentist,
title={Frequentist regret bounds for randomized least-squares value iteration},
author={Zanette, Andrea and Brandfonbrener, David and Brunskill, Emma and Pirotta, Matteo and Lazaric, Alessandro},
booktitle={International Conference on Artificial Intelligence and Statistics},
pages={1954--1964},
year={2020},
organization={PMLR}
}
@inproceedings{yang2020reinforcement,
title={Reinforcement learning in feature space: Matrix bandit, kernels, and regret bound},
author={Yang, Lin and Wang, Mengdi},
booktitle={International Conference on Machine Learning},
pages={10746--10756},
year={2020},
organization={PMLR}
}
@inproceedings{ayoub2020model,
title={Model-based reinforcement learning with value-targeted regression},
author={Ayoub, Alex and Jia, Zeyu and Szepesvari, Csaba and Wang, Mengdi and Yang, Lin},
booktitle={International Conference on Machine Learning},
pages={463--474},
year={2020},
organization={PMLR}
}
@inproceedings{jiang2017contextual,
title={Contextual decision processes with low bellman rank are pac-learnable},
author={Jiang, Nan and Krishnamurthy, Akshay and Agarwal, Alekh and Langford, John and Schapire, Robert E},
booktitle={International Conference on Machine Learning},
pages={1704--1713},
year={2017},
organization={PMLR}
}
@article{DBLP:journals/corr/OrtnerMR14,
author = {Ronald Ortner and
Odalric{-}Ambrym Maillard and
Daniil Ryabko},
title = {Selecting Near-Optimal Approximate State Representations in Reinforcement
Learning},
journal = {CoRR},
volume = {abs/1405.2652},
year = {2014},
url = {http://arxiv.org/abs/1405.2652},
eprinttype = {arXiv},
eprint = {1405.2652},
timestamp = {Mon, 13 Aug 2018 16:46:00 +0200},
biburl = {https://dblp.org/rec/journals/corr/OrtnerMR14.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{NEURIPS2019_9b8b50fb,
author = {Ortner, Ronald and Pirotta, Matteo and Lazaric, Alessandro and Fruit, Ronan and Maillard, Odalric-Ambrym},
booktitle = {Advances in Neural Information Processing Systems},
editor = {H. Wallach and H. Larochelle and A. Beygelzimer and F. d\textquotesingle Alch\'{e}-Buc and E. Fox and R. Garnett},
pages = {},
publisher = {Curran Associates, Inc.},
title = {Regret Bounds for Learning State Representations in Reinforcement Learning},
url = {https://proceedings.neurips.cc/paper/2019/file/9b8b50fb590c590ffbf1295ce92258dc-Paper.pdf},
volume = {32},
year = {2019}
}
@article{DBLP:journals/corr/abs-2011-09750,
author = {Jonathan N. Lee and
Aldo Pacchiano and
Vidya Muthukumar and
Weihao Kong and
Emma Brunskill},
title = {Online Model Selection for Reinforcement Learning with Function Approximation},
journal = {CoRR},
volume = {abs/2011.09750},
year = {2020},
url = {https://arxiv.org/abs/2011.09750},
eprinttype = {arXiv},
eprint = {2011.09750},
timestamp = {Wed, 25 Nov 2020 16:34:14 +0100},
biburl = {https://dblp.org/rec/journals/corr/abs-2011-09750.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{du2019provably,
title={Provably efficient RL with rich observations via latent state decoding},
author={Du, Simon and Krishnamurthy, Akshay and Jiang, Nan and Agarwal, Alekh and Dudik, Miroslav and Langford, John},
booktitle={International Conference on Machine Learning},
pages={1665--1674},
year={2019},
organization={PMLR}
}
@article{DBLP:journals/corr/abs-2006-10814,
author = {Alekh Agarwal and
Sham M. Kakade and
Akshay Krishnamurthy and
Wen Sun},
title = {{FLAMBE:} Structural Complexity and Representation Learning of Low
Rank MDPs},
journal = {CoRR},
volume = {abs/2006.10814},
year = {2020},
url = {https://arxiv.org/abs/2006.10814},
eprinttype = {arXiv},
eprint = {2006.10814},
timestamp = {Tue, 23 Jun 2020 17:57:22 +0200},
biburl = {https://dblp.org/rec/journals/corr/abs-2006-10814.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2102-07035,
author = {Aditya Modi and
Jinglin Chen and
Akshay Krishnamurthy and
Nan Jiang and
Alekh Agarwal},
title = {Model-free Representation Learning and Exploration in Low-rank MDPs},
journal = {CoRR},
volume = {abs/2102.07035},
year = {2021},
url = {https://arxiv.org/abs/2102.07035},
eprinttype = {arXiv},
eprint = {2102.07035},
timestamp = {Fri, 19 Feb 2021 08:32:49 +0100},
biburl = {https://dblp.org/rec/journals/corr/abs-2102-07035.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1910-06996,
author = {Botao Hao and
Tor Lattimore and
Csaba Szepesv{\'{a}}ri},
title = {Adaptive Exploration in Linear Contextual Bandit},
journal = {CoRR},
volume = {abs/1910.06996},
year = {2019},
url = {http://arxiv.org/abs/1910.06996},
eprinttype = {arXiv},
eprint = {1910.06996},
timestamp = {Tue, 22 Oct 2019 18:17:16 +0200},
biburl = {https://dblp.org/rec/journals/corr/abs-1910-06996.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2104-03781,
author = {Matteo Papini and
Andrea Tirinzoni and
Marcello Restelli and
Alessandro Lazaric and
Matteo Pirotta},
title = {Leveraging Good Representations in Linear Contextual Bandits},
journal = {CoRR},
volume = {abs/2104.03781},
year = {2021},
url = {https://arxiv.org/abs/2104.03781},
eprinttype = {arXiv},
eprint = {2104.03781},
timestamp = {Tue, 13 Apr 2021 16:46:17 +0200},
biburl = {https://dblp.org/rec/journals/corr/abs-2104-03781.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{Zanette2022BellmanRO,
title={Bellman Residual Orthogonalization for Offline Reinforcement Learning},
author={Andrea Zanette and Martin J. Wainwright},
journal={ArXiv},
year={2022},
volume={abs/2203.12786}
}
@article{Perdomo2022ASC,
title={A Sharp Characterization of Linear Estimators for Offline Policy Evaluation},
author={Juan C. Perdomo and Akshay Krishnamurthy and Peter L. Bartlett and Sham M. Kakade},
journal={ArXiv},
year={2022},
volume={abs/2203.04236}
}
@article{hu2021fast,
title={Fast rates for the regret of offline reinforcement learning},
author={Hu, Yichun and Kallus, Nathan and Uehara, Masatoshi},
journal={arXiv preprint arXiv:2102.00479},
year={2021}
}
@article{audibert2005fast,
title={Fast learning rates for plug-in classifiers under the margin condition},
author={Audibert, Jean-Yves and Tsybakov, Alexandre B},
journal={arXiv preprint math/0507180},
year={2005}
}
@article{mou2020sample,
title={On the sample complexity of reinforcement learning with policy space generalization},
author={Mou, Wenlong and Wen, Zheng and Chen, Xi},
journal={arXiv preprint arXiv:2008.07353},
year={2020}
}
@inproceedings{yang2021q,
title={Q-learning with logarithmic regret},
author={Yang, Kunhe and Yang, Lin and Du, Simon},
booktitle={International Conference on Artificial Intelligence and Statistics},
pages={1576--1584},
year={2021},
organization={PMLR}
}
@article{wang2021exponential,
title={An Exponential Lower Bound for Linearly Realizable MDP with Constant Suboptimality Gap},
author={Wang, Yuanhao and Wang, Ruosong and Kakade, Sham},
journal={Advances in Neural Information Processing Systems},
volume={34},
year={2021}
}
@article{min2021variance,
title={Variance-aware off-policy evaluation with linear function approximation},
author={Min, Yifei and Wang, Tianhao and Zhou, Dongruo and Gu, Quanquan},
journal={Advances in neural information processing systems},
volume={34},
year={2021}
}
@inproceedings{NIPS2011_e1d5be1c,
author = {Abbasi-yadkori, Yasin and P\'{a}l, D\'{a}vid and Szepesv\'{a}ri, Csaba},
booktitle = {Advances in Neural Information Processing Systems},
editor = {J. Shawe-Taylor and R. Zemel and P. Bartlett and F. Pereira and K.Q. Weinberger},
pages = {},
publisher = {Curran Associates, Inc.},
title = {Improved Algorithms for Linear Stochastic Bandits},
url = {https://proceedings.neurips.cc/paper/2011/file/e1d5be1c7f2f456670de3d53c7b54f4a-Paper.pdf},
volume = {24},
year = {2011}
}
@inproceedings{duan2020minimax,
title={Minimax-optimal off-policy evaluation with linear function approximation},
author={Duan, Yaqi and Jia, Zeyu and Wang, Mengdi},
booktitle={International Conference on Machine Learning},
pages={2701--2709},
year={2020},
organization={PMLR}
}
@inproceedings{DBLP:conf/uai/LiuSAB19,
author = {Yao Liu and
Adith Swaminathan and
Alekh Agarwal and
Emma Brunskill},
editor = {Amir Globerson and
Ricardo Silva},
title = {Off-Policy Policy Gradient with Stationary Distribution Correction},
booktitle = {Proceedings of the Thirty-Fifth Conference on Uncertainty in Artificial
Intelligence, {UAI} 2019, Tel Aviv, Israel, July 22-25, 2019},
series = {Proceedings of Machine Learning Research},
volume = {115},
pages = {1180--1190},
publisher = {{AUAI} Press},
year = {2019},
url = {http://proceedings.mlr.press/v115/liu20a.html},
timestamp = {Tue, 15 Dec 2020 17:40:18 +0100},
biburl = {https://dblp.org/rec/conf/uai/LiuSAB19.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{bartlett2005local,
title={Local rademacher complexities},
author={Bartlett, Peter L and Bousquet, Olivier and Mendelson, Shahar},
journal={The Annals of Statistics},
volume={33},
number={4},
pages={1497--1537},
year={2005},
publisher={Institute of Mathematical Statistics}
}
@article{tropp2011freedman,
title={Freedman's inequality for matrix martingales},
author={Tropp, Joel},
journal={Electronic Communications in Probability},
volume={16},
pages={262--270},
year={2011},
publisher={Institute of Mathematical Statistics and Bernoulli Society}
}
@article{nguyen2022practical,
title={On Practical Reinforcement Learning: Provable Robustness, Scalability, and Statistical Efficiency},
author={Nguyen-Tang, Thanh},
journal={arXiv preprint arXiv:2203.01758},
year={2022}
}
@article{Duan2021RiskBA,
title={Risk Bounds and Rademacher Complexity in Batch Reinforcement Learning},
author={Yaqi Duan and Chi Jin and Zhiyuan Li},
journal={ArXiv},
year={2021},
volume={abs/2103.13883}
}
@incollection{lange2012batch,
title={Batch reinforcement learning},
author={Lange, Sascha and Gabel, Thomas and Riedmiller, Martin},
booktitle={Reinforcement learning},
pages={45--73},
year={2012},
publisher={Springer}
}
@article{levine2020offline,
title={Offline reinforcement learning: Tutorial, review, and perspectives on open problems},
author={Levine, Sergey and Kumar, Aviral and Tucker, George and Fu, Justin},
journal={arXiv preprint arXiv:2005.01643},
year={2020}
}
@article{gottesman2019guidelines,
title={Guidelines for reinforcement learning in healthcare},
author={Gottesman, Omer and Johansson, Fredrik and Komorowski, Matthieu and Faisal, Aldo and Sontag, David and Doshi-Velez, Finale and Celi, Leo Anthony},
journal={Nature medicine},
volume={25},
number={1},
pages={16--18},
year={2019},
publisher={Nature Publishing Group}
}
@article{nie2021learning,
title={Learning when-to-treat policies},
author={Nie, Xinkun and Brunskill, Emma and Wager, Stefan},
journal={Journal of the American Statistical Association},
volume={116},
number={533},
pages={392--409},
year={2021},
publisher={Taylor \& Francis}
}
@article{strehl2010learning,
title={Learning from logged implicit exploration data},
author={Strehl, Alex and Langford, John and Kakade, Sham and Li, Lihong},
journal={arXiv preprint arXiv:1003.0120},
year={2010}
}
@inproceedings{thomasAAAI17,
author = {Thomas, Philip S. and Theocharous, Georgios and Ghavamzadeh, Mohammad and Durugkar, Ishan and Brunskill, Emma},
title = {Predictive Off-Policy Policy Evaluation for Nonstationary Decision Problems, with Applications to Digital Marketing},
year = {2017},
publisher = {AAAI Press},
booktitle = {Proceedings of the Thirty-First AAAI Conference on Artificial Intelligence},
pages = {4740–4745},
numpages = {6},
location = {San Francisco, California, USA},
series = {AAAI'17}
}
@article{Kitagawa18,
author = {Kitagawa, Toru and Tetenov, Aleksey},
title = {Who Should Be Treated? Empirical Welfare Maximization Methods for Treatment Choice},
journal = {Econometrica},
volume = {86},
number = {2},
pages = {591-616},
keywords = {Heterogeneous treatment effects, randomized experiments, program evaluation, individualized treatment rules, empirical risk minimization, risk bounds},
doi = {https://doi.org/10.3982/ECTA13288},
url = {https://onlinelibrary.wiley.com/doi/abs/10.3982/ECTA13288},
eprint = {https://onlinelibrary.wiley.com/doi/pdf/10.3982/ECTA13288},
year = {2018}
}
@article{athey2021policy,
title={Policy learning with observational data},
author={Athey, Susan and Wager, Stefan},
journal={Econometrica},
volume={89},
number={1},
pages={133--161},
year={2021},
publisher={Wiley Online Library}
}
@inproceedings{uehara2022representation,
title={Representation Learning for Online and Offline {RL} in Low-rank {MDP}s},
author={Masatoshi Uehara and Xuezhou Zhang and Wen Sun},
booktitle={International Conference on Learning Representations},
year={2022},
url={https://openreview.net/forum?id=J4iSIR9fhY0}
}
@inproceedings{fujimoto2019off,
title={Off-policy deep reinforcement learning without exploration},
author={Fujimoto, Scott and Meger, David and Precup, Doina},
booktitle={International Conference on Machine Learning},
pages={2052--2062},
year={2019},
organization={PMLR}
}
@inproceedings{le2019batch,
title={Batch policy learning under constraints},
author={Le, Hoang and Voloshin, Cameron and Yue, Yisong},
booktitle={International Conference on Machine Learning},
pages={3703--3712},
year={2019},
organization={PMLR}
}
@article{kumar2019stabilizing,
title={Stabilizing off-policy q-learning via bootstrapping error reduction},
author={Kumar, Aviral and Fu, Justin and Soh, Matthew and Tucker, George and Levine, Sergey},
journal={Advances in Neural Information Processing Systems},
volume={32},
year={2019}
}
@article{kumar2020conservative,
title={Conservative q-learning for offline reinforcement learning},
author={Kumar, Aviral and Zhou, Aurick and Tucker, George and Levine, Sergey},
journal={Advances in Neural Information Processing Systems},
volume={33},
pages={1179--1191},
year={2020}
}
@inproceedings{duan2021risk,
title={Risk bounds and rademacher complexity in batch reinforcement learning},
author={Duan, Yaqi and Jin, Chi and Li, Zhiyuan},
booktitle={International Conference on Machine Learning},
pages={2892--2902},
year={2021},
organization={PMLR}
}
@inproceedings{lee2021optidice,
title={Optidice: Offline policy optimization via stationary distribution correction estimation},
author={Lee, Jongmin and Jeon, Wonseok and Lee, Byungjun and Pineau, Joelle and Kim, Kee-Eung},
booktitle={International Conference on Machine Learning},
pages={6120--6130},
year={2021},
organization={PMLR}
}
@article{nachum2019dualdice,
title={Dualdice: Behavior-agnostic estimation of discounted stationary distribution corrections},
author={Nachum, Ofir and Chow, Yinlam and Dai, Bo and Li, Lihong},
journal={Advances in Neural Information Processing Systems},
volume={32},
year={2019}
}
@article{nachum2019algaedice,
title={Algaedice: Policy gradient from arbitrary experience},
author={Nachum, Ofir and Dai, Bo and Kostrikov, Ilya and Chow, Yinlam and Li, Lihong and Schuurmans, Dale},
journal={arXiv preprint arXiv:1912.02074},
year={2019}
}
@article{zhang2020gendice,
title={Gendice: Generalized offline estimation of stationary values},
author={Zhang, Ruiyi and Dai, Bo and Li, Lihong and Schuurmans, Dale},
journal={arXiv preprint arXiv:2002.09072},
year={2020}
}
@article{kostrikov2021offline,
title={Offline reinforcement learning with implicit q-learning},
author={Kostrikov, Ilya and Nair, Ashvin and Levine, Sergey},
journal={arXiv preprint arXiv:2110.06169},
year={2021}
}
@article{Zhang2022OffPolicyFQ,
title={Off-Policy Fitted Q-Evaluation with Differentiable Function Approximators: Z-Estimation and Inference Theory},
author={Ruiqi Zhang and Xuezhou Zhang and Chengzhuo Ni and Mengdi Wang},
journal={ArXiv},
year={2022},
volume={abs/2202.04970}
}
@article{Duan2021OptimalPE,
title={Optimal policy evaluation using kernel-based temporal difference methods},
author={Yaqi Duan and Mengdi Wang and Martin J. Wainwright},
journal={ArXiv},
year={2021},
volume={abs/2109.12002}
}
@inproceedings{xie2020q,
title={Q* approximation schemes for batch reinforcement learning: A theoretical comparison},
author={Xie, Tengyang and Jiang, Nan},
booktitle={Conference on Uncertainty in Artificial Intelligence},
pages={550--559},
year={2020},
organization={PMLR}
}
@article{wu2019behavior,
title={Behavior regularized offline reinforcement learning},
author={Wu, Yifan and Tucker, George and Nachum, Ofir},
journal={arXiv preprint arXiv:1911.11361},
year={2019}
}
@inproceedings{yang2019sample,
title={Sample-optimal parametric q-learning using linearly additive features},
author={Yang, Lin and Wang, Mengdi},
booktitle={International Conference on Machine Learning},
pages={6995--7004},
year={2019},
organization={PMLR}
}
@article{qiao2022sample,
title={Sample-Efficient Reinforcement Learning with loglog (T) Switching Cost},
author={Qiao, Dan and Yin, Ming and Min, Ming and Wang, Yu-Xiang},
journal={arXiv preprint arXiv:2202.06385},
year={2022}
}
@article{jin2020simultaneously,
title={Simultaneously learning stochastic and adversarial episodic mdps with known transition},
author={Jin, Tiancheng and Luo, Haipeng},
journal={Advances in neural information processing systems},
volume={33},
pages={16557--16566},
year={2020}
}
@article{arora2012online,
title={Online bandit learning against an adaptive adversary: from regret to policy regret},
author={Arora, Raman and Dekel, Ofer and Tewari, Ambuj},
journal={arXiv preprint arXiv:1206.6400},
year={2012}
}
@article{amir2022better,
title={Better Best of Both Worlds Bounds for Bandits with Switching Costs},
author={Amir, Idan and Azov, Guy and Koren, Tomer and Livni, Roi},
journal={arXiv preprint arXiv:2206.03098},
year={2022}
}
@article{malik2022complete,
title={Complete Policy Regret Bounds for Tallying Bandits},
author={Malik, Dhruv and Li, Yuanzhi and Singh, Aarti},
journal={arXiv preprint arXiv:2204.11174},
year={2022}
}
@article{arora2012deterministic,
title={Deterministic MDPs with adversarial rewards and bandit feedback},
author={Arora, Raman and Dekel, Ofer and Tewari, Ambuj},
journal={arXiv preprint arXiv:1210.4843},
year={2012}
}
@inproceedings{jin2020learning,
title={Learning adversarial markov decision processes with bandit feedback and unknown transition},
author={Jin, Chi and Jin, Tiancheng and Luo, Haipeng and Sra, Suvrit and Yu, Tiancheng},
booktitle={International Conference on Machine Learning},
pages={4860--4869},
year={2020},
organization={PMLR}
}
@inproceedings{rosenberg2019online,
title={Online convex optimization in adversarial markov decision processes},
author={Rosenberg, Aviv and Mansour, Yishay},
booktitle={International Conference on Machine Learning},
pages={5478--5486},
year={2019},
organization={PMLR}
}
@article{neu2021online,
title={Online learning in MDPs with linear function approximation and bandit feedback.},
author={Neu, Gergely and Olkhovskaya, Julia},
journal={Advances in Neural Information Processing Systems},
volume={34},
pages={10407--10417},
year={2021}
}
@article{fei2020dynamic,
title={Dynamic regret of policy optimization in non-stationary environments},
author={Fei, Yingjie and Yang, Zhuoran and Wang, Zhaoran and Xie, Qiaomin},
journal={Advances in Neural Information Processing Systems},
volume={33},
pages={6743--6754},
year={2020}
}
@inproceedings{Mao2021NearOptimalMR,
title={Near-Optimal Model-Free Reinforcement Learning in Non-Stationary Episodic MDPs},
author={Weichao Mao and K. Zhang and Ruihao Zhu and David Simchi-Levi and Tamer Başar},
booktitle={ICML},
year={2021}
}
@inproceedings{cheung2020reinforcement,
title={Reinforcement learning for non-stationary markov decision processes: The blessing of (more) optimism},
author={Cheung, Wang Chi and Simchi-Levi, David and Zhu, Ruihao},
booktitle={International Conference on Machine Learning},
pages={1843--1854},
year={2020},
organization={PMLR}
}
@article{Dinh2021OnlineMD,
title={Online Markov Decision Processes with Non-oblivious Strategic Adversary},
author={Le Cong Dinh and David Henry Mguni and Long Tran-Thanh and Jun Wang and Yaodong Yang},
journal={ArXiv},
year={2021},
volume={abs/2110.03604}
}
@inproceedings{dick2014online,
title={Online learning in Markov decision processes with changing cost sequences},
author={Dick, Travis and Gyorgy, Andras and Szepesvari, Csaba},
booktitle={International Conference on Machine Learning},
pages={512--520},
year={2014},
organization={PMLR}
}
@article{arora2018policy,
title={Policy regret in repeated games},
author={Arora, Raman and Dinitz, Michael and Marinov, Teodor Vanislavov and Mohri, Mehryar},
journal={Advances in Neural Information Processing Systems},
volume={31},
year={2018}
}
@inproceedings{suggala2020online,
title={Online non-convex learning: Following the perturbed leader is optimal},
author={Suggala, Arun Sai and Netrapalli, Praneeth},
booktitle={Algorithmic Learning Theory},
pages={845--861},
year={2020},
organization={PMLR}
}
@article{jin2021v,
title={V-Learning--A Simple, Efficient, Decentralized Algorithm for Multiagent RL},
author={Jin, Chi and Liu, Qinghua and Wang, Yuanhao and Yu, Tiancheng},
journal={arXiv preprint arXiv:2110.14555},
year={2021}
}
@inproceedings{liu2021sharp,
title={A sharp analysis of model-based reinforcement learning with self-play},
author={Liu, Qinghua and Yu, Tiancheng and Bai, Yu and Jin, Chi},
booktitle={International Conference on Machine Learning},
pages={7001--7010},
year={2021},
organization={PMLR}
}
@thanhnguyentang
Copy link
Copy Markdown
Author

hi

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment