thanhnguyentang · July 3, 2022 17:18 · thanhnguyentang · Jun 27, 2022
diff --git a/rl_refs.bib b/rl_refs.bib
 @inproceedings{tripuraneni2021provable,
  title={Provable meta-learning of linear representations},
  author={Tripuraneni, Nilesh and Jin, Chi and Jordan, Michael},
  booktitle={International Conference on Machine Learning},
  pages={10434--10443},
  year={2021},
  organization={PMLR}
 }

 @inproceedings{mitchell2021offline,
  title={Offline meta-reinforcement learning with advantage weighting},
  author={Mitchell, Eric and Rafailov, Rafael and Peng, Xue Bin and Levine, Sergey and Finn, Chelsea},
  booktitle={International Conference on Machine Learning},
  pages={7780--7791},
  year={2021},
  organization={PMLR}
 }

 @article{dorfman2021offline,
  title={Offline Meta Reinforcement Learning--Identifiability Challenges and Effective Data Collection Strategies},
  author={Dorfman, Ron and Shenfeld, Idan and Tamar, Aviv},
  journal={Advances in Neural Information Processing Systems},
  volume={34},
  year={2021}
 }

 @inproceedings{cella2020meta,
  title={Meta-learning with stochastic linear bandits},
  author={Cella, Leonardo and Lazaric, Alessandro and Pontil, Massimiliano},
  booktitle={International Conference on Machine Learning},
  pages={1360--1370},
  year={2020},
  organization={PMLR}
 }


 @article{cesa2021multitask,
  title={Multitask Online Mirror Descent},
  author={Cesa-Bianchi, Nicol{\`o} and Laforgue, Pierre and Paudice, Andrea and Pontil, Massimiliano},
  journal={arXiv preprint arXiv:2106.02393},
  year={2021}
 }

 @inproceedings{hu2021near,
  title={Near-optimal representation learning for linear bandits and linear rl},
  author={Hu, Jiachen and Chen, Xiaoyu and Jin, Chi and Li, Lihong and Wang, Liwei},
  booktitle={International Conference on Machine Learning},
  pages={4349--4358},
  year={2021},
  organization={PMLR}
 }

 @article{zhang2021variance,
  title={Variance-aware confidence set: Variance-dependent bound for linear bandits and horizon-free bound for linear mixture mdp},
  author={Zhang, Zihan and Yang, Jiaqi and Ji, Xiangyang and Du, Simon S},
  journal={arXiv preprint arXiv:2101.12745},
  year={2021}
 }

 @article{minimax_repr,
  title={Nearly Minimax Algorithms for Linear Bandits with Shared Representation},
  author={Anonymous},
  journal={Under review for ICML},
  year={2022}
 }

 @inproceedings{jin2020provably,
  title={Provably efficient reinforcement learning with linear function approximation},
  author={Jin, Chi and Yang, Zhuoran and Wang, Zhaoran and Jordan, Michael I},
  booktitle={Conference on Learning Theory},
  pages={2137--2143},
  year={2020},
  organization={PMLR}
 }

 @misc{nguyentang2021sample,
      title={Sample Complexity of Offline Reinforcement Learning with Deep ReLU Networks}, 
      author={Thanh Nguyen-Tang and Sunil Gupta and Hung Tran-The and Svetha Venkatesh},
      year={2021},
      eprint={2103.06671},
      archivePrefix={arXiv},
      primaryClass={stat.ML}
 }

 @inproceedings{wang2021instabilities,
  title={Instabilities of offline rl with pre-trained neural representation},
  author={Wang, Ruosong and Wu, Yifan and Salakhutdinov, Ruslan and Kakade, Sham},
  booktitle={International Conference on Machine Learning},
  pages={10948--10960},
  year={2021},
  organization={PMLR}
 }

 @article{Jin2021BellmanED,
  title={Bellman Eluder Dimension: New Rich Classes of RL Problems, and Sample-Efficient Algorithms},
  author={Chi Jin and Qinghua Liu and Sobhan Miryoosefi},
  journal={ArXiv},
  year={2021},
  volume={abs/2102.00815}
 }


 @article{nguyen2021offline,
  title={Offline Neural Contextual Bandits: Pessimism, Optimization and Generalization},
  author={Nguyen-Tang, Thanh and Gupta, Sunil and Nguyen, A Tuan and Venkatesh, Svetha},
  journal={arXiv preprint arXiv:2111.13807},
  year={2021}
 }

 @article{yinnear,
  title={NEAR-OPTIMAL OFFLINE REINFORCEMENT LEARNING WITH LINEAR REPRESENTATION: LEVERAGING VARIANCE INFORMATION WITH PESSIMISM},
  author={Yin, Ming and Wang, Yu-Xiang and Duan, Yaqi and Wang, Mengdi}, 
  year={2022}
 }

 @article{yang2020function,
  title={On function approximation in reinforcement learning: Optimism in the face of large state spaces},
  author={Yang, Zhuoran and Jin, Chi and Wang, Zhaoran and Wang, Mengdi and Jordan, Michael I},
  journal={arXiv preprint arXiv:2011.04622},
  year={2020}
 }

 @inproceedings{jin2021pessimism,
  title={Is pessimism provably efficient for offline rl?},
  author={Jin, Ying and Yang, Zhuoran and Wang, Zhaoran},
  booktitle={International Conference on Machine Learning},
  pages={5084--5096},
  year={2021},
  organization={PMLR}
 }

 @inproceedings{cai2020provably,
  title={Provably efficient exploration in policy optimization},
  author={Cai, Qi and Yang, Zhuoran and Jin, Chi and Wang, Zhaoran},
  booktitle={International Conference on Machine Learning},
  pages={1283--1294},
  year={2020},
  organization={PMLR}
 }

 @article{mucke2021data,
  title={Data splitting improves statistical performance in overparametrized regimes},
  author={M{\"u}cke, Nicole and Reiss, Enrico and Rungenhagen, Jonas and Klein, Markus},
  journal={arXiv preprint arXiv:2110.10956},
  year={2021}
 }

 @article{cai2019neural,
  title={Neural temporal-difference learning converges to global optima},
  author={Cai, Qi and Yang, Zhuoran and Lee, Jason D and Wang, Zhaoran},
  journal={Advances in Neural Information Processing Systems},
  volume={32},
  year={2019}
 }

 @article{jacot2018neural,
  title={Neural tangent kernel: Convergence and generalization in neural networks},
  author={Jacot, Arthur and Gabriel, Franck and Hongler, Cl{\'e}ment},
  journal={arXiv preprint arXiv:1806.07572},
  year={2018}
 }

 @article{arora2019exact,
  title={On exact computation with an infinitely wide neural net},
  author={Arora, Sanjeev and Du, Simon S and Hu, Wei and Li, Zhiyuan and Salakhutdinov, Ruslan and Wang, Ruosong},
  journal={arXiv preprint arXiv:1904.11955},
  year={2019}
 }

 @inproceedings{allen2019convergence,
  title={A convergence theory for deep learning via over-parameterization},
  author={Allen-Zhu, Zeyuan and Li, Yuanzhi and Song, Zhao},
  booktitle={International Conference on Machine Learning},
  pages={242--252},
  year={2019},
  organization={PMLR}
 }

 @article{hanin2019finite,
  title={Finite depth and width corrections to the neural tangent kernel},
  author={Hanin, Boris and Nica, Mihai},
  journal={arXiv preprint arXiv:1909.05989},
  year={2019}
 }

 @article{cao2019generalization,
  title={Generalization bounds of stochastic gradient descent for wide and deep neural networks},
  author={Cao, Yuan and Gu, Quanquan},
  journal={Advances in Neural Information Processing Systems},
  volume={32},
  pages={10836--10846},
  year={2019}
 }

 @article{belkin2021fit,
  title={Fit without fear: remarkable mathematical phenomena of deep learning through the prism of interpolation},
  author={Belkin, Mikhail},
  journal={arXiv preprint arXiv:2105.14368},
  year={2021}
 }

 @inproceedings{zhou2020neural,
  title={Neural contextual bandits with ucb-based exploration},
  author={Zhou, Dongruo and Li, Lihong and Gu, Quanquan},
  booktitle={International Conference on Machine Learning},
  pages={11492--11502},
  year={2020},
  organization={PMLR}
 }

 @article{dumer2007covering,
  title={Covering spheres with spheres},
  author={Dumer, Ilya},
  journal={Discrete \& Computational Geometry},
  volume={38},
  number={4},
  pages={665--679},
  year={2007},
  publisher={Springer}
 }

 @article{gouk2021regularisation,
  title={Regularisation of neural networks by enforcing lipschitz continuity},
  author={Gouk, Henry and Frank, Eibe and Pfahringer, Bernhard and Cree, Michael J},
  journal={Machine Learning},
  volume={110},
  number={2},
  pages={393--416},
  year={2021},
  publisher={Springer}
 }

 @inproceedings{nguyen2021tight,
  title={Tight bounds on the smallest eigenvalue of the neural tangent kernel for deep relu networks},
  author={Nguyen, Quynh and Mondelli, Marco and Montufar, Guido F},
  booktitle={International Conference on Machine Learning},
  pages={8119--8129},
  year={2021},
  organization={PMLR}
 }

 @article{gao2019convergence,
  title={Convergence of adversarial training in overparametrized neural networks},
  author={Gao, Ruiqi and Cai, Tianle and Li, Haochuan and Hsieh, Cho-Jui and Wang, Liwei and Lee, Jason D},
  journal={Advances in Neural Information Processing Systems},
  volume={32},
  year={2019}
 }

 @article{fulton2000eigenvalues,
  title={Eigenvalues, invariant factors, highest weights, and Schubert calculus},
  author={Fulton, William},
  journal={Bulletin of the American Mathematical Society},
  volume={37},
  number={3},
  pages={209--249},
  year={2000}
 }

 @article{Schur1911,
 author = {Schur, J.},
 journal = {Journal für die reine und angewandte Mathematik},
 pages = {1-28},
 title = {Bemerkungen zur Theorie der beschränkten Bilinearformen mit unendlich vielen Veränderlichen.},
 url = {http://eudml.org/doc/149352},
 volume = {140},
 year = {1911},
 }

 @article{foster2021offline,
  title={Offline Reinforcement Learning: Fundamental Barriers for Value Function Approximation},
  author={Foster, Dylan J and Krishnamurthy, Akshay and Simchi-Levi, David and Xu, Yunzong},
  journal={arXiv preprint arXiv:2111.10919},
  year={2021}
 }

 @article{zhan2022offline,
  title={Offline Reinforcement Learning with Realizability and Single-policy Concentrability},
  author={Zhan, Wenhao and Huang, Baihe and Huang, Audrey and Jiang, Nan and Lee, Jason D},
  journal={arXiv preprint arXiv:2202.04634},
  year={2022}
 }

 @article{yin2020near,
  title={Near-Optimal Provable Uniform Convergence in Offline Policy Evaluation for Reinforcement Learning},
  author={Yin, Ming and Bai, Yu and Wang, Yu-Xiang},
  journal={arXiv preprint arXiv:2007.03760},
  year={2020}
 }

 @inproceedings{szepesvari2005finite,
  title={Finite time bounds for sampling based fitted value iteration},
  author={Szepesv{\'a}ri, Csaba and Munos, R{\'e}mi},
  booktitle={Proceedings of the 22nd international conference on Machine learning},
  pages={880--887},
  year={2005}
 }

 @inproceedings{chen2019information,
  title={Information-theoretic considerations in batch reinforcement learning},
  author={Chen, Jinglin and Jiang, Nan},
  booktitle={International Conference on Machine Learning},
  pages={1042--1051},
  year={2019},
  organization={PMLR}
 }

 @article{liu2019off,
  title={Off-policy policy gradient with state distribution correction},
  author={Liu, Yao and Swaminathan, Adith and Agarwal, Alekh and Brunskill, Emma},
  journal={arXiv preprint arXiv:1904.08473},
  year={2019}
 }

 @article{rashidinejad2021bridging,
  title={Bridging offline reinforcement learning and imitation learning: A tale of pessimism},
  author={Rashidinejad, Paria and Zhu, Banghua and Ma, Cong and Jiao, Jiantao and Russell, Stuart},
  journal={Advances in Neural Information Processing Systems},
  volume={34},
  year={2021}
 }

 @article{xie2021policy,
  title={Policy finetuning: Bridging sample-efficient offline and online reinforcement learning},
  author={Xie, Tengyang and Jiang, Nan and Wang, Huan and Xiong, Caiming and Bai, Yu},
  journal={Advances in neural information processing systems},
  volume={34},
  year={2021}
 }

 @article{yin2021towards,
  title={Towards instance-optimal offline reinforcement learning with pessimism},
  author={Yin, Ming and Wang, Yu-Xiang},
  journal={Advances in neural information processing systems},
  volume={34},
  year={2021}
 }

 @article{xie2021bellman,
  title={Bellman-consistent pessimism for offline reinforcement learning},
  author={Xie, Tengyang and Cheng, Ching-An and Jiang, Nan and Mineiro, Paul and Agarwal, Alekh},
  journal={Advances in neural information processing systems},
  volume={34},
  year={2021}
 }

 @article{chang2021mitigating,
  title={Mitigating Covariate Shift in Imitation Learning via Offline Data With Partial Coverage},
  author={Chang, Jonathan and Uehara, Masatoshi and Sreenivas, Dhruv and Kidambi, Rahul and Sun, Wen},
  journal={Advances in Neural Information Processing Systems},
  volume={34},
  year={2021}
 }

 @article{uehara2021pessimistic,
  title={Pessimistic Model-based Offline Reinforcement Learning under Partial Coverage},
  author={Uehara, Masatoshi and Sun, Wen},
  journal={arXiv preprint arXiv:2107.06226},
  year={2021}
 }

 @article{liu2020provably,
  title={Provably good batch reinforcement learning without great exploration},
  author={Liu, Yao and Swaminathan, Adith and Agarwal, Alekh and Brunskill, Emma},
  journal={arXiv preprint arXiv:2007.08202},
  year={2020}
 }

 @article{kidambi2020morel,
  title={Morel: Model-based offline reinforcement learning},
  author={Kidambi, Rahul and Rajeswaran, Aravind and Netrapalli, Praneeth and Joachims, Thorsten},
  journal={Advances in neural information processing systems},
  volume={33},
  pages={21810--21823},
  year={2020}
 }

 @article{wang2020statistical,
  title={What are the Statistical Limits of Offline RL with Linear Function Approximation?},
  author={Wang, Ruosong and Foster, Dean P and Kakade, Sham M},
  journal={arXiv preprint arXiv:2010.11895},
  year={2020}
 }

 @article{amortila2020variant,
  title={A variant of the wang-foster-kakade lower bound for the discounted setting},
  author={Amortila, Philip and Jiang, Nan and Xie, Tengyang},
  journal={arXiv preprint arXiv:2011.01075},
  year={2020}
 }

 @inproceedings{zanette2021exponential,
  title={Exponential lower bounds for batch reinforcement learning: Batch rl can be exponentially harder than online rl},
  author={Zanette, Andrea},
  booktitle={International Conference on Machine Learning},
  pages={12287--12297},
  year={2021},
  organization={PMLR}
 }

 @article{chen2021infinite,
  title={Infinite-horizon offline reinforcement learning with linear function approximation: Curse of dimensionality and algorithm},
  author={Chen, Lin and Scherrer, Bruno and Bartlett, Peter L},
  journal={arXiv preprint arXiv:2103.09847},
  year={2021}
 }

 @inproceedings{Chen2022OfflineRL,
  title={Offline Reinforcement Learning Under Value and Density-Ratio Realizability: the Power of Gaps},
  author={Jinglin Chen and Nan Jiang},
  year={2022}
 }

 @inproceedings{zhou2021nearly,
  title={Nearly minimax optimal reinforcement learning for linear mixture markov decision processes},
  author={Zhou, Dongruo and Gu, Quanquan and Szepesvari, Csaba},
  booktitle={Conference on Learning Theory},
  pages={4532--4576},
  year={2021},
  organization={PMLR}
 }

 @inproceedings{NIPS2014_2ab56412,
 author = {Maillard, Odalric-Ambrym and Mann, Timothy A and Mannor, Shie},
 booktitle = {Advances in Neural Information Processing Systems},
 editor = {Z. Ghahramani and M. Welling and C. Cortes and N. Lawrence and K. Q. Weinberger},
 pages = {},
 publisher = {Curran Associates, Inc.},
 title = {How hard is my MDP?" The distribution-norm to the rescue"},
 url = {https://proceedings.neurips.cc/paper/2014/file/2ab56412b1163ee131e1246da0955bd1-Paper.pdf},
 volume = {27},
 year = {2014}
 }

 @inproceedings{azar2017minimax,
  title={Minimax regret bounds for reinforcement learning},
  author={Azar, Mohammad Gheshlaghi and Osband, Ian and Munos, R{\'e}mi},
  booktitle={International Conference on Machine Learning},
  pages={263--272},
  year={2017},
  organization={PMLR}
 }

 @inproceedings{weisz2021exponential,
  title={Exponential lower bounds for planning in mdps with linearly-realizable optimal action-value functions},
  author={Weisz, Gell{\'e}rt and Amortila, Philip and Szepesv{\'a}ri, Csaba},
  booktitle={Algorithmic Learning Theory},
  pages={1237--1264},
  year={2021},
  organization={PMLR}
 }

 @article{bubeck2012regret,
  title={Regret analysis of stochastic and nonstochastic multi-armed bandit problems},
  author={Bubeck, S{\'e}bastien and Cesa-Bianchi, Nicolo},
  journal={arXiv preprint arXiv:1204.5721},
  year={2012}
 }

 @article{ok2018exploration,
  title={Exploration in structured reinforcement learning},
  author={Ok, Jungseul and Proutiere, Alexandre and Tranos, Damianos},
  journal={Advances in Neural Information Processing Systems},
  volume={31},
  year={2018}
 }

 @article{simchowitz2019non,
  title={Non-asymptotic gap-dependent regret bounds for tabular mdps},
  author={Simchowitz, Max and Jamieson, Kevin G},
  journal={Advances in Neural Information Processing Systems},
  volume={32},
  year={2019}
 }

 @book{lattimore_szepesvari_2020, 
 place={Cambridge}, 
 title={Bandit Algorithms}, 
 DOI={10.1017/9781108571401}, 
 publisher={Cambridge University Press}, 
 author={Lattimore, Tor and Szepesvári, Csaba}, 
 year={2020}}


 @inproceedings{he2021logarithmic,
  title={Logarithmic regret for reinforcement learning with linear function approximation},
  author={He, Jiafan and Zhou, Dongruo and Gu, Quanquan},
  booktitle={International Conference on Machine Learning},
  pages={4171--4180},
  year={2021},
  organization={PMLR}
 }

 @article{papini2021reinforcement,
  title={Reinforcement Learning in Linear MDPs: Constant Regret and Representation Selection},
  author={Papini, Matteo and Tirinzoni, Andrea and Pacchiano, Aldo and Restelli, Marcello and Lazaric, Alessandro and Pirotta, Matteo},
  journal={Advances in Neural Information Processing Systems},
  volume={34},
  year={2021}
 }

 @inproceedings{zanette2020learning,
  title={Learning near optimal policies with low inherent bellman error},
  author={Zanette, Andrea and Lazaric, Alessandro and Kochenderfer, Mykel and Brunskill, Emma},
  booktitle={International Conference on Machine Learning},
  pages={10978--10989},
  year={2020},
  organization={PMLR}
 }

 @inproceedings{zanette2020frequentist,
  title={Frequentist regret bounds for randomized least-squares value iteration},
  author={Zanette, Andrea and Brandfonbrener, David and Brunskill, Emma and Pirotta, Matteo and Lazaric, Alessandro},
  booktitle={International Conference on Artificial Intelligence and Statistics},
  pages={1954--1964},
  year={2020},
  organization={PMLR}
 }

 @inproceedings{yang2020reinforcement,
  title={Reinforcement learning in feature space: Matrix bandit, kernels, and regret bound},
  author={Yang, Lin and Wang, Mengdi},
  booktitle={International Conference on Machine Learning},
  pages={10746--10756},
  year={2020},
  organization={PMLR}
 }

 @inproceedings{ayoub2020model,
  title={Model-based reinforcement learning with value-targeted regression},
  author={Ayoub, Alex and Jia, Zeyu and Szepesvari, Csaba and Wang, Mengdi and Yang, Lin},
  booktitle={International Conference on Machine Learning},
  pages={463--474},
  year={2020},
  organization={PMLR}
 }

 @inproceedings{jiang2017contextual,
  title={Contextual decision processes with low bellman rank are pac-learnable},
  author={Jiang, Nan and Krishnamurthy, Akshay and Agarwal, Alekh and Langford, John and Schapire, Robert E},
  booktitle={International Conference on Machine Learning},
  pages={1704--1713},
  year={2017},
  organization={PMLR}
 }

 @article{DBLP:journals/corr/OrtnerMR14,
  author    = {Ronald Ortner and
               Odalric{-}Ambrym Maillard and
               Daniil Ryabko},
  title     = {Selecting Near-Optimal Approximate State Representations in Reinforcement
               Learning},
  journal   = {CoRR},
  volume    = {abs/1405.2652},
  year      = {2014},
  url       = {http://arxiv.org/abs/1405.2652},
  eprinttype = {arXiv},
  eprint    = {1405.2652},
  timestamp = {Mon, 13 Aug 2018 16:46:00 +0200},
  biburl    = {https://dblp.org/rec/journals/corr/OrtnerMR14.bib},
  bibsource = {dblp computer science bibliography, https://dblp.org}
 }

 @inproceedings{NEURIPS2019_9b8b50fb,
 author = {Ortner, Ronald and Pirotta, Matteo and Lazaric, Alessandro and Fruit, Ronan and Maillard, Odalric-Ambrym},
 booktitle = {Advances in Neural Information Processing Systems},
 editor = {H. Wallach and H. Larochelle and A. Beygelzimer and F. d\textquotesingle Alch\'{e}-Buc and E. Fox and R. Garnett},
 pages = {},
 publisher = {Curran Associates, Inc.},
 title = {Regret Bounds for Learning State Representations in Reinforcement Learning},
 url = {https://proceedings.neurips.cc/paper/2019/file/9b8b50fb590c590ffbf1295ce92258dc-Paper.pdf},
 volume = {32},
 year = {2019}
 }


 @article{DBLP:journals/corr/abs-2011-09750,
  author    = {Jonathan N. Lee and
               Aldo Pacchiano and
               Vidya Muthukumar and
               Weihao Kong and
               Emma Brunskill},
  title     = {Online Model Selection for Reinforcement Learning with Function Approximation},
  journal   = {CoRR},
  volume    = {abs/2011.09750},
  year      = {2020},
  url       = {https://arxiv.org/abs/2011.09750},
  eprinttype = {arXiv},
  eprint    = {2011.09750},
  timestamp = {Wed, 25 Nov 2020 16:34:14 +0100},
  biburl    = {https://dblp.org/rec/journals/corr/abs-2011-09750.bib},
  bibsource = {dblp computer science bibliography, https://dblp.org}
 }


 @inproceedings{du2019provably,
  title={Provably efficient RL with rich observations via latent state decoding},
  author={Du, Simon and Krishnamurthy, Akshay and Jiang, Nan and Agarwal, Alekh and Dudik, Miroslav and Langford, John},
  booktitle={International Conference on Machine Learning},
  pages={1665--1674},
  year={2019},
  organization={PMLR}
 }

 @article{DBLP:journals/corr/abs-2006-10814,
  author    = {Alekh Agarwal and
               Sham M. Kakade and
               Akshay Krishnamurthy and
               Wen Sun},
  title     = {{FLAMBE:} Structural Complexity and Representation Learning of Low
               Rank MDPs},
  journal   = {CoRR},
  volume    = {abs/2006.10814},
  year      = {2020},
  url       = {https://arxiv.org/abs/2006.10814},
  eprinttype = {arXiv},
  eprint    = {2006.10814},
  timestamp = {Tue, 23 Jun 2020 17:57:22 +0200},
  biburl    = {https://dblp.org/rec/journals/corr/abs-2006-10814.bib},
  bibsource = {dblp computer science bibliography, https://dblp.org}
 }


 @article{DBLP:journals/corr/abs-2102-07035,
  author    = {Aditya Modi and
               Jinglin Chen and
               Akshay Krishnamurthy and
               Nan Jiang and
               Alekh Agarwal},
  title     = {Model-free Representation Learning and Exploration in Low-rank MDPs},
  journal   = {CoRR},
  volume    = {abs/2102.07035},
  year      = {2021},
  url       = {https://arxiv.org/abs/2102.07035},
  eprinttype = {arXiv},
  eprint    = {2102.07035},
  timestamp = {Fri, 19 Feb 2021 08:32:49 +0100},
  biburl    = {https://dblp.org/rec/journals/corr/abs-2102-07035.bib},
  bibsource = {dblp computer science bibliography, https://dblp.org}
 }

 @article{DBLP:journals/corr/abs-1910-06996,
  author    = {Botao Hao and
               Tor Lattimore and
               Csaba Szepesv{\'{a}}ri},
  title     = {Adaptive Exploration in Linear Contextual Bandit},
  journal   = {CoRR},
  volume    = {abs/1910.06996},
  year      = {2019},
  url       = {http://arxiv.org/abs/1910.06996},
  eprinttype = {arXiv},
  eprint    = {1910.06996},
  timestamp = {Tue, 22 Oct 2019 18:17:16 +0200},
  biburl    = {https://dblp.org/rec/journals/corr/abs-1910-06996.bib},
  bibsource = {dblp computer science bibliography, https://dblp.org}
 }

 @article{DBLP:journals/corr/abs-2104-03781,
  author    = {Matteo Papini and
               Andrea Tirinzoni and
               Marcello Restelli and
               Alessandro Lazaric and
               Matteo Pirotta},
  title     = {Leveraging Good Representations in Linear Contextual Bandits},
  journal   = {CoRR},
  volume    = {abs/2104.03781},
  year      = {2021},
  url       = {https://arxiv.org/abs/2104.03781},
  eprinttype = {arXiv},
  eprint    = {2104.03781},
  timestamp = {Tue, 13 Apr 2021 16:46:17 +0200},
  biburl    = {https://dblp.org/rec/journals/corr/abs-2104-03781.bib},
  bibsource = {dblp computer science bibliography, https://dblp.org}
 }

 @article{Zanette2022BellmanRO,
  title={Bellman Residual Orthogonalization for Offline Reinforcement Learning},
  author={Andrea Zanette and Martin J. Wainwright},
  journal={ArXiv},
  year={2022},
  volume={abs/2203.12786}
 }

 @article{Perdomo2022ASC,
  title={A Sharp Characterization of Linear Estimators for Offline Policy Evaluation},
  author={Juan C. Perdomo and Akshay Krishnamurthy and Peter L. Bartlett and Sham M. Kakade},
  journal={ArXiv},
  year={2022},
  volume={abs/2203.04236}
 }

 @article{hu2021fast,
  title={Fast rates for the regret of offline reinforcement learning},
  author={Hu, Yichun and Kallus, Nathan and Uehara, Masatoshi},
  journal={arXiv preprint arXiv:2102.00479},
  year={2021}
 }

 @article{audibert2005fast,
  title={Fast learning rates for plug-in classifiers under the margin condition},
  author={Audibert, Jean-Yves and Tsybakov, Alexandre B},
  journal={arXiv preprint math/0507180},
  year={2005}
 }

 @article{mou2020sample,
  title={On the sample complexity of reinforcement learning with policy space generalization},
  author={Mou, Wenlong and Wen, Zheng and Chen, Xi},
  journal={arXiv preprint arXiv:2008.07353},
  year={2020}
 }

 @inproceedings{yang2021q,
  title={Q-learning with logarithmic regret},
  author={Yang, Kunhe and Yang, Lin and Du, Simon},
  booktitle={International Conference on Artificial Intelligence and Statistics},
  pages={1576--1584},
  year={2021},
  organization={PMLR}
 }

 @article{wang2021exponential,
  title={An Exponential Lower Bound for Linearly Realizable MDP with Constant Suboptimality Gap},
  author={Wang, Yuanhao and Wang, Ruosong and Kakade, Sham},
  journal={Advances in Neural Information Processing Systems},
  volume={34},
  year={2021}
 }

 @article{min2021variance,
  title={Variance-aware off-policy evaluation with linear function approximation},
  author={Min, Yifei and Wang, Tianhao and Zhou, Dongruo and Gu, Quanquan},
  journal={Advances in neural information processing systems},
  volume={34},
  year={2021}
 }

 @inproceedings{NIPS2011_e1d5be1c,
 author = {Abbasi-yadkori, Yasin and P\'{a}l, D\'{a}vid and Szepesv\'{a}ri, Csaba},
 booktitle = {Advances in Neural Information Processing Systems},
 editor = {J. Shawe-Taylor and R. Zemel and P. Bartlett and F. Pereira and K.Q. Weinberger},
 pages = {},
 publisher = {Curran Associates, Inc.},
 title = {Improved Algorithms for Linear Stochastic Bandits},
 url = {https://proceedings.neurips.cc/paper/2011/file/e1d5be1c7f2f456670de3d53c7b54f4a-Paper.pdf},
 volume = {24},
 year = {2011}
 }

 @inproceedings{duan2020minimax,
  title={Minimax-optimal off-policy evaluation with linear function approximation},
  author={Duan, Yaqi and Jia, Zeyu and Wang, Mengdi},
  booktitle={International Conference on Machine Learning},
  pages={2701--2709},
  year={2020},
  organization={PMLR}
 }

 @inproceedings{DBLP:conf/uai/LiuSAB19,
  author    = {Yao Liu and
               Adith Swaminathan and
               Alekh Agarwal and
               Emma Brunskill},
  editor    = {Amir Globerson and
               Ricardo Silva},
  title     = {Off-Policy Policy Gradient with Stationary Distribution Correction},
  booktitle = {Proceedings of the Thirty-Fifth Conference on Uncertainty in Artificial
               Intelligence, {UAI} 2019, Tel Aviv, Israel, July 22-25, 2019},
  series    = {Proceedings of Machine Learning Research},
  volume    = {115},
  pages     = {1180--1190},
  publisher = {{AUAI} Press},
  year      = {2019},
  url       = {http://proceedings.mlr.press/v115/liu20a.html},
  timestamp = {Tue, 15 Dec 2020 17:40:18 +0100},
  biburl    = {https://dblp.org/rec/conf/uai/LiuSAB19.bib},
  bibsource = {dblp computer science bibliography, https://dblp.org}
 }

 @article{bartlett2005local,
  title={Local rademacher complexities},
  author={Bartlett, Peter L and Bousquet, Olivier and Mendelson, Shahar},
  journal={The Annals of Statistics},
  volume={33},
  number={4},
  pages={1497--1537},
  year={2005},
  publisher={Institute of Mathematical Statistics}
 }

 @article{tropp2011freedman,
  title={Freedman's inequality for matrix martingales},
  author={Tropp, Joel},
  journal={Electronic Communications in Probability},
  volume={16},
  pages={262--270},
  year={2011},
  publisher={Institute of Mathematical Statistics and Bernoulli Society}
 }

 @article{nguyen2022practical,
  title={On Practical Reinforcement Learning: Provable Robustness, Scalability, and Statistical Efficiency},
  author={Nguyen-Tang, Thanh},
  journal={arXiv preprint arXiv:2203.01758},
  year={2022}
 }

 @article{Duan2021RiskBA,
  title={Risk Bounds and Rademacher Complexity in Batch Reinforcement Learning},
  author={Yaqi Duan and Chi Jin and Zhiyuan Li},
  journal={ArXiv},
  year={2021},
  volume={abs/2103.13883}
 }

 @incollection{lange2012batch,
  title={Batch reinforcement learning},
  author={Lange, Sascha and Gabel, Thomas and Riedmiller, Martin},
  booktitle={Reinforcement learning},
  pages={45--73},
  year={2012},
  publisher={Springer}
 }

 @article{levine2020offline,
  title={Offline reinforcement learning: Tutorial, review, and perspectives on open problems},
  author={Levine, Sergey and Kumar, Aviral and Tucker, George and Fu, Justin},
  journal={arXiv preprint arXiv:2005.01643},
  year={2020}
 }

 @article{gottesman2019guidelines,
  title={Guidelines for reinforcement learning in healthcare},
  author={Gottesman, Omer and Johansson, Fredrik and Komorowski, Matthieu and Faisal, Aldo and Sontag, David and Doshi-Velez, Finale and Celi, Leo Anthony},
  journal={Nature medicine},
  volume={25},
  number={1},
  pages={16--18},
  year={2019},
  publisher={Nature Publishing Group}
 }

 @article{nie2021learning,
  title={Learning when-to-treat policies},
  author={Nie, Xinkun and Brunskill, Emma and Wager, Stefan},
  journal={Journal of the American Statistical Association},
  volume={116},
  number={533},
  pages={392--409},
  year={2021},
  publisher={Taylor \&amp; Francis}
 }


 @article{strehl2010learning,
  title={Learning from logged implicit exploration data},
  author={Strehl, Alex and Langford, John and Kakade, Sham and Li, Lihong},
  journal={arXiv preprint arXiv:1003.0120},
  year={2010}
 }

 @inproceedings{thomasAAAI17,
 author = {Thomas, Philip S. and Theocharous, Georgios and Ghavamzadeh, Mohammad and Durugkar, Ishan and Brunskill, Emma},
 title = {Predictive Off-Policy Policy Evaluation for Nonstationary Decision Problems, with Applications to Digital Marketing},
 year = {2017},
 publisher = {AAAI Press},
 booktitle = {Proceedings of the Thirty-First AAAI Conference on Artificial Intelligence},
 pages = {4740–4745},
 numpages = {6},
 location = {San Francisco, California, USA},
 series = {AAAI'17}
 }

 @article{Kitagawa18,
 author = {Kitagawa, Toru and Tetenov, Aleksey},
 title = {Who Should Be Treated? Empirical Welfare Maximization Methods for Treatment Choice},
 journal = {Econometrica},
 volume = {86},
 number = {2},
 pages = {591-616},
 keywords = {Heterogeneous treatment effects, randomized experiments, program evaluation, individualized treatment rules, empirical risk minimization, risk bounds},
 doi = {https://doi.org/10.3982/ECTA13288},
 url = {https://onlinelibrary.wiley.com/doi/abs/10.3982/ECTA13288},
 eprint = {https://onlinelibrary.wiley.com/doi/pdf/10.3982/ECTA13288},
 year = {2018}
 }

 @article{athey2021policy,
  title={Policy learning with observational data},
  author={Athey, Susan and Wager, Stefan},
  journal={Econometrica},
  volume={89},
  number={1},
  pages={133--161},
  year={2021},
  publisher={Wiley Online Library}
 }

 @inproceedings{uehara2022representation,
 title={Representation Learning for Online and Offline {RL} in Low-rank {MDP}s},
 author={Masatoshi Uehara and Xuezhou Zhang and Wen Sun},
 booktitle={International Conference on Learning Representations},
 year={2022},
 url={https://openreview.net/forum?id=J4iSIR9fhY0}
 }

 @inproceedings{fujimoto2019off,
  title={Off-policy deep reinforcement learning without exploration},
  author={Fujimoto, Scott and Meger, David and Precup, Doina},
  booktitle={International Conference on Machine Learning},
  pages={2052--2062},
  year={2019},
  organization={PMLR}
 }

 @inproceedings{le2019batch,
  title={Batch policy learning under constraints},
  author={Le, Hoang and Voloshin, Cameron and Yue, Yisong},
  booktitle={International Conference on Machine Learning},
  pages={3703--3712},
  year={2019},
  organization={PMLR}
 }

 @article{kumar2019stabilizing,
  title={Stabilizing off-policy q-learning via bootstrapping error reduction},
  author={Kumar, Aviral and Fu, Justin and Soh, Matthew and Tucker, George and Levine, Sergey},
  journal={Advances in Neural Information Processing Systems},
  volume={32},
  year={2019}
 }

 @article{kumar2020conservative,
  title={Conservative q-learning for offline reinforcement learning},
  author={Kumar, Aviral and Zhou, Aurick and Tucker, George and Levine, Sergey},
  journal={Advances in Neural Information Processing Systems},
  volume={33},
  pages={1179--1191},
  year={2020}
 }

 @inproceedings{duan2021risk,
  title={Risk bounds and rademacher complexity in batch reinforcement learning},
  author={Duan, Yaqi and Jin, Chi and Li, Zhiyuan},
  booktitle={International Conference on Machine Learning},
  pages={2892--2902},
  year={2021},
  organization={PMLR}
 }

 @inproceedings{lee2021optidice,
  title={Optidice: Offline policy optimization via stationary distribution correction estimation},
  author={Lee, Jongmin and Jeon, Wonseok and Lee, Byungjun and Pineau, Joelle and Kim, Kee-Eung},
  booktitle={International Conference on Machine Learning},
  pages={6120--6130},
  year={2021},
  organization={PMLR}
 }

 @article{nachum2019dualdice,
  title={Dualdice: Behavior-agnostic estimation of discounted stationary distribution corrections},
  author={Nachum, Ofir and Chow, Yinlam and Dai, Bo and Li, Lihong},
  journal={Advances in Neural Information Processing Systems},
  volume={32},
  year={2019}
 }

 @article{nachum2019algaedice,
  title={Algaedice: Policy gradient from arbitrary experience},
  author={Nachum, Ofir and Dai, Bo and Kostrikov, Ilya and Chow, Yinlam and Li, Lihong and Schuurmans, Dale},
  journal={arXiv preprint arXiv:1912.02074},
  year={2019}
 }

 @article{zhang2020gendice,
  title={Gendice: Generalized offline estimation of stationary values},
  author={Zhang, Ruiyi and Dai, Bo and Li, Lihong and Schuurmans, Dale},
  journal={arXiv preprint arXiv:2002.09072},
  year={2020}
 }

 @article{kostrikov2021offline,
  title={Offline reinforcement learning with implicit q-learning},
  author={Kostrikov, Ilya and Nair, Ashvin and Levine, Sergey},
  journal={arXiv preprint arXiv:2110.06169},
  year={2021}
 }

 @article{Zhang2022OffPolicyFQ,
  title={Off-Policy Fitted Q-Evaluation with Differentiable Function Approximators: Z-Estimation and Inference Theory},
  author={Ruiqi Zhang and Xuezhou Zhang and Chengzhuo Ni and Mengdi Wang},
  journal={ArXiv},
  year={2022},
  volume={abs/2202.04970}
 }

 @article{Duan2021OptimalPE,
  title={Optimal policy evaluation using kernel-based temporal difference methods},
  author={Yaqi Duan and Mengdi Wang and Martin J. Wainwright},
  journal={ArXiv},
  year={2021},
  volume={abs/2109.12002}
 }

 @inproceedings{xie2020q,
  title={Q* approximation schemes for batch reinforcement learning: A theoretical comparison},
  author={Xie, Tengyang and Jiang, Nan},
  booktitle={Conference on Uncertainty in Artificial Intelligence},
  pages={550--559},
  year={2020},
  organization={PMLR}
 }

 @article{wu2019behavior,
  title={Behavior regularized offline reinforcement learning},
  author={Wu, Yifan and Tucker, George and Nachum, Ofir},
  journal={arXiv preprint arXiv:1911.11361},
  year={2019}
 }

 @inproceedings{yang2019sample,
  title={Sample-optimal parametric q-learning using linearly additive features},
  author={Yang, Lin and Wang, Mengdi},
  booktitle={International Conference on Machine Learning},
  pages={6995--7004},
  year={2019},
  organization={PMLR}
 }

 @article{qiao2022sample,
  title={Sample-Efficient Reinforcement Learning with loglog (T) Switching Cost},
  author={Qiao, Dan and Yin, Ming and Min, Ming and Wang, Yu-Xiang},
  journal={arXiv preprint arXiv:2202.06385},
  year={2022}
 }

 @article{jin2020simultaneously,
  title={Simultaneously learning stochastic and adversarial episodic mdps with known transition},
  author={Jin, Tiancheng and Luo, Haipeng},
  journal={Advances in neural information processing systems},
  volume={33},
  pages={16557--16566},
  year={2020}
 }

 @article{arora2012online,
  title={Online bandit learning against an adaptive adversary: from regret to policy regret},
  author={Arora, Raman and Dekel, Ofer and Tewari, Ambuj},
  journal={arXiv preprint arXiv:1206.6400},
  year={2012}
 }

 @article{amir2022better,
  title={Better Best of Both Worlds Bounds for Bandits with Switching Costs},
  author={Amir, Idan and Azov, Guy and Koren, Tomer and Livni, Roi},
  journal={arXiv preprint arXiv:2206.03098},
  year={2022}
 }

 @article{malik2022complete,
  title={Complete Policy Regret Bounds for Tallying Bandits},
  author={Malik, Dhruv and Li, Yuanzhi and Singh, Aarti},
  journal={arXiv preprint arXiv:2204.11174},
  year={2022}
 }

 @article{arora2012deterministic,
  title={Deterministic MDPs with adversarial rewards and bandit feedback},
  author={Arora, Raman and Dekel, Ofer and Tewari, Ambuj},
  journal={arXiv preprint arXiv:1210.4843},
  year={2012}
 }

 @inproceedings{jin2020learning,
  title={Learning adversarial markov decision processes with bandit feedback and unknown transition},
  author={Jin, Chi and Jin, Tiancheng and Luo, Haipeng and Sra, Suvrit and Yu, Tiancheng},
  booktitle={International Conference on Machine Learning},
  pages={4860--4869},
  year={2020},
  organization={PMLR}
 }

 @inproceedings{rosenberg2019online,
  title={Online convex optimization in adversarial markov decision processes},
  author={Rosenberg, Aviv and Mansour, Yishay},
  booktitle={International Conference on Machine Learning},
  pages={5478--5486},
  year={2019},
  organization={PMLR}
 }

 @article{neu2021online,
  title={Online learning in MDPs with linear function approximation and bandit feedback.},
  author={Neu, Gergely and Olkhovskaya, Julia},
  journal={Advances in Neural Information Processing Systems},
  volume={34},
  pages={10407--10417},
  year={2021}
 }

 @article{fei2020dynamic,
  title={Dynamic regret of policy optimization in non-stationary environments},
  author={Fei, Yingjie and Yang, Zhuoran and Wang, Zhaoran and Xie, Qiaomin},
  journal={Advances in Neural Information Processing Systems},
  volume={33},
  pages={6743--6754},
  year={2020}
 }

 @inproceedings{Mao2021NearOptimalMR,
  title={Near-Optimal Model-Free Reinforcement Learning in Non-Stationary Episodic MDPs},
  author={Weichao Mao and K. Zhang and Ruihao Zhu and David Simchi-Levi and Tamer Başar},
  booktitle={ICML},
  year={2021}
 }

 @inproceedings{cheung2020reinforcement,
  title={Reinforcement learning for non-stationary markov decision processes: The blessing of (more) optimism},
  author={Cheung, Wang Chi and Simchi-Levi, David and Zhu, Ruihao},
  booktitle={International Conference on Machine Learning},
  pages={1843--1854},
  year={2020},
  organization={PMLR}
 }

 @article{Dinh2021OnlineMD,
  title={Online Markov Decision Processes with Non-oblivious Strategic Adversary},
  author={Le Cong Dinh and David Henry Mguni and Long Tran-Thanh and Jun Wang and Yaodong Yang},
  journal={ArXiv},
  year={2021},
  volume={abs/2110.03604}
 }

 @inproceedings{dick2014online,
  title={Online learning in Markov decision processes with changing cost sequences},
  author={Dick, Travis and Gyorgy, Andras and Szepesvari, Csaba},
  booktitle={International Conference on Machine Learning},
  pages={512--520},
  year={2014},
  organization={PMLR}
 }


 @article{arora2018policy,
  title={Policy regret in repeated games},
  author={Arora, Raman and Dinitz, Michael and Marinov, Teodor Vanislavov and Mohri, Mehryar},
  journal={Advances in Neural Information Processing Systems},
  volume={31},
  year={2018}
 }


 @inproceedings{suggala2020online,
  title={Online non-convex learning: Following the perturbed leader is optimal},
  author={Suggala, Arun Sai and Netrapalli, Praneeth},
  booktitle={Algorithmic Learning Theory},
  pages={845--861},
  year={2020},
  organization={PMLR}
 }

 @article{jin2021v,
  title={V-Learning--A Simple, Efficient, Decentralized Algorithm for Multiagent RL},
  author={Jin, Chi and Liu, Qinghua and Wang, Yuanhao and Yu, Tiancheng},
  journal={arXiv preprint arXiv:2110.14555},
  year={2021}
 }

 @inproceedings{liu2021sharp,
  title={A sharp analysis of model-based reinforcement learning with self-play},
  author={Liu, Qinghua and Yu, Tiancheng and Bai, Yu and Jin, Chi},
  booktitle={International Conference on Machine Learning},
  pages={7001--7010},
  year={2021},
  organization={PMLR}
 }
No results found