Last active
July 3, 2022 17:18
-
-
Save thanhnguyentang/7045bdff9d381c4e8fc0b3489a197c97 to your computer and use it in GitHub Desktop.
Bib refs for RL works
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| @inproceedings{tripuraneni2021provable, | |
| title={Provable meta-learning of linear representations}, | |
| author={Tripuraneni, Nilesh and Jin, Chi and Jordan, Michael}, | |
| booktitle={International Conference on Machine Learning}, | |
| pages={10434--10443}, | |
| year={2021}, | |
| organization={PMLR} | |
| } | |
| @inproceedings{mitchell2021offline, | |
| title={Offline meta-reinforcement learning with advantage weighting}, | |
| author={Mitchell, Eric and Rafailov, Rafael and Peng, Xue Bin and Levine, Sergey and Finn, Chelsea}, | |
| booktitle={International Conference on Machine Learning}, | |
| pages={7780--7791}, | |
| year={2021}, | |
| organization={PMLR} | |
| } | |
| @article{dorfman2021offline, | |
| title={Offline Meta Reinforcement Learning--Identifiability Challenges and Effective Data Collection Strategies}, | |
| author={Dorfman, Ron and Shenfeld, Idan and Tamar, Aviv}, | |
| journal={Advances in Neural Information Processing Systems}, | |
| volume={34}, | |
| year={2021} | |
| } | |
| @inproceedings{cella2020meta, | |
| title={Meta-learning with stochastic linear bandits}, | |
| author={Cella, Leonardo and Lazaric, Alessandro and Pontil, Massimiliano}, | |
| booktitle={International Conference on Machine Learning}, | |
| pages={1360--1370}, | |
| year={2020}, | |
| organization={PMLR} | |
| } | |
| @article{cesa2021multitask, | |
| title={Multitask Online Mirror Descent}, | |
| author={Cesa-Bianchi, Nicol{\`o} and Laforgue, Pierre and Paudice, Andrea and Pontil, Massimiliano}, | |
| journal={arXiv preprint arXiv:2106.02393}, | |
| year={2021} | |
| } | |
| @inproceedings{hu2021near, | |
| title={Near-optimal representation learning for linear bandits and linear rl}, | |
| author={Hu, Jiachen and Chen, Xiaoyu and Jin, Chi and Li, Lihong and Wang, Liwei}, | |
| booktitle={International Conference on Machine Learning}, | |
| pages={4349--4358}, | |
| year={2021}, | |
| organization={PMLR} | |
| } | |
| @article{zhang2021variance, | |
| title={Variance-aware confidence set: Variance-dependent bound for linear bandits and horizon-free bound for linear mixture mdp}, | |
| author={Zhang, Zihan and Yang, Jiaqi and Ji, Xiangyang and Du, Simon S}, | |
| journal={arXiv preprint arXiv:2101.12745}, | |
| year={2021} | |
| } | |
| @article{minimax_repr, | |
| title={Nearly Minimax Algorithms for Linear Bandits with Shared Representation}, | |
| author={Anonymous}, | |
| journal={Under review for ICML}, | |
| year={2022} | |
| } | |
| @inproceedings{jin2020provably, | |
| title={Provably efficient reinforcement learning with linear function approximation}, | |
| author={Jin, Chi and Yang, Zhuoran and Wang, Zhaoran and Jordan, Michael I}, | |
| booktitle={Conference on Learning Theory}, | |
| pages={2137--2143}, | |
| year={2020}, | |
| organization={PMLR} | |
| } | |
| @misc{nguyentang2021sample, | |
| title={Sample Complexity of Offline Reinforcement Learning with Deep ReLU Networks}, | |
| author={Thanh Nguyen-Tang and Sunil Gupta and Hung Tran-The and Svetha Venkatesh}, | |
| year={2021}, | |
| eprint={2103.06671}, | |
| archivePrefix={arXiv}, | |
| primaryClass={stat.ML} | |
| } | |
| @inproceedings{wang2021instabilities, | |
| title={Instabilities of offline rl with pre-trained neural representation}, | |
| author={Wang, Ruosong and Wu, Yifan and Salakhutdinov, Ruslan and Kakade, Sham}, | |
| booktitle={International Conference on Machine Learning}, | |
| pages={10948--10960}, | |
| year={2021}, | |
| organization={PMLR} | |
| } | |
| @article{Jin2021BellmanED, | |
| title={Bellman Eluder Dimension: New Rich Classes of RL Problems, and Sample-Efficient Algorithms}, | |
| author={Chi Jin and Qinghua Liu and Sobhan Miryoosefi}, | |
| journal={ArXiv}, | |
| year={2021}, | |
| volume={abs/2102.00815} | |
| } | |
| @article{nguyen2021offline, | |
| title={Offline Neural Contextual Bandits: Pessimism, Optimization and Generalization}, | |
| author={Nguyen-Tang, Thanh and Gupta, Sunil and Nguyen, A Tuan and Venkatesh, Svetha}, | |
| journal={arXiv preprint arXiv:2111.13807}, | |
| year={2021} | |
| } | |
| @article{yinnear, | |
| title={NEAR-OPTIMAL OFFLINE REINFORCEMENT LEARNING WITH LINEAR REPRESENTATION: LEVERAGING VARIANCE INFORMATION WITH PESSIMISM}, | |
| author={Yin, Ming and Wang, Yu-Xiang and Duan, Yaqi and Wang, Mengdi}, | |
| year={2022} | |
| } | |
| @article{yang2020function, | |
| title={On function approximation in reinforcement learning: Optimism in the face of large state spaces}, | |
| author={Yang, Zhuoran and Jin, Chi and Wang, Zhaoran and Wang, Mengdi and Jordan, Michael I}, | |
| journal={arXiv preprint arXiv:2011.04622}, | |
| year={2020} | |
| } | |
| @inproceedings{jin2021pessimism, | |
| title={Is pessimism provably efficient for offline rl?}, | |
| author={Jin, Ying and Yang, Zhuoran and Wang, Zhaoran}, | |
| booktitle={International Conference on Machine Learning}, | |
| pages={5084--5096}, | |
| year={2021}, | |
| organization={PMLR} | |
| } | |
| @inproceedings{cai2020provably, | |
| title={Provably efficient exploration in policy optimization}, | |
| author={Cai, Qi and Yang, Zhuoran and Jin, Chi and Wang, Zhaoran}, | |
| booktitle={International Conference on Machine Learning}, | |
| pages={1283--1294}, | |
| year={2020}, | |
| organization={PMLR} | |
| } | |
| @article{mucke2021data, | |
| title={Data splitting improves statistical performance in overparametrized regimes}, | |
| author={M{\"u}cke, Nicole and Reiss, Enrico and Rungenhagen, Jonas and Klein, Markus}, | |
| journal={arXiv preprint arXiv:2110.10956}, | |
| year={2021} | |
| } | |
| @article{cai2019neural, | |
| title={Neural temporal-difference learning converges to global optima}, | |
| author={Cai, Qi and Yang, Zhuoran and Lee, Jason D and Wang, Zhaoran}, | |
| journal={Advances in Neural Information Processing Systems}, | |
| volume={32}, | |
| year={2019} | |
| } | |
| @article{jacot2018neural, | |
| title={Neural tangent kernel: Convergence and generalization in neural networks}, | |
| author={Jacot, Arthur and Gabriel, Franck and Hongler, Cl{\'e}ment}, | |
| journal={arXiv preprint arXiv:1806.07572}, | |
| year={2018} | |
| } | |
| @article{arora2019exact, | |
| title={On exact computation with an infinitely wide neural net}, | |
| author={Arora, Sanjeev and Du, Simon S and Hu, Wei and Li, Zhiyuan and Salakhutdinov, Ruslan and Wang, Ruosong}, | |
| journal={arXiv preprint arXiv:1904.11955}, | |
| year={2019} | |
| } | |
| @inproceedings{allen2019convergence, | |
| title={A convergence theory for deep learning via over-parameterization}, | |
| author={Allen-Zhu, Zeyuan and Li, Yuanzhi and Song, Zhao}, | |
| booktitle={International Conference on Machine Learning}, | |
| pages={242--252}, | |
| year={2019}, | |
| organization={PMLR} | |
| } | |
| @article{hanin2019finite, | |
| title={Finite depth and width corrections to the neural tangent kernel}, | |
| author={Hanin, Boris and Nica, Mihai}, | |
| journal={arXiv preprint arXiv:1909.05989}, | |
| year={2019} | |
| } | |
| @article{cao2019generalization, | |
| title={Generalization bounds of stochastic gradient descent for wide and deep neural networks}, | |
| author={Cao, Yuan and Gu, Quanquan}, | |
| journal={Advances in Neural Information Processing Systems}, | |
| volume={32}, | |
| pages={10836--10846}, | |
| year={2019} | |
| } | |
| @article{belkin2021fit, | |
| title={Fit without fear: remarkable mathematical phenomena of deep learning through the prism of interpolation}, | |
| author={Belkin, Mikhail}, | |
| journal={arXiv preprint arXiv:2105.14368}, | |
| year={2021} | |
| } | |
| @inproceedings{zhou2020neural, | |
| title={Neural contextual bandits with ucb-based exploration}, | |
| author={Zhou, Dongruo and Li, Lihong and Gu, Quanquan}, | |
| booktitle={International Conference on Machine Learning}, | |
| pages={11492--11502}, | |
| year={2020}, | |
| organization={PMLR} | |
| } | |
| @article{dumer2007covering, | |
| title={Covering spheres with spheres}, | |
| author={Dumer, Ilya}, | |
| journal={Discrete \& Computational Geometry}, | |
| volume={38}, | |
| number={4}, | |
| pages={665--679}, | |
| year={2007}, | |
| publisher={Springer} | |
| } | |
| @article{gouk2021regularisation, | |
| title={Regularisation of neural networks by enforcing lipschitz continuity}, | |
| author={Gouk, Henry and Frank, Eibe and Pfahringer, Bernhard and Cree, Michael J}, | |
| journal={Machine Learning}, | |
| volume={110}, | |
| number={2}, | |
| pages={393--416}, | |
| year={2021}, | |
| publisher={Springer} | |
| } | |
| @inproceedings{nguyen2021tight, | |
| title={Tight bounds on the smallest eigenvalue of the neural tangent kernel for deep relu networks}, | |
| author={Nguyen, Quynh and Mondelli, Marco and Montufar, Guido F}, | |
| booktitle={International Conference on Machine Learning}, | |
| pages={8119--8129}, | |
| year={2021}, | |
| organization={PMLR} | |
| } | |
| @article{gao2019convergence, | |
| title={Convergence of adversarial training in overparametrized neural networks}, | |
| author={Gao, Ruiqi and Cai, Tianle and Li, Haochuan and Hsieh, Cho-Jui and Wang, Liwei and Lee, Jason D}, | |
| journal={Advances in Neural Information Processing Systems}, | |
| volume={32}, | |
| year={2019} | |
| } | |
| @article{fulton2000eigenvalues, | |
| title={Eigenvalues, invariant factors, highest weights, and Schubert calculus}, | |
| author={Fulton, William}, | |
| journal={Bulletin of the American Mathematical Society}, | |
| volume={37}, | |
| number={3}, | |
| pages={209--249}, | |
| year={2000} | |
| } | |
| @article{Schur1911, | |
| author = {Schur, J.}, | |
| journal = {Journal für die reine und angewandte Mathematik}, | |
| pages = {1-28}, | |
| title = {Bemerkungen zur Theorie der beschränkten Bilinearformen mit unendlich vielen Veränderlichen.}, | |
| url = {http://eudml.org/doc/149352}, | |
| volume = {140}, | |
| year = {1911}, | |
| } | |
| @article{foster2021offline, | |
| title={Offline Reinforcement Learning: Fundamental Barriers for Value Function Approximation}, | |
| author={Foster, Dylan J and Krishnamurthy, Akshay and Simchi-Levi, David and Xu, Yunzong}, | |
| journal={arXiv preprint arXiv:2111.10919}, | |
| year={2021} | |
| } | |
| @article{zhan2022offline, | |
| title={Offline Reinforcement Learning with Realizability and Single-policy Concentrability}, | |
| author={Zhan, Wenhao and Huang, Baihe and Huang, Audrey and Jiang, Nan and Lee, Jason D}, | |
| journal={arXiv preprint arXiv:2202.04634}, | |
| year={2022} | |
| } | |
| @article{yin2020near, | |
| title={Near-Optimal Provable Uniform Convergence in Offline Policy Evaluation for Reinforcement Learning}, | |
| author={Yin, Ming and Bai, Yu and Wang, Yu-Xiang}, | |
| journal={arXiv preprint arXiv:2007.03760}, | |
| year={2020} | |
| } | |
| @inproceedings{szepesvari2005finite, | |
| title={Finite time bounds for sampling based fitted value iteration}, | |
| author={Szepesv{\'a}ri, Csaba and Munos, R{\'e}mi}, | |
| booktitle={Proceedings of the 22nd international conference on Machine learning}, | |
| pages={880--887}, | |
| year={2005} | |
| } | |
| @inproceedings{chen2019information, | |
| title={Information-theoretic considerations in batch reinforcement learning}, | |
| author={Chen, Jinglin and Jiang, Nan}, | |
| booktitle={International Conference on Machine Learning}, | |
| pages={1042--1051}, | |
| year={2019}, | |
| organization={PMLR} | |
| } | |
| @article{liu2019off, | |
| title={Off-policy policy gradient with state distribution correction}, | |
| author={Liu, Yao and Swaminathan, Adith and Agarwal, Alekh and Brunskill, Emma}, | |
| journal={arXiv preprint arXiv:1904.08473}, | |
| year={2019} | |
| } | |
| @article{rashidinejad2021bridging, | |
| title={Bridging offline reinforcement learning and imitation learning: A tale of pessimism}, | |
| author={Rashidinejad, Paria and Zhu, Banghua and Ma, Cong and Jiao, Jiantao and Russell, Stuart}, | |
| journal={Advances in Neural Information Processing Systems}, | |
| volume={34}, | |
| year={2021} | |
| } | |
| @article{xie2021policy, | |
| title={Policy finetuning: Bridging sample-efficient offline and online reinforcement learning}, | |
| author={Xie, Tengyang and Jiang, Nan and Wang, Huan and Xiong, Caiming and Bai, Yu}, | |
| journal={Advances in neural information processing systems}, | |
| volume={34}, | |
| year={2021} | |
| } | |
| @article{yin2021towards, | |
| title={Towards instance-optimal offline reinforcement learning with pessimism}, | |
| author={Yin, Ming and Wang, Yu-Xiang}, | |
| journal={Advances in neural information processing systems}, | |
| volume={34}, | |
| year={2021} | |
| } | |
| @article{xie2021bellman, | |
| title={Bellman-consistent pessimism for offline reinforcement learning}, | |
| author={Xie, Tengyang and Cheng, Ching-An and Jiang, Nan and Mineiro, Paul and Agarwal, Alekh}, | |
| journal={Advances in neural information processing systems}, | |
| volume={34}, | |
| year={2021} | |
| } | |
| @article{chang2021mitigating, | |
| title={Mitigating Covariate Shift in Imitation Learning via Offline Data With Partial Coverage}, | |
| author={Chang, Jonathan and Uehara, Masatoshi and Sreenivas, Dhruv and Kidambi, Rahul and Sun, Wen}, | |
| journal={Advances in Neural Information Processing Systems}, | |
| volume={34}, | |
| year={2021} | |
| } | |
| @article{uehara2021pessimistic, | |
| title={Pessimistic Model-based Offline Reinforcement Learning under Partial Coverage}, | |
| author={Uehara, Masatoshi and Sun, Wen}, | |
| journal={arXiv preprint arXiv:2107.06226}, | |
| year={2021} | |
| } | |
| @article{liu2020provably, | |
| title={Provably good batch reinforcement learning without great exploration}, | |
| author={Liu, Yao and Swaminathan, Adith and Agarwal, Alekh and Brunskill, Emma}, | |
| journal={arXiv preprint arXiv:2007.08202}, | |
| year={2020} | |
| } | |
| @article{kidambi2020morel, | |
| title={Morel: Model-based offline reinforcement learning}, | |
| author={Kidambi, Rahul and Rajeswaran, Aravind and Netrapalli, Praneeth and Joachims, Thorsten}, | |
| journal={Advances in neural information processing systems}, | |
| volume={33}, | |
| pages={21810--21823}, | |
| year={2020} | |
| } | |
| @article{wang2020statistical, | |
| title={What are the Statistical Limits of Offline RL with Linear Function Approximation?}, | |
| author={Wang, Ruosong and Foster, Dean P and Kakade, Sham M}, | |
| journal={arXiv preprint arXiv:2010.11895}, | |
| year={2020} | |
| } | |
| @article{amortila2020variant, | |
| title={A variant of the wang-foster-kakade lower bound for the discounted setting}, | |
| author={Amortila, Philip and Jiang, Nan and Xie, Tengyang}, | |
| journal={arXiv preprint arXiv:2011.01075}, | |
| year={2020} | |
| } | |
| @inproceedings{zanette2021exponential, | |
| title={Exponential lower bounds for batch reinforcement learning: Batch rl can be exponentially harder than online rl}, | |
| author={Zanette, Andrea}, | |
| booktitle={International Conference on Machine Learning}, | |
| pages={12287--12297}, | |
| year={2021}, | |
| organization={PMLR} | |
| } | |
| @article{chen2021infinite, | |
| title={Infinite-horizon offline reinforcement learning with linear function approximation: Curse of dimensionality and algorithm}, | |
| author={Chen, Lin and Scherrer, Bruno and Bartlett, Peter L}, | |
| journal={arXiv preprint arXiv:2103.09847}, | |
| year={2021} | |
| } | |
| @inproceedings{Chen2022OfflineRL, | |
| title={Offline Reinforcement Learning Under Value and Density-Ratio Realizability: the Power of Gaps}, | |
| author={Jinglin Chen and Nan Jiang}, | |
| year={2022} | |
| } | |
| @inproceedings{zhou2021nearly, | |
| title={Nearly minimax optimal reinforcement learning for linear mixture markov decision processes}, | |
| author={Zhou, Dongruo and Gu, Quanquan and Szepesvari, Csaba}, | |
| booktitle={Conference on Learning Theory}, | |
| pages={4532--4576}, | |
| year={2021}, | |
| organization={PMLR} | |
| } | |
| @inproceedings{NIPS2014_2ab56412, | |
| author = {Maillard, Odalric-Ambrym and Mann, Timothy A and Mannor, Shie}, | |
| booktitle = {Advances in Neural Information Processing Systems}, | |
| editor = {Z. Ghahramani and M. Welling and C. Cortes and N. Lawrence and K. Q. Weinberger}, | |
| pages = {}, | |
| publisher = {Curran Associates, Inc.}, | |
| title = {How hard is my MDP?" The distribution-norm to the rescue"}, | |
| url = {https://proceedings.neurips.cc/paper/2014/file/2ab56412b1163ee131e1246da0955bd1-Paper.pdf}, | |
| volume = {27}, | |
| year = {2014} | |
| } | |
| @inproceedings{azar2017minimax, | |
| title={Minimax regret bounds for reinforcement learning}, | |
| author={Azar, Mohammad Gheshlaghi and Osband, Ian and Munos, R{\'e}mi}, | |
| booktitle={International Conference on Machine Learning}, | |
| pages={263--272}, | |
| year={2017}, | |
| organization={PMLR} | |
| } | |
| @inproceedings{weisz2021exponential, | |
| title={Exponential lower bounds for planning in mdps with linearly-realizable optimal action-value functions}, | |
| author={Weisz, Gell{\'e}rt and Amortila, Philip and Szepesv{\'a}ri, Csaba}, | |
| booktitle={Algorithmic Learning Theory}, | |
| pages={1237--1264}, | |
| year={2021}, | |
| organization={PMLR} | |
| } | |
| @article{bubeck2012regret, | |
| title={Regret analysis of stochastic and nonstochastic multi-armed bandit problems}, | |
| author={Bubeck, S{\'e}bastien and Cesa-Bianchi, Nicolo}, | |
| journal={arXiv preprint arXiv:1204.5721}, | |
| year={2012} | |
| } | |
| @article{ok2018exploration, | |
| title={Exploration in structured reinforcement learning}, | |
| author={Ok, Jungseul and Proutiere, Alexandre and Tranos, Damianos}, | |
| journal={Advances in Neural Information Processing Systems}, | |
| volume={31}, | |
| year={2018} | |
| } | |
| @article{simchowitz2019non, | |
| title={Non-asymptotic gap-dependent regret bounds for tabular mdps}, | |
| author={Simchowitz, Max and Jamieson, Kevin G}, | |
| journal={Advances in Neural Information Processing Systems}, | |
| volume={32}, | |
| year={2019} | |
| } | |
| @book{lattimore_szepesvari_2020, | |
| place={Cambridge}, | |
| title={Bandit Algorithms}, | |
| DOI={10.1017/9781108571401}, | |
| publisher={Cambridge University Press}, | |
| author={Lattimore, Tor and Szepesvári, Csaba}, | |
| year={2020}} | |
| @inproceedings{he2021logarithmic, | |
| title={Logarithmic regret for reinforcement learning with linear function approximation}, | |
| author={He, Jiafan and Zhou, Dongruo and Gu, Quanquan}, | |
| booktitle={International Conference on Machine Learning}, | |
| pages={4171--4180}, | |
| year={2021}, | |
| organization={PMLR} | |
| } | |
| @article{papini2021reinforcement, | |
| title={Reinforcement Learning in Linear MDPs: Constant Regret and Representation Selection}, | |
| author={Papini, Matteo and Tirinzoni, Andrea and Pacchiano, Aldo and Restelli, Marcello and Lazaric, Alessandro and Pirotta, Matteo}, | |
| journal={Advances in Neural Information Processing Systems}, | |
| volume={34}, | |
| year={2021} | |
| } | |
| @inproceedings{zanette2020learning, | |
| title={Learning near optimal policies with low inherent bellman error}, | |
| author={Zanette, Andrea and Lazaric, Alessandro and Kochenderfer, Mykel and Brunskill, Emma}, | |
| booktitle={International Conference on Machine Learning}, | |
| pages={10978--10989}, | |
| year={2020}, | |
| organization={PMLR} | |
| } | |
| @inproceedings{zanette2020frequentist, | |
| title={Frequentist regret bounds for randomized least-squares value iteration}, | |
| author={Zanette, Andrea and Brandfonbrener, David and Brunskill, Emma and Pirotta, Matteo and Lazaric, Alessandro}, | |
| booktitle={International Conference on Artificial Intelligence and Statistics}, | |
| pages={1954--1964}, | |
| year={2020}, | |
| organization={PMLR} | |
| } | |
| @inproceedings{yang2020reinforcement, | |
| title={Reinforcement learning in feature space: Matrix bandit, kernels, and regret bound}, | |
| author={Yang, Lin and Wang, Mengdi}, | |
| booktitle={International Conference on Machine Learning}, | |
| pages={10746--10756}, | |
| year={2020}, | |
| organization={PMLR} | |
| } | |
| @inproceedings{ayoub2020model, | |
| title={Model-based reinforcement learning with value-targeted regression}, | |
| author={Ayoub, Alex and Jia, Zeyu and Szepesvari, Csaba and Wang, Mengdi and Yang, Lin}, | |
| booktitle={International Conference on Machine Learning}, | |
| pages={463--474}, | |
| year={2020}, | |
| organization={PMLR} | |
| } | |
| @inproceedings{jiang2017contextual, | |
| title={Contextual decision processes with low bellman rank are pac-learnable}, | |
| author={Jiang, Nan and Krishnamurthy, Akshay and Agarwal, Alekh and Langford, John and Schapire, Robert E}, | |
| booktitle={International Conference on Machine Learning}, | |
| pages={1704--1713}, | |
| year={2017}, | |
| organization={PMLR} | |
| } | |
| @article{DBLP:journals/corr/OrtnerMR14, | |
| author = {Ronald Ortner and | |
| Odalric{-}Ambrym Maillard and | |
| Daniil Ryabko}, | |
| title = {Selecting Near-Optimal Approximate State Representations in Reinforcement | |
| Learning}, | |
| journal = {CoRR}, | |
| volume = {abs/1405.2652}, | |
| year = {2014}, | |
| url = {http://arxiv.org/abs/1405.2652}, | |
| eprinttype = {arXiv}, | |
| eprint = {1405.2652}, | |
| timestamp = {Mon, 13 Aug 2018 16:46:00 +0200}, | |
| biburl = {https://dblp.org/rec/journals/corr/OrtnerMR14.bib}, | |
| bibsource = {dblp computer science bibliography, https://dblp.org} | |
| } | |
| @inproceedings{NEURIPS2019_9b8b50fb, | |
| author = {Ortner, Ronald and Pirotta, Matteo and Lazaric, Alessandro and Fruit, Ronan and Maillard, Odalric-Ambrym}, | |
| booktitle = {Advances in Neural Information Processing Systems}, | |
| editor = {H. Wallach and H. Larochelle and A. Beygelzimer and F. d\textquotesingle Alch\'{e}-Buc and E. Fox and R. Garnett}, | |
| pages = {}, | |
| publisher = {Curran Associates, Inc.}, | |
| title = {Regret Bounds for Learning State Representations in Reinforcement Learning}, | |
| url = {https://proceedings.neurips.cc/paper/2019/file/9b8b50fb590c590ffbf1295ce92258dc-Paper.pdf}, | |
| volume = {32}, | |
| year = {2019} | |
| } | |
| @article{DBLP:journals/corr/abs-2011-09750, | |
| author = {Jonathan N. Lee and | |
| Aldo Pacchiano and | |
| Vidya Muthukumar and | |
| Weihao Kong and | |
| Emma Brunskill}, | |
| title = {Online Model Selection for Reinforcement Learning with Function Approximation}, | |
| journal = {CoRR}, | |
| volume = {abs/2011.09750}, | |
| year = {2020}, | |
| url = {https://arxiv.org/abs/2011.09750}, | |
| eprinttype = {arXiv}, | |
| eprint = {2011.09750}, | |
| timestamp = {Wed, 25 Nov 2020 16:34:14 +0100}, | |
| biburl = {https://dblp.org/rec/journals/corr/abs-2011-09750.bib}, | |
| bibsource = {dblp computer science bibliography, https://dblp.org} | |
| } | |
| @inproceedings{du2019provably, | |
| title={Provably efficient RL with rich observations via latent state decoding}, | |
| author={Du, Simon and Krishnamurthy, Akshay and Jiang, Nan and Agarwal, Alekh and Dudik, Miroslav and Langford, John}, | |
| booktitle={International Conference on Machine Learning}, | |
| pages={1665--1674}, | |
| year={2019}, | |
| organization={PMLR} | |
| } | |
| @article{DBLP:journals/corr/abs-2006-10814, | |
| author = {Alekh Agarwal and | |
| Sham M. Kakade and | |
| Akshay Krishnamurthy and | |
| Wen Sun}, | |
| title = {{FLAMBE:} Structural Complexity and Representation Learning of Low | |
| Rank MDPs}, | |
| journal = {CoRR}, | |
| volume = {abs/2006.10814}, | |
| year = {2020}, | |
| url = {https://arxiv.org/abs/2006.10814}, | |
| eprinttype = {arXiv}, | |
| eprint = {2006.10814}, | |
| timestamp = {Tue, 23 Jun 2020 17:57:22 +0200}, | |
| biburl = {https://dblp.org/rec/journals/corr/abs-2006-10814.bib}, | |
| bibsource = {dblp computer science bibliography, https://dblp.org} | |
| } | |
| @article{DBLP:journals/corr/abs-2102-07035, | |
| author = {Aditya Modi and | |
| Jinglin Chen and | |
| Akshay Krishnamurthy and | |
| Nan Jiang and | |
| Alekh Agarwal}, | |
| title = {Model-free Representation Learning and Exploration in Low-rank MDPs}, | |
| journal = {CoRR}, | |
| volume = {abs/2102.07035}, | |
| year = {2021}, | |
| url = {https://arxiv.org/abs/2102.07035}, | |
| eprinttype = {arXiv}, | |
| eprint = {2102.07035}, | |
| timestamp = {Fri, 19 Feb 2021 08:32:49 +0100}, | |
| biburl = {https://dblp.org/rec/journals/corr/abs-2102-07035.bib}, | |
| bibsource = {dblp computer science bibliography, https://dblp.org} | |
| } | |
| @article{DBLP:journals/corr/abs-1910-06996, | |
| author = {Botao Hao and | |
| Tor Lattimore and | |
| Csaba Szepesv{\'{a}}ri}, | |
| title = {Adaptive Exploration in Linear Contextual Bandit}, | |
| journal = {CoRR}, | |
| volume = {abs/1910.06996}, | |
| year = {2019}, | |
| url = {http://arxiv.org/abs/1910.06996}, | |
| eprinttype = {arXiv}, | |
| eprint = {1910.06996}, | |
| timestamp = {Tue, 22 Oct 2019 18:17:16 +0200}, | |
| biburl = {https://dblp.org/rec/journals/corr/abs-1910-06996.bib}, | |
| bibsource = {dblp computer science bibliography, https://dblp.org} | |
| } | |
| @article{DBLP:journals/corr/abs-2104-03781, | |
| author = {Matteo Papini and | |
| Andrea Tirinzoni and | |
| Marcello Restelli and | |
| Alessandro Lazaric and | |
| Matteo Pirotta}, | |
| title = {Leveraging Good Representations in Linear Contextual Bandits}, | |
| journal = {CoRR}, | |
| volume = {abs/2104.03781}, | |
| year = {2021}, | |
| url = {https://arxiv.org/abs/2104.03781}, | |
| eprinttype = {arXiv}, | |
| eprint = {2104.03781}, | |
| timestamp = {Tue, 13 Apr 2021 16:46:17 +0200}, | |
| biburl = {https://dblp.org/rec/journals/corr/abs-2104-03781.bib}, | |
| bibsource = {dblp computer science bibliography, https://dblp.org} | |
| } | |
| @article{Zanette2022BellmanRO, | |
| title={Bellman Residual Orthogonalization for Offline Reinforcement Learning}, | |
| author={Andrea Zanette and Martin J. Wainwright}, | |
| journal={ArXiv}, | |
| year={2022}, | |
| volume={abs/2203.12786} | |
| } | |
| @article{Perdomo2022ASC, | |
| title={A Sharp Characterization of Linear Estimators for Offline Policy Evaluation}, | |
| author={Juan C. Perdomo and Akshay Krishnamurthy and Peter L. Bartlett and Sham M. Kakade}, | |
| journal={ArXiv}, | |
| year={2022}, | |
| volume={abs/2203.04236} | |
| } | |
| @article{hu2021fast, | |
| title={Fast rates for the regret of offline reinforcement learning}, | |
| author={Hu, Yichun and Kallus, Nathan and Uehara, Masatoshi}, | |
| journal={arXiv preprint arXiv:2102.00479}, | |
| year={2021} | |
| } | |
| @article{audibert2005fast, | |
| title={Fast learning rates for plug-in classifiers under the margin condition}, | |
| author={Audibert, Jean-Yves and Tsybakov, Alexandre B}, | |
| journal={arXiv preprint math/0507180}, | |
| year={2005} | |
| } | |
| @article{mou2020sample, | |
| title={On the sample complexity of reinforcement learning with policy space generalization}, | |
| author={Mou, Wenlong and Wen, Zheng and Chen, Xi}, | |
| journal={arXiv preprint arXiv:2008.07353}, | |
| year={2020} | |
| } | |
| @inproceedings{yang2021q, | |
| title={Q-learning with logarithmic regret}, | |
| author={Yang, Kunhe and Yang, Lin and Du, Simon}, | |
| booktitle={International Conference on Artificial Intelligence and Statistics}, | |
| pages={1576--1584}, | |
| year={2021}, | |
| organization={PMLR} | |
| } | |
| @article{wang2021exponential, | |
| title={An Exponential Lower Bound for Linearly Realizable MDP with Constant Suboptimality Gap}, | |
| author={Wang, Yuanhao and Wang, Ruosong and Kakade, Sham}, | |
| journal={Advances in Neural Information Processing Systems}, | |
| volume={34}, | |
| year={2021} | |
| } | |
| @article{min2021variance, | |
| title={Variance-aware off-policy evaluation with linear function approximation}, | |
| author={Min, Yifei and Wang, Tianhao and Zhou, Dongruo and Gu, Quanquan}, | |
| journal={Advances in neural information processing systems}, | |
| volume={34}, | |
| year={2021} | |
| } | |
| @inproceedings{NIPS2011_e1d5be1c, | |
| author = {Abbasi-yadkori, Yasin and P\'{a}l, D\'{a}vid and Szepesv\'{a}ri, Csaba}, | |
| booktitle = {Advances in Neural Information Processing Systems}, | |
| editor = {J. Shawe-Taylor and R. Zemel and P. Bartlett and F. Pereira and K.Q. Weinberger}, | |
| pages = {}, | |
| publisher = {Curran Associates, Inc.}, | |
| title = {Improved Algorithms for Linear Stochastic Bandits}, | |
| url = {https://proceedings.neurips.cc/paper/2011/file/e1d5be1c7f2f456670de3d53c7b54f4a-Paper.pdf}, | |
| volume = {24}, | |
| year = {2011} | |
| } | |
| @inproceedings{duan2020minimax, | |
| title={Minimax-optimal off-policy evaluation with linear function approximation}, | |
| author={Duan, Yaqi and Jia, Zeyu and Wang, Mengdi}, | |
| booktitle={International Conference on Machine Learning}, | |
| pages={2701--2709}, | |
| year={2020}, | |
| organization={PMLR} | |
| } | |
| @inproceedings{DBLP:conf/uai/LiuSAB19, | |
| author = {Yao Liu and | |
| Adith Swaminathan and | |
| Alekh Agarwal and | |
| Emma Brunskill}, | |
| editor = {Amir Globerson and | |
| Ricardo Silva}, | |
| title = {Off-Policy Policy Gradient with Stationary Distribution Correction}, | |
| booktitle = {Proceedings of the Thirty-Fifth Conference on Uncertainty in Artificial | |
| Intelligence, {UAI} 2019, Tel Aviv, Israel, July 22-25, 2019}, | |
| series = {Proceedings of Machine Learning Research}, | |
| volume = {115}, | |
| pages = {1180--1190}, | |
| publisher = {{AUAI} Press}, | |
| year = {2019}, | |
| url = {http://proceedings.mlr.press/v115/liu20a.html}, | |
| timestamp = {Tue, 15 Dec 2020 17:40:18 +0100}, | |
| biburl = {https://dblp.org/rec/conf/uai/LiuSAB19.bib}, | |
| bibsource = {dblp computer science bibliography, https://dblp.org} | |
| } | |
| @article{bartlett2005local, | |
| title={Local rademacher complexities}, | |
| author={Bartlett, Peter L and Bousquet, Olivier and Mendelson, Shahar}, | |
| journal={The Annals of Statistics}, | |
| volume={33}, | |
| number={4}, | |
| pages={1497--1537}, | |
| year={2005}, | |
| publisher={Institute of Mathematical Statistics} | |
| } | |
| @article{tropp2011freedman, | |
| title={Freedman's inequality for matrix martingales}, | |
| author={Tropp, Joel}, | |
| journal={Electronic Communications in Probability}, | |
| volume={16}, | |
| pages={262--270}, | |
| year={2011}, | |
| publisher={Institute of Mathematical Statistics and Bernoulli Society} | |
| } | |
| @article{nguyen2022practical, | |
| title={On Practical Reinforcement Learning: Provable Robustness, Scalability, and Statistical Efficiency}, | |
| author={Nguyen-Tang, Thanh}, | |
| journal={arXiv preprint arXiv:2203.01758}, | |
| year={2022} | |
| } | |
| @article{Duan2021RiskBA, | |
| title={Risk Bounds and Rademacher Complexity in Batch Reinforcement Learning}, | |
| author={Yaqi Duan and Chi Jin and Zhiyuan Li}, | |
| journal={ArXiv}, | |
| year={2021}, | |
| volume={abs/2103.13883} | |
| } | |
| @incollection{lange2012batch, | |
| title={Batch reinforcement learning}, | |
| author={Lange, Sascha and Gabel, Thomas and Riedmiller, Martin}, | |
| booktitle={Reinforcement learning}, | |
| pages={45--73}, | |
| year={2012}, | |
| publisher={Springer} | |
| } | |
| @article{levine2020offline, | |
| title={Offline reinforcement learning: Tutorial, review, and perspectives on open problems}, | |
| author={Levine, Sergey and Kumar, Aviral and Tucker, George and Fu, Justin}, | |
| journal={arXiv preprint arXiv:2005.01643}, | |
| year={2020} | |
| } | |
| @article{gottesman2019guidelines, | |
| title={Guidelines for reinforcement learning in healthcare}, | |
| author={Gottesman, Omer and Johansson, Fredrik and Komorowski, Matthieu and Faisal, Aldo and Sontag, David and Doshi-Velez, Finale and Celi, Leo Anthony}, | |
| journal={Nature medicine}, | |
| volume={25}, | |
| number={1}, | |
| pages={16--18}, | |
| year={2019}, | |
| publisher={Nature Publishing Group} | |
| } | |
| @article{nie2021learning, | |
| title={Learning when-to-treat policies}, | |
| author={Nie, Xinkun and Brunskill, Emma and Wager, Stefan}, | |
| journal={Journal of the American Statistical Association}, | |
| volume={116}, | |
| number={533}, | |
| pages={392--409}, | |
| year={2021}, | |
| publisher={Taylor \& Francis} | |
| } | |
| @article{strehl2010learning, | |
| title={Learning from logged implicit exploration data}, | |
| author={Strehl, Alex and Langford, John and Kakade, Sham and Li, Lihong}, | |
| journal={arXiv preprint arXiv:1003.0120}, | |
| year={2010} | |
| } | |
| @inproceedings{thomasAAAI17, | |
| author = {Thomas, Philip S. and Theocharous, Georgios and Ghavamzadeh, Mohammad and Durugkar, Ishan and Brunskill, Emma}, | |
| title = {Predictive Off-Policy Policy Evaluation for Nonstationary Decision Problems, with Applications to Digital Marketing}, | |
| year = {2017}, | |
| publisher = {AAAI Press}, | |
| booktitle = {Proceedings of the Thirty-First AAAI Conference on Artificial Intelligence}, | |
| pages = {4740–4745}, | |
| numpages = {6}, | |
| location = {San Francisco, California, USA}, | |
| series = {AAAI'17} | |
| } | |
| @article{Kitagawa18, | |
| author = {Kitagawa, Toru and Tetenov, Aleksey}, | |
| title = {Who Should Be Treated? Empirical Welfare Maximization Methods for Treatment Choice}, | |
| journal = {Econometrica}, | |
| volume = {86}, | |
| number = {2}, | |
| pages = {591-616}, | |
| keywords = {Heterogeneous treatment effects, randomized experiments, program evaluation, individualized treatment rules, empirical risk minimization, risk bounds}, | |
| doi = {https://doi.org/10.3982/ECTA13288}, | |
| url = {https://onlinelibrary.wiley.com/doi/abs/10.3982/ECTA13288}, | |
| eprint = {https://onlinelibrary.wiley.com/doi/pdf/10.3982/ECTA13288}, | |
| year = {2018} | |
| } | |
| @article{athey2021policy, | |
| title={Policy learning with observational data}, | |
| author={Athey, Susan and Wager, Stefan}, | |
| journal={Econometrica}, | |
| volume={89}, | |
| number={1}, | |
| pages={133--161}, | |
| year={2021}, | |
| publisher={Wiley Online Library} | |
| } | |
| @inproceedings{uehara2022representation, | |
| title={Representation Learning for Online and Offline {RL} in Low-rank {MDP}s}, | |
| author={Masatoshi Uehara and Xuezhou Zhang and Wen Sun}, | |
| booktitle={International Conference on Learning Representations}, | |
| year={2022}, | |
| url={https://openreview.net/forum?id=J4iSIR9fhY0} | |
| } | |
| @inproceedings{fujimoto2019off, | |
| title={Off-policy deep reinforcement learning without exploration}, | |
| author={Fujimoto, Scott and Meger, David and Precup, Doina}, | |
| booktitle={International Conference on Machine Learning}, | |
| pages={2052--2062}, | |
| year={2019}, | |
| organization={PMLR} | |
| } | |
| @inproceedings{le2019batch, | |
| title={Batch policy learning under constraints}, | |
| author={Le, Hoang and Voloshin, Cameron and Yue, Yisong}, | |
| booktitle={International Conference on Machine Learning}, | |
| pages={3703--3712}, | |
| year={2019}, | |
| organization={PMLR} | |
| } | |
| @article{kumar2019stabilizing, | |
| title={Stabilizing off-policy q-learning via bootstrapping error reduction}, | |
| author={Kumar, Aviral and Fu, Justin and Soh, Matthew and Tucker, George and Levine, Sergey}, | |
| journal={Advances in Neural Information Processing Systems}, | |
| volume={32}, | |
| year={2019} | |
| } | |
| @article{kumar2020conservative, | |
| title={Conservative q-learning for offline reinforcement learning}, | |
| author={Kumar, Aviral and Zhou, Aurick and Tucker, George and Levine, Sergey}, | |
| journal={Advances in Neural Information Processing Systems}, | |
| volume={33}, | |
| pages={1179--1191}, | |
| year={2020} | |
| } | |
| @inproceedings{duan2021risk, | |
| title={Risk bounds and rademacher complexity in batch reinforcement learning}, | |
| author={Duan, Yaqi and Jin, Chi and Li, Zhiyuan}, | |
| booktitle={International Conference on Machine Learning}, | |
| pages={2892--2902}, | |
| year={2021}, | |
| organization={PMLR} | |
| } | |
| @inproceedings{lee2021optidice, | |
| title={Optidice: Offline policy optimization via stationary distribution correction estimation}, | |
| author={Lee, Jongmin and Jeon, Wonseok and Lee, Byungjun and Pineau, Joelle and Kim, Kee-Eung}, | |
| booktitle={International Conference on Machine Learning}, | |
| pages={6120--6130}, | |
| year={2021}, | |
| organization={PMLR} | |
| } | |
| @article{nachum2019dualdice, | |
| title={Dualdice: Behavior-agnostic estimation of discounted stationary distribution corrections}, | |
| author={Nachum, Ofir and Chow, Yinlam and Dai, Bo and Li, Lihong}, | |
| journal={Advances in Neural Information Processing Systems}, | |
| volume={32}, | |
| year={2019} | |
| } | |
| @article{nachum2019algaedice, | |
| title={Algaedice: Policy gradient from arbitrary experience}, | |
| author={Nachum, Ofir and Dai, Bo and Kostrikov, Ilya and Chow, Yinlam and Li, Lihong and Schuurmans, Dale}, | |
| journal={arXiv preprint arXiv:1912.02074}, | |
| year={2019} | |
| } | |
| @article{zhang2020gendice, | |
| title={Gendice: Generalized offline estimation of stationary values}, | |
| author={Zhang, Ruiyi and Dai, Bo and Li, Lihong and Schuurmans, Dale}, | |
| journal={arXiv preprint arXiv:2002.09072}, | |
| year={2020} | |
| } | |
| @article{kostrikov2021offline, | |
| title={Offline reinforcement learning with implicit q-learning}, | |
| author={Kostrikov, Ilya and Nair, Ashvin and Levine, Sergey}, | |
| journal={arXiv preprint arXiv:2110.06169}, | |
| year={2021} | |
| } | |
| @article{Zhang2022OffPolicyFQ, | |
| title={Off-Policy Fitted Q-Evaluation with Differentiable Function Approximators: Z-Estimation and Inference Theory}, | |
| author={Ruiqi Zhang and Xuezhou Zhang and Chengzhuo Ni and Mengdi Wang}, | |
| journal={ArXiv}, | |
| year={2022}, | |
| volume={abs/2202.04970} | |
| } | |
| @article{Duan2021OptimalPE, | |
| title={Optimal policy evaluation using kernel-based temporal difference methods}, | |
| author={Yaqi Duan and Mengdi Wang and Martin J. Wainwright}, | |
| journal={ArXiv}, | |
| year={2021}, | |
| volume={abs/2109.12002} | |
| } | |
| @inproceedings{xie2020q, | |
| title={Q* approximation schemes for batch reinforcement learning: A theoretical comparison}, | |
| author={Xie, Tengyang and Jiang, Nan}, | |
| booktitle={Conference on Uncertainty in Artificial Intelligence}, | |
| pages={550--559}, | |
| year={2020}, | |
| organization={PMLR} | |
| } | |
| @article{wu2019behavior, | |
| title={Behavior regularized offline reinforcement learning}, | |
| author={Wu, Yifan and Tucker, George and Nachum, Ofir}, | |
| journal={arXiv preprint arXiv:1911.11361}, | |
| year={2019} | |
| } | |
| @inproceedings{yang2019sample, | |
| title={Sample-optimal parametric q-learning using linearly additive features}, | |
| author={Yang, Lin and Wang, Mengdi}, | |
| booktitle={International Conference on Machine Learning}, | |
| pages={6995--7004}, | |
| year={2019}, | |
| organization={PMLR} | |
| } | |
| @article{qiao2022sample, | |
| title={Sample-Efficient Reinforcement Learning with loglog (T) Switching Cost}, | |
| author={Qiao, Dan and Yin, Ming and Min, Ming and Wang, Yu-Xiang}, | |
| journal={arXiv preprint arXiv:2202.06385}, | |
| year={2022} | |
| } | |
| @article{jin2020simultaneously, | |
| title={Simultaneously learning stochastic and adversarial episodic mdps with known transition}, | |
| author={Jin, Tiancheng and Luo, Haipeng}, | |
| journal={Advances in neural information processing systems}, | |
| volume={33}, | |
| pages={16557--16566}, | |
| year={2020} | |
| } | |
| @article{arora2012online, | |
| title={Online bandit learning against an adaptive adversary: from regret to policy regret}, | |
| author={Arora, Raman and Dekel, Ofer and Tewari, Ambuj}, | |
| journal={arXiv preprint arXiv:1206.6400}, | |
| year={2012} | |
| } | |
| @article{amir2022better, | |
| title={Better Best of Both Worlds Bounds for Bandits with Switching Costs}, | |
| author={Amir, Idan and Azov, Guy and Koren, Tomer and Livni, Roi}, | |
| journal={arXiv preprint arXiv:2206.03098}, | |
| year={2022} | |
| } | |
| @article{malik2022complete, | |
| title={Complete Policy Regret Bounds for Tallying Bandits}, | |
| author={Malik, Dhruv and Li, Yuanzhi and Singh, Aarti}, | |
| journal={arXiv preprint arXiv:2204.11174}, | |
| year={2022} | |
| } | |
| @article{arora2012deterministic, | |
| title={Deterministic MDPs with adversarial rewards and bandit feedback}, | |
| author={Arora, Raman and Dekel, Ofer and Tewari, Ambuj}, | |
| journal={arXiv preprint arXiv:1210.4843}, | |
| year={2012} | |
| } | |
| @inproceedings{jin2020learning, | |
| title={Learning adversarial markov decision processes with bandit feedback and unknown transition}, | |
| author={Jin, Chi and Jin, Tiancheng and Luo, Haipeng and Sra, Suvrit and Yu, Tiancheng}, | |
| booktitle={International Conference on Machine Learning}, | |
| pages={4860--4869}, | |
| year={2020}, | |
| organization={PMLR} | |
| } | |
| @inproceedings{rosenberg2019online, | |
| title={Online convex optimization in adversarial markov decision processes}, | |
| author={Rosenberg, Aviv and Mansour, Yishay}, | |
| booktitle={International Conference on Machine Learning}, | |
| pages={5478--5486}, | |
| year={2019}, | |
| organization={PMLR} | |
| } | |
| @article{neu2021online, | |
| title={Online learning in MDPs with linear function approximation and bandit feedback.}, | |
| author={Neu, Gergely and Olkhovskaya, Julia}, | |
| journal={Advances in Neural Information Processing Systems}, | |
| volume={34}, | |
| pages={10407--10417}, | |
| year={2021} | |
| } | |
| @article{fei2020dynamic, | |
| title={Dynamic regret of policy optimization in non-stationary environments}, | |
| author={Fei, Yingjie and Yang, Zhuoran and Wang, Zhaoran and Xie, Qiaomin}, | |
| journal={Advances in Neural Information Processing Systems}, | |
| volume={33}, | |
| pages={6743--6754}, | |
| year={2020} | |
| } | |
| @inproceedings{Mao2021NearOptimalMR, | |
| title={Near-Optimal Model-Free Reinforcement Learning in Non-Stationary Episodic MDPs}, | |
| author={Weichao Mao and K. Zhang and Ruihao Zhu and David Simchi-Levi and Tamer Başar}, | |
| booktitle={ICML}, | |
| year={2021} | |
| } | |
| @inproceedings{cheung2020reinforcement, | |
| title={Reinforcement learning for non-stationary markov decision processes: The blessing of (more) optimism}, | |
| author={Cheung, Wang Chi and Simchi-Levi, David and Zhu, Ruihao}, | |
| booktitle={International Conference on Machine Learning}, | |
| pages={1843--1854}, | |
| year={2020}, | |
| organization={PMLR} | |
| } | |
| @article{Dinh2021OnlineMD, | |
| title={Online Markov Decision Processes with Non-oblivious Strategic Adversary}, | |
| author={Le Cong Dinh and David Henry Mguni and Long Tran-Thanh and Jun Wang and Yaodong Yang}, | |
| journal={ArXiv}, | |
| year={2021}, | |
| volume={abs/2110.03604} | |
| } | |
| @inproceedings{dick2014online, | |
| title={Online learning in Markov decision processes with changing cost sequences}, | |
| author={Dick, Travis and Gyorgy, Andras and Szepesvari, Csaba}, | |
| booktitle={International Conference on Machine Learning}, | |
| pages={512--520}, | |
| year={2014}, | |
| organization={PMLR} | |
| } | |
| @article{arora2018policy, | |
| title={Policy regret in repeated games}, | |
| author={Arora, Raman and Dinitz, Michael and Marinov, Teodor Vanislavov and Mohri, Mehryar}, | |
| journal={Advances in Neural Information Processing Systems}, | |
| volume={31}, | |
| year={2018} | |
| } | |
| @inproceedings{suggala2020online, | |
| title={Online non-convex learning: Following the perturbed leader is optimal}, | |
| author={Suggala, Arun Sai and Netrapalli, Praneeth}, | |
| booktitle={Algorithmic Learning Theory}, | |
| pages={845--861}, | |
| year={2020}, | |
| organization={PMLR} | |
| } | |
| @article{jin2021v, | |
| title={V-Learning--A Simple, Efficient, Decentralized Algorithm for Multiagent RL}, | |
| author={Jin, Chi and Liu, Qinghua and Wang, Yuanhao and Yu, Tiancheng}, | |
| journal={arXiv preprint arXiv:2110.14555}, | |
| year={2021} | |
| } | |
| @inproceedings{liu2021sharp, | |
| title={A sharp analysis of model-based reinforcement learning with self-play}, | |
| author={Liu, Qinghua and Yu, Tiancheng and Bai, Yu and Jin, Chi}, | |
| booktitle={International Conference on Machine Learning}, | |
| pages={7001--7010}, | |
| year={2021}, | |
| organization={PMLR} | |
| } |
Author
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
hi