# please setup your kubernetes cluster and kubectl with admin permission (for deploying crd and operator) # confirm ks version (0.11.0 or later) $ ks version # create kss app directory $ ks init kflowmeetup $ cd kflowmeetup # add kubeflow ksonnet registry $ ks registry add kubeflow github.com/kubeflow/kubeflow/tree/master/kubeflow # list packages and install chainer-job $ ks pkg list $ ks pkg install kubeflow/chainer-job $ ks prototype list # deploy chainer-operator $ ks generate chainer-operator chainer-operator $ ks component list $ ks show default -c chainer-operator | less $ ks apply default -c chainer-operator # check what are created $ kubectl get all $ stern chainer-operator $ kubectl get crd # create distributed chainer-job # NOTE: create chainer-job in the same app with operator component. # In real situation, it should be in different ks application. $ ks generate chainer-job chainer-job $ ks component list # make it run in three nodes without gpu(for demo) # if complex yaml generation is needed, please edit yaml directly manually. $ ks param list chainer-job $ ks param set chainer-job workers 2 $ ks param set chainer-job gpus 0 $ ks param list chainer-job # render yaml $ ks show default -c chainer-job | less # prepare what's going on k8s clusters $ watch -n 1 kubectl get all # run three nodes chainer job in another terminal $ ks apply default -c chainer-job $ kubectl get chainerjob $ stern chainer-job-master $ kubectl get chainerjob -o yaml