Skip to content

Instantly share code, notes, and snippets.

@ozagordi
Last active March 25, 2021 08:06
Show Gist options
  • Select an option

  • Save ozagordi/d945f117355b8873dc00211b9dad4926 to your computer and use it in GitHub Desktop.

Select an option

Save ozagordi/d945f117355b8873dc00211b9dad4926 to your computer and use it in GitHub Desktop.
Give names and other options to clusters
#!/usr/bin/env python
"""
data.csv from https://gist.github.com/tezzutezzu/8f025345cadc5f92b9b311bf032b264d
"""
import argparse
import json
import re
import subprocess
from urllib.request import urlopen
import pandas as pd
pattern = r"\b(San\s|Sant'|Santo|Santa|Beato|Beata|Beati)\s?(\w*).*\W?$"
def write_to_clipboard(output):
process = subprocess.Popen(
'pbcopy', env={'LANG': 'en_US.UTF-8'}, stdin=subprocess.PIPE)
process.communicate(output.encode('utf-8'))
def read_name_ranks(nrows=None):
encoding = "ISO-8859-1"
ranks = pd.read_csv("~/.ranking_nomi.csv", encoding=encoding, usecols=["name", "rank"], nrows=nrows)
ranks["name"] = ranks["name"].str.replace(" ", "").str.lower()
ranks = (
ranks
.groupby("name")
.agg("min")
)
return ranks
def get_santo_del_giorno(local=False, nrows=10000):
if local:
with open("santi.json") as f:
santi = json.load(f)
else:
try:
with urlopen("https://www.santodelgiorno.it/santi.json") as response:
santi = json.loads(response.read())
except:
santi = None
santi_df = extract_name(santi)
ranks = read_name_ranks(nrows)
saints = (
pd.merge(
santi_df,
ranks,
on='name', how="left"
)
.fillna({"rank": 10000})
.query("rank > 500")
)
print(saints)
santo = (
saints
.sample()
.name
.to_list()[0]
)
if santo is None:
santo = "dunno"
return santo
def extract_name(names_dict):
full_names = []
names = []
for santo in names_dict:
try:
nome = (
re.search(pattern, santo[u'nome'], re.MULTILINE)
.group(2)
.lower()
)
except AttributeError:
nome = "luca"
full_names.append(santo[u'nome'])
names.append(nome)
today_df = pd.DataFrame({"name": names, "full_names": full_names})
return today_df
parser = argparse.ArgumentParser(description='Create a named dataproc cluster')
parser.add_argument('--type', choices=['high-std', 'high-high', 'highperf'], default="highperf", help="Default is %(default)s")
parser.add_argument("--name", type=str, default=None, help="Set cluster name, if unspecified try with santo del giorno")
# parser.add_argument("--image", type=str, default="default_image")
args = parser.parse_args()
if args.type == "high-std":
master = "n1-highmem-8"
worker = "n1-standard-4"
num_workers = 10
elif args.type == "high-high":
master = "n1-highmem-8"
worker = "n1-highmem-8"
num_workers = 10
elif args.type == "highperf":
master = "n1-highmem-8"
worker = "n1-highmem-8"
num_workers = 5
if args.name is None:
name = get_santo_del_giorno(local=False)
else:
name = args.name
print(f"Today's name is {name}")
bucket = "vf-it-ca-nonlive-mom"
image = "projects/vf-it-ca-nonlive/global/images/itca-neuron-dp-img-20210302-084132-34"
labels = "it_almo=it_almo_notebook"
full_string = f"""gcloud beta dataproc clusters create {name} --bucket {bucket} \
--image {image} \
--labels {labels} \
--master-machine-type {master} \
--worker-machine-type {worker} \
--num-workers {num_workers} \
--region europe-west1 \
--zone europe-west1-b \
--subnet projects/vf-it-ca-nonlive/regions/europe-west1/subnetworks/dev-restricted-zone \
--tags allow-internal-dataproc-dev,allow-ssh-from-management-zone,allow-ssh-from-net-to-bastion \
--project vf-it-ca-nonlive \
--service-account vf-it-ca-dev-dp-ds-sa@vf-it-ca-nonlive.iam.gserviceaccount.com \
--master-boot-disk-type pd-ssd \
--master-boot-disk-size 512 \
--num-master-local-ssds=1 \
--worker-boot-disk-type pd-ssd \
--worker-boot-disk-size 512 \
--num-worker-local-ssds=1 \
--metadata enable-oslogin=true \
--properties core:fs.gs.implicit.dir.repair.enable=false,core:fs.gs.status.parallel.enable=true,spark:spark.jars=gs://vf-it-ca-nonlive-dev/deployment/com.vodafone.neuron.ra.it/artifacts-ra-it/5.52-SN/artifacts/common-5.58.jar \
--enable-component-gateway \
--optional-components=ANACONDA,JUPYTER,ZEPPELIN \
--initialization-actions gs://vf-it-ca-nonlive-devde/de_apps/storage-utils/init_action.sh \
--max-idle 8h \
--no-address
"""
write_to_clipboard(full_string)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment