Last active
March 25, 2021 08:06
-
-
Save ozagordi/d945f117355b8873dc00211b9dad4926 to your computer and use it in GitHub Desktop.
Give names and other options to clusters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python | |
| """ | |
| data.csv from https://gist.github.com/tezzutezzu/8f025345cadc5f92b9b311bf032b264d | |
| """ | |
| import argparse | |
| import json | |
| import re | |
| import subprocess | |
| from urllib.request import urlopen | |
| import pandas as pd | |
| pattern = r"\b(San\s|Sant'|Santo|Santa|Beato|Beata|Beati)\s?(\w*).*\W?$" | |
| def write_to_clipboard(output): | |
| process = subprocess.Popen( | |
| 'pbcopy', env={'LANG': 'en_US.UTF-8'}, stdin=subprocess.PIPE) | |
| process.communicate(output.encode('utf-8')) | |
| def read_name_ranks(nrows=None): | |
| encoding = "ISO-8859-1" | |
| ranks = pd.read_csv("~/.ranking_nomi.csv", encoding=encoding, usecols=["name", "rank"], nrows=nrows) | |
| ranks["name"] = ranks["name"].str.replace(" ", "").str.lower() | |
| ranks = ( | |
| ranks | |
| .groupby("name") | |
| .agg("min") | |
| ) | |
| return ranks | |
| def get_santo_del_giorno(local=False, nrows=10000): | |
| if local: | |
| with open("santi.json") as f: | |
| santi = json.load(f) | |
| else: | |
| try: | |
| with urlopen("https://www.santodelgiorno.it/santi.json") as response: | |
| santi = json.loads(response.read()) | |
| except: | |
| santi = None | |
| santi_df = extract_name(santi) | |
| ranks = read_name_ranks(nrows) | |
| saints = ( | |
| pd.merge( | |
| santi_df, | |
| ranks, | |
| on='name', how="left" | |
| ) | |
| .fillna({"rank": 10000}) | |
| .query("rank > 500") | |
| ) | |
| print(saints) | |
| santo = ( | |
| saints | |
| .sample() | |
| .name | |
| .to_list()[0] | |
| ) | |
| if santo is None: | |
| santo = "dunno" | |
| return santo | |
| def extract_name(names_dict): | |
| full_names = [] | |
| names = [] | |
| for santo in names_dict: | |
| try: | |
| nome = ( | |
| re.search(pattern, santo[u'nome'], re.MULTILINE) | |
| .group(2) | |
| .lower() | |
| ) | |
| except AttributeError: | |
| nome = "luca" | |
| full_names.append(santo[u'nome']) | |
| names.append(nome) | |
| today_df = pd.DataFrame({"name": names, "full_names": full_names}) | |
| return today_df | |
| parser = argparse.ArgumentParser(description='Create a named dataproc cluster') | |
| parser.add_argument('--type', choices=['high-std', 'high-high', 'highperf'], default="highperf", help="Default is %(default)s") | |
| parser.add_argument("--name", type=str, default=None, help="Set cluster name, if unspecified try with santo del giorno") | |
| # parser.add_argument("--image", type=str, default="default_image") | |
| args = parser.parse_args() | |
| if args.type == "high-std": | |
| master = "n1-highmem-8" | |
| worker = "n1-standard-4" | |
| num_workers = 10 | |
| elif args.type == "high-high": | |
| master = "n1-highmem-8" | |
| worker = "n1-highmem-8" | |
| num_workers = 10 | |
| elif args.type == "highperf": | |
| master = "n1-highmem-8" | |
| worker = "n1-highmem-8" | |
| num_workers = 5 | |
| if args.name is None: | |
| name = get_santo_del_giorno(local=False) | |
| else: | |
| name = args.name | |
| print(f"Today's name is {name}") | |
| bucket = "vf-it-ca-nonlive-mom" | |
| image = "projects/vf-it-ca-nonlive/global/images/itca-neuron-dp-img-20210302-084132-34" | |
| labels = "it_almo=it_almo_notebook" | |
| full_string = f"""gcloud beta dataproc clusters create {name} --bucket {bucket} \ | |
| --image {image} \ | |
| --labels {labels} \ | |
| --master-machine-type {master} \ | |
| --worker-machine-type {worker} \ | |
| --num-workers {num_workers} \ | |
| --region europe-west1 \ | |
| --zone europe-west1-b \ | |
| --subnet projects/vf-it-ca-nonlive/regions/europe-west1/subnetworks/dev-restricted-zone \ | |
| --tags allow-internal-dataproc-dev,allow-ssh-from-management-zone,allow-ssh-from-net-to-bastion \ | |
| --project vf-it-ca-nonlive \ | |
| --service-account vf-it-ca-dev-dp-ds-sa@vf-it-ca-nonlive.iam.gserviceaccount.com \ | |
| --master-boot-disk-type pd-ssd \ | |
| --master-boot-disk-size 512 \ | |
| --num-master-local-ssds=1 \ | |
| --worker-boot-disk-type pd-ssd \ | |
| --worker-boot-disk-size 512 \ | |
| --num-worker-local-ssds=1 \ | |
| --metadata enable-oslogin=true \ | |
| --properties core:fs.gs.implicit.dir.repair.enable=false,core:fs.gs.status.parallel.enable=true,spark:spark.jars=gs://vf-it-ca-nonlive-dev/deployment/com.vodafone.neuron.ra.it/artifacts-ra-it/5.52-SN/artifacts/common-5.58.jar \ | |
| --enable-component-gateway \ | |
| --optional-components=ANACONDA,JUPYTER,ZEPPELIN \ | |
| --initialization-actions gs://vf-it-ca-nonlive-devde/de_apps/storage-utils/init_action.sh \ | |
| --max-idle 8h \ | |
| --no-address | |
| """ | |
| write_to_clipboard(full_string) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment