Skip to content

Instantly share code, notes, and snippets.

@emptyr1
Created November 30, 2020 18:04
Show Gist options
  • Select an option

  • Save emptyr1/5d2ef025258a4bbeec32ff68cf719424 to your computer and use it in GitHub Desktop.

Select an option

Save emptyr1/5d2ef025258a4bbeec32ff68cf719424 to your computer and use it in GitHub Desktop.
import os
import time
import sys
from datetime import date, timedelta
sdate = date(2019, 3, 1) # start date
edate = date(2019, 3, 31) # end date
delta = edate - sdate # as timedelta
date_str = []
for i in range(delta.days + 1):
day = sdate + timedelta(days=i)
out = str(day.year) + " " + str(day.month).zfill(2) + " " + str(day.day).zfill(2)
date_str.append(out)
print(date_str)
for dt in date_str:
print(dt)
cmd = """spark-submit --deploy-mode client --num-executors 120 --executor-cores 5 --executor-memory 7g --driver-memory 25g --class com.glu.revId.replacement.backfill.EventsOrcRevIdReplacementBackfill s3://glu-emr/airflow/events_orc_backfill/jars/glu_analytics-daily-jobs-revid-prod_2.11-3.4.jar {} backfill glu_kinesis_events_orc backfill glu_kinesis_events_orc 20 orphan_revid_events 1 --conf spark.dynamicAllocation.enabled=false""".format(dt)
print(cmd)
try:
os.system(cmd)
except:
print("ERRORR failed for date: ", dt)
sys.exit(0)
time.sleep(15)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment