[core] executor = CeleryExecutor sql_alchemy_conn = mysql://USER:PW@AIRFLOW_MASTER_SERVER/airflowtable # The SqlAlchemy pool size is the maximum number of database connections # in the pool. sql_alchemy_pool_size = 100 # The SqlAlchemy pool recycle is the number of seconds a connection # can be idle in the pool before it is invalidated. This config does # not apply to sqlite. sql_alchemy_pool_recycle = 3600 # The amount of parallelism as a setting to the executor. This defines # the max number of task instances that should run simultaneously # on this airflow installation parallelism = 64 # The number of task instances allowed to run concurrently by the scheduler dag_concurrency = 32 # When not using pools, tasks are run in the "default pool", # whose size is guided by this config element non_pooled_task_slot_count = 256 # The maximum number of active DAG runs per DAG max_active_runs_per_dag = 99 # Whether to disable pickling dags donot_pickle = False # How long before timing out a python file import while filling the DagBag dagbag_import_timeout = 30 # The class to use for running task instances in a subprocess task_runner = BashTaskRunner [operators] # The default owner assigned to each new operator, unless # provided explicitly or passed via `default_args` default_owner = Airflow default_cpus = 4 default_ram = 512 default_disk = 512 default_gpus = 0 [webserver] # Number of seconds the gunicorn webserver waits before timing out on a worker web_server_worker_timeout = 120 # Number of workers to refresh at a time. When set to 0, worker refresh is # disabled. When nonzero, airflow periodically refreshes webserver workers by # bringing up new ones and killing old ones. worker_refresh_batch_size = 1 # Number of seconds to wait before refreshing a batch of workers. worker_refresh_interval = 30 # Number of workers to run the Gunicorn web server workers = 4 # The worker class gunicorn should use. Choices include # sync (default), eventlet, gevent worker_class = sync [celery] # The app name that will be used by celery celery_app_name = airflow.executors.celery_executor # The concurrency that will be used when starting workers with the # "airflow worker" command. This defines the number of task instances that # a worker will take, so size up your workers based on the resources on # your worker box and the nature of your tasks #celeryd_concurrency = 16 celeryd_concurrency = 32 # The Celery broker URL. Celery supports RabbitMQ, Redis and experimentally # a sqlalchemy database. Refer to the Celery documentation for more # information. broker_url = amqp://MY_AIRFLOW_USER:MY_AIRFLOW_PW@airflowmaster/airflow_vhost # Another key Celery setting # this can be == to broker_url or to sql_alchemy_conn celery_result_backend = db+mysql://MY_AIRFLOW_USER:MY_AIRFLOW_PW@airflowmaster/airflow # Default queue that tasks get assigned to and that worker listen on. default_queue = default [scheduler] # Task instances listen for external kill signal (when you clear tasks # from the CLI or the UI), this defines the frequency at which they should # listen (in seconds). job_heartbeat_sec = 5 # The scheduler constantly tries to trigger new tasks (look at the # scheduler section in the docs for more information). This defines # how often the scheduler should run (in seconds). scheduler_heartbeat_sec = 1 # after how much time should the scheduler terminate in seconds # -1 indicates to run continuously (see also num_runs) #run_duration = -1 run_duration = 600 # after how much time a new DAGs should be picked up from the filesystem min_file_process_interval = 0 dag_dir_list_interval = 300 # How often should stats be printed to the logs print_stats_interval = 30 # Local task jobs periodically heartbeat to the DB. If the job has # not heartbeat in this many seconds, the scheduler will mark the # associated task instance as failed and will re-schedule the task. scheduler_zombie_task_threshold = 300 # Turn off scheduler catchup by setting this to False. # Default behavior is unchanged and # Command Line Backfills still work, but the scheduler # will not do scheduler catchup if this is False, # however it can be set on a per DAG basis in the # DAG definition (catchup) catchup_by_default = True # Statsd (https://github.com/etsy/statsd) integration settings statsd_on = False statsd_host = localhost statsd_port = 8125 statsd_prefix = airflow # The scheduler can run multiple threads in parallel to schedule dags. # This defines how many threads will run. However airflow will never # use more threads than the amount of cpu cores available. max_threads = 64 authenticate = False [mesos] # Number of cpu cores required for running one task instance using # 'airflow run --local -p ' # command on a mesos slave task_cpu = 1 # Memory in MB required for running one task instance using # 'airflow run --local -p ' # command on a mesos slave task_memory = 256 # Enable framework checkpointing for mesos # See http://mesos.apache.org/documentation/latest/slave-recovery/ checkpoint = False # Failover timeout in milliseconds. # When checkpointing is enabled and this option is set, Mesos waits # until the configured timeout for # the MesosExecutor framework to re-register after a failover. Mesos # shuts down running tasks if the # MesosExecutor framework fails to re-register within this timeframe. # failover_timeout = 604800 [kerberos] reinit_frequency = 3600