Skip to content

Instantly share code, notes, and snippets.

View poteznyKrolik's full-sized avatar

mzajac poteznyKrolik

  • 21:54 (UTC -07:00)
View GitHub Profile
@poteznyKrolik
poteznyKrolik / airflow_parity.toml
Created July 24, 2025 21:41
Data Engineering CursorRules 2025 – modular TOML rules for logging, structure, security, and type safety
### Local Airflow Parity Tools Specific Guidelines
# Consistent {Component}Controller/Orchestrator
# Proper init/cleanup
# Docker health checks + retries
# Proxy/network standards
# Secret Manager integration
# Use Cement framework
@poteznyKrolik
poteznyKrolik / manage-dag.sh
Created September 3, 2024 16:19
helper for doing GCP Composer change (V2/3) without going crazy or clicking-through more than i have o
#!/bin/bash
############################################################################
# Author: Martin Zajac
# Script Name: manage_dag.sh
# Purpose: This script manages Google Cloud Composer DAGs by allowing users
# to perform various actions such as listing DAGs, running tasks,
# pausing/unpausing DAGs, streaming logs, and copying files to
# the Composer environment's DAG folder.
#
@poteznyKrolik
poteznyKrolik / timeout.py
Created February 27, 2023 23:53 — forked from TySkby/timeout.py
Timeout decorator/context manager using signals (for Python 3)
#!/usr/bin/env python3
"""Easily put time restrictions on things
Note: Requires Python 3.x
Usage as a context manager:
```
with timeout(10):
something_that_should_not_exceed_ten_seconds()
```
import os, sys
airflow_core_sqlalchemy_conn = f"postgresql+psycopg2://{os.environ.get('AIRFLOW_DB_USER')}:{os.environ.get('AIRFLOW_DB_PASS')}@{os.environ.get('AIRFLOW_DB_HOST')}:{os.environ.get('AIRFLOW_DB_PORT')}/airflow"
os.environ['AIRFLOW__CORE__SQL_ALCHEMY_CONN'] = airflow_core_sqlalchemy_conn
print(airflow_core_sqlalchemy_conn)
""" lists environment variables, and splits elements in path variable """
import os
for k, v in sorted(os.environ.items()):
print(k+':', v)
print('\n')
# list elements in path environment variable
[print(item) for item in os.environ['PATH'].split(';')]
@poteznyKrolik
poteznyKrolik / baseline.py
Created March 31, 2021 20:57
open raw cursor for baseline
"""BASELINE
Revision ID: 98d0325960d8
Revises:
Create Date: 2021-03-18 16:56:53.584644
"""
import sqlalchemy as sa
from sqlalchemy.exc import SQLAlchemyError
from alembic import op
@poteznyKrolik
poteznyKrolik / list_files.sh
Created January 29, 2021 23:00
shell , list file type and count in
find . -type f -maxdepth 1 | grep -E ".*\.[a-zA-Z0-9]*$" | sed -e 's/.*\(\.[a-zA-Z0-9]*\)$/\1/' | sort | uniq -c | sort -n
@poteznyKrolik
poteznyKrolik / argBaseClass.py
Created December 4, 2020 20:46
ArgBaseCLass
"""
Assuming feature.py implements class Feature(ArgBaseClass), then the above invocation of load_subclasses will return { 'feature' : <Feature object> }. The same kwargs (foo = bar) will be passed into the Feature class.
#!/usr/bin/env python3
import os, pkgutil, importlib, inspect
"""
class ArgBaseClass():
# Assign all keyword arguments as properties on self, and keep the kwargs for later.
def __init__(self, **kwargs):
self._kwargs = kwargs
@poteznyKrolik
poteznyKrolik / DecodeLargeJSON.py
Created October 8, 2020 22:05 — forked from niccokunzmann/DecodeLargeJSON.py
Decode large json files
"""
Response to
http://stackoverflow.com/a/22904200/1320237
"""
import json.scanner
import json.decoder
from json.decoder import JSONDecoder
class FileString(object):
@poteznyKrolik
poteznyKrolik / zipr.py
Created September 30, 2020 18:53 — forked from bbengfort/zipr.py
Dealing with Zip archives and json data in Python
#!/usr/bin/env python3
import os
import json
import random
import zipfile
config = {
"color": "red",
"amount": 42.24,