Skip to content

Instantly share code, notes, and snippets.

@nulconaux
nulconaux / jira_attachment_report.gs
Created September 20, 2023 07:08
Jira attachments size report
var DOMAIN = '<DOMAIN>.atlassian.net';
var EMAIL = '<EMAIL>';
var TOKEN = '<TOKEN>';
function fetchJiraData() {
var ss = SpreadsheetApp.getActiveSpreadsheet();
// Remove all sheets except the first one
removeAllSheetsExceptFirst(ss);
from pyspark.sql import SparkSession
def test_spark_cluster():
# Create a SparkSession
spark = SparkSession.builder \
.appName("TestingSparkCluster") \
.master("spark://127.0.0.1:7077") \
.getOrCreate()
# Create a sample DataFrame
@nulconaux
nulconaux / ydata_profiling.py
Created June 22, 2023 08:14
Generate ydata_profiling report for CSV and Parquet
import pandas as pd
import ydata_profiling
import argparse
import glob
import logging
import os
import multiprocessing
# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
@nulconaux
nulconaux / perf_csv_parquet.py
Created June 22, 2023 08:13
Simple Parquet/SCV Perfomance measurement
import pandas as pd
import time
import os
import humanize
# Read CSV and measure time
csv_file = 'data_table_1.csv'
t_start = time.time()
df_csv = pd.read_csv(csv_file)
t_csv = time.time() - t_start
import pandas as pd
import pyarrow.parquet as pq
import pyarrow as pa
import time
import os
import humanize
# Set the source Parquet file
parquet_file = 'data_table_converted.parquet'
@nulconaux
nulconaux / read-data-q.py
Created July 7, 2022 07:14
ETL script example
# https://dev.to/aws/orchestrating-hybrid-workflows-using-amazon-managed-workflows-for-apache-airflow-mwaa-2boc
from copy import copy
from mysql.connector import MySQLConnection, Error
from python_mysql_dbconfig import read_db_config
import sys
import csv
import boto3
import json
import socket
def query_with_fetchone(query2run,secret,region):
@nulconaux
nulconaux / ds-example.py
Created July 7, 2022 07:12
DevOps in Data Science
import os
# Example of secrets as environmental variables
def access_secrets_env():
secrets = os.environ.get('secret_key', None)
return secrets
# Example of secrets from AWS secrets manager using "default" profile
# In reality, developers typically use specific profiles for specific projects.
@nulconaux
nulconaux / postgres_to_csv.sh
Created June 16, 2022 17:15
PostgreSQL to CSV
#!/bin/bash
DB_NAME=${1}
export PGPASSWORD=
DBMS_SHELL="psql -p 5432 -h localhost"
DBMS_USER="postgres"
#if [ "$1" = '--help' ]; then
if [[ ( "$1" == '--help' ) || ( "$1" == '-h' ) ]]; then
echo "usage: $0 [DB_NAME] [DBMS_SHELL]"
@nulconaux
nulconaux / mysql_to_csv.py
Created June 16, 2022 17:13
MySQL to CSV
import pymysql
def execute(c, command):
c.execute(command)
return c.fetchall()
db = pymysql.connect(host='', port=3306, user='root', passwd='', db='', use_unicode=True, charset="utf8")
c = db.cursor()
for table in execute(c, "show tables;"):
@nulconaux
nulconaux / notify-ssh.sh
Created June 15, 2022 13:55
Notify whenever someone logs in via SSH
#!/bin/bash
# https://cmaster11.medium.com/how-to-get-notified-whenever-someone-logs-in-via-ssh-947a8f8d4f37
# Edit the /etc/pam.d/sshd file and append the following line at the end:
# session [default=ignore] pam_exec.so /bin/bash /opt/notify-ssh.sh
# We want to trigger the script only when the SSH session starts.
# To be notified also when session closes, you can watch for
# the "close_session" value.
if [[ "$PAM_TYPE" != "open_session" ]]; then
exit 0