Skip to content

Instantly share code, notes, and snippets.

@lenamax2355
lenamax2355 / _write_dataframe_to_parquet_on_s3.py
Created June 2, 2022 11:32 — forked from jitsejan/_write_dataframe_to_parquet_on_s3.py
Write a Pandas dataframe to Parquet format on AWS S3.
# Note: make sure `s3fs` is installed in order to make Pandas use S3.
# Credentials for AWS in the normal location ~/.aws/credentials
def _write_dataframe_to_parquet_on_s3(dataframe, filename):
""" Write a dataframe to a Parquet on S3 """
print("Writing {} records to {}".format(len(dataframe), filename))
output_file = f"s3://{DESTINATION}/{filename}/data.parquet"
dataframe.to_parquet(output_file)
@lenamax2355
lenamax2355 / pyspark-melt.py
Created June 2, 2022 08:48 — forked from sllynn/pyspark-melt.py
melt a pyspark dataframe
from pyspark.sql.functions import array, col, explode, lit, struct
from pyspark.sql import DataFrame
from typing import Iterable
def melt(
df: DataFrame,
id_vars: Iterable[str], value_vars: Iterable[str],
var_name: str="variable", value_name: str="value") -> DataFrame:
"""Convert :class:`DataFrame` from wide to long format."""
@lenamax2355
lenamax2355 / compare_dfs.py
Created March 16, 2022 11:05 — forked from yassineAlouini/compare_dfs.py
Compare two Pandas DataFrames
import pandas as pd
def compare_two_dfs(input_df_1, input_df_2):
df_1, df_2 = input_df_1.copy(), input_df_2.copy()
ne_stacked = (df_1 != df_2).stack()
changed = ne_stacked[ne_stacked]
changed.index.names = ['id', 'col']
difference_locations = np.where(df_1 != df_2)
changed_from = df_1.values[difference_locations]
@lenamax2355
lenamax2355 / pandas-checks.py
Created March 16, 2022 10:50 — forked from aurelienpierre/pandas-checks.py
Perform usual types and values checks on columns of a pandas.DataFrame
# Create Dataframe with fake data
df = pd.util.testing.makeMissingDataframe()
df['index1'] = df.index # create a text column by replicating index
df['A'] = 0 # create a zero column
# Helper function
def check_df_sanity(df, verbose=False):
"""Perform usual types and values checks on columns of a pandas.DataFrame"""
for col in df:
@lenamax2355
lenamax2355 / sanity_check_sum.txt
Created March 16, 2022 10:25 — forked from basselkarami/sanity_check_sum.txt
Utility function for sanity checks that tests if model output increases with the provided input columns
def sanity_check_sum(model, dataframe, cols, delta=1):
'''Calculates success rate on basic sanity check. A "delta" value is added
to columns in a dataframe and the newly predicted house price should be higher
than the existing prediction since the addition is supposed to be an added feature
to the house such as bigger area or better condition or view etc.
Args:
model: sklearn or other model with predict() method
dataframe: pandas dataframe with dataset to be test
cols: column or list of columns in dataframe to be incremented by delta parameter
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
import datetime
def get_easter_for_year(year):
a = year % 19
b, c = divmod(year, 100)
d, e = divmod(b, 4)
g = (8 * b +13)//25
h = (19 * a + b - d - g + 15)% 30
j, k = divmod(c, 4)
@lenamax2355
lenamax2355 / custom.css
Created February 19, 2022 13:09 — forked from formigone/custom.css
Custom CSS for Jupyter Notebook web application
#notebook-container{
box-shadow: none !important;
}
.container {
width: 80% !important;
}
.notebook_app {
background: #fff !important;
@lenamax2355
lenamax2355 / example.html
Created February 11, 2022 16:20 — forked from ariesduanmu/example.html
Flask MultiSelect CheckBox
{% extends "bootstrap/base.html" %}
{% import "bootstrap/wtf.html" as wtf %}
{% block title %}test{% endblock %}
{% block content %}
<div class="col-md-4">
<form action="" method="post" novalidata>
{{ form.hidden_tag() }}
<p>
{{ form.example }}<br>
@lenamax2355
lenamax2355 / dynamicUpdate.py
Created February 11, 2022 16:19 — forked from rschutjens/dynamicUpdate.py
dynamic update of selectField options Flask Ajax
from flask import Flask
from flask import json, make_response, render_template_string, request
from flask_wtf import Form
from wtforms import IntegerField, SelectField
from wtforms.validators import DataRequired
template = """
<html>
<head>
<script src="//ajax.googleapis.com/ajax/libs/jquery/1.8.3/jquery.min.js"></script>