Skip to content

Instantly share code, notes, and snippets.

View chrisluedtke's full-sized avatar

Chris Luedtke chrisluedtke

View GitHub Profile
@chrisluedtke
chrisluedtke / get_uci_data_urls.py
Last active February 1, 2019 17:58
Get UCI data urls from base url
import re
import requests
from typing import Tuple
from bs4 import BeautifulSoup
def get_uci_data_urls(url: str) -> Tuple[str]:
r = requests.get(url)
soup = BeautifulSoup(r.text, 'html.parser')
@chrisluedtke
chrisluedtke / pandas_sorted_correlation.py
Created January 11, 2019 21:57
Return pandas correlation table without duplicated and sorted by correlation strength
import numpy as np
import pandas as pd
def correlate_sort(df: pd.DataFrame, method: str = 'pearson') -> pd.DataFrame:
"""
pd.DataFrame.corr() without redundancy and sorted by strength
"""
df = df.corr(method)
df = df.mask(np.tril(np.ones(df.shape)).astype(np.bool))
df = df.stack().reset_index()