{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Introduction" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Getting going with joining tables in Pandas." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Load data" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Drudgery w/o interest beyond the fact that data is coming from multiple sources." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "import gzip # can uncompress gzipped files, useful for accessing the pleiades data\n", "import io # useful routines for input/output\n", "import pandas as pd # for working with \"rows/columns\" oriented data\n", "import urllib.request # for loading documents using http\n", "\n", "%matplotlib inline\n", "\n", "import matplotlib # plotting\n", "import matplotlib.pyplot as plt\n", "\n", "\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "# load csv files\n", "ramphs = pd.read_csv(\"http://sebastianheath.com/roman-amphitheaters/roman-amphitheaters.csv\")\n", "chronogrps = pd.read_csv(\"http://sebastianheath.com/roman-amphitheaters/chronogrps.csv\")\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "# load gzipped PLEIADES. key is \"io.BytesIO(gzip.decompress(response.read())\", which \n", "# delivers (so to speak) an uncompressed csv to 'pd.read_csv'.\n", "response = urllib.request.urlopen(\"http://atlantides.org/downloads/pleiades/dumps/pleiades-places-latest.csv.gz\")\n", "pleiades = pd.read_csv(io.BytesIO(gzip.decompress(response.read())))\n", "\n", "# One note: because it takes a long time to load the pleiades data,\n", "# avoid running this cell again unless necessary\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "# metmuseum = pd.read_csv(\"https://github.com/metmuseum/openaccess/blob/master/MetObjects.csv?raw=true\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "print(\"Number of pleiades geographic entites loaded: %s\" % len(pleiades))\n", "print(\"Number of roman amphitheater records loaded loaded: %s\" % len(ramphs))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Explore Pleiades" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "# simple way to get column names\n", "# list(pleiades)\n", "\n", "# pleiades.dtypes\n", "\n", "# pd.options.display.max_columns = 999" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "# unique values\n", "pleiades.featureTypes.unique()\n", "\n", "# sort_values()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "# substring searching\n", "pleiades[pleiades.featureTypes.str.contains('amphitheatre')]" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "# renaming columns\n", "# https://stackoverflow.com/questions/19758364/rename-a-single-column-header-in-a-pandas-dataframe\n", "print(\"https://stackoverflow.com/questions/19758364/rename-a-single-column-header-in-a-pandas-dataframe\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Joins" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "# pd.merge\n", "\n", "ramphs.merge(chronogrps, left_on = 'chronogroup', right_on = 'chronogroup')\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "# pd.set_index\n" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.5.2" } }, "nbformat": 4, "nbformat_minor": 1 }