Created
June 26, 2019 06:28
-
-
Save connectwithprakash/cf381f5a0b96e13abd87b721e0c15f5f to your computer and use it in GitHub Desktop.
Created on Cognitive Class Labs
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| { | |
| "cells": [ | |
| { | |
| "cell_type": "code", | |
| "execution_count": 4, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Collecting package metadata: done\n", | |
| "Solving environment: | \n", | |
| "The environment is inconsistent, please check the package plan carefully\n", | |
| "The following packages are causing the inconsistency:\n", | |
| "\n", | |
| " - defaults/linux-64::anaconda==5.3.1=py37_0\n", | |
| " - defaults/linux-64::astropy==3.0.4=py37h14c3975_0\n", | |
| " - defaults/linux-64::bkcharts==0.2=py37_0\n", | |
| " - defaults/linux-64::blaze==0.11.3=py37_0\n", | |
| " - defaults/linux-64::bokeh==0.13.0=py37_0\n", | |
| " - defaults/linux-64::bottleneck==1.2.1=py37h035aef0_1\n", | |
| " - defaults/linux-64::dask==0.19.1=py37_0\n", | |
| " - defaults/linux-64::datashape==0.5.4=py37_1\n", | |
| " - defaults/linux-64::mkl-service==1.1.2=py37h90e4bf4_5\n", | |
| " - defaults/linux-64::numba==0.39.0=py37h04863e7_0\n", | |
| " - defaults/linux-64::numexpr==2.6.8=py37hd89afb7_0\n", | |
| " - defaults/linux-64::odo==0.5.1=py37_0\n", | |
| " - defaults/linux-64::pytables==3.4.4=py37ha205bf6_0\n", | |
| " - defaults/linux-64::pytest-arraydiff==0.2=py37h39e3cac_0\n", | |
| " - defaults/linux-64::pytest-astropy==0.4.0=py37_0\n", | |
| " - defaults/linux-64::pytest-doctestplus==0.1.3=py37_0\n", | |
| " - defaults/linux-64::pywavelets==1.0.0=py37hdd07704_0\n", | |
| " - defaults/linux-64::scikit-image==0.14.0=py37hf484d3e_1\n", | |
| "done\n", | |
| "\n", | |
| "# All requested packages already installed.\n", | |
| "\n", | |
| "Collecting package metadata: done\n", | |
| "Solving environment: / \n", | |
| "The environment is inconsistent, please check the package plan carefully\n", | |
| "The following packages are causing the inconsistency:\n", | |
| "\n", | |
| " - defaults/linux-64::anaconda==5.3.1=py37_0\n", | |
| " - defaults/linux-64::astropy==3.0.4=py37h14c3975_0\n", | |
| " - defaults/linux-64::bkcharts==0.2=py37_0\n", | |
| " - defaults/linux-64::blaze==0.11.3=py37_0\n", | |
| " - defaults/linux-64::bokeh==0.13.0=py37_0\n", | |
| " - defaults/linux-64::bottleneck==1.2.1=py37h035aef0_1\n", | |
| " - defaults/linux-64::dask==0.19.1=py37_0\n", | |
| " - defaults/linux-64::datashape==0.5.4=py37_1\n", | |
| " - defaults/linux-64::mkl-service==1.1.2=py37h90e4bf4_5\n", | |
| " - defaults/linux-64::numba==0.39.0=py37h04863e7_0\n", | |
| " - defaults/linux-64::numexpr==2.6.8=py37hd89afb7_0\n", | |
| " - defaults/linux-64::odo==0.5.1=py37_0\n", | |
| " - defaults/linux-64::pytables==3.4.4=py37ha205bf6_0\n", | |
| " - defaults/linux-64::pytest-arraydiff==0.2=py37h39e3cac_0\n", | |
| " - defaults/linux-64::pytest-astropy==0.4.0=py37_0\n", | |
| " - defaults/linux-64::pytest-doctestplus==0.1.3=py37_0\n", | |
| " - defaults/linux-64::pywavelets==1.0.0=py37hdd07704_0\n", | |
| " - defaults/linux-64::scikit-image==0.14.0=py37hf484d3e_1\n", | |
| "done\n", | |
| "\n", | |
| "## Package Plan ##\n", | |
| "\n", | |
| " environment location: /home/jupyterlab/conda\n", | |
| "\n", | |
| " added / updated specs:\n", | |
| " - geopy\n", | |
| "\n", | |
| "\n", | |
| "The following packages will be downloaded:\n", | |
| "\n", | |
| " package | build\n", | |
| " ---------------------------|-----------------\n", | |
| " ca-certificates-2019.6.16 | hecc5488_0 145 KB conda-forge\n", | |
| " certifi-2019.6.16 | py36_0 148 KB conda-forge\n", | |
| " conda-4.7.5 | py36_0 3.0 MB conda-forge\n", | |
| " conda-package-handling-1.3.10| py36_0 257 KB conda-forge\n", | |
| " geographiclib-1.49 | py_0 32 KB conda-forge\n", | |
| " geopy-1.20.0 | py_0 57 KB conda-forge\n", | |
| " ------------------------------------------------------------\n", | |
| " Total: 3.6 MB\n", | |
| "\n", | |
| "The following NEW packages will be INSTALLED:\n", | |
| "\n", | |
| " conda-package-han~ conda-forge/linux-64::conda-package-handling-1.3.10-py36_0\n", | |
| " geographiclib conda-forge/noarch::geographiclib-1.49-py_0\n", | |
| "\n", | |
| "The following packages will be UPDATED:\n", | |
| "\n", | |
| " ca-certificates anaconda::ca-certificates-2019.5.15-0 --> conda-forge::ca-certificates-2019.6.16-hecc5488_0\n", | |
| " conda anaconda::conda-4.6.14-py36_0 --> conda-forge::conda-4.7.5-py36_0\n", | |
| " geopy conda-forge/linux-64::geopy-1.11.0-py~ --> conda-forge/noarch::geopy-1.20.0-py_0\n", | |
| "\n", | |
| "The following packages will be SUPERSEDED by a higher-priority channel:\n", | |
| "\n", | |
| " certifi anaconda --> conda-forge\n", | |
| " openssl anaconda::openssl-1.1.1-h7b6447c_0 --> conda-forge::openssl-1.1.1b-h14c3975_1\n", | |
| "\n", | |
| "\n", | |
| "\n", | |
| "Downloading and Extracting Packages\n", | |
| "conda-package-handli | 257 KB | ##################################### | 100% \n", | |
| "conda-4.7.5 | 3.0 MB | ##################################### | 100% \n", | |
| "ca-certificates-2019 | 145 KB | ##################################### | 100% \n", | |
| "geopy-1.20.0 | 57 KB | ##################################### | 100% \n", | |
| "certifi-2019.6.16 | 148 KB | ##################################### | 100% \n", | |
| "geographiclib-1.49 | 32 KB | ##################################### | 100% \n", | |
| "Preparing transaction: done\n", | |
| "Verifying transaction: done\n", | |
| "Executing transaction: done\n", | |
| "WARNING conda.base.context:use_only_tar_bz2(632): Conda is constrained to only using the old .tar.bz2 file format because you have conda-build installed, and it is <3.18.3. Update or remove conda-build to get smaller downloads and faster extractions.\n", | |
| "Collecting package metadata (repodata.json): done\n", | |
| "Solving environment: / \n", | |
| "The environment is inconsistent, please check the package plan carefully\n", | |
| "The following packages are causing the inconsistency:\n", | |
| "\n", | |
| " - defaults/linux-64::anaconda==5.3.1=py37_0\n", | |
| " - defaults/linux-64::astropy==3.0.4=py37h14c3975_0\n", | |
| " - defaults/linux-64::bkcharts==0.2=py37_0\n", | |
| " - defaults/linux-64::blaze==0.11.3=py37_0\n", | |
| " - defaults/linux-64::bokeh==0.13.0=py37_0\n", | |
| " - defaults/linux-64::bottleneck==1.2.1=py37h035aef0_1\n", | |
| " - defaults/linux-64::dask==0.19.1=py37_0\n", | |
| " - defaults/linux-64::datashape==0.5.4=py37_1\n", | |
| " - defaults/linux-64::mkl-service==1.1.2=py37h90e4bf4_5\n", | |
| " - defaults/linux-64::numba==0.39.0=py37h04863e7_0\n", | |
| " - defaults/linux-64::numexpr==2.6.8=py37hd89afb7_0\n", | |
| " - defaults/linux-64::odo==0.5.1=py37_0\n", | |
| " - defaults/linux-64::pytables==3.4.4=py37ha205bf6_0\n", | |
| " - defaults/linux-64::pytest-arraydiff==0.2=py37h39e3cac_0\n", | |
| " - defaults/linux-64::pytest-astropy==0.4.0=py37_0\n", | |
| " - defaults/linux-64::pytest-doctestplus==0.1.3=py37_0\n", | |
| " - defaults/linux-64::pywavelets==1.0.0=py37hdd07704_0\n", | |
| " - defaults/linux-64::scikit-image==0.14.0=py37hf484d3e_1\n", | |
| "failed\n", | |
| "\n", | |
| "UnsatisfiableError: The following specifications were found to be incompatible with each other:\n", | |
| "\n", | |
| " - anaconda/linux-64::anaconda-navigator==1.9.7=py36_0\n", | |
| " - anaconda/linux-64::graphviz==2.40.1=h21bd128_2 -> pango[version='>=1.42.1,<2.0a0']\n", | |
| " - anaconda/linux-64::importlib_metadata==0.8=py36_0\n", | |
| " - anaconda/linux-64::lxml==4.3.0=py36hefd8a0e_0\n", | |
| " - anaconda/linux-64::mkl_fft==1.0.6=py36h7dd41cf_0 -> mkl[version='>=2018.0.3']\n", | |
| " - anaconda/linux-64::mkl_random==1.0.1=py36h4414c95_1 -> mkl[version='>=2018.0.3']\n", | |
| " - anaconda/linux-64::navigator-updater==0.2.1=py36_0\n", | |
| " - anaconda/linux-64::numpy-base==1.15.4=py36h81de0dd_0 -> mkl[version='>=2018.0.3']\n", | |
| " - anaconda/linux-64::numpy==1.15.4=py36h1d66e8a_0 -> mkl[version='>=2018.0.3']\n", | |
| " - anaconda/linux-64::pytorch==0.4.1=py36ha74772b_0 -> mkl[version='>=2018.0.3']\n", | |
| " - anaconda/linux-64::scikit-learn==0.20.1=py36h4989274_0 -> mkl[version='>=2018.0.3']\n", | |
| " - anaconda/linux-64::scipy==1.1.0=py36hfa4b5c9_1 -> mkl[version='>=2018.0.3']\n", | |
| " - anaconda/linux-64::spyder==3.3.4=py36_0\n", | |
| " - anaconda/linux-64::sympy==1.4=py36_0\n", | |
| " - anaconda/linux-64::torchvision==0.2.1=py36_0 -> pytorch[version='>=0.4'] -> mkl[version='>=2019.1,<2020.0a0']\n", | |
| " - anaconda/noarch::openpyxl==2.6.2=py_0\n", | |
| " - anaconda/noarch::path.py==12.0.1=py_0 -> importlib_metadata[version='>=0.5']\n", | |
| " - anaconda/noarch::xlsxwriter==1.1.6=py_0\n", | |
| " - mkl-service -> mkl[version='>=2019.4,<2020.0a0']\n", | |
| " - pkgs/main/linux-64::mkl==2019.0=118\n", | |
| " - pkgs/main/linux-64::pango==1.42.4=h049681c_0\n", | |
| "\n", | |
| "\n", | |
| "\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "!conda install -c anaconda beautifulsoup4 --yes\n", | |
| "!conda install -c conda-forge geopy --yes\n", | |
| "!conda install -c conda-forge folium=0.5.0 --yes" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "## Installing required Libraried" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 1, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "import numpy as np\n", | |
| "import pandas as pd\n", | |
| "\n", | |
| "import requests\n", | |
| "\n", | |
| "from bs4 import BeautifulSoup\n", | |
| "\n", | |
| "from tqdm import tqdm" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "## Libraries Import" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 2, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'\n", | |
| "html_data = requests.get(url).text\n", | |
| "soup = BeautifulSoup(html_data, 'html.parser')" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "## Scrapping web for HTML data" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 3, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "100%|██████████| 289/289 [00:00<00:00, 23968.87it/s]\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "post_code = []\n", | |
| "borough = []\n", | |
| "neighborhood = []\n", | |
| "for row in tqdm(soup.find('table', {'class' : 'wikitable sortable'}).find_all('tr')):\n", | |
| " columns = row.find_all('td')\n", | |
| " if(len(columns) > 0):\n", | |
| " post_code.append(columns[0].text)\n", | |
| " borough.append(columns[1].text)\n", | |
| " neighborhood.append(columns[2].text.rstrip('\\n'))\n", | |
| " \n", | |
| " " | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 14, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style scoped>\n", | |
| " .dataframe tbody tr th:only-of-type {\n", | |
| " vertical-align: middle;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>PostalCode</th>\n", | |
| " <th>Borough</th>\n", | |
| " <th>Neighborhood</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td>M1A</td>\n", | |
| " <td>Not assigned</td>\n", | |
| " <td>Not assigned</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>1</th>\n", | |
| " <td>M2A</td>\n", | |
| " <td>Not assigned</td>\n", | |
| " <td>Not assigned</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td>M3A</td>\n", | |
| " <td>North York</td>\n", | |
| " <td>Parkwoods</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>3</th>\n", | |
| " <td>M4A</td>\n", | |
| " <td>North York</td>\n", | |
| " <td>Victoria Village</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>4</th>\n", | |
| " <td>M5A</td>\n", | |
| " <td>Downtown Toronto</td>\n", | |
| " <td>Harbourfront</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " PostalCode Borough Neighborhood\n", | |
| "0 M1A Not assigned Not assigned\n", | |
| "1 M2A Not assigned Not assigned\n", | |
| "2 M3A North York Parkwoods\n", | |
| "3 M4A North York Victoria Village\n", | |
| "4 M5A Downtown Toronto Harbourfront" | |
| ] | |
| }, | |
| "execution_count": 14, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "df = pd.DataFrame(data=[post_code, borough, neighborhood])\n", | |
| "df = df.T\n", | |
| "df.columns = ['PostalCode', 'Borough', 'Neighborhood']\n", | |
| "df.head()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "## Converting scrapped data to DataFrame" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 15, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style scoped>\n", | |
| " .dataframe tbody tr th:only-of-type {\n", | |
| " vertical-align: middle;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>PostalCode</th>\n", | |
| " <th>Borough</th>\n", | |
| " <th>Neighborhood</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td>M3A</td>\n", | |
| " <td>North York</td>\n", | |
| " <td>Parkwoods</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>1</th>\n", | |
| " <td>M4A</td>\n", | |
| " <td>North York</td>\n", | |
| " <td>Victoria Village</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td>M5A</td>\n", | |
| " <td>Downtown Toronto</td>\n", | |
| " <td>Harbourfront</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>3</th>\n", | |
| " <td>M5A</td>\n", | |
| " <td>Downtown Toronto</td>\n", | |
| " <td>Regent Park</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>4</th>\n", | |
| " <td>M6A</td>\n", | |
| " <td>North York</td>\n", | |
| " <td>Lawrence Heights</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " PostalCode Borough Neighborhood\n", | |
| "0 M3A North York Parkwoods\n", | |
| "1 M4A North York Victoria Village\n", | |
| "2 M5A Downtown Toronto Harbourfront\n", | |
| "3 M5A Downtown Toronto Regent Park\n", | |
| "4 M6A North York Lawrence Heights" | |
| ] | |
| }, | |
| "execution_count": 15, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "df_dropna = df[df.Borough != 'Not assigned'].reset_index(drop=True) \n", | |
| "df_dropna.head()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "#### Removing the Borough with values as 'Not assigned'" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 25, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style scoped>\n", | |
| " .dataframe tbody tr th:only-of-type {\n", | |
| " vertical-align: middle;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>PostalCode</th>\n", | |
| " <th>Borough</th>\n", | |
| " <th>Neighborhood</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td>M1B</td>\n", | |
| " <td>Scarborough</td>\n", | |
| " <td>Rouge,Malvern</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>1</th>\n", | |
| " <td>M1C</td>\n", | |
| " <td>Scarborough</td>\n", | |
| " <td>Highland Creek,Rouge Hill,Port Union</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td>M1E</td>\n", | |
| " <td>Scarborough</td>\n", | |
| " <td>Guildwood,Morningside,West Hill</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>3</th>\n", | |
| " <td>M1G</td>\n", | |
| " <td>Scarborough</td>\n", | |
| " <td>Woburn</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>4</th>\n", | |
| " <td>M1H</td>\n", | |
| " <td>Scarborough</td>\n", | |
| " <td>Cedarbrae</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " PostalCode Borough Neighborhood\n", | |
| "0 M1B Scarborough Rouge,Malvern\n", | |
| "1 M1C Scarborough Highland Creek,Rouge Hill,Port Union\n", | |
| "2 M1E Scarborough Guildwood,Morningside,West Hill\n", | |
| "3 M1G Scarborough Woburn\n", | |
| "4 M1H Scarborough Cedarbrae" | |
| ] | |
| }, | |
| "execution_count": 25, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "df_grouped =df_dropna.groupby(['PostalCode', 'Borough'], as_index=False).agg(lambda x:','.join(x))\n", | |
| "df_grouped.head()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "#### Grouping neighborhood by postal and borough" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 31, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "(103, 3)" | |
| ] | |
| }, | |
| "execution_count": 31, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "df_clean = df_grouped\n", | |
| "df_clean.shape" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 26, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style scoped>\n", | |
| " .dataframe tbody tr th:only-of-type {\n", | |
| " vertical-align: middle;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>PostalCode</th>\n", | |
| " <th>Borough</th>\n", | |
| " <th>Neighborhood</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>85</th>\n", | |
| " <td>M7A</td>\n", | |
| " <td>Queen's Park</td>\n", | |
| " <td>Not assigned</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " PostalCode Borough Neighborhood\n", | |
| "85 M7A Queen's Park Not assigned" | |
| ] | |
| }, | |
| "execution_count": 26, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "df_grouped.loc[df_grouped.Neighborhood == 'Not assigned']" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "#### Dealing with 'Not assigned' neighborhood" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 28, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "PostalCode M7A\n", | |
| "Borough Queen's Park\n", | |
| "Neighborhood Queen's Park\n", | |
| "Name: 85, dtype: object" | |
| ] | |
| }, | |
| "execution_count": 28, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "df_grouped.loc[df_grouped.Neighborhood == 'Not assigned', 'Neighborhood'] = df_grouped.loc[df_grouped.Neighborhood == 'Not assigned', 'Borough']\n", | |
| "df_grouped.iloc[]" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "#### Clean DataFrame" | |
| ] | |
| } | |
| ], | |
| "metadata": { | |
| "kernelspec": { | |
| "display_name": "Python 3", | |
| "language": "python", | |
| "name": "python3" | |
| }, | |
| "language_info": { | |
| "codemirror_mode": { | |
| "name": "ipython", | |
| "version": 3 | |
| }, | |
| "file_extension": ".py", | |
| "mimetype": "text/x-python", | |
| "name": "python", | |
| "nbconvert_exporter": "python", | |
| "pygments_lexer": "ipython3", | |
| "version": "3.6.8" | |
| } | |
| }, | |
| "nbformat": 4, | |
| "nbformat_minor": 2 | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment