Skip to content

Instantly share code, notes, and snippets.

@SoyabulIslamLincoln
Created May 1, 2020 16:47
Show Gist options
  • Select an option

  • Save SoyabulIslamLincoln/6ab1b37664e2b2f77a02253475a8cf0d to your computer and use it in GitHub Desktop.

Select an option

Save SoyabulIslamLincoln/6ab1b37664e2b2f77a02253475a8cf0d to your computer and use it in GitHub Desktop.
Created on Skills Network Labs
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"from IPython.display import Markdown, display\n",
"def printmd(string):\n",
" display(Markdown('# <span style=\"color:red\">'+string+'</span>'))\n",
"\n",
"\n",
"if ('sc' in locals() or 'sc' in globals()):\n",
" printmd('<<<<<!!!!! It seems that you are running in a IBM Watson Studio Apache Spark Notebook. Please run it in an IBM Watson Studio Default Runtime (without Apache Spark) !!!!!>>>>>')"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Collecting pyspark==2.4.5\n",
"\u001b[?25l Downloading https://files.pythonhosted.org/packages/9a/5a/271c416c1c2185b6cb0151b29a91fff6fcaed80173c8584ff6d20e46b465/pyspark-2.4.5.tar.gz (217.8MB)\n",
"\u001b[K |████████████████████████████████| 217.8MB 83kB/s eta 0:00:011 | | 368kB 6.4MB/s eta 0:00:34 |▎ | 1.7MB 6.4MB/s eta 0:00:34 |███▉ | 26.4MB 7.0MB/s eta 0:00:28 |████▍ | 30.2MB 7.0MB/s eta 0:00:27 |█████▎ | 35.6MB 7.0MB/s eta 0:00:27 |█████▊ | 39.2MB 6.0MB/s eta 0:00:30 |██████ | 40.7MB 6.0MB/s eta 0:00:30 |███████▋ | 51.9MB 6.5MB/s eta 0:00:26 |███████▊ | 52.6MB 6.5MB/s eta 0:00:26 |████████ | 54.0MB 6.5MB/s eta 0:00:26 |████████▏ | 55.5MB 6.5MB/s eta 0:00:26 |████████▍ | 56.9MB 6.5MB/s eta 0:00:25 |████████▉ | 59.9MB 6.5MB/s eta 0:00:25 |███████████▍ | 77.5MB 5.4MB/s eta 0:00:27MB/s eta 0:00:24 |████████████████▉ | 114.5MB 7.0MB/s eta 0:00:15 | 119.9MB 7.0MB/s eta 0:00:14��████████▌ | 126.1MB 6.7MB/s eta 0:00:14 |███████████████████▉ | 135.4MB 7.0MB/s eta 0:00:12�█████████▎ | 145.2MB 7.0MB/s eta 0:00:11 |█████████████████████▉ | 148.8MB 5.5MB/s eta 0:00:13 |███████████████████████▍ | 159.0MB 5.5MB/s eta 0:00:11 |█████████████████████████▌ | 173.8MB 7.0MB/s eta 0:00:07��██████████████████ | 177.0MB 7.0MB/s eta 0:00:06 |███████████████████████████▏ | 185.1MB 5.0MB/s eta 0:00:07 | 185.7MB 5.0MB/s eta 0:00:07��█████████▎ | 192.2MB 5.0MB/s eta 0:00:06 |████████████████████████████▎ | 192.8MB 5.4MB/s eta 0:00:05 |████████████████████████████▋ | 194.8MB 5.4MB/s eta 0:00:05 |█████████████████████████████▏ | 198.3MB 5.4MB/s eta 0:00:04 |█████████████████████████████▌ | 200.6MB 5.4MB/s eta 0:00:04 |█████████████████████████████▋ | 201.3MB 5.4MB/s eta 0:00:04 |██████████████████████████████ | 203.5MB 5.4MB/s eta 0:00:03 |██████████████████████████████ | 204.2MB 5.4MB/s eta 0:00:03 |██████████████████████████████▏ | 205.5MB 39.5MB/s eta 0:00:01 |██████████████████████████████▍ | 206.8MB 39.5MB/s eta 0:00:01 |██████████████████████████████▌ | 207.5MB 39.5MB/s eta 0:00:01 |██████████████████████████████▋ | 208.3MB 39.5MB/s eta 0:00:01 |███████████████████████████████ | 210.5MB 39.5MB/s eta 0:00:01 |███████████████████████████████ | 211.2MB 39.5MB/s eta 0:00:01 |███████████████████████████████▏| 211.9MB 39.5MB/s eta 0:00:01 |███████████████████████████████▏| 212.4MB 39.5MB/s eta 0:00:01 |████████████████████████████████| 217.3MB 6.6MB/s eta 0:00:01\n",
"\u001b[?25hCollecting py4j==0.10.7 (from pyspark==2.4.5)\n",
"\u001b[?25l Downloading https://files.pythonhosted.org/packages/e3/53/c737818eb9a7dc32a7cd4f1396e787bd94200c3997c72c1dbe028587bd76/py4j-0.10.7-py2.py3-none-any.whl (197kB)\n",
"\u001b[K |████████████████████████████████| 204kB 37.1MB/s eta 0:00:01\n",
"\u001b[?25hBuilding wheels for collected packages: pyspark\n",
" Building wheel for pyspark (setup.py) ... \u001b[?25ldone\n",
"\u001b[?25h Stored in directory: /home/jupyterlab/.cache/pip/wheels/bf/db/04/61d66a5939364e756eb1c1be4ec5bdce6e04047fc7929a3c3c\n",
"Successfully built pyspark\n",
"Installing collected packages: py4j, pyspark\n",
"Successfully installed py4j-0.10.7 pyspark-2.4.5\n"
]
}
],
"source": [
"!pip install pyspark==2.4.5"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"try:\n",
" from pyspark import SparkContext, SparkConf\n",
" from pyspark.sql import SparkSession\n",
"except ImportError as e:\n",
" printmd('<<<<<!!!!! Please restart your kernel after installing Apache Spark !!!!!>>>>>')"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"sc = SparkContext.getOrCreate(SparkConf().setMaster(\"local[*]\"))\n",
"\n",
"spark = SparkSession \\\n",
" .builder \\\n",
" .getOrCreate()"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"\n",
"rdd = sc.parallelize(range(100))"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"100"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"rdd.count()"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"4950"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"rdd.sum()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python",
"language": "python",
"name": "conda-env-python-py"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.10"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment