Skip to content

Instantly share code, notes, and snippets.

@racydata
Created July 29, 2019 00:38
Show Gist options
  • Select an option

  • Save racydata/92ae85ea47da7c2d0bf50442bb0e83ea to your computer and use it in GitHub Desktop.

Select an option

Save racydata/92ae85ea47da7c2d0bf50442bb0e83ea to your computer and use it in GitHub Desktop.
Clustering of pornhub videos/playlists
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"%matplotlib inline\n",
"\n",
"import pandas as pd\n",
"import numpy as np\n",
"import os\n",
"import sys\n",
"import matplotlib.pyplot as plt\n",
"\n",
"from sklearn import decomposition\n",
"from scipy.sparse import coo_matrix\n",
"from sklearn import preprocessing\n",
"\n",
"from tqdm import tqdm_notebook as tqdm"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Read the data\n",
"Read in chunks to keep peak memory usage under around ~3GB"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "18b2b35138cb4af2931151f60cc9f146",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"HBox(children=(IntProgress(value=1, bar_style=u'info', max=1), HTML(value=u'')))"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "21b8da3f48244dff8ce2400cdd484ff1",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"HBox(children=(IntProgress(value=1, bar_style=u'info', max=1), HTML(value=u'')))"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "8d7b7b0018f948219c2a8f6416da8666",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"HBox(children=(IntProgress(value=1, bar_style=u'info', max=1), HTML(value=u'')))"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"CPU times: user 3min 34s, sys: 21 s, total: 3min 55s\n",
"Wall time: 3min 51s\n"
]
}
],
"source": [
"%%time\n",
"dfv = pd.concat(tqdm(pd.read_json(\"data/dfv.json.gz\",lines=True,chunksize=100e3)))\n",
"dfp = pd.concat(tqdm(pd.read_json(\"data/dfp.json.gz\",lines=True,chunksize=10e3)))\n",
"dfm = pd.concat(tqdm(pd.read_json(\"data/dfm.json.gz\",lines=True,chunksize=1e6)))\n",
"\n",
"dfv[\"added\"] = pd.to_datetime(dfv[\"added\"],unit=\"ms\")\n",
"dfp[\"date_added\"] = pd.to_datetime(dfp[\"date_added\"],unit=\"ms\")\n",
"dfp[\"date_updated\"] = pd.to_datetime(dfp[\"date_updated\"],unit=\"ms\")\n",
"\n",
"dfp = dfp.set_index(\"playlistid\",drop=False)\n",
"dfv = dfv.set_index(\"videoid\",drop=False)\n",
"dfm = dfm.reset_index(drop=True).astype(\"uint32\")\n",
"dfv[\"nplaylists\"] = dfm.groupby(\"videoid\").count()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Dataframe of _v_ideos\n",
"* Videos are uniquely identified by `videoid` later (and in URLs, the `viewkey` is used)\n",
"* All columns refer to the video itself, except `nplaylists` which counts the number of playlists each video appears in"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"('Shape:', (2606742, 13))\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>added</th>\n",
" <th>duration</th>\n",
" <th>ishd</th>\n",
" <th>ispremium</th>\n",
" <th>isprivate</th>\n",
" <th>percentlikes</th>\n",
" <th>playlistid</th>\n",
" <th>price</th>\n",
" <th>title</th>\n",
" <th>videoid</th>\n",
" <th>viewkey</th>\n",
" <th>views</th>\n",
" <th>nplaylists</th>\n",
" </tr>\n",
" <tr>\n",
" <th>videoid</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>23820902</th>\n",
" <td>2014-07-28 02:14:47.304</td>\n",
" <td>722</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>79</td>\n",
" <td>1861</td>\n",
" <td>0.0</td>\n",
" <td>Reality Kings - Belle Knox Fresh Outta Duke Un...</td>\n",
" <td>23820902</td>\n",
" <td>116495537</td>\n",
" <td>21600000</td>\n",
" <td>1737</td>\n",
" </tr>\n",
" <tr>\n",
" <th>32352371</th>\n",
" <td>2015-07-28 02:14:47.304</td>\n",
" <td>347</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>71</td>\n",
" <td>1861</td>\n",
" <td>0.0</td>\n",
" <td>Nasty Taking a BBC</td>\n",
" <td>32352371</td>\n",
" <td>507533678</td>\n",
" <td>1900000</td>\n",
" <td>296</td>\n",
" </tr>\n",
" <tr>\n",
" <th>23294052</th>\n",
" <td>2014-07-28 02:14:47.304</td>\n",
" <td>1185</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>72</td>\n",
" <td>1861</td>\n",
" <td>0.0</td>\n",
" <td>sucking dick and geting fucked good made him c...</td>\n",
" <td>23294052</td>\n",
" <td>424485320</td>\n",
" <td>4200000</td>\n",
" <td>321</td>\n",
" </tr>\n",
" <tr>\n",
" <th>15581092</th>\n",
" <td>2014-07-28 02:14:47.304</td>\n",
" <td>1852</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>71</td>\n",
" <td>1861</td>\n",
" <td>0.0</td>\n",
" <td>Big black cock cums on ghetto girls face</td>\n",
" <td>15581092</td>\n",
" <td>540659716</td>\n",
" <td>1600000</td>\n",
" <td>377</td>\n",
" </tr>\n",
" <tr>\n",
" <th>15890861</th>\n",
" <td>2014-07-28 02:14:47.304</td>\n",
" <td>1836</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>81</td>\n",
" <td>1861</td>\n",
" <td>0.0</td>\n",
" <td>BOOTYLICIOUS BLACK SCHOOL GIRLZ - Scene 3</td>\n",
" <td>15890861</td>\n",
" <td>579960734</td>\n",
" <td>323000</td>\n",
" <td>169</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" added duration ishd ispremium isprivate \\\n",
"videoid \n",
"23820902 2014-07-28 02:14:47.304 722 True False False \n",
"32352371 2015-07-28 02:14:47.304 347 True False False \n",
"23294052 2014-07-28 02:14:47.304 1185 False False False \n",
"15581092 2014-07-28 02:14:47.304 1852 False False False \n",
"15890861 2014-07-28 02:14:47.304 1836 False False False \n",
"\n",
" percentlikes playlistid price \\\n",
"videoid \n",
"23820902 79 1861 0.0 \n",
"32352371 71 1861 0.0 \n",
"23294052 72 1861 0.0 \n",
"15581092 71 1861 0.0 \n",
"15890861 81 1861 0.0 \n",
"\n",
" title videoid \\\n",
"videoid \n",
"23820902 Reality Kings - Belle Knox Fresh Outta Duke Un... 23820902 \n",
"32352371 Nasty Taking a BBC 32352371 \n",
"23294052 sucking dick and geting fucked good made him c... 23294052 \n",
"15581092 Big black cock cums on ghetto girls face 15581092 \n",
"15890861 BOOTYLICIOUS BLACK SCHOOL GIRLZ - Scene 3 15890861 \n",
"\n",
" viewkey views nplaylists \n",
"videoid \n",
"23820902 116495537 21600000 1737 \n",
"32352371 507533678 1900000 296 \n",
"23294052 424485320 4200000 321 \n",
"15581092 540659716 1600000 377 \n",
"15890861 579960734 323000 169 "
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"print(\"Shape:\",dfv.shape)\n",
"dfv.head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Dataframe of _p_laylists\n",
"* Playlists are uniquely identified with the `playlistid` (and this is also used in URLs)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"('Shape:', (323240, 12))\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>date_added</th>\n",
" <th>date_updated</th>\n",
" <th>description</th>\n",
" <th>favorite_count</th>\n",
" <th>ncomments</th>\n",
" <th>playlistid</th>\n",
" <th>tags</th>\n",
" <th>title</th>\n",
" <th>user_id</th>\n",
" <th>video_count</th>\n",
" <th>votesdown</th>\n",
" <th>votesup</th>\n",
" </tr>\n",
" <tr>\n",
" <th>playlistid</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>100921</th>\n",
" <td>2014-01-06 04:38:19</td>\n",
" <td>2019-07-23 12:00:09</td>\n",
" <td></td>\n",
" <td>3</td>\n",
" <td>0</td>\n",
" <td>100921</td>\n",
" <td>[cuckold, bbc]</td>\n",
" <td>Interracial</td>\n",
" <td>515031</td>\n",
" <td>473</td>\n",
" <td>4</td>\n",
" <td>7</td>\n",
" </tr>\n",
" <tr>\n",
" <th>134421</th>\n",
" <td>2014-05-04 03:33:10</td>\n",
" <td>2019-07-13 12:50:01</td>\n",
" <td></td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>134421</td>\n",
" <td>[anal]</td>\n",
" <td>anal</td>\n",
" <td>640878</td>\n",
" <td>39</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>137921</th>\n",
" <td>2014-05-10 15:41:45</td>\n",
" <td>2019-07-20 02:23:13</td>\n",
" <td></td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>137921</td>\n",
" <td>[ass, pussy]</td>\n",
" <td>more new ass and pussy</td>\n",
" <td>658425</td>\n",
" <td>243</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>141921</th>\n",
" <td>2013-01-04 23:07:48</td>\n",
" <td>2019-07-17 15:31:28</td>\n",
" <td>hand job</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>141921</td>\n",
" <td>[hand, job]</td>\n",
" <td>hand job</td>\n",
" <td>682226</td>\n",
" <td>37</td>\n",
" <td>8</td>\n",
" <td>8</td>\n",
" </tr>\n",
" <tr>\n",
" <th>150421</th>\n",
" <td>2014-06-24 22:10:53</td>\n",
" <td>2019-07-23 03:46:02</td>\n",
" <td>Hot beautiful women smoking. Its very sexy.</td>\n",
" <td>6</td>\n",
" <td>0</td>\n",
" <td>150421</td>\n",
" <td>[teen, fucking, milf, mature, sex, women, smok...</td>\n",
" <td>smoking girl</td>\n",
" <td>715689</td>\n",
" <td>349</td>\n",
" <td>1</td>\n",
" <td>6</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" date_added date_updated \\\n",
"playlistid \n",
"100921 2014-01-06 04:38:19 2019-07-23 12:00:09 \n",
"134421 2014-05-04 03:33:10 2019-07-13 12:50:01 \n",
"137921 2014-05-10 15:41:45 2019-07-20 02:23:13 \n",
"141921 2013-01-04 23:07:48 2019-07-17 15:31:28 \n",
"150421 2014-06-24 22:10:53 2019-07-23 03:46:02 \n",
"\n",
" description favorite_count \\\n",
"playlistid \n",
"100921 3 \n",
"134421 0 \n",
"137921 1 \n",
"141921 hand job 0 \n",
"150421 Hot beautiful women smoking. Its very sexy. 6 \n",
"\n",
" ncomments playlistid \\\n",
"playlistid \n",
"100921 0 100921 \n",
"134421 0 134421 \n",
"137921 0 137921 \n",
"141921 0 141921 \n",
"150421 0 150421 \n",
"\n",
" tags \\\n",
"playlistid \n",
"100921 [cuckold, bbc] \n",
"134421 [anal] \n",
"137921 [ass, pussy] \n",
"141921 [hand, job] \n",
"150421 [teen, fucking, milf, mature, sex, women, smok... \n",
"\n",
" title user_id video_count votesdown votesup \n",
"playlistid \n",
"100921 Interracial 515031 473 4 7 \n",
"134421 anal 640878 39 0 0 \n",
"137921 more new ass and pussy 658425 243 0 0 \n",
"141921 hand job 682226 37 8 8 \n",
"150421 smoking girl 715689 349 1 6 "
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"print(\"Shape:\",dfp.shape)\n",
"dfp.head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Dataframe of _m_atches\n",
"* Pairs of (`playlistid`,`videoid`) to cross-reference `dfv` and `dfp`\n",
"* This is what we will use to build a matrix later"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"('Shape', (40175689, 2))\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>playlistid</th>\n",
" <th>videoid</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>8091</td>\n",
" <td>9728051</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>8091</td>\n",
" <td>9988031</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>8091</td>\n",
" <td>8211041</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>8091</td>\n",
" <td>2089773</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>8091</td>\n",
" <td>4223525</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" playlistid videoid\n",
"0 8091 9728051\n",
"1 8091 9988031\n",
"2 8091 8211041\n",
"3 8091 2089773\n",
"4 8091 4223525"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"print(\"Shape\",dfm.shape)\n",
"dfm.head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Filter and preprocess\n",
"* Filter the data to remove videos or playlists that could be noisy"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"dfm = dfm.sort_values([\"videoid\"])\n",
"# drop playlists that have less than 5 videos\n",
"df = dfm[dfm[\"playlistid\"].isin(dfp[dfp[\"video_count\"]>=5][\"playlistid\"])]\n",
"# and consider only videos that appear in more than 1 (remaining) playlist\n",
"counts = df.groupby(\"videoid\")[\"playlistid\"].count()\n",
"df = df[df[\"videoid\"].isin(counts[counts>1].index)]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"* Re-label the `videoid`s and `playlistid`s so that they start from 0 and can thus be used as array indices\n",
"* Make a sparse matrix with (unique) videos as rows and (unique) playlists as columns, and the cell value is binary \n",
"(0 if the video is not in the playlist, 1 if it is)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Shape of sparse matrix: (1829874, 323041)\n",
"Non-zero entries in matrix: 39398444 (0.00667%)\n",
"CPU times: user 13.9 s, sys: 1.64 s, total: 15.5 s\n",
"Wall time: 15.5 s\n"
]
}
],
"source": [
"%%time\n",
"\n",
"vle = preprocessing.LabelEncoder()\n",
"ple = preprocessing.LabelEncoder()\n",
"\n",
"vids = vle.fit_transform(df[\"videoid\"])\n",
"pids = ple.fit_transform(df[\"playlistid\"])\n",
"\n",
"num_videos = df[\"videoid\"].nunique()\n",
"num_playlists = df[\"playlistid\"].nunique()\n",
"\n",
"coo = coo_matrix((np.ones(len(vids)),(vids,pids)),shape=(num_videos,num_playlists))\n",
"\n",
"print(\"Shape of sparse matrix: {}\".format(coo.shape))\n",
"print(\"Non-zero entries in matrix: {} ({:.3g}%)\".format(coo.data.shape[0],100.0*coo.data.shape[0]/np.prod(coo.shape)))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Now we want to do an approximate singular value decomposition of the very sparse `nvideos` by `nplaylists` matrix\n",
"into one that is \n",
" * `nvideos` by 60 (`embed_vids`)\n",
" * `nplaylists` by 60 (`embed_pids`)\n",
" \n",
"This takes about 5 mins on a laptop."
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(1829874, 60)\n",
"(323041, 60)\n",
"CPU times: user 7min 52s, sys: 46.5 s, total: 8min 39s\n",
"Wall time: 6min 27s\n"
]
}
],
"source": [
"%%time\n",
"\n",
"svd = decomposition.TruncatedSVD(n_components=60, n_iter=6, random_state=42,)\n",
"# svd = decomposition.TruncatedSVD(n_components=3, n_iter=1, random_state=42)\n",
"embed_vids = svd.fit_transform(coo)\n",
"embed_pids = svd.fit_transform(coo.T)\n",
"\n",
"print(embed_vids.shape)\n",
"print(embed_pids.shape)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Make recommendations\n",
"Now that we have reduced the dimensionality, we can \n",
"* pick a video or playlist at random or specify it based on `viewkey` and `playlistid` (both seen in URLs)\n",
"* transform the zero-based label back (undoing a previous step)\n",
"* get the 60-dimension embedding vector and compute its distance to all other videos/playlists\n",
"* return the `num` closest ones"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"def get_rec_videos(embed_vids,viewkey=None,num=5):\n",
" if viewkey is None:\n",
" i0 = np.random.randint(0,len(embed_vids))\n",
" else:\n",
" i0 = vle.transform(dfv[dfv.viewkey==viewkey].videoid)[0]\n",
" v0 = embed_vids[i0] # embedding vector of random/specified video\n",
" dists = (((embed_vids-v0)**2.).sum(axis=-1)) # (squared) distances to all other videos\n",
" iclosest = dists.argsort()[:num+1] # closest indices, including the random/specified one\n",
" return dfv.loc[vle.classes_[iclosest]]\n",
"\n",
"def get_rec_playlists(embed_pids,playlistid=None,num=5):\n",
" if playlistid is None:\n",
" i0 = np.random.randint(0,len(embed_pids))\n",
" else:\n",
" i0 = ple.transform([playlistid])[0]\n",
" v0 = embed_pids[i0]\n",
" dists = (((embed_pids-v0)**2.).sum(axis=-1))**0.5\n",
" iclosest = dists.argsort()[:num+1]\n",
" return dfp.loc[ple.classes_[iclosest]]\n"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>added</th>\n",
" <th>duration</th>\n",
" <th>ishd</th>\n",
" <th>ispremium</th>\n",
" <th>isprivate</th>\n",
" <th>percentlikes</th>\n",
" <th>playlistid</th>\n",
" <th>price</th>\n",
" <th>title</th>\n",
" <th>videoid</th>\n",
" <th>viewkey</th>\n",
" <th>views</th>\n",
" <th>nplaylists</th>\n",
" </tr>\n",
" <tr>\n",
" <th>videoid</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>113366961</th>\n",
" <td>2017-07-22 19:18:57.720</td>\n",
" <td>216</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>79</td>\n",
" <td>37653051</td>\n",
" <td>0.0</td>\n",
" <td>PMV - Spell On You</td>\n",
" <td>113366961</td>\n",
" <td>ph58f2b01c55897</td>\n",
" <td>7200</td>\n",
" <td>6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>125024951</th>\n",
" <td>2017-07-23 08:18:37.817</td>\n",
" <td>168</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>88</td>\n",
" <td>35891051</td>\n",
" <td>0.0</td>\n",
" <td>Metal PMV feat. Madison, Christy, and Ashlynn ...</td>\n",
" <td>125024951</td>\n",
" <td>ph596eef0b48504</td>\n",
" <td>17300</td>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>99766152</th>\n",
" <td>2017-07-27 19:26:45.130</td>\n",
" <td>204</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>74</td>\n",
" <td>103937762</td>\n",
" <td>0.0</td>\n",
" <td>SAFE&amp;SOUND PMV</td>\n",
" <td>99766152</td>\n",
" <td>ph585acccf82eb1</td>\n",
" <td>25400</td>\n",
" <td>9</td>\n",
" </tr>\n",
" <tr>\n",
" <th>143091502</th>\n",
" <td>2018-07-28 00:39:18.822</td>\n",
" <td>236</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>87</td>\n",
" <td>48877371</td>\n",
" <td>0.0</td>\n",
" <td>music compilation</td>\n",
" <td>143091502</td>\n",
" <td>ph5a1d8cff93e5a</td>\n",
" <td>8300</td>\n",
" <td>6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>167857102</th>\n",
" <td>2018-10-27 02:14:52.365</td>\n",
" <td>183</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>83</td>\n",
" <td>98590762</td>\n",
" <td>0.0</td>\n",
" <td>Sexy KPOP PMV - Brave Girls High Heels</td>\n",
" <td>167857102</td>\n",
" <td>ph5b09fffc25503</td>\n",
" <td>36300</td>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>130522571</th>\n",
" <td>2018-07-27 19:26:45.101</td>\n",
" <td>218</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>90</td>\n",
" <td>49792951</td>\n",
" <td>0.0</td>\n",
" <td>TURN DOWN FOR WHAT PMV (CREATED WITH www.video...</td>\n",
" <td>130522571</td>\n",
" <td>ph59a5fceac60af</td>\n",
" <td>4200</td>\n",
" <td>8</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" added duration ishd ispremium isprivate \\\n",
"videoid \n",
"113366961 2017-07-22 19:18:57.720 216 True False False \n",
"125024951 2017-07-23 08:18:37.817 168 True False False \n",
"99766152 2017-07-27 19:26:45.130 204 True False False \n",
"143091502 2018-07-28 00:39:18.822 236 True False False \n",
"167857102 2018-10-27 02:14:52.365 183 True False False \n",
"130522571 2018-07-27 19:26:45.101 218 True False False \n",
"\n",
" percentlikes playlistid price \\\n",
"videoid \n",
"113366961 79 37653051 0.0 \n",
"125024951 88 35891051 0.0 \n",
"99766152 74 103937762 0.0 \n",
"143091502 87 48877371 0.0 \n",
"167857102 83 98590762 0.0 \n",
"130522571 90 49792951 0.0 \n",
"\n",
" title videoid \\\n",
"videoid \n",
"113366961 PMV - Spell On You 113366961 \n",
"125024951 Metal PMV feat. Madison, Christy, and Ashlynn ... 125024951 \n",
"99766152 SAFE&SOUND PMV 99766152 \n",
"143091502 music compilation 143091502 \n",
"167857102 Sexy KPOP PMV - Brave Girls High Heels 167857102 \n",
"130522571 TURN DOWN FOR WHAT PMV (CREATED WITH www.video... 130522571 \n",
"\n",
" viewkey views nplaylists \n",
"videoid \n",
"113366961 ph58f2b01c55897 7200 6 \n",
"125024951 ph596eef0b48504 17300 10 \n",
"99766152 ph585acccf82eb1 25400 9 \n",
"143091502 ph5a1d8cff93e5a 8300 6 \n",
"167857102 ph5b09fffc25503 36300 10 \n",
"130522571 ph59a5fceac60af 4200 8 "
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"get_rec_videos(embed_vids,viewkey=\"ph58f2b01c55897\")"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>date_added</th>\n",
" <th>date_updated</th>\n",
" <th>description</th>\n",
" <th>favorite_count</th>\n",
" <th>ncomments</th>\n",
" <th>playlistid</th>\n",
" <th>tags</th>\n",
" <th>title</th>\n",
" <th>user_id</th>\n",
" <th>video_count</th>\n",
" <th>votesdown</th>\n",
" <th>votesup</th>\n",
" </tr>\n",
" <tr>\n",
" <th>playlistid</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>108885191</th>\n",
" <td>2019-04-28 19:29:40</td>\n",
" <td>2019-07-23 22:21:01</td>\n",
" <td>pmvs</td>\n",
" <td>10</td>\n",
" <td>0</td>\n",
" <td>108885191</td>\n",
" <td>[pmv]</td>\n",
" <td>pmv</td>\n",
" <td>387194942</td>\n",
" <td>288</td>\n",
" <td>1</td>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>106541021</th>\n",
" <td>2019-04-04 14:05:10</td>\n",
" <td>2019-07-18 15:58:49</td>\n",
" <td>Porn Music Videos to watch</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>106541021</td>\n",
" <td>[music videos]</td>\n",
" <td>PMV</td>\n",
" <td>614447851</td>\n",
" <td>158</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>91912761</th>\n",
" <td>2018-11-04 00:16:33</td>\n",
" <td>2019-07-22 02:19:31</td>\n",
" <td>pmv</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>91912761</td>\n",
" <td>[pmv]</td>\n",
" <td>pmv</td>\n",
" <td>548843641</td>\n",
" <td>76</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>90703191</th>\n",
" <td>2018-10-20 23:43:22</td>\n",
" <td>2019-07-20 17:02:49</td>\n",
" <td></td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>90703191</td>\n",
" <td>[pmv]</td>\n",
" <td>pmv</td>\n",
" <td>638369811</td>\n",
" <td>74</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>31425051</th>\n",
" <td>2016-07-16 02:36:42</td>\n",
" <td>2019-07-24 17:55:37</td>\n",
" <td></td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>31425051</td>\n",
" <td>[music videos]</td>\n",
" <td>Sexy Music Videos</td>\n",
" <td>130533051</td>\n",
" <td>63</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>105874052</th>\n",
" <td>2019-03-28 10:34:47</td>\n",
" <td>2019-07-27 12:23:10</td>\n",
" <td></td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>105874052</td>\n",
" <td>[pmv]</td>\n",
" <td>pmv</td>\n",
" <td>292703972</td>\n",
" <td>55</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" date_added date_updated \\\n",
"playlistid \n",
"108885191 2019-04-28 19:29:40 2019-07-23 22:21:01 \n",
"106541021 2019-04-04 14:05:10 2019-07-18 15:58:49 \n",
"91912761 2018-11-04 00:16:33 2019-07-22 02:19:31 \n",
"90703191 2018-10-20 23:43:22 2019-07-20 17:02:49 \n",
"31425051 2016-07-16 02:36:42 2019-07-24 17:55:37 \n",
"105874052 2019-03-28 10:34:47 2019-07-27 12:23:10 \n",
"\n",
" description favorite_count ncomments playlistid \\\n",
"playlistid \n",
"108885191 pmvs 10 0 108885191 \n",
"106541021 Porn Music Videos to watch 0 0 106541021 \n",
"91912761 pmv 0 0 91912761 \n",
"90703191 0 0 90703191 \n",
"31425051 0 0 31425051 \n",
"105874052 0 0 105874052 \n",
"\n",
" tags title user_id video_count \\\n",
"playlistid \n",
"108885191 [pmv] pmv 387194942 288 \n",
"106541021 [music videos] PMV 614447851 158 \n",
"91912761 [pmv] pmv 548843641 76 \n",
"90703191 [pmv] pmv 638369811 74 \n",
"31425051 [music videos] Sexy Music Videos 130533051 63 \n",
"105874052 [pmv] pmv 292703972 55 \n",
"\n",
" votesdown votesup \n",
"playlistid \n",
"108885191 1 5 \n",
"106541021 0 0 \n",
"91912761 0 0 \n",
"90703191 0 0 \n",
"31425051 0 0 \n",
"105874052 0 0 "
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"get_rec_playlists(embed_pids,playlistid=108885191,num=5)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 2",
"language": "python",
"name": "python2"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.10"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment