Last active
April 15, 2022 19:09
-
-
Save Sparrow0hawk/bf3bf62abc4a623cf1c5f5cb20d806a6 to your computer and use it in GitHub Desktop.
Extracting topic scores for documents using LDA Gensim (SOO answer)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| { | |
| "nbformat": 4, | |
| "nbformat_minor": 0, | |
| "metadata": { | |
| "colab": { | |
| "name": "Untitled28.ipynb", | |
| "provenance": [], | |
| "authorship_tag": "ABX9TyMbQk1gQNIBS7J1ybWMNhHa", | |
| "include_colab_link": true | |
| }, | |
| "kernelspec": { | |
| "name": "python3", | |
| "display_name": "Python 3" | |
| }, | |
| "language_info": { | |
| "name": "python" | |
| } | |
| }, | |
| "cells": [ | |
| { | |
| "cell_type": "markdown", | |
| "metadata": { | |
| "id": "view-in-github", | |
| "colab_type": "text" | |
| }, | |
| "source": [ | |
| "<a href=\"https://colab.research.google.com/gist/Sparrow0hawk/bf3bf62abc4a623cf1c5f5cb20d806a6/untitled28.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 1, | |
| "metadata": { | |
| "id": "rD5gmpMum-Fe" | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "from gensim.test.utils import common_texts, common_corpus, common_dictionary\n", | |
| "from gensim.models import LdaModel\n", | |
| "\n", | |
| "# train a quick lda model using the common _corpus, _dictionary and _texts from gensim\n", | |
| "optimal_model = LdaModel(common_corpus, id2word=common_dictionary, num_topics=10)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "source": [ | |
| "import pandas as pd\n", | |
| "\n", | |
| "##dominant topic for each document\n", | |
| "def format_topics_sentences(ldamodel=optimal_model, \n", | |
| " corpus=common_corpus, \n", | |
| " texts=common_texts, \n", | |
| " n=1):\n", | |
| " \"\"\"\n", | |
| " A function for extracting a number of dominant topics for a given document\n", | |
| " using an existing LDA model\n", | |
| " \"\"\"\n", | |
| " # Init output\n", | |
| " sent_topics_df = pd.DataFrame()\n", | |
| "\n", | |
| "\n", | |
| " # Get main topic in each document\n", | |
| " for i, row in enumerate(ldamodel[corpus]):\n", | |
| " row = sorted(row, key=lambda x: (x[1]), reverse=True)\n", | |
| " # Get the Dominant topic, Perc Contribution and Keywords for each document\n", | |
| " for j, (topic_num, prop_topic) in enumerate(row):\n", | |
| " # we use range here to iterate over the n parameter\n", | |
| " if j in range(n): # => dominant topic\n", | |
| " wp = ldamodel.show_topic(topic_num)\n", | |
| " topic_keywords = \", \".join([word for word, prop in wp])\n", | |
| " sent_topics_df = sent_topics_df.append(\n", | |
| " # and also use the i value here to get the document label\n", | |
| " pd.Series([int(i), int(topic_num), round(prop_topic, 4), topic_keywords]),\n", | |
| " ignore_index=True,\n", | |
| " )\n", | |
| " else:\n", | |
| " break\n", | |
| " sent_topics_df.columns = [\"Document\", \"Dominant_Topic\", \"Perc_Contribution\", \"Topic_Keywords\"]\n", | |
| "\n", | |
| " # Add original text to the end of the output\n", | |
| " text_col = [texts[int(i)] for i in sent_topics_df.Document.tolist()]\n", | |
| " contents = pd.Series(text_col, name='original_texts')\n", | |
| " sent_topics_df = pd.concat([sent_topics_df, contents], axis=1)\n", | |
| " return sent_topics_df" | |
| ], | |
| "metadata": { | |
| "id": "ylYlNPQLm_Fb" | |
| }, | |
| "execution_count": 2, | |
| "outputs": [] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "source": [ | |
| "format_topics_sentences(ldamodel=optimal_model, corpus=common_corpus, texts=common_texts, n=2)" | |
| ], | |
| "metadata": { | |
| "colab": { | |
| "base_uri": "https://localhost:8080/", | |
| "height": 614 | |
| }, | |
| "id": "OgK69on8nDsT", | |
| "outputId": "40b988aa-f19b-41cc-923a-96fb034be298" | |
| }, | |
| "execution_count": 3, | |
| "outputs": [ | |
| { | |
| "output_type": "execute_result", | |
| "data": { | |
| "text/plain": [ | |
| " Document Dominant_Topic Perc_Contribution \\\n", | |
| "0 0.0 7.0 0.7750 \n", | |
| "1 0.0 5.0 0.0250 \n", | |
| "2 1.0 4.0 0.8714 \n", | |
| "3 1.0 3.0 0.0143 \n", | |
| "4 2.0 5.0 0.8200 \n", | |
| "5 2.0 0.0 0.0200 \n", | |
| "6 3.0 0.0 0.8200 \n", | |
| "7 3.0 5.0 0.0200 \n", | |
| "8 4.0 3.0 0.7750 \n", | |
| "9 4.0 4.0 0.0250 \n", | |
| "10 5.0 8.0 0.5500 \n", | |
| "11 5.0 4.0 0.0500 \n", | |
| "12 6.0 8.0 0.7000 \n", | |
| "13 6.0 2.0 0.0333 \n", | |
| "14 7.0 8.0 0.7750 \n", | |
| "15 7.0 2.0 0.0250 \n", | |
| "16 8.0 2.0 0.7750 \n", | |
| "17 8.0 8.0 0.0250 \n", | |
| "\n", | |
| " Topic_Keywords \\\n", | |
| "0 human, interface, computer, trees, graph, syst... \n", | |
| "1 system, user, interface, eps, trees, graph, co... \n", | |
| "2 user, survey, time, computer, system, response... \n", | |
| "3 user, response, time, trees, graph, system, in... \n", | |
| "4 system, user, interface, eps, trees, graph, co... \n", | |
| "5 system, eps, human, trees, graph, user, comput... \n", | |
| "6 system, eps, human, trees, graph, user, comput... \n", | |
| "7 system, user, interface, eps, trees, graph, co... \n", | |
| "8 user, response, time, trees, graph, system, in... \n", | |
| "9 user, survey, time, computer, system, response... \n", | |
| "10 graph, trees, minors, system, interface, time,... \n", | |
| "11 user, survey, time, computer, system, response... \n", | |
| "12 graph, trees, minors, system, interface, time,... \n", | |
| "13 minors, survey, graph, trees, system, user, hu... \n", | |
| "14 graph, trees, minors, system, interface, time,... \n", | |
| "15 minors, survey, graph, trees, system, user, hu... \n", | |
| "16 minors, survey, graph, trees, system, user, hu... \n", | |
| "17 graph, trees, minors, system, interface, time,... \n", | |
| "\n", | |
| " original_texts \n", | |
| "0 [human, interface, computer] \n", | |
| "1 [human, interface, computer] \n", | |
| "2 [survey, user, computer, system, response, time] \n", | |
| "3 [survey, user, computer, system, response, time] \n", | |
| "4 [eps, user, interface, system] \n", | |
| "5 [eps, user, interface, system] \n", | |
| "6 [system, human, system, eps] \n", | |
| "7 [system, human, system, eps] \n", | |
| "8 [user, response, time] \n", | |
| "9 [user, response, time] \n", | |
| "10 [trees] \n", | |
| "11 [trees] \n", | |
| "12 [graph, trees] \n", | |
| "13 [graph, trees] \n", | |
| "14 [graph, minors, trees] \n", | |
| "15 [graph, minors, trees] \n", | |
| "16 [graph, minors, survey] \n", | |
| "17 [graph, minors, survey] " | |
| ], | |
| "text/html": [ | |
| "\n", | |
| " <div id=\"df-269dacc6-7800-4c13-a48d-88ad0314e5c2\">\n", | |
| " <div class=\"colab-df-container\">\n", | |
| " <div>\n", | |
| "<style scoped>\n", | |
| " .dataframe tbody tr th:only-of-type {\n", | |
| " vertical-align: middle;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>Document</th>\n", | |
| " <th>Dominant_Topic</th>\n", | |
| " <th>Perc_Contribution</th>\n", | |
| " <th>Topic_Keywords</th>\n", | |
| " <th>original_texts</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td>0.0</td>\n", | |
| " <td>7.0</td>\n", | |
| " <td>0.7750</td>\n", | |
| " <td>human, interface, computer, trees, graph, syst...</td>\n", | |
| " <td>[human, interface, computer]</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>1</th>\n", | |
| " <td>0.0</td>\n", | |
| " <td>5.0</td>\n", | |
| " <td>0.0250</td>\n", | |
| " <td>system, user, interface, eps, trees, graph, co...</td>\n", | |
| " <td>[human, interface, computer]</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td>1.0</td>\n", | |
| " <td>4.0</td>\n", | |
| " <td>0.8714</td>\n", | |
| " <td>user, survey, time, computer, system, response...</td>\n", | |
| " <td>[survey, user, computer, system, response, time]</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>3</th>\n", | |
| " <td>1.0</td>\n", | |
| " <td>3.0</td>\n", | |
| " <td>0.0143</td>\n", | |
| " <td>user, response, time, trees, graph, system, in...</td>\n", | |
| " <td>[survey, user, computer, system, response, time]</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>4</th>\n", | |
| " <td>2.0</td>\n", | |
| " <td>5.0</td>\n", | |
| " <td>0.8200</td>\n", | |
| " <td>system, user, interface, eps, trees, graph, co...</td>\n", | |
| " <td>[eps, user, interface, system]</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>5</th>\n", | |
| " <td>2.0</td>\n", | |
| " <td>0.0</td>\n", | |
| " <td>0.0200</td>\n", | |
| " <td>system, eps, human, trees, graph, user, comput...</td>\n", | |
| " <td>[eps, user, interface, system]</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>6</th>\n", | |
| " <td>3.0</td>\n", | |
| " <td>0.0</td>\n", | |
| " <td>0.8200</td>\n", | |
| " <td>system, eps, human, trees, graph, user, comput...</td>\n", | |
| " <td>[system, human, system, eps]</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>7</th>\n", | |
| " <td>3.0</td>\n", | |
| " <td>5.0</td>\n", | |
| " <td>0.0200</td>\n", | |
| " <td>system, user, interface, eps, trees, graph, co...</td>\n", | |
| " <td>[system, human, system, eps]</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>8</th>\n", | |
| " <td>4.0</td>\n", | |
| " <td>3.0</td>\n", | |
| " <td>0.7750</td>\n", | |
| " <td>user, response, time, trees, graph, system, in...</td>\n", | |
| " <td>[user, response, time]</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>9</th>\n", | |
| " <td>4.0</td>\n", | |
| " <td>4.0</td>\n", | |
| " <td>0.0250</td>\n", | |
| " <td>user, survey, time, computer, system, response...</td>\n", | |
| " <td>[user, response, time]</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>10</th>\n", | |
| " <td>5.0</td>\n", | |
| " <td>8.0</td>\n", | |
| " <td>0.5500</td>\n", | |
| " <td>graph, trees, minors, system, interface, time,...</td>\n", | |
| " <td>[trees]</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>11</th>\n", | |
| " <td>5.0</td>\n", | |
| " <td>4.0</td>\n", | |
| " <td>0.0500</td>\n", | |
| " <td>user, survey, time, computer, system, response...</td>\n", | |
| " <td>[trees]</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>12</th>\n", | |
| " <td>6.0</td>\n", | |
| " <td>8.0</td>\n", | |
| " <td>0.7000</td>\n", | |
| " <td>graph, trees, minors, system, interface, time,...</td>\n", | |
| " <td>[graph, trees]</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>13</th>\n", | |
| " <td>6.0</td>\n", | |
| " <td>2.0</td>\n", | |
| " <td>0.0333</td>\n", | |
| " <td>minors, survey, graph, trees, system, user, hu...</td>\n", | |
| " <td>[graph, trees]</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>14</th>\n", | |
| " <td>7.0</td>\n", | |
| " <td>8.0</td>\n", | |
| " <td>0.7750</td>\n", | |
| " <td>graph, trees, minors, system, interface, time,...</td>\n", | |
| " <td>[graph, minors, trees]</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>15</th>\n", | |
| " <td>7.0</td>\n", | |
| " <td>2.0</td>\n", | |
| " <td>0.0250</td>\n", | |
| " <td>minors, survey, graph, trees, system, user, hu...</td>\n", | |
| " <td>[graph, minors, trees]</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>16</th>\n", | |
| " <td>8.0</td>\n", | |
| " <td>2.0</td>\n", | |
| " <td>0.7750</td>\n", | |
| " <td>minors, survey, graph, trees, system, user, hu...</td>\n", | |
| " <td>[graph, minors, survey]</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>17</th>\n", | |
| " <td>8.0</td>\n", | |
| " <td>8.0</td>\n", | |
| " <td>0.0250</td>\n", | |
| " <td>graph, trees, minors, system, interface, time,...</td>\n", | |
| " <td>[graph, minors, survey]</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>\n", | |
| " <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-269dacc6-7800-4c13-a48d-88ad0314e5c2')\"\n", | |
| " title=\"Convert this dataframe to an interactive table.\"\n", | |
| " style=\"display:none;\">\n", | |
| " \n", | |
| " <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n", | |
| " width=\"24px\">\n", | |
| " <path d=\"M0 0h24v24H0V0z\" fill=\"none\"/>\n", | |
| " <path d=\"M18.56 5.44l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94zm-11 1L8.5 8.5l.94-2.06 2.06-.94-2.06-.94L8.5 2.5l-.94 2.06-2.06.94zm10 10l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94z\"/><path d=\"M17.41 7.96l-1.37-1.37c-.4-.4-.92-.59-1.43-.59-.52 0-1.04.2-1.43.59L10.3 9.45l-7.72 7.72c-.78.78-.78 2.05 0 2.83L4 21.41c.39.39.9.59 1.41.59.51 0 1.02-.2 1.41-.59l7.78-7.78 2.81-2.81c.8-.78.8-2.07 0-2.86zM5.41 20L4 18.59l7.72-7.72 1.47 1.35L5.41 20z\"/>\n", | |
| " </svg>\n", | |
| " </button>\n", | |
| " \n", | |
| " <style>\n", | |
| " .colab-df-container {\n", | |
| " display:flex;\n", | |
| " flex-wrap:wrap;\n", | |
| " gap: 12px;\n", | |
| " }\n", | |
| "\n", | |
| " .colab-df-convert {\n", | |
| " background-color: #E8F0FE;\n", | |
| " border: none;\n", | |
| " border-radius: 50%;\n", | |
| " cursor: pointer;\n", | |
| " display: none;\n", | |
| " fill: #1967D2;\n", | |
| " height: 32px;\n", | |
| " padding: 0 0 0 0;\n", | |
| " width: 32px;\n", | |
| " }\n", | |
| "\n", | |
| " .colab-df-convert:hover {\n", | |
| " background-color: #E2EBFA;\n", | |
| " box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n", | |
| " fill: #174EA6;\n", | |
| " }\n", | |
| "\n", | |
| " [theme=dark] .colab-df-convert {\n", | |
| " background-color: #3B4455;\n", | |
| " fill: #D2E3FC;\n", | |
| " }\n", | |
| "\n", | |
| " [theme=dark] .colab-df-convert:hover {\n", | |
| " background-color: #434B5C;\n", | |
| " box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n", | |
| " filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n", | |
| " fill: #FFFFFF;\n", | |
| " }\n", | |
| " </style>\n", | |
| "\n", | |
| " <script>\n", | |
| " const buttonEl =\n", | |
| " document.querySelector('#df-269dacc6-7800-4c13-a48d-88ad0314e5c2 button.colab-df-convert');\n", | |
| " buttonEl.style.display =\n", | |
| " google.colab.kernel.accessAllowed ? 'block' : 'none';\n", | |
| "\n", | |
| " async function convertToInteractive(key) {\n", | |
| " const element = document.querySelector('#df-269dacc6-7800-4c13-a48d-88ad0314e5c2');\n", | |
| " const dataTable =\n", | |
| " await google.colab.kernel.invokeFunction('convertToInteractive',\n", | |
| " [key], {});\n", | |
| " if (!dataTable) return;\n", | |
| "\n", | |
| " const docLinkHtml = 'Like what you see? Visit the ' +\n", | |
| " '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n", | |
| " + ' to learn more about interactive tables.';\n", | |
| " element.innerHTML = '';\n", | |
| " dataTable['output_type'] = 'display_data';\n", | |
| " await google.colab.output.renderOutput(dataTable, element);\n", | |
| " const docLink = document.createElement('div');\n", | |
| " docLink.innerHTML = docLinkHtml;\n", | |
| " element.appendChild(docLink);\n", | |
| " }\n", | |
| " </script>\n", | |
| " </div>\n", | |
| " </div>\n", | |
| " " | |
| ] | |
| }, | |
| "metadata": {}, | |
| "execution_count": 3 | |
| } | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "source": [ | |
| "for i in optimal_model[common_corpus]:\n", | |
| " print(i)" | |
| ], | |
| "metadata": { | |
| "colab": { | |
| "base_uri": "https://localhost:8080/" | |
| }, | |
| "id": "b_-wOVlInHLB", | |
| "outputId": "4b03ce2d-ec95-4a79-b016-5bf212e3f2b0" | |
| }, | |
| "execution_count": 5, | |
| "outputs": [ | |
| { | |
| "output_type": "stream", | |
| "name": "stdout", | |
| "text": [ | |
| "[(0, 0.025002241), (1, 0.025000002), (2, 0.025), (3, 0.025), (4, 0.02500137), (5, 0.025002241), (6, 0.025000002), (7, 0.77499413), (8, 0.025), (9, 0.025000002)]\n", | |
| "[(0, 0.014288797), (1, 0.014285716), (2, 0.014287288), (3, 0.014290491), (4, 0.8714151), (5, 0.014288196), (6, 0.014285716), (7, 0.014287288), (8, 0.014285714), (9, 0.014285716)]\n", | |
| "[(0, 0.020005856), (1, 0.020000003), (2, 0.020000001), (3, 0.020002093), (4, 0.020002015), (5, 0.81998795), (6, 0.020000003), (7, 0.020002093), (8, 0.020000001), (9, 0.020000003)]\n", | |
| "[(0, 0.8199941), (1, 0.020000001), (2, 0.02), (3, 0.02), (4, 0.020000812), (5, 0.02000298), (6, 0.020000001), (7, 0.020002091), (8, 0.02), (9, 0.020000001)]\n", | |
| "[(0, 0.025), (1, 0.025000002), (2, 0.025), (3, 0.77499324), (4, 0.025004486), (5, 0.025002241), (6, 0.025000002), (7, 0.025), (8, 0.025), (9, 0.025000002)]\n", | |
| "[(0, 0.05), (1, 0.050000004), (2, 0.05), (3, 0.05), (4, 0.050006837), (5, 0.05), (6, 0.050000004), (7, 0.05), (8, 0.54999316), (9, 0.050000004)]\n", | |
| "[(0, 0.033333335), (1, 0.033333335), (2, 0.033338174), (3, 0.033333335), (4, 0.03333516), (5, 0.033333335), (6, 0.033333335), (7, 0.033333335), (8, 0.6999933), (9, 0.033333335)]\n", | |
| "[(0, 0.025), (1, 0.025000002), (2, 0.025009582), (3, 0.025), (4, 0.02500085), (5, 0.025), (6, 0.025000002), (7, 0.025), (8, 0.77498955), (9, 0.025000002)]\n", | |
| "[(0, 0.025), (1, 0.025000002), (2, 0.7749882), (3, 0.025), (4, 0.025001371), (5, 0.025), (6, 0.025000002), (7, 0.025), (8, 0.025010476), (9, 0.025000002)]\n" | |
| ] | |
| } | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "source": [ | |
| "list(optimal_model[common_corpus])" | |
| ], | |
| "metadata": { | |
| "colab": { | |
| "base_uri": "https://localhost:8080/" | |
| }, | |
| "id": "Pcwk9DDGnLSW", | |
| "outputId": "7ffe6328-98e2-468a-bccc-39e55350f780" | |
| }, | |
| "execution_count": 7, | |
| "outputs": [ | |
| { | |
| "output_type": "execute_result", | |
| "data": { | |
| "text/plain": [ | |
| "[[(0, 0.025002243),\n", | |
| " (1, 0.025000002),\n", | |
| " (2, 0.025),\n", | |
| " (3, 0.025),\n", | |
| " (4, 0.02500137),\n", | |
| " (5, 0.025002243),\n", | |
| " (6, 0.025000002),\n", | |
| " (7, 0.7749942),\n", | |
| " (8, 0.025),\n", | |
| " (9, 0.025000002)],\n", | |
| " [(0, 0.014288797),\n", | |
| " (1, 0.014285716),\n", | |
| " (2, 0.014287288),\n", | |
| " (3, 0.01429052),\n", | |
| " (4, 0.8714151),\n", | |
| " (5, 0.014288196),\n", | |
| " (6, 0.014285716),\n", | |
| " (7, 0.014287288),\n", | |
| " (8, 0.014285714),\n", | |
| " (9, 0.014285716)],\n", | |
| " [(0, 0.02000577),\n", | |
| " (1, 0.020000001),\n", | |
| " (2, 0.02),\n", | |
| " (3, 0.020002091),\n", | |
| " (4, 0.020002013),\n", | |
| " (5, 0.8199881),\n", | |
| " (6, 0.020000001),\n", | |
| " (7, 0.020002091),\n", | |
| " (8, 0.02),\n", | |
| " (9, 0.020000001)],\n", | |
| " [(0, 0.8199941),\n", | |
| " (1, 0.02),\n", | |
| " (2, 0.019999998),\n", | |
| " (3, 0.019999998),\n", | |
| " (4, 0.02000081),\n", | |
| " (5, 0.020002991),\n", | |
| " (6, 0.02),\n", | |
| " (7, 0.02000209),\n", | |
| " (8, 0.019999998),\n", | |
| " (9, 0.02)],\n", | |
| " [(0, 0.025000002),\n", | |
| " (1, 0.025000004),\n", | |
| " (2, 0.025000002),\n", | |
| " (3, 0.77499366),\n", | |
| " (4, 0.02500412),\n", | |
| " (5, 0.025002243),\n", | |
| " (6, 0.025000004),\n", | |
| " (7, 0.025000002),\n", | |
| " (8, 0.025000002),\n", | |
| " (9, 0.025000004)],\n", | |
| " [(0, 0.049999993),\n", | |
| " (1, 0.049999997),\n", | |
| " (2, 0.049999993),\n", | |
| " (3, 0.049999993),\n", | |
| " (4, 0.05000683),\n", | |
| " (5, 0.049999993),\n", | |
| " (6, 0.049999997),\n", | |
| " (7, 0.049999993),\n", | |
| " (8, 0.54999316),\n", | |
| " (9, 0.049999997)],\n", | |
| " [(0, 0.033333335),\n", | |
| " (1, 0.033333335),\n", | |
| " (2, 0.033337615),\n", | |
| " (3, 0.033333335),\n", | |
| " (4, 0.033335157),\n", | |
| " (5, 0.033333335),\n", | |
| " (6, 0.033333335),\n", | |
| " (7, 0.033333335),\n", | |
| " (8, 0.69999385),\n", | |
| " (9, 0.033333335)],\n", | |
| " [(0, 0.025),\n", | |
| " (1, 0.025000002),\n", | |
| " (2, 0.02500647),\n", | |
| " (3, 0.025),\n", | |
| " (4, 0.025000848),\n", | |
| " (5, 0.025),\n", | |
| " (6, 0.025000002),\n", | |
| " (7, 0.025),\n", | |
| " (8, 0.77499264),\n", | |
| " (9, 0.025000002)],\n", | |
| " [(0, 0.025000002),\n", | |
| " (1, 0.025000004),\n", | |
| " (2, 0.77499217),\n", | |
| " (3, 0.025000002),\n", | |
| " (4, 0.025001371),\n", | |
| " (5, 0.025000002),\n", | |
| " (6, 0.025000004),\n", | |
| " (7, 0.025000002),\n", | |
| " (8, 0.025006471),\n", | |
| " (9, 0.025000004)]]" | |
| ] | |
| }, | |
| "metadata": {}, | |
| "execution_count": 7 | |
| } | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "source": [ | |
| "" | |
| ], | |
| "metadata": { | |
| "id": "jxG1iDelndlS" | |
| }, | |
| "execution_count": null, | |
| "outputs": [] | |
| } | |
| ] | |
| } |
Author
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
https://stackoverflow.com/questions/70295773/extract-topic-scores-for-documents-lda-gensim-python/70303115?noredirect=1#comment127022020_70303115