cstrelioff · August 13, 2018 17:09
diff --git a/lda_textmine_ex.py b/lda_textmine_ex.py
 #! /usr/bin/env python
 # -*- coding: utf-8 -*-
 # vim:fenc=utf-8
 #
 # Copyright © 2015 Christopher C. Strelioff <chris.strelioff@gmail.com>
 #
 # Distributed under terms of the MIT license.

 """
 An example of getting titles and vocab for lda using textmine package.

 -- adapted from: http://www.christianpeccei.com/textmining/

 """
 from __future__ import print_function

 import numpy as np
 import textmining

 # Create some very short sample documents
 doc1 = 'John and Bob are brothers.'
 doc2 = 'John went to the store. The store was closed.'
 doc3 = 'Bob went to the store too.'

 # make a titles tuple 
 # -- these should be the "titles" for the "documents" above
 titles = ("sentence 1 -- brothers",
          "sentence 2 -- john to store",
          "sentence 3 -- bob to store")

 # Initialize class to create term-document matrix
 tdm = textmining.TermDocumentMatrix()

 # Add the documents
 tdm.add_doc(doc1)
 tdm.add_doc(doc2)
 tdm.add_doc(doc3)

 # create a temp variable with doc-term info
 temp = list(tdm.rows(cutoff=1))

 # get the vocab from first row
 vocab = tuple(temp[0])

 # get document-term matrix from remaining rows
 X = np.array(temp[1:])

 ##
 ## print out info, as in blog post with a little extra info
 ##
 ## post: http://bit.ly/1bxob2E
 ##

 # document-term matrix
 print("type(X): {}".format(type(X)))
 print("shape: {}".format(X.shape))
 print("X:\n\n", X , "\n")

 # the vocab
 print("type(vocab): {}".format(type(vocab)))
 print("len(vocab): {}".format(len(vocab)))
 print("vocab:\n\n", vocab, "\n")

 # titles for each story
 print("type(titles): {}".format(type(titles)))
 print("len(titles): {}".format(len(titles)))
 print("titles:\n\n", titles , "\n")
	#! /usr/bin/env python
	# -- coding: utf-8 --
	# vim:fenc=utf-8
	#
	# Copyright © 2015 Christopher C. Strelioff <chris.strelioff@gmail.com>
	#
	# Distributed under terms of the MIT license.

	"""
	An example of getting titles and vocab for lda using textmine package.

	-- adapted from: http://www.christianpeccei.com/textmining/

	"""
	from __future__ import print_function

	import numpy as np
	import textmining

	# Create some very short sample documents
	doc1 = 'John and Bob are brothers.'
	doc2 = 'John went to the store. The store was closed.'
	doc3 = 'Bob went to the store too.'

	# make a titles tuple
	# -- these should be the "titles" for the "documents" above
	titles = ("sentence 1 -- brothers",
	"sentence 2 -- john to store",
	"sentence 3 -- bob to store")

	# Initialize class to create term-document matrix
	tdm = textmining.TermDocumentMatrix()

	# Add the documents
	tdm.add_doc(doc1)
	tdm.add_doc(doc2)
	tdm.add_doc(doc3)

	# create a temp variable with doc-term info
	temp = list(tdm.rows(cutoff=1))

	# get the vocab from first row
	vocab = tuple(temp[0])

	# get document-term matrix from remaining rows
	X = np.array(temp[1:])

	##
	## print out info, as in blog post with a little extra info
	##
	## post: http://bit.ly/1bxob2E
	##

	# document-term matrix
	print("type(X): {}".format(type(X)))
	print("shape: {}".format(X.shape))
	print("X:\n\n", X , "\n")

	# the vocab
	print("type(vocab): {}".format(type(vocab)))
	print("len(vocab): {}".format(len(vocab)))
	print("vocab:\n\n", vocab, "\n")

	# titles for each story
	print("type(titles): {}".format(type(titles)))
	print("len(titles): {}".format(len(titles)))
	print("titles:\n\n", titles , "\n")
No results found