ndaifallah · May 11, 2018 11:47
diff --git a/tag_encoder.py b/tag_encoder.py
 # I needed this in some unprepared datasets
 # Author Nasreddine DAIFALLAH
 import pandas as pn


 class TagLabelEncoder:
    def __init__(self, strip=',', tolower=True):
        self.strip = strip
        self.tolower = tolower
        pass
    def fit(self, arr=[]):
        p = list(map(lambda x: x.split(self.strip), arr))
        self.vocabulary = {}
        for idx in p:
            for jdx in idx:
                self.vocabulary[jdx.strip()] = True
        self.vocabulary = self.vocabulary.keys()
        if self.tolower:
            self.vocabulary = list(map(lambda x: x.lower(), self.vocabulary))
        pass
    def transform(self, arr=[], to_dataframe=False):
        if to_dataframe:
            return self.to_dataframe(arr)
        voc = {}
        for i, idx in enumerate(self.vocabulary):
            voc[idx] = i
        result = [[0 for idx in self.vocabulary] for jdx in range(len(arr))]
        r = list(map(lambda x: x.split(self.strip), arr))
        for i, idx in enumerate(r):
            for jdx in idx:
                if self.tolower:
                    result[i][voc[jdx.strip().lower()]] = 1
                else:
                    result[i][voc[jdx.strip()]] = 1
        return result
    def to_dataframe(self, matrix=[]):
        arr = self.transform(matrix)
        horizontal_dict = {}
        for i, idx in enumerate(self.vocabulary):
            horizontal_dict[idx] = []
            for jdx in arr:
                horizontal_dict[idx].append(jdx[i])
        return pn.DataFrame.from_dict(horizontal_dict)
	# I needed this in some unprepared datasets
	# Author Nasreddine DAIFALLAH
	import pandas as pn


	class TagLabelEncoder:
	def __init__(self, strip=',', tolower=True):
	self.strip = strip
	self.tolower = tolower
	pass
	def fit(self, arr=[]):
	p = list(map(lambda x: x.split(self.strip), arr))
	self.vocabulary = {}
	for idx in p:
	for jdx in idx:
	self.vocabulary[jdx.strip()] = True
	self.vocabulary = self.vocabulary.keys()
	if self.tolower:
	self.vocabulary = list(map(lambda x: x.lower(), self.vocabulary))
	pass
	def transform(self, arr=[], to_dataframe=False):
	if to_dataframe:
	return self.to_dataframe(arr)
	voc = {}
	for i, idx in enumerate(self.vocabulary):
	voc[idx] = i
	result = [[0 for idx in self.vocabulary] for jdx in range(len(arr))]
	r = list(map(lambda x: x.split(self.strip), arr))
	for i, idx in enumerate(r):
	for jdx in idx:
	if self.tolower:
	result[i][voc[jdx.strip().lower()]] = 1
	else:
	result[i][voc[jdx.strip()]] = 1
	return result
	def to_dataframe(self, matrix=[]):
	arr = self.transform(matrix)
	horizontal_dict = {}
	for i, idx in enumerate(self.vocabulary):
	horizontal_dict[idx] = []
	for jdx in arr:
	horizontal_dict[idx].append(jdx[i])
	return pn.DataFrame.from_dict(horizontal_dict)
No results found