import sys import os import matplotlib.pyplot as plt import numpy as np from scipy import stats import scipy.special as sp import time from scipy.optimize import minimize from collections import Counter import data_generator as dg # Problem 1. # cross entropy loss def cross_entropy_softmax_loss(Wb, x, y, num_class, n, feat_dim): pass # Problem 2. # svm loss calculation def svm_loss(Wb, x, y, num_class, n, feat_dim): print(Wb) Wb = np.reshape(Wb,(-1,1)) b = Wb[-num_class:] W = np.reshape(Wb[range(num_class * feat_dim)], (num_class, feat_dim)) x = np.reshape(x.T, (-1, n)) s = W@x+b loss = 0.0 W = Wb reg = 1.0 dW = np.zeros(W.shape) # initialize the gradient as zero num_train = x.shape[0] scores = x.dot(W) yi_scores = scores[np.arange(scores.shape[0]),y] margins = np.maximum(0, scores - np.matrix(yi_scores).T + 1) margins[np.arange(num_train),y] = 0 loss = np.mean(np.sum(margins, axis=1)) loss += 0.5 * reg * np.sum(W * W) return loss # Problem 3. # kNN classification def knn_test(X_train, y_train, X_test, y_test, n_train_sample, n_test_sample, k): knn = MyKnnClassifier(n_neighbors=k) # k-NN 분류기 객체 생성 - 생성자 호출e knn.fit(X_train, y_train) y_pred = knn.predict(X_test) print("prediction accuracy: {:.2f}".format(np.mean(y_pred == y_test))) return np.mean(y_pred == y_test) import scipy.stats as ss def majority_vote2(votes): mode, count = ss.mstats.mode(votes) return mode class MyKnnClassifier: def __init__(self, n_neighbors=3): # 객체 생성 self.k = n_neighbors def fit(self, X_train, y_label): # 모델 훈련 self.points = X_train self.labels = y_label def predict(self, X_test): # 예측 predicts = [] # 예측값들을 저장할 리스트 for test_pt in X_test: # 테스트 세트에 있는 점들의 개수만큼 반복 distances = self.distance(self.points, test_pt) winner = self.majority_vote(distances) predicts.append(winner) return predicts def distance(self, X, y): return np.sqrt(np.sum((X - y) ** 2, axis=1)) def majority_vote(self, distances): indices_by_distance = np.argsort(distances) k_nearest_neighbor = [] for i in indices_by_distance[0:self.k]: k_nearest_neighbor.append(self.labels[i]) vote_counts = Counter(k_nearest_neighbor) winner, winner_count = vote_counts.most_common(1)[0] return winner # now lets test the model for linear models, that is, SVM and softmax def linear_classifier_test(Wb, x_te, y_te, num_class,n_test): Wb = np.reshape(Wb, (-1, 1)) dlen = len(x_te[0]) b = Wb[-num_class:] W = np.reshape(Wb[range(num_class * dlen)], (num_class, dlen)) accuracy = 0; for i in range(n_test): # find the linear scores s = W @ x_te[i].reshape((-1, 1)) + b # find the maximum score index res = np.argmax(s) accuracy = accuracy + (res == y_te[i]).astype('uint8') return accuracy / n_test # number of classes: this can be either 3 or 4 num_class = 4 # sigma controls the degree of data scattering. Larger sigma gives larger scatter # default is 1.0. Accuracy becomes lower with larger sigma sigma = 1.0 print('number of classes: ',num_class,' sigma for data scatter:',sigma) if num_class == 4: n_train = 400 n_test = 100 feat_dim = 2 else: # then 3 n_train = 300 n_test = 60 feat_dim = 2 # generate train dataset print('generating training data') x_train, y_train = dg.generate(number=n_train, seed=None, plot=True, num_class=num_class, sigma=sigma) # generate test dataset print('generating test data') x_test, y_test = dg.generate(number=n_test, seed=None, plot=False, num_class=num_class, sigma=sigma) # set classifiers to 'svm' to test SVM classifier # set classifiers to 'softmax' to test softmax classifier # set classifiers to 'knn' to test kNN classifier classifiers = 'svm' if classifiers == 'svm': print('training SVM classifier...') w0 = np.random.normal(0, 1, (2 * num_class + num_class)) result = minimize(svm_loss, w0, args=(x_train, y_train, num_class, n_train, feat_dim)) print('testing SVM classifier...') Wb = result.x print('accuracy of SVM loss: ', linear_classifier_test(Wb, x_test, y_test, num_class,n_test)*100,'%') elif classifiers == 'softmax': print('training softmax classifier...') w0 = np.random.normal(0, 1, (2 * num_class + num_class)) result = minimize(cross_entropy_softmax_loss, w0, args=(x_train, y_train, num_class, n_train, feat_dim)) print('testing softmax classifier...') Wb = result.x print('accuracy of softmax loss: ', linear_classifier_test(Wb, x_test, y_test, num_class,n_test)*100,'%') else: # knn # k value for kNN classifier. k can be either 1 or 3. k = 3 print('testing kNN classifier...') print('accuracy of kNN loss: ', knn_test(x_train, y_train, x_test, y_test, n_train, n_test, k)*100 , '% for k value of ', k)