Created
April 15, 2021 12:51
-
-
Save seongilp/6eb36329cfdfb8f15b1d872b83f84b4d to your computer and use it in GitHub Desktop.
hw2
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import sys | |
| import os | |
| import matplotlib.pyplot as plt | |
| import numpy as np | |
| from scipy import stats | |
| import scipy.special as sp | |
| import time | |
| from scipy.optimize import minimize | |
| from collections import Counter | |
| import data_generator as dg | |
| # Problem 1. | |
| # cross entropy loss | |
| def cross_entropy_softmax_loss(Wb, x, y, num_class, n, feat_dim): | |
| pass | |
| # Problem 2. | |
| # svm loss calculation | |
| def svm_loss(Wb, x, y, num_class, n, feat_dim): | |
| print(Wb) | |
| Wb = np.reshape(Wb,(-1,1)) | |
| b = Wb[-num_class:] | |
| W = np.reshape(Wb[range(num_class * feat_dim)], (num_class, feat_dim)) | |
| x = np.reshape(x.T, (-1, n)) | |
| s = W@x+b | |
| loss = 0.0 | |
| W = Wb | |
| reg = 1.0 | |
| dW = np.zeros(W.shape) # initialize the gradient as zero | |
| num_train = x.shape[0] | |
| scores = x.dot(W) | |
| yi_scores = scores[np.arange(scores.shape[0]),y] | |
| margins = np.maximum(0, scores - np.matrix(yi_scores).T + 1) | |
| margins[np.arange(num_train),y] = 0 | |
| loss = np.mean(np.sum(margins, axis=1)) | |
| loss += 0.5 * reg * np.sum(W * W) | |
| return loss | |
| # Problem 3. | |
| # kNN classification | |
| def knn_test(X_train, y_train, X_test, y_test, n_train_sample, n_test_sample, k): | |
| knn = MyKnnClassifier(n_neighbors=k) # k-NN 분류기 객체 생성 - 생성자 호출e | |
| knn.fit(X_train, y_train) | |
| y_pred = knn.predict(X_test) | |
| print("prediction accuracy: {:.2f}".format(np.mean(y_pred == y_test))) | |
| return np.mean(y_pred == y_test) | |
| import scipy.stats as ss | |
| def majority_vote2(votes): | |
| mode, count = ss.mstats.mode(votes) | |
| return mode | |
| class MyKnnClassifier: | |
| def __init__(self, n_neighbors=3): # 객체 생성 | |
| self.k = n_neighbors | |
| def fit(self, X_train, y_label): # 모델 훈련 | |
| self.points = X_train | |
| self.labels = y_label | |
| def predict(self, X_test): # 예측 | |
| predicts = [] # 예측값들을 저장할 리스트 | |
| for test_pt in X_test: # 테스트 세트에 있는 점들의 개수만큼 반복 | |
| distances = self.distance(self.points, test_pt) | |
| winner = self.majority_vote(distances) | |
| predicts.append(winner) | |
| return predicts | |
| def distance(self, X, y): | |
| return np.sqrt(np.sum((X - y) ** 2, axis=1)) | |
| def majority_vote(self, distances): | |
| indices_by_distance = np.argsort(distances) | |
| k_nearest_neighbor = [] | |
| for i in indices_by_distance[0:self.k]: | |
| k_nearest_neighbor.append(self.labels[i]) | |
| vote_counts = Counter(k_nearest_neighbor) | |
| winner, winner_count = vote_counts.most_common(1)[0] | |
| return winner | |
| # now lets test the model for linear models, that is, SVM and softmax | |
| def linear_classifier_test(Wb, x_te, y_te, num_class,n_test): | |
| Wb = np.reshape(Wb, (-1, 1)) | |
| dlen = len(x_te[0]) | |
| b = Wb[-num_class:] | |
| W = np.reshape(Wb[range(num_class * dlen)], (num_class, dlen)) | |
| accuracy = 0; | |
| for i in range(n_test): | |
| # find the linear scores | |
| s = W @ x_te[i].reshape((-1, 1)) + b | |
| # find the maximum score index | |
| res = np.argmax(s) | |
| accuracy = accuracy + (res == y_te[i]).astype('uint8') | |
| return accuracy / n_test | |
| # number of classes: this can be either 3 or 4 | |
| num_class = 4 | |
| # sigma controls the degree of data scattering. Larger sigma gives larger scatter | |
| # default is 1.0. Accuracy becomes lower with larger sigma | |
| sigma = 1.0 | |
| print('number of classes: ',num_class,' sigma for data scatter:',sigma) | |
| if num_class == 4: | |
| n_train = 400 | |
| n_test = 100 | |
| feat_dim = 2 | |
| else: # then 3 | |
| n_train = 300 | |
| n_test = 60 | |
| feat_dim = 2 | |
| # generate train dataset | |
| print('generating training data') | |
| x_train, y_train = dg.generate(number=n_train, seed=None, plot=True, num_class=num_class, sigma=sigma) | |
| # generate test dataset | |
| print('generating test data') | |
| x_test, y_test = dg.generate(number=n_test, seed=None, plot=False, num_class=num_class, sigma=sigma) | |
| # set classifiers to 'svm' to test SVM classifier | |
| # set classifiers to 'softmax' to test softmax classifier | |
| # set classifiers to 'knn' to test kNN classifier | |
| classifiers = 'svm' | |
| if classifiers == 'svm': | |
| print('training SVM classifier...') | |
| w0 = np.random.normal(0, 1, (2 * num_class + num_class)) | |
| result = minimize(svm_loss, w0, args=(x_train, y_train, num_class, n_train, feat_dim)) | |
| print('testing SVM classifier...') | |
| Wb = result.x | |
| print('accuracy of SVM loss: ', linear_classifier_test(Wb, x_test, y_test, num_class,n_test)*100,'%') | |
| elif classifiers == 'softmax': | |
| print('training softmax classifier...') | |
| w0 = np.random.normal(0, 1, (2 * num_class + num_class)) | |
| result = minimize(cross_entropy_softmax_loss, w0, args=(x_train, y_train, num_class, n_train, feat_dim)) | |
| print('testing softmax classifier...') | |
| Wb = result.x | |
| print('accuracy of softmax loss: ', linear_classifier_test(Wb, x_test, y_test, num_class,n_test)*100,'%') | |
| else: # knn | |
| # k value for kNN classifier. k can be either 1 or 3. | |
| k = 3 | |
| print('testing kNN classifier...') | |
| print('accuracy of kNN loss: ', knn_test(x_train, y_train, x_test, y_test, n_train, n_test, k)*100 | |
| , '% for k value of ', k) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment