Skip to content

Instantly share code, notes, and snippets.

@seongilp
Created April 15, 2021 12:51
Show Gist options
  • Select an option

  • Save seongilp/6eb36329cfdfb8f15b1d872b83f84b4d to your computer and use it in GitHub Desktop.

Select an option

Save seongilp/6eb36329cfdfb8f15b1d872b83f84b4d to your computer and use it in GitHub Desktop.
hw2
import sys
import os
import matplotlib.pyplot as plt
import numpy as np
from scipy import stats
import scipy.special as sp
import time
from scipy.optimize import minimize
from collections import Counter
import data_generator as dg
# Problem 1.
# cross entropy loss
def cross_entropy_softmax_loss(Wb, x, y, num_class, n, feat_dim):
pass
# Problem 2.
# svm loss calculation
def svm_loss(Wb, x, y, num_class, n, feat_dim):
print(Wb)
Wb = np.reshape(Wb,(-1,1))
b = Wb[-num_class:]
W = np.reshape(Wb[range(num_class * feat_dim)], (num_class, feat_dim))
x = np.reshape(x.T, (-1, n))
s = W@x+b
loss = 0.0
W = Wb
reg = 1.0
dW = np.zeros(W.shape) # initialize the gradient as zero
num_train = x.shape[0]
scores = x.dot(W)
yi_scores = scores[np.arange(scores.shape[0]),y]
margins = np.maximum(0, scores - np.matrix(yi_scores).T + 1)
margins[np.arange(num_train),y] = 0
loss = np.mean(np.sum(margins, axis=1))
loss += 0.5 * reg * np.sum(W * W)
return loss
# Problem 3.
# kNN classification
def knn_test(X_train, y_train, X_test, y_test, n_train_sample, n_test_sample, k):
knn = MyKnnClassifier(n_neighbors=k) # k-NN 분류기 객체 생성 - 생성자 호출e
knn.fit(X_train, y_train)
y_pred = knn.predict(X_test)
print("prediction accuracy: {:.2f}".format(np.mean(y_pred == y_test)))
return np.mean(y_pred == y_test)
import scipy.stats as ss
def majority_vote2(votes):
mode, count = ss.mstats.mode(votes)
return mode
class MyKnnClassifier:
def __init__(self, n_neighbors=3): # 객체 생성
self.k = n_neighbors
def fit(self, X_train, y_label): # 모델 훈련
self.points = X_train
self.labels = y_label
def predict(self, X_test): # 예측
predicts = [] # 예측값들을 저장할 리스트
for test_pt in X_test: # 테스트 세트에 있는 점들의 개수만큼 반복
distances = self.distance(self.points, test_pt)
winner = self.majority_vote(distances)
predicts.append(winner)
return predicts
def distance(self, X, y):
return np.sqrt(np.sum((X - y) ** 2, axis=1))
def majority_vote(self, distances):
indices_by_distance = np.argsort(distances)
k_nearest_neighbor = []
for i in indices_by_distance[0:self.k]:
k_nearest_neighbor.append(self.labels[i])
vote_counts = Counter(k_nearest_neighbor)
winner, winner_count = vote_counts.most_common(1)[0]
return winner
# now lets test the model for linear models, that is, SVM and softmax
def linear_classifier_test(Wb, x_te, y_te, num_class,n_test):
Wb = np.reshape(Wb, (-1, 1))
dlen = len(x_te[0])
b = Wb[-num_class:]
W = np.reshape(Wb[range(num_class * dlen)], (num_class, dlen))
accuracy = 0;
for i in range(n_test):
# find the linear scores
s = W @ x_te[i].reshape((-1, 1)) + b
# find the maximum score index
res = np.argmax(s)
accuracy = accuracy + (res == y_te[i]).astype('uint8')
return accuracy / n_test
# number of classes: this can be either 3 or 4
num_class = 4
# sigma controls the degree of data scattering. Larger sigma gives larger scatter
# default is 1.0. Accuracy becomes lower with larger sigma
sigma = 1.0
print('number of classes: ',num_class,' sigma for data scatter:',sigma)
if num_class == 4:
n_train = 400
n_test = 100
feat_dim = 2
else: # then 3
n_train = 300
n_test = 60
feat_dim = 2
# generate train dataset
print('generating training data')
x_train, y_train = dg.generate(number=n_train, seed=None, plot=True, num_class=num_class, sigma=sigma)
# generate test dataset
print('generating test data')
x_test, y_test = dg.generate(number=n_test, seed=None, plot=False, num_class=num_class, sigma=sigma)
# set classifiers to 'svm' to test SVM classifier
# set classifiers to 'softmax' to test softmax classifier
# set classifiers to 'knn' to test kNN classifier
classifiers = 'svm'
if classifiers == 'svm':
print('training SVM classifier...')
w0 = np.random.normal(0, 1, (2 * num_class + num_class))
result = minimize(svm_loss, w0, args=(x_train, y_train, num_class, n_train, feat_dim))
print('testing SVM classifier...')
Wb = result.x
print('accuracy of SVM loss: ', linear_classifier_test(Wb, x_test, y_test, num_class,n_test)*100,'%')
elif classifiers == 'softmax':
print('training softmax classifier...')
w0 = np.random.normal(0, 1, (2 * num_class + num_class))
result = minimize(cross_entropy_softmax_loss, w0, args=(x_train, y_train, num_class, n_train, feat_dim))
print('testing softmax classifier...')
Wb = result.x
print('accuracy of softmax loss: ', linear_classifier_test(Wb, x_test, y_test, num_class,n_test)*100,'%')
else: # knn
# k value for kNN classifier. k can be either 1 or 3.
k = 3
print('testing kNN classifier...')
print('accuracy of kNN loss: ', knn_test(x_train, y_train, x_test, y_test, n_train, n_test, k)*100
, '% for k value of ', k)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment