seongilp · April 15, 2021 12:51
diff --git a/hw2.py b/hw2.py
 import sys
 import os
 import matplotlib.pyplot as plt
 import numpy as np
 from scipy import stats
 import scipy.special as sp
 import time
 from scipy.optimize import minimize
 from collections import Counter

 import data_generator as dg

 # Problem 1.
 # cross entropy loss
 def cross_entropy_softmax_loss(Wb, x, y, num_class, n, feat_dim):
    pass

 # Problem 2.
 # svm loss calculation
 def svm_loss(Wb, x, y, num_class, n, feat_dim):
    print(Wb)
    Wb = np.reshape(Wb,(-1,1))
    b = Wb[-num_class:]
    W = np.reshape(Wb[range(num_class * feat_dim)], (num_class, feat_dim))
    x = np.reshape(x.T, (-1, n))
    s = W@x+b
    loss = 0.0
    W = Wb
    reg = 1.0
    dW = np.zeros(W.shape) # initialize the gradient as zero
    num_train = x.shape[0]
    scores = x.dot(W)
    yi_scores = scores[np.arange(scores.shape[0]),y] 
    margins = np.maximum(0, scores - np.matrix(yi_scores).T + 1)
    margins[np.arange(num_train),y] = 0
    loss = np.mean(np.sum(margins, axis=1))
    loss += 0.5 * reg * np.sum(W * W)
    return loss

 # Problem 3.
 # kNN classification
 def knn_test(X_train, y_train, X_test, y_test, n_train_sample, n_test_sample, k):
    knn = MyKnnClassifier(n_neighbors=k) # k-NN 분류기 객체 생성 - 생성자 호출e
    knn.fit(X_train, y_train)
    y_pred = knn.predict(X_test)
    print("prediction accuracy: {:.2f}".format(np.mean(y_pred == y_test)))
    return np.mean(y_pred == y_test)

 import scipy.stats as ss

 def majority_vote2(votes):
    mode, count = ss.mstats.mode(votes)
    return mode

 class MyKnnClassifier:
    def __init__(self, n_neighbors=3): # 객체 생성
        self.k = n_neighbors

    def fit(self, X_train, y_label): # 모델 훈련
        self.points = X_train
        self.labels = y_label

    def predict(self, X_test): # 예측
        predicts = [] # 예측값들을 저장할 리스트
        for test_pt in X_test: # 테스트 세트에 있는 점들의 개수만큼 반복
            distances = self.distance(self.points, test_pt)
            winner = self.majority_vote(distances)
            predicts.append(winner)
        return predicts
    def distance(self, X, y):
        return np.sqrt(np.sum((X - y) ** 2, axis=1))
    
    def majority_vote(self, distances):
        indices_by_distance = np.argsort(distances)
        k_nearest_neighbor = []
        for i in indices_by_distance[0:self.k]:
            k_nearest_neighbor.append(self.labels[i])
        vote_counts = Counter(k_nearest_neighbor)
        winner, winner_count = vote_counts.most_common(1)[0]
        return winner

 # now lets test the model for linear models, that is, SVM and softmax
 def linear_classifier_test(Wb, x_te, y_te, num_class,n_test):
    Wb = np.reshape(Wb, (-1, 1))
    dlen = len(x_te[0])
    b = Wb[-num_class:]
    W = np.reshape(Wb[range(num_class * dlen)], (num_class, dlen))
    accuracy = 0;

    for i in range(n_test):
        # find the linear scores
        s = W @ x_te[i].reshape((-1, 1)) + b
        # find the maximum score index
        res = np.argmax(s)
        accuracy = accuracy + (res == y_te[i]).astype('uint8')

    return accuracy / n_test

 # number of classes: this can be either 3 or 4
 num_class = 4

 # sigma controls the degree of data scattering. Larger sigma gives larger scatter
 # default is 1.0. Accuracy becomes lower with larger sigma
 sigma = 1.0

 print('number of classes: ',num_class,' sigma for data scatter:',sigma)
 if num_class == 4:
    n_train = 400
    n_test = 100
    feat_dim = 2
 else:  # then 3
    n_train = 300
    n_test = 60
    feat_dim = 2

 # generate train dataset
 print('generating training data')
 x_train, y_train = dg.generate(number=n_train, seed=None, plot=True, num_class=num_class, sigma=sigma)

 # generate test dataset
 print('generating test data')
 x_test, y_test = dg.generate(number=n_test, seed=None, plot=False, num_class=num_class, sigma=sigma)

 # set classifiers to 'svm' to test SVM classifier
 # set classifiers to 'softmax' to test softmax classifier
 # set classifiers to 'knn' to test kNN classifier
 classifiers = 'svm'

 if classifiers == 'svm':
    print('training SVM classifier...')
    w0 = np.random.normal(0, 1, (2 * num_class + num_class))
    result = minimize(svm_loss, w0, args=(x_train, y_train, num_class, n_train, feat_dim))
    print('testing SVM classifier...')

    Wb = result.x
    print('accuracy of SVM loss: ', linear_classifier_test(Wb, x_test, y_test, num_class,n_test)*100,'%')

 elif classifiers == 'softmax':
    print('training softmax classifier...')
    w0 = np.random.normal(0, 1, (2 * num_class + num_class))
    result = minimize(cross_entropy_softmax_loss, w0, args=(x_train, y_train, num_class, n_train, feat_dim))

    print('testing softmax classifier...')

    Wb = result.x
    print('accuracy of softmax loss: ', linear_classifier_test(Wb, x_test, y_test, num_class,n_test)*100,'%')

 else:  # knn
    # k value for kNN classifier. k can be either 1 or 3.
    k = 3
    print('testing kNN classifier...')
    print('accuracy of kNN loss: ', knn_test(x_train, y_train, x_test, y_test, n_train, n_test, k)*100
          , '% for k value of ', k)
	import sys
	import os
	import matplotlib.pyplot as plt
	import numpy as np
	from scipy import stats
	import scipy.special as sp
	import time
	from scipy.optimize import minimize
	from collections import Counter

	import data_generator as dg

	# Problem 1.
	# cross entropy loss
	def cross_entropy_softmax_loss(Wb, x, y, num_class, n, feat_dim):
	pass

	# Problem 2.
	# svm loss calculation
	def svm_loss(Wb, x, y, num_class, n, feat_dim):
	print(Wb)
	Wb = np.reshape(Wb,(-1,1))
	b = Wb[-num_class:]
	W = np.reshape(Wb[range(num_class * feat_dim)], (num_class, feat_dim))
	x = np.reshape(x.T, (-1, n))
	s = W@x+b
	loss = 0.0
	W = Wb
	reg = 1.0
	dW = np.zeros(W.shape) # initialize the gradient as zero
	num_train = x.shape[0]
	scores = x.dot(W)
	yi_scores = scores[np.arange(scores.shape[0]),y]
	margins = np.maximum(0, scores - np.matrix(yi_scores).T + 1)
	margins[np.arange(num_train),y] = 0
	loss = np.mean(np.sum(margins, axis=1))
	loss += 0.5 * reg * np.sum(W * W)
	return loss

	# Problem 3.
	# kNN classification
	def knn_test(X_train, y_train, X_test, y_test, n_train_sample, n_test_sample, k):
	knn = MyKnnClassifier(n_neighbors=k) # k-NN 분류기 객체 생성 - 생성자 호출e
	knn.fit(X_train, y_train)
	y_pred = knn.predict(X_test)
	print("prediction accuracy: {:.2f}".format(np.mean(y_pred == y_test)))
	return np.mean(y_pred == y_test)

	import scipy.stats as ss

	def majority_vote2(votes):
	mode, count = ss.mstats.mode(votes)
	return mode

	class MyKnnClassifier:
	def __init__(self, n_neighbors=3): # 객체 생성
	self.k = n_neighbors

	def fit(self, X_train, y_label): # 모델 훈련
	self.points = X_train
	self.labels = y_label

	def predict(self, X_test): # 예측
	predicts = [] # 예측값들을 저장할 리스트
	for test_pt in X_test: # 테스트 세트에 있는 점들의 개수만큼 반복
	distances = self.distance(self.points, test_pt)
	winner = self.majority_vote(distances)
	predicts.append(winner)
	return predicts
	def distance(self, X, y):
	return np.sqrt(np.sum((X - y) ** 2, axis=1))

	def majority_vote(self, distances):
	indices_by_distance = np.argsort(distances)
	k_nearest_neighbor = []
	for i in indices_by_distance[0:self.k]:
	k_nearest_neighbor.append(self.labels[i])
	vote_counts = Counter(k_nearest_neighbor)
	winner, winner_count = vote_counts.most_common(1)[0]
	return winner

	# now lets test the model for linear models, that is, SVM and softmax
	def linear_classifier_test(Wb, x_te, y_te, num_class,n_test):
	Wb = np.reshape(Wb, (-1, 1))
	dlen = len(x_te[0])
	b = Wb[-num_class:]
	W = np.reshape(Wb[range(num_class * dlen)], (num_class, dlen))
	accuracy = 0;

	for i in range(n_test):
	# find the linear scores
	s = W @ x_te[i].reshape((-1, 1)) + b
	# find the maximum score index
	res = np.argmax(s)
	accuracy = accuracy + (res == y_te[i]).astype('uint8')

	return accuracy / n_test

	# number of classes: this can be either 3 or 4
	num_class = 4

	# sigma controls the degree of data scattering. Larger sigma gives larger scatter
	# default is 1.0. Accuracy becomes lower with larger sigma
	sigma = 1.0

	print('number of classes: ',num_class,' sigma for data scatter:',sigma)
	if num_class == 4:
	n_train = 400
	n_test = 100
	feat_dim = 2
	else: # then 3
	n_train = 300
	n_test = 60
	feat_dim = 2

	# generate train dataset
	print('generating training data')
	x_train, y_train = dg.generate(number=n_train, seed=None, plot=True, num_class=num_class, sigma=sigma)

	# generate test dataset
	print('generating test data')
	x_test, y_test = dg.generate(number=n_test, seed=None, plot=False, num_class=num_class, sigma=sigma)

	# set classifiers to 'svm' to test SVM classifier
	# set classifiers to 'softmax' to test softmax classifier
	# set classifiers to 'knn' to test kNN classifier
	classifiers = 'svm'

	if classifiers == 'svm':
	print('training SVM classifier...')
	w0 = np.random.normal(0, 1, (2 * num_class + num_class))
	result = minimize(svm_loss, w0, args=(x_train, y_train, num_class, n_train, feat_dim))
	print('testing SVM classifier...')

	Wb = result.x
	print('accuracy of SVM loss: ', linear_classifier_test(Wb, x_test, y_test, num_class,n_test)*100,'%')

	elif classifiers == 'softmax':
	print('training softmax classifier...')
	w0 = np.random.normal(0, 1, (2 * num_class + num_class))
	result = minimize(cross_entropy_softmax_loss, w0, args=(x_train, y_train, num_class, n_train, feat_dim))

	print('testing softmax classifier...')

	Wb = result.x
	print('accuracy of softmax loss: ', linear_classifier_test(Wb, x_test, y_test, num_class,n_test)*100,'%')

	else: # knn
	# k value for kNN classifier. k can be either 1 or 3.
	k = 3
	print('testing kNN classifier...')
	print('accuracy of kNN loss: ', knn_test(x_train, y_train, x_test, y_test, n_train, n_test, k)*100
	, '% for k value of ', k)
No results found