近邻分类器
二分类
from sklearn import svm
import numpy as np
import random
import sklearn as svm
w=[]
train_x=[]
train_y=[]
rest_x=[]
rest_y=[]
average5=[]
average8=[]
test_x=[]
test_y=[]
TR=[]
def Init():
global train_x,train_y,test_x,test_y
with open(r'C:\Users\Administrator\Desktop\模式识别\2021141470111 苏臻 实践作业2\train-images.idx3-ubyte', 'rb') as f:
file = f.read()
with open(r'C:\Users\Administrator\Desktop\模式识别\2021141470111 苏臻 实践作业2\train-labels.idx1-ubyte', 'rb') as f:
lab = f.read()
with open(r'C:\Users\Administrator\Desktop\模式识别\2021141470111 苏臻 实践作业2\t10k-images.idx3-ubyte', 'rb') as f:
file2 = f.read()
with open(r'C:\Users\Administrator\Desktop\模式识别\2021141470111 苏臻 实践作业2\t10k-labels.idx1-ubyte', 'rb') as f:
lab2 = f.read()
for i in range(0, 60000):
item = lab[8 + i]
num = int(str(item).encode('UTF-8'), 16)
if (num != 5 and num != 8): continue
IMG_ = [int(str(item).encode('UTF-8'), 16) for item in file[16 + i * 784: 16 + (i + 1) * 784]]
train_x.append(IMG_)
if num == 8:
train_y.append(-1)
else :
train_y.append(1)
train_x = np.array(train_x)
for i in range(0, 10000):
item = lab2[8 + i]
num = int(str(item).encode('UTF-8'), 16)
if (num != 5 and num != 8): continue
IMG_ = [int(str(item).encode('UTF-8'), 16) for item in file2[16 + i * 784: 16 + (i + 1) * 784]]
test_x.append(IMG_)
if num==8:
test_y.append(-1)
else:
test_y.append(1)
test_x = np.array(test_x)
def Train():
global train_x, train_y,rest_x,rest_y
n=500
for i in range(0,n):
TR.append(random.randint(0,1))
print(TR)
tt=0;
for i in range(0, n):
if TR[i]==1:
tt+=1
mn = 0
id = 0
t = 0
for j in range(0, n):
if TR[j]==0:
d = 0;
for k in range(0, 784):
d += sqr(train_x[i][k] - train_x[j][k])
if t == 0:
mn = d
id = 1
t = 1
else:
if d < mn:
mn = d
id = j
# print(d)
# print(j)
if train_y[id] == train_y[i]:
rest_x.append(train_x[i])
rest_y.append(train_y[i])
print(tt)
def sqr(n):
return n*n
def Test():
global rest_x,rest_y,test_x,test_y
n=len(rest_x)
m=len(test_x)
s=0
# print(len(w))
# print(w)
# print(len(test_y))
# print(test_y)
print(n)
print(m)
for i in range(0,m):
#c=0
mn=0
id=0
t=0
for j in range(0,n):
d=0;
for k in range(0,784):
d+=sqr(test_x[i][k]-rest_x[j][k])
if t==0:
mn=d
id=1
t=1
else:
if d
多分类
import numpy as np
import random
train_x=[]
train_y=[]
rest_x=[]
rest_y=[]
test_x=[]
test_y=[]
TR=[]
def Init():
global train_x,train_y,test_x,test_y
with open(r'C:\Users\Administrator\Desktop\模式识别\2021141470111 苏臻 实践作业2\train-images.idx3-ubyte', 'rb') as f:
file = f.read()
with open(r'C:\Users\Administrator\Desktop\模式识别\2021141470111 苏臻 实践作业2\train-labels.idx1-ubyte', 'rb') as f:
lab = f.read()
with open(r'C:\Users\Administrator\Desktop\模式识别\2021141470111 苏臻 实践作业2\t10k-images.idx3-ubyte', 'rb') as f:
file2 = f.read()
with open(r'C:\Users\Administrator\Desktop\模式识别\2021141470111 苏臻 实践作业2\t10k-labels.idx1-ubyte', 'rb') as f:
lab2 = f.read()
for i in range(0, 60000):
item = lab[8 + i]
num = int(str(item).encode('UTF-8'), 16)
#if (num != 5 and num != 8): continue
IMG_ = [int(str(item).encode('UTF-8'), 16) for item in file[16 + i * 784: 16 + (i + 1) * 784]]
train_x.append(IMG_)
train_y.append(num)
train_x = np.array(train_x)
for i in range(0, 10000):
item = lab2[8 + i]
num = int(str(item).encode('UTF-8'), 16)
# if (num != 5 and num != 8): continue
IMG_ = [int(str(item).encode('UTF-8'), 16) for item in file2[16 + i * 784: 16 + (i + 1) * 784]]
test_x.append(IMG_)
test_y.append(num)
test_x = np.array(test_x)
def Train():
global train_x, train_y,rest_x,rest_y
n=1000
for i in range(0,n):
TR.append(random.randint(0,1))
print(TR)
tt=0
for i in range(0, n):
if TR[i]==1:
tt+=1
mn = 0
id = 0
t = 0
a=[]
for j in range(0, n):
if TR[j]==0:
'''''
d = 0;
for k in range(0, 784):
d += sqr(train_x[i][k] - train_x[j][k])
'''''
'''''
if t == 0:
mn = d
id = 1
t = 1
else:
if d < mn:
mn = d
id = j
# print(d)
# print(j)
'''
d=np.sum(np.square(train_x[i] - train_x[j]))
a.append((d,j))
a.sort(key=lambda x:x[0],reverse=0)
#print(a)
c=[0]*1000
mx=0
id=0
for j in range(0,10):
c[a[j][1]]+=(10-j)
x=c[a[j][1]]
if x>mx:
mx=x
id=a[j][1]
if train_y[id] == train_y[i]:
rest_x.append(train_x[i])
rest_y.append(train_y[i])
print(tt)
def Test():
global rest_x,rest_y,test_x,test_y
n=len(rest_x)
#m=len(test_x)
m=500
s=0
# print(len(w))
# print(w)
# print(len(test_y))
# print(test_y)
print(n)
print(m)
for i in range(0,m):
#c=0
mn=0
id=0
t=0
a=[]
for j in range(0,n):
'''''
d=0;
for k in range(0,784):
d+=sqr(test_x[i][k]-rest_x[j][k])
'''''
d = np.sum(np.square(test_x[i] - rest_x[j]))
a.append((d, j))
a.sort(key=lambda x: x[0], reverse=0)
#print(a)
c = [0] * 1000
mx = 0
id = 0
for j in range(0, 10):
c[a[j][1]] += 10-j
x = c[a[j][1]]
if x > mx:
mx = x
id = a[j][1]
#print(d)
#print(j)
if rest_y[id]==test_y[i]: s+=1
print("The accuracy is %.2f"%(s*100/m))
Init()
Train()
Test()
聚类分析
二分类
from sklearn import svm
from sklearn.cluster import KMeans
import numpy as np
train_x=[]
train_y=[]
test_x=[]
test_y=[]
predict=[]
def Init():
global train_x,train_y,test_x,test_y
with open(r'C:\Users\Administrator\Desktop\模式识别\2021141470111 苏臻 实践作业2\train-images.idx3-ubyte', 'rb') as f:
file = f.read()
with open(r'C:\Users\Administrator\Desktop\模式识别\2021141470111 苏臻 实践作业2\train-labels.idx1-ubyte', 'rb') as f:
lab = f.read()
with open(r'C:\Users\Administrator\Desktop\模式识别\2021141470111 苏臻 实践作业2\t10k-images.idx3-ubyte', 'rb') as f:
file2 = f.read()
with open(r'C:\Users\Administrator\Desktop\模式识别\2021141470111 苏臻 实践作业2\t10k-labels.idx1-ubyte', 'rb') as f:
lab2 = f.read()
for i in range(0, 60000):
item = lab[8 + i]
num = int(str(item).encode('UTF-8'), 16)
if (num != 5 and num != 8): continue
IMG_ = [int(str(item).encode('UTF-8'), 16) for item in file[16 + i * 784: 16 + (i + 1) * 784]]
train_x.append(IMG_)
if num == 8:
train_y.append(-1)
else :
train_y.append(1)
train_x = np.array(train_x)
for i in range(0, 10000):
item = lab2[8 + i]
num = int(str(item).encode('UTF-8'), 16)
if (num != 5 and num != 8): continue
IMG_ = [int(str(item).encode('UTF-8'), 16) for item in file2[16 + i * 784: 16 + (i + 1) * 784]]
test_x.append(IMG_)
if num==8:
test_y.append(-1)
else:
test_y.append(1)
test_x = np.array(test_x)
def Train():
global w,train_x,train_y,test_x,test_y,predict
kmeans = KMeans(n_clusters=2)
kmeans.fit(train_x)
predict = kmeans.predict(test_x)
def calc_JC(y_true, y_pred):
#******** Begin *******#
a,b,c = 0,0,0
for i in range(len(y_true)):
for j in range(i+1,len(y_true)):
if y_true[i] == y_true[j] and y_pred[i] == y_pred[j]:
a += 1
elif y_true[i] != y_true[j] and y_pred[i] == y_pred[j]:
b += 1
elif y_true[i] == y_true[j] and y_pred[i] != y_pred[j]:
c += 1
jc = a/(a+b+c)
return jc
def calc_FM(y_true, y_pred):
a,b,c = 0,0,0
for i in range(len(y_true)):
for j in range(i+1,len(y_true)):
if y_true[i] == y_true[j] and y_pred[i] == y_pred[j]:
a += 1
elif y_true[i] != y_true[j] and y_pred[i] == y_pred[j]:
b += 1
elif y_true[i] == y_true[j] and y_pred[i] != y_pred[j]:
c += 1
fm = np.sqrt(a/(a+b)*a/(a+c))
return fm
#******** End *******#
def calc_RI(y_true, y_pred):
'''
计算并返回Rand指数
:param y_true: 参考模型给出的簇,类型为ndarray
:param y_pred: 聚类模型给出的簇,类型为ndarray
:return: Rand指数
'''
#******** Begin *******#
a,d = 0,0
m = len(y_true)
for i in range(m):
for j in range(i+1,m):
if y_true[i] == y_true[j] and y_pred[i] == y_pred[j]:
a += 1
elif y_true[i] != y_true[j] and y_pred[i] != y_pred[j]:
d += 1
rand = (2*(a+d))/(m*(m-1))
return rand
def Test():
global test_y,predict
print('JC:', calc_JC(test_y, predict))
print('FM:', calc_FM(test_y, predict))
print('RI:', calc_RI(test_y, predict))
Init()
Train()
Test()
多分类
from sklearn import svm
from sklearn.cluster import KMeans
import numpy as np
train_x=[]
train_y=[]
test_x=[]
test_y=[]
predict=[]
def Init():
global train_x,train_y,test_x,test_y
with open(r'C:\Users\Administrator\Desktop\模式识别\2021141470111 苏臻 实践作业2\train-images.idx3-ubyte', 'rb') as f:
file = f.read()
with open(r'C:\Users\Administrator\Desktop\模式识别\2021141470111 苏臻 实践作业2\train-labels.idx1-ubyte', 'rb') as f:
lab = f.read()
with open(r'C:\Users\Administrator\Desktop\模式识别\2021141470111 苏臻 实践作业2\t10k-images.idx3-ubyte', 'rb') as f:
file2 = f.read()
with open(r'C:\Users\Administrator\Desktop\模式识别\2021141470111 苏臻 实践作业2\t10k-labels.idx1-ubyte', 'rb') as f:
lab2 = f.read()
for i in range(0, 60000):
item = lab[8 + i]
num = int(str(item).encode('UTF-8'), 16)
#if (num != 5 and num != 8): continue
IMG_ = [int(str(item).encode('UTF-8'), 16) for item in file[16 + i * 784: 16 + (i + 1) * 784]]
train_x.append(IMG_)
train_y.append(num)
train_x = np.array(train_x)
for i in range(0, 10000):
item = lab2[8 + i]
num = int(str(item).encode('UTF-8'), 16)
#if (num != 5 and num != 8): continue
IMG_ = [int(str(item).encode('UTF-8'), 16) for item in file2[16 + i * 784: 16 + (i + 1) * 784]]
test_x.append(IMG_)
test_y.append(num)
test_x = np.array(test_x)
def Train():
global w,train_x,train_y,test_x,test_y,predict
kmeans = KMeans(n_clusters=10)
kmeans.fit(train_x)
predict = kmeans.predict(test_x)
def calc_JC(y_true, y_pred):
#******** Begin *******#
a,b,c = 0,0,0
for i in range(len(y_true)):
for j in range(i+1,len(y_true)):
if y_true[i] == y_true[j] and y_pred[i] == y_pred[j]:
a += 1
elif y_true[i] != y_true[j] and y_pred[i] == y_pred[j]:
b += 1
elif y_true[i] == y_true[j] and y_pred[i] != y_pred[j]:
c += 1
jc = a/(a+b+c)
return jc
def calc_FM(y_true, y_pred):
a,b,c = 0,0,0
for i in range(len(y_true)):
for j in range(i+1,len(y_true)):
if y_true[i] == y_true[j] and y_pred[i] == y_pred[j]:
a += 1
elif y_true[i] != y_true[j] and y_pred[i] == y_pred[j]:
b += 1
elif y_true[i] == y_true[j] and y_pred[i] != y_pred[j]:
c += 1
fm = np.sqrt(a/(a+b)*a/(a+c))
return fm
#******** End *******#
def calc_RI(y_true, y_pred):
'''
计算并返回Rand指数
:param y_true: 参考模型给出的簇,类型为ndarray
:param y_pred: 聚类模型给出的簇,类型为ndarray
:return: Rand指数
'''
#******** Begin *******#
a,d = 0,0
m = len(y_true)
for i in range(m):
for j in range(i+1,m):
if y_true[i] == y_true[j] and y_pred[i] == y_pred[j]:
a += 1
elif y_true[i] != y_true[j] and y_pred[i] != y_pred[j]:
d += 1
rand = (2*(a+d))/(m*(m-1))
return rand
def Test():
global test_y,predict
print('JC:', calc_JC(test_y, predict))
print('FM:', calc_FM(test_y, predict))
print('RI:', calc_RI(test_y, predict))
print(test_y)
print(predict)
Init()
Train()
Test()