近邻分类器与聚类分析


近邻分类器

二分类

from sklearn import svm
import numpy as np
import random
import sklearn as svm
w=[]
train_x=[]
train_y=[]
rest_x=[]
rest_y=[]
average5=[]
average8=[]
test_x=[]
test_y=[]
TR=[]
def Init():
    global train_x,train_y,test_x,test_y
    with open(r'C:\Users\Administrator\Desktop\模式识别\2021141470111 苏臻 实践作业2\train-images.idx3-ubyte', 'rb') as f:
        file = f.read()

    with open(r'C:\Users\Administrator\Desktop\模式识别\2021141470111 苏臻 实践作业2\train-labels.idx1-ubyte', 'rb') as f:
        lab = f.read()

    with open(r'C:\Users\Administrator\Desktop\模式识别\2021141470111 苏臻 实践作业2\t10k-images.idx3-ubyte', 'rb') as f:
        file2 = f.read()

    with open(r'C:\Users\Administrator\Desktop\模式识别\2021141470111 苏臻 实践作业2\t10k-labels.idx1-ubyte', 'rb') as f:
        lab2 = f.read()

    for i in range(0, 60000):
        item = lab[8 + i]
        num = int(str(item).encode('UTF-8'), 16)
        if (num != 5 and num != 8): continue
        IMG_ = [int(str(item).encode('UTF-8'), 16) for item in file[16 + i * 784: 16 + (i + 1) * 784]]
        train_x.append(IMG_)

        if num == 8:
            train_y.append(-1)
        else :
            train_y.append(1)

    train_x = np.array(train_x)


    for i in range(0, 10000):
        item = lab2[8 + i]
        num = int(str(item).encode('UTF-8'), 16)
        if (num != 5 and num != 8): continue
        IMG_ = [int(str(item).encode('UTF-8'), 16) for item in file2[16 + i * 784: 16 + (i + 1) * 784]]
        test_x.append(IMG_)

        if num==8:
            test_y.append(-1)
        else:
            test_y.append(1)


    test_x = np.array(test_x)

def Train():
    global train_x, train_y,rest_x,rest_y
    n=500
    for i in range(0,n):
        TR.append(random.randint(0,1))
    print(TR)

    tt=0;
    for i in range(0, n):
        if TR[i]==1:
            tt+=1
            mn = 0
            id = 0
            t = 0

            for j in range(0, n):
                if TR[j]==0:
                    d = 0;
                    for k in range(0, 784):
                        d += sqr(train_x[i][k] - train_x[j][k])
                    if t == 0:
                        mn = d
                        id = 1
                        t = 1
                    else:
                        if d < mn:
                            mn = d
                            id = j
                    # print(d)
                    # print(j)
            if train_y[id] == train_y[i]:
                rest_x.append(train_x[i])
                rest_y.append(train_y[i])
    print(tt)

def sqr(n):
    return n*n

def Test():
    global rest_x,rest_y,test_x,test_y
    n=len(rest_x)
    m=len(test_x)
    s=0
   # print(len(w))
   # print(w)
   # print(len(test_y))
   # print(test_y)
    print(n)
    print(m)
    for i in range(0,m):
        #c=0
        mn=0
        id=0
        t=0
        for j in range(0,n):
            d=0;
            for k in range(0,784):
                d+=sqr(test_x[i][k]-rest_x[j][k])
            if t==0:
                mn=d
                id=1
                t=1
            else:
                if d

多分类

import numpy as np
import random
train_x=[]
train_y=[]
rest_x=[]
rest_y=[]
test_x=[]
test_y=[]
TR=[]
def Init():
    global train_x,train_y,test_x,test_y
    with open(r'C:\Users\Administrator\Desktop\模式识别\2021141470111 苏臻 实践作业2\train-images.idx3-ubyte', 'rb') as f:
        file = f.read()

    with open(r'C:\Users\Administrator\Desktop\模式识别\2021141470111 苏臻 实践作业2\train-labels.idx1-ubyte', 'rb') as f:
        lab = f.read()

    with open(r'C:\Users\Administrator\Desktop\模式识别\2021141470111 苏臻 实践作业2\t10k-images.idx3-ubyte', 'rb') as f:
        file2 = f.read()

    with open(r'C:\Users\Administrator\Desktop\模式识别\2021141470111 苏臻 实践作业2\t10k-labels.idx1-ubyte', 'rb') as f:
        lab2 = f.read()

    for i in range(0, 60000):
        item = lab[8 + i]
        num = int(str(item).encode('UTF-8'), 16)
        #if (num != 5 and num != 8): continue
        IMG_ = [int(str(item).encode('UTF-8'), 16) for item in file[16 + i * 784: 16 + (i + 1) * 784]]
        train_x.append(IMG_)

        train_y.append(num)

    train_x = np.array(train_x)

    for i in range(0, 10000):
        item = lab2[8 + i]
        num = int(str(item).encode('UTF-8'), 16)
        # if (num != 5 and num != 8): continue
        IMG_ = [int(str(item).encode('UTF-8'), 16) for item in file2[16 + i * 784: 16 + (i + 1) * 784]]
        test_x.append(IMG_)

        test_y.append(num)

    test_x = np.array(test_x)

def Train():
    global train_x, train_y,rest_x,rest_y
    n=1000
    for i in range(0,n):
        TR.append(random.randint(0,1))
    print(TR)
    tt=0
    for i in range(0, n):
        if TR[i]==1:
            tt+=1
            mn = 0
            id = 0
            t = 0
            a=[]
            for j in range(0, n):
                if TR[j]==0:
                    '''''
                    d = 0;
                    for k in range(0, 784):
                        d += sqr(train_x[i][k] - train_x[j][k])
                    '''''
                    '''''
                    if t == 0:
                        mn = d
                        id = 1
                        t = 1
                    else:
                        if d < mn:
                            mn = d
                            id = j
                    # print(d)
                    # print(j)
                    '''
                    d=np.sum(np.square(train_x[i] - train_x[j]))
                    a.append((d,j))
            a.sort(key=lambda x:x[0],reverse=0)
            #print(a)
            c=[0]*1000
            mx=0
            id=0
            for j in range(0,10):
                c[a[j][1]]+=(10-j)
                x=c[a[j][1]]
                if x>mx:
                    mx=x
                    id=a[j][1]

            if train_y[id] == train_y[i]:
                rest_x.append(train_x[i])
                rest_y.append(train_y[i])
    print(tt)

def Test():
    global rest_x,rest_y,test_x,test_y
    n=len(rest_x)
    #m=len(test_x)
    m=500
    s=0
   # print(len(w))
   # print(w)
   # print(len(test_y))
   # print(test_y)
    print(n)
    print(m)
    for i in range(0,m):
        #c=0
        mn=0
        id=0
        t=0
        a=[]
        for j in range(0,n):
            '''''
            d=0;
            for k in range(0,784):
                d+=sqr(test_x[i][k]-rest_x[j][k])
            '''''
            d = np.sum(np.square(test_x[i] - rest_x[j]))
            a.append((d, j))
        a.sort(key=lambda x: x[0], reverse=0)
        #print(a)
        c = [0] * 1000
        mx = 0
        id = 0
        for j in range(0, 10):
            c[a[j][1]] += 10-j
            x = c[a[j][1]]
            if x > mx:
                mx = x
                id = a[j][1]
            #print(d)
            #print(j)
        if rest_y[id]==test_y[i]: s+=1
    print("The accuracy is %.2f"%(s*100/m))


Init()
Train()
Test()

聚类分析

二分类

from sklearn import svm
from sklearn.cluster import KMeans
import numpy as np
train_x=[]
train_y=[]
test_x=[]
test_y=[]
predict=[]
def Init():
    global train_x,train_y,test_x,test_y
    with open(r'C:\Users\Administrator\Desktop\模式识别\2021141470111 苏臻 实践作业2\train-images.idx3-ubyte', 'rb') as f:
        file = f.read()

    with open(r'C:\Users\Administrator\Desktop\模式识别\2021141470111 苏臻 实践作业2\train-labels.idx1-ubyte', 'rb') as f:
        lab = f.read()

    with open(r'C:\Users\Administrator\Desktop\模式识别\2021141470111 苏臻 实践作业2\t10k-images.idx3-ubyte', 'rb') as f:
        file2 = f.read()

    with open(r'C:\Users\Administrator\Desktop\模式识别\2021141470111 苏臻 实践作业2\t10k-labels.idx1-ubyte', 'rb') as f:
        lab2 = f.read()

    for i in range(0, 60000):
        item = lab[8 + i]
        num = int(str(item).encode('UTF-8'), 16)
        if (num != 5 and num != 8): continue
        IMG_ = [int(str(item).encode('UTF-8'), 16) for item in file[16 + i * 784: 16 + (i + 1) * 784]]
        train_x.append(IMG_)

        if num == 8:
            train_y.append(-1)
        else :
            train_y.append(1)

    train_x = np.array(train_x)


    for i in range(0, 10000):
        item = lab2[8 + i]
        num = int(str(item).encode('UTF-8'), 16)
        if (num != 5 and num != 8): continue
        IMG_ = [int(str(item).encode('UTF-8'), 16) for item in file2[16 + i * 784: 16 + (i + 1) * 784]]
        test_x.append(IMG_)

        if num==8:
            test_y.append(-1)
        else:
            test_y.append(1)


    test_x = np.array(test_x)

def Train():
    global w,train_x,train_y,test_x,test_y,predict
    kmeans = KMeans(n_clusters=2)
    kmeans.fit(train_x)
    predict = kmeans.predict(test_x)

def calc_JC(y_true, y_pred):


    #******** Begin *******#
    a,b,c = 0,0,0
    for i in range(len(y_true)):
        for j in range(i+1,len(y_true)):
            if y_true[i] == y_true[j] and y_pred[i] == y_pred[j]:
                a += 1
            elif y_true[i] != y_true[j] and y_pred[i] == y_pred[j]:
                b += 1
            elif y_true[i] == y_true[j] and y_pred[i] != y_pred[j]:
                c += 1
    jc = a/(a+b+c)
    return jc



def calc_FM(y_true, y_pred):

    a,b,c = 0,0,0
    for i in range(len(y_true)):
        for j in range(i+1,len(y_true)):
            if y_true[i] == y_true[j] and y_pred[i] == y_pred[j]:
                a += 1
            elif y_true[i] != y_true[j] and y_pred[i] == y_pred[j]:
                b += 1
            elif y_true[i] == y_true[j] and y_pred[i] != y_pred[j]:
                c += 1
    fm = np.sqrt(a/(a+b)*a/(a+c))
    return fm
    #******** End *******#

def calc_RI(y_true, y_pred):
    '''
    计算并返回Rand指数
    :param y_true: 参考模型给出的簇,类型为ndarray
    :param y_pred: 聚类模型给出的簇,类型为ndarray
    :return: Rand指数
    '''

    #******** Begin *******#
    a,d = 0,0
    m = len(y_true)
    for i in range(m):
        for j in range(i+1,m):
            if y_true[i] == y_true[j] and y_pred[i] == y_pred[j]:
                a += 1
            elif y_true[i] != y_true[j] and y_pred[i] != y_pred[j]:
                d += 1
    rand = (2*(a+d))/(m*(m-1))
    return rand

def Test():
    global test_y,predict

    print('JC:', calc_JC(test_y, predict))
    print('FM:', calc_FM(test_y, predict))
    print('RI:', calc_RI(test_y, predict))



Init()
Train()
Test()

多分类

from sklearn import svm
from sklearn.cluster import KMeans
import numpy as np
train_x=[]
train_y=[]
test_x=[]
test_y=[]
predict=[]
def Init():
    global train_x,train_y,test_x,test_y
    with open(r'C:\Users\Administrator\Desktop\模式识别\2021141470111 苏臻 实践作业2\train-images.idx3-ubyte', 'rb') as f:
        file = f.read()

    with open(r'C:\Users\Administrator\Desktop\模式识别\2021141470111 苏臻 实践作业2\train-labels.idx1-ubyte', 'rb') as f:
        lab = f.read()

    with open(r'C:\Users\Administrator\Desktop\模式识别\2021141470111 苏臻 实践作业2\t10k-images.idx3-ubyte', 'rb') as f:
        file2 = f.read()

    with open(r'C:\Users\Administrator\Desktop\模式识别\2021141470111 苏臻 实践作业2\t10k-labels.idx1-ubyte', 'rb') as f:
        lab2 = f.read()

    for i in range(0, 60000):
        item = lab[8 + i]
        num = int(str(item).encode('UTF-8'), 16)
        #if (num != 5 and num != 8): continue
        IMG_ = [int(str(item).encode('UTF-8'), 16) for item in file[16 + i * 784: 16 + (i + 1) * 784]]
        train_x.append(IMG_)

        train_y.append(num)

    train_x = np.array(train_x)


    for i in range(0, 10000):
        item = lab2[8 + i]
        num = int(str(item).encode('UTF-8'), 16)
        #if (num != 5 and num != 8): continue
        IMG_ = [int(str(item).encode('UTF-8'), 16) for item in file2[16 + i * 784: 16 + (i + 1) * 784]]
        test_x.append(IMG_)

        test_y.append(num)

    test_x = np.array(test_x)

def Train():
    global w,train_x,train_y,test_x,test_y,predict
    kmeans = KMeans(n_clusters=10)
    kmeans.fit(train_x)
    predict = kmeans.predict(test_x)

def calc_JC(y_true, y_pred):


    #******** Begin *******#
    a,b,c = 0,0,0
    for i in range(len(y_true)):
        for j in range(i+1,len(y_true)):
            if y_true[i] == y_true[j] and y_pred[i] == y_pred[j]:
                a += 1
            elif y_true[i] != y_true[j] and y_pred[i] == y_pred[j]:
                b += 1
            elif y_true[i] == y_true[j] and y_pred[i] != y_pred[j]:
                c += 1
    jc = a/(a+b+c)
    return jc



def calc_FM(y_true, y_pred):

    a,b,c = 0,0,0
    for i in range(len(y_true)):
        for j in range(i+1,len(y_true)):
            if y_true[i] == y_true[j] and y_pred[i] == y_pred[j]:
                a += 1
            elif y_true[i] != y_true[j] and y_pred[i] == y_pred[j]:
                b += 1
            elif y_true[i] == y_true[j] and y_pred[i] != y_pred[j]:
                c += 1
    fm = np.sqrt(a/(a+b)*a/(a+c))
    return fm
    #******** End *******#

def calc_RI(y_true, y_pred):
    '''
    计算并返回Rand指数
    :param y_true: 参考模型给出的簇,类型为ndarray
    :param y_pred: 聚类模型给出的簇,类型为ndarray
    :return: Rand指数
    '''

    #******** Begin *******#
    a,d = 0,0
    m = len(y_true)
    for i in range(m):
        for j in range(i+1,m):
            if y_true[i] == y_true[j] and y_pred[i] == y_pred[j]:
                a += 1
            elif y_true[i] != y_true[j] and y_pred[i] != y_pred[j]:
                d += 1
    rand = (2*(a+d))/(m*(m-1))
    return rand

def Test():
    global test_y,predict

    print('JC:', calc_JC(test_y, predict))
    print('FM:', calc_FM(test_y, predict))
    print('RI:', calc_RI(test_y, predict))

    print(test_y)
    print(predict)


Init()
Train()
Test()

CS