Neaya~

笔记、记录、总结

KNN

K近邻分类

KNN

  • n_neighbors. 临近的节点数量,默认值是5

  • 近邻法(KNN)

  • kNN预测iris

    用python实现knn

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    """
    # @Time : 2020/8/3
    # @Author : Jimou Chen
    """

    from sklearn import datasets # 导入数据集
    from sklearn.model_selection import train_test_split # 用于切分数据
    from sklearn.metrics import classification_report, confusion_matrix # 验证准确性
    import operator
    import numpy as np


    # 定义kNN函数,采用欧氏距离计算,返回预测的分类结果
    def kNN(x_test, x_data, y_data, k):
    # 计算样本数量
    x_data_size = x_data.shape[0]
    # 复制x_test
    x_test_copy = np.tile(x_test, (x_data_size, 1))
    # 计算x_test与每个样本的差值
    diff_mat = x_test_copy - x_data
    # 计算差值平方
    sq_diff_mat = diff_mat ** 2
    # 求和
    sq_distance = sq_diff_mat.sum(axis=1)
    # 开方,得到每个样本与测试样本的距离
    distance = sq_distance ** 0.5
    # 从小到大排序
    sorted_distance = distance.argsort()
    # 进行分类,把分类结果按多到少放到一个字典
    class_count = {}
    for i in range(k):
    # 获取标签
    label = y_data[sorted_distance[i]]
    # 统计标签数量
    class_count[label] = class_count.get(label, 0) + 1
    # 将分类结果从数量按多到少排序
    sorted_class_count = sorted(class_count.items(), key=operator.itemgetter(1), reverse=True)

    return sorted_class_count[0][0]


    # 载入数据
    iris = datasets.load_iris()
    # 切分数据集, 0.2为测试集,0.8为训练集
    x_train, x_test, y_train, y_test = train_test_split(iris.data, iris.target, test_size=0.2)

    prediction = []
    for i in range(x_test.shape[0]):
    prediction.append(kNN(x_test[i], x_train, y_train, 5))

    # 拿测试的和预测的作比较,看看效果
    print(classification_report(y_test, prediction))
    print(confusion_matrix(y_test, prediction))
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
     precision    recall  f1-score   support

    0 1.00 1.00 1.00 11
    1 0.90 1.00 0.95 9
    2 1.00 0.90 0.95 10

    accuracy 0.97 30
    macro avg 0.97 0.97 0.96 30
    weighted avg 0.97 0.97 0.97 30

    [[11 0 0]
    [ 0 9 0]
    [ 0 1 9]]

    Process finished with exit code 0
  • 为了方便,可以把用python实现的kNN算法封装起来

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    """
    # @Time : 2020/8/3
    # @Author : Jimou Chen
    """
    import operator
    import numpy as np


    # 定义knn函数,采用欧氏距离计算,返回预测的分类结果
    def kNN(x_test, x_data, y_data, k):
    sorted_distance = ((((np.tile(x_test, (x_data.shape[0], 1)) - x_data) ** 2).sum(axis=1)) ** 0.5).argsort()
    # 进行分类,把分类结果按多到少放到一个字典
    class_count = {}
    for i in range(k):
    # 获取标签
    label = y_data[sorted_distance[i]]
    # 统计标签数量
    class_count[label] = class_count.get(label, 0) + 1
    # 将分类结果从数量按多到少排序
    sorted_class_count = sorted(class_count.items(), key=operator.itemgetter(1), reverse=True)

    return sorted_class_count[0][0]

用sklearn调用kNN

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
"""
# @Time : 2020/8/8
# @Author : Jimou Chen
"""
from sklearn.datasets import load_iris
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

# 读入数据
iris = load_iris()
x_data = iris.data
y_data = iris.target

# 切分数据
x_train, x_test, y_train, y_test = train_test_split(x_data, y_data, test_size=0.2)

# 建模,n_neighbors即为k
# kNN_model = KNeighborsClassifier()
kNN_model = KNeighborsClassifier(n_neighbors=5)
kNN_model.fit(x_train, y_train)

predictions = kNN_model.predict(x_test)
print('origin: \n', y_test)
print('predict result:\n', predictions)
print(classification_report(y_test, predictions))

# 调用该对象的打分方法,计算出准确率
# print(kNN_model.score(x_test, y_test, sample_weight=None))
print(kNN_model.score(x_test, y_test))
Welcome to reward