1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73
| import numpy as np import random
class Kmeans: def __init__(self): self.dataSet = self.load_dataSet() self.k = 3
def load_dataSet(self): dataSet = [] filename = "kmeans.txt" file = open(filename) for line in file.readlines(): lineArray = line.strip().split(' ') dataSet.append([float(lineArray[0]), float(lineArray[1])]) print("初始化数据:", np.array(dataSet)) return np.array(dataSet)
def exam_equal(self, center, dataSet): number_of_samples = dataSet.shape[0] for i in range(len(center)): for j in range(i + 1, len(center)): if np.array_equal(center[i], center[j]): print("元素", center[i], "和元素", center[j], "相等") center[j] = dataSet[random.randint(0, number_of_samples - 1)] return center
def init_center(self, dataSet, k): number_of_samples = dataSet.shape[0] column_feature = dataSet.shape[1] print(f"输入数据格式为{number_of_samples}x{column_feature}") center = np.zeros((k, column_feature), dtype=np.float64) for i in range(k): center[i] = dataSet[random.randint(0, number_of_samples - 1)] center = self.exam_equal(center, dataSet) print("初始化中心矩阵:", center) return center
def distance(self, x1, x2): sum = np.sum(np.square(x1 - x2)) return np.sqrt(sum)
def cluster(self, dataSet, center): sample_corresp_class = np.zeros(dataSet.shape[0], dtype=np.int32) for idx_i, i in enumerate(dataSet): sample_to_center_distance = np.zeros(len(center), dtype=np.float64) for idx_j, j in enumerate(center): sample_to_center_distance[idx_j] = self.distance(i, j) sample_corresp_class[idx_i] = np.argmin(sample_to_center_distance) return sample_corresp_class
def class_center(self, sample_corresp_class, center, dataSet): center_class = [[] for _ in range(len(center))] for idx_j, j in enumerate(sample_corresp_class): center_class[j].append(dataSet[idx_j]) print("归类数组:", center_class) return center_class
def recalcu_center(self, center_class): center = np.zeros((len(center_class), len(center_class[0][0])), dtype=np.float64) for idx, cluster in enumerate(center_class): center[idx] = np.mean(cluster, axis=0) print("不同集群质心坐标:", center) return center
if __name__ == '__main__': kmeans = Kmeans() center = kmeans.init_center(kmeans.dataSet, kmeans.k) for i in range(5): sample_corresp_class = kmeans.cluster(kmeans.dataSet, center) center_class = kmeans.class_center(sample_corresp_class, center, kmeans.dataSet) center = kmeans.recalcu_center(center_class)
|