diff --git a/2025.01.13_KMeans/KMeans_example.py b/2025.01.13_KMeans/KMeans_example.py new file mode 100644 index 0000000..eb53a64 --- /dev/null +++ b/2025.01.13_KMeans/KMeans_example.py @@ -0,0 +1,30 @@ +""" +This code is supported by the website: https://www.guanjihuan.com +The newest version of this code is on the web page: https://www.guanjihuan.com/archives/44839 +""" + +import os +os.environ["OMP_NUM_THREADS"] = "1" # KMeans is known to have a memory leak on Windows with MKL, when there are less chunks than available threads. You can avoid it by setting this environment variable + +from sklearn.cluster import KMeans +from sklearn.datasets import make_blobs +import matplotlib.pyplot as plt + +X, y = make_blobs(n_samples=300, centers=4, random_state=42) # 生成示例数据(四类) +print(X.shape) +print(y.shape) +plt.scatter(X[:, 0], X[:, 1]) # 显示数据 +plt.show() + +plt.scatter(X[:, 0], X[:, 1], c=y, cmap='viridis') # 通过颜色显示数据原有的标签 +plt.show() + +kmeans = KMeans(n_clusters=3, random_state=42) # 进行 KMeans 聚类(这里分为三类) +kmeans.fit(X) +labels = kmeans.labels_ # 获取聚类的标签 +print(labels.shape) + +plt.scatter(X[:, 0], X[:, 1], c=labels, cmap='viridis') # 绘制聚类结果 +plt.scatter(kmeans.cluster_centers_[:, 0], kmeans.cluster_centers_[:, 1], s=300, c='red', marker='X') # 绘制聚类中心 +plt.title('KMeans Result') +plt.show() \ No newline at end of file