Create KMeans_example.py

2025-01-13 08:10:04 +08:00
parent 8a83969b07
commit 3cde4d784f
1 changed files with 30 additions and 0 deletions
--- a/2025.01.13_KMeans/KMeans_example.py
+++ b/2025.01.13_KMeans/KMeans_example.py
@@ -0,0 +1,30 @@
+"""
+This code is supported by the website: https://www.guanjihuan.com
+The newest version of this code is on the web page: https://www.guanjihuan.com/archives/44839
+"""
+
+import os
+os.environ["OMP_NUM_THREADS"] = "1" # KMeans is known to have a memory leak on Windows with MKL, when there are less chunks than available threads. You can avoid it by setting this environment variable
+
+from sklearn.cluster import KMeans
+from sklearn.datasets import make_blobs
+import matplotlib.pyplot as plt
+
+X, y = make_blobs(n_samples=300, centers=4, random_state=42) # 生成示例数据（四类）
+print(X.shape)
+print(y.shape)
+plt.scatter(X[:, 0], X[:, 1]) # 显示数据
+plt.show()
+
+plt.scatter(X[:, 0], X[:, 1], c=y, cmap='viridis') # 通过颜色显示数据原有的标签
+plt.show()
+
+kmeans = KMeans(n_clusters=3, random_state=42) # 进行 KMeans 聚类（这里分为三类）
+kmeans.fit(X)
+labels = kmeans.labels_  # 获取聚类的标签
+print(labels.shape)
+
+plt.scatter(X[:, 0], X[:, 1], c=labels, cmap='viridis') # 绘制聚类结果
+plt.scatter(kmeans.cluster_centers_[:, 0], kmeans.cluster_centers_[:, 1], s=300, c='red', marker='X') # 绘制聚类中心
+plt.title('KMeans Result')
+plt.show()