import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_blobs
from sklearn.cluster import KMeans
import seaborn as sns
sns.set_theme()
plt.rcParams["font.sans-serif"] = [
"Microsoft YaHei"
] # 设置字体。如果不设置,中文会乱码。这里采用微软雅黑'Microsoft YaHei',如果显示不正常,也可以使用黑体'SimHei'或者宋体'SimSun'等
plt.rcParams["axes.unicode_minus"] = False # 该语句解决图像中的“-”负号的乱码问题
# 绘图使用'svg'后端:svg是矢量格式,可以任意缩放均保持清晰,各种屏幕的显示良好。
%config InlineBackend.figure_formats = ['svg']
# 生成数据
X, _ = make_blobs(n_samples=100, centers=2, cluster_std=0.75, random_state=0)
# 初始化图形
fig, axs = plt.subplots(2, 2, figsize=(8, 8))
axs = axs.ravel()
# K-means 算法不同迭代次数的可视化
iterations = [1, 2, 3, 4]
for i, iter_num in enumerate(iterations):
# 应用 K-means
kmeans = KMeans(
n_clusters=2, init="random", n_init=1, max_iter=iter_num, random_state=0
)
kmeans.fit(X)
y_kmeans = kmeans.predict(X)
centers = kmeans.cluster_centers_
# 绘制数据点
axs[i].scatter(
X[:, 0], X[:, 1], c=y_kmeans, cmap="viridis", marker="o", edgecolor="k"
)
# 绘制聚类中心
axs[i].scatter(centers[:, 0], centers[:, 1], c="red", s=200, alpha=0.75, marker="X")
axs[i].set_title(f"{iter_num} Iterations")
# axs[i].grid(True)
# 计算并绘制分类界限
midpoint = np.average(centers, axis=0)
slope = (centers[1, 1] - centers[0, 1]) / (centers[1, 0] - centers[0, 0])
perpendicular_slope = -1 / slope
intercept = midpoint[1] - perpendicular_slope * midpoint[0]
x_vals = np.array(axs[i].get_xlim())
y_vals = perpendicular_slope * x_vals + intercept
axs[i].plot(x_vals, y_vals, "k--") # 使用黑色虚线表示界限
# 显示图形
plt.tight_layout()
plt.show()