import numpy as np import matplotlib.pyplot as plt from sklearn.datasets import make_blobs from sklearn.preprocessing import StandardScaler # Generate synthetic 2D data np.random.seed(42) X, _ = make_blobs(n_samples=150, centers=3, n_features=2, cluster_std=2.5) # Standardize the data scaler = StandardScaler() X = scaler.fit_transform(X) # Plot the initial data distribution plt.figure(figsize=(8, 6)) plt.scatter(X[:, 0], X[:, 1], c='gray', edgecolor='k', s=50) plt.title('Initial Data Distribution') plt.xlabel('Feature 1') plt.ylabel('Feature 2') plt.grid(True) plt.show() def kmeans_clustering(X, n_clusters, max_iters=10): # Randomly initialize centroids np.random.seed(42) centroids = X[np.random.choice(X.shape[0], n_clusters, replace=False)] for i in range(max_iters): # Assign clusters based on closest centroid distances = np.linalg.norm(X[:, np.newaxis] - centroids, axis=2) labels = np.argmin(distances, axis=1) # Plot current state plt.figure(figsize=(8, 6)) for j in range(n_clusters): plt.scatter(X[labels == j][:, 0], X[labels == j][:, 1], label=f'Cluster {j+1}') plt.scatter(centroids[j, 0], centroids[j, 1], s=200, c='black', marker='X') # Add centroid coordinates as text plt.text(centroids[j, 0], centroids[j, 1], f'({centroids[j, 0]:.2f}, {centroids[j, 1]:.2f})', fontsize=10, color='red', ha='center', va='center') plt.title(f'Iteration {i+1}') plt.xlabel('Feature 1') plt.ylabel('Feature 2') plt.legend() plt.grid(True) plt.show() # Update centroids new_centroids = np.array([X[labels == j].mean(axis=0) for j in range(n_clusters)]) # Check for convergence if np.all(centroids == new_centroids): break centroids = new_centroids # Perform K-means clustering and visualize iterations kmeans_clustering(X, n_clusters=3, max_iters=10)