% In order to utilize k-means algorithm, which clusters data into K
% clusters, we will have first to get some data to cluster.
% For this reason we are going to generate a random 2D array
% For reproducibility, we set our random seed to default
clc;
clear all;
rng default; 
X = [randn(100,2)*0.75+ones(100,2);
    randn(100,2)*0.5-ones(100,2)];

subplot(3,1,1);
plot(X(:,1),X(:,2),'*');
title('Randomly Generated Data');
% Define some options
opts = statset('Display','final');
% We define the number of our clusters
clusters = 2;
% Use squared euclidian distance 
distance = 'sqeuclidean';
% Iterate 50 times
replicates = 50;
[idx,C] = kmeans(X,clusters,'Distance',distance,'Replicates',replicates,'Options',opts);
subplot(3,1,2);
plot(X(idx==1,1),X(idx==1,2),'r.','MarkerSize',5);
hold on;
plot(X(idx==2,1),X(idx==2,2),'b.','MarkerSize',5)
plot(C(:,1),C(:,2),'kx','MarkerSize',8,'LineWidth',3) 
legend('Cluster 1','Cluster 2','Centroids','Location','NW')
title(sprintf('%d clusters and %d centroids',clusters,clusters));
hold off

% Let's now try with more clusters!
clusters = 4;
[idx,C] = kmeans(X,clusters,'Distance',distance,'Replicates',replicates,'Options',opts);
subplot(3,1,3);
hold on;
plot(X(idx==1,1),X(idx==1,2),'r.','MarkerSize',5);
plot(X(idx==2,1),X(idx==2,2),'b.','MarkerSize',5)
plot(X(idx==3,1),X(idx==3,2),'g.','MarkerSize',5);
plot(X(idx==4,1),X(idx==4,2),'c.','MarkerSize',5);
plot(C(:,1),C(:,2),'kx','MarkerSize',8,'LineWidth',3);
legend('Cluster 1','Cluster 2','Cluster 3','Cluster 4','Centroids','Location','NW')
title(sprintf('%d clusters and %d centroids',clusters,clusters));
hold off