using System;
using System.IO;
using System.Collections.Generic;
using System.Linq;
using System.Text;
namespace ReadFromFile
{
public class k_means1
{
static void Main(string[] args)
{
Console.WriteLine("\nBegin k-means clustering demo\n");
StreamReader reader = new StreamReader(@"F:\DOT NET PROJECTS\2012 projects\scalable learning of collective behaviour\app.txt");
string content = null;
string[] line = null;
char[] ch = { ',' };
int a = 1;
double[][] rawdata = new double[20][];
while ((content = reader.ReadLine()) != null)
{
if (a == 1 || a == 2)
Console.WriteLine("Does nothing");
else
{
line = content.Split(ch);
try
{
for (int b = 0; b < 20; b++)
{
rawdata[b] = new double[2];
rawdata[b][0] = Convert.ToDouble(line[1]);
rawdata[b][1] = Convert.ToDouble(line[2]);
}
}
catch (Exception ex)
{
Console.WriteLine(ex.Message);
}
}
a++;
}
Console.WriteLine("Raw unclustered data:\n");
Console.WriteLine(" Height Weight");
Console.WriteLine("-------------------");
ShowData(rawdata, 1, true, true);
int numClusters = 3;
Console.WriteLine("\nSetting numClusters to " + numClusters);
int[] clustering = Cluster(rawdata, numClusters);
Console.WriteLine("\nK-means clustering complete\n");
Console.WriteLine("Final clustering in internal form:\n");
ShowVector(clustering, true);
Console.WriteLine("Raw data by cluster:\n");
ShowClustered(rawdata, clustering, numClusters, 1);
Console.WriteLine("\nEnd k-means clustering demo\n");
Console.ReadLine();
}
public static int[] Cluster(double[][] rawData, int numClusters)
{
double[][] data = Normalized(rawData);
bool changed = true;
bool success = true;
int[] clustering = InitClustering(data.Length, numClusters, 0);
double[][] means = Allocate(numClusters, data[0].Length);
int maxCount = data.Length * 10;
int ct = 0;
while (changed == true && success == true && ct < maxCount)
{
++ct;
success = UpdateMeans(data, clustering, means);
changed = UpdateClustering(data, clustering, means);
}
return clustering;
}
private static double[][] Normalized(double[][] rawData)
{
double[][] result = new double[rawData.Length][];
for (int i = 0; i < rawData.Length; ++i)
{
result[i] = new double[rawData[i].Length];
Array.Copy(rawData[i], result[i], rawData[i].Length);
}
for (int j = 0; j < result[0].Length; ++j)
{
double colSum = 0.0;
for (int i = 0; i < result.Length; ++i)
colSum += result[i][j];
double mean = colSum / result.Length;
double sum = 0.0;
for (int i = 0; i < result.Length; ++i)
sum += (result[i][j] - mean) * (result[i][j] - mean);
double sd = sum / result.Length;
for (int i = 0; i < result.Length; ++i)
result[i][j] = (result[i][j] - mean) / sd;
}
return result;
}
private static int[] InitClustering(int numTuples, int numClusters, int randomSeed)
{
Random random = new Random(randomSeed);
int[] clustering = new int[numTuples];
for (int i = 0; i < numClusters; ++i)
clustering[i] = i;
for (int i = numClusters; i < clustering.Length; ++i)
clustering[i] = random.Next(0, numClusters);
return clustering;
}
private static double[][] Allocate(int numClusters, int numColumns)
{
double[][] result = new double[numClusters][];
for (int k = 0; k < numClusters; ++k)
result[k] = new double[numColumns];
return result;
}
private static bool UpdateMeans(double[][] data, int[] clustering, double[][] means)
{
int numClusters = means.Length;
int[] clusterCounts = new int[numClusters];
for (int i = 0; i < data.Length; ++i)
{
int cluster = clustering[i];
++clusterCounts[cluster];
}
for (int k = 0; k < numClusters; ++k)
if (clusterCounts[k] == 0)
return false;
for (int k = 0; k < means.Length; ++k)
for (int j = 0; j < means[k].Length; ++j)
means[k][j] = 0.0;
for (int i = 0; i < data.Length; ++i)
{
int cluster = clustering[i];
for (int j = 0; j < data[i].Length; ++j)
means[cluster][j] += data[i][j];
}
for (int k = 0; k < means.Length; ++k)
for (int j = 0; j < means[k].Length; ++j)
means[k][j] /= clusterCounts[k];
return true;
}
private static bool UpdateClustering(double[][] data, int[] clustering, double[][] means)
{
int numClusters = means.Length;
bool changed = false;
int[] newClustering = new int[clustering.Length];
Array.Copy(clustering, newClustering, clustering.Length);
double[] distances = new double[numClusters];
for (int i = 0; i < data.Length; ++i)
{
for (int k = 0; k < numClusters; ++k)
distances[k] = Distance(data[i], means[k]);
int newClusterID = MinIndex(distances);
if (newClusterID != newClustering[i])
{
changed = true;
newClustering[i] = newClusterID;
}
}
if (changed == false)
return false;
int[] clusterCounts = new int[numClusters];
for (int i = 0; i < data.Length; ++i)
{
int cluster = newClustering[i];
++clusterCounts[cluster];
}
for (int k = 0; k < numClusters; ++k)
if (clusterCounts[k] == 0)
return false;
Array.Copy(newClustering, clustering, newClustering.Length);
return true;
}
private static double Distance(double[] tuple, double[] mean)
{
double sumSquaredDiffs = 0.0;
for (int j = 0; j < tuple.Length; ++j)
sumSquaredDiffs += Math.Pow((tuple[j] - mean[j]), 2);
return Math.Sqrt(sumSquaredDiffs);
}
private static int MinIndex(double[] distances)
{
int indexOfMin = 0;
double smallDist = distances[0];
for (int k = 0; k < distances.Length; ++k)
{
if (distances[k] < smallDist)
{
smallDist = distances[k];
indexOfMin = k;
}
}
return indexOfMin;
}
static void ShowData(double[][] data, int decimals, bool indices, bool newLine)
{
for (int i = 0; i < data.Length; ++i)
{
if (indices) Console.Write(i.ToString().PadLeft(3) + " ");
for (int j = 0; j < 2; ++j)
{
if (data[i][j] >= 0.0) Console.Write(" ");
Console.Write(data[i][j].ToString("F" + decimals) + " ");
}
Console.WriteLine("");
}
if (newLine) Console.WriteLine("");
}
static void ShowVector(int[] vector, bool newLine)
{
for (int i = 0; i < vector.Length; ++i)
Console.Write(vector[i] + " ");
if (newLine) Console.WriteLine("\n");
}
static void ShowClustered(double[][] data, int[] clustering, int numClusters, int decimals)
{
for (int k = 0; k < numClusters; ++k)
{
Console.WriteLine("===================");
for (int i = 0; i < data.Length; ++i)
{
int clusterID = clustering[i];
if (clusterID != k) continue;
Console.Write(i.ToString().PadLeft(3) + " ");
for (int j = 0; j < data[i].Length; ++j)
{
if (data[i][j] >= 0.0) Console.Write(" ");
Console.Write(data[i][j].ToString("F" + decimals) + " ");
}
Console.WriteLine("");
}
Console.WriteLine("===================");
}
}
}
}
this is k means algorithm.
show data method will contain instruction is
if (data[i][j] >= 0.0) Console.Write(" ");
this instruction show error like this "Object reference not set to an instance of an object."
pls help me...........