Click here to Skip to main content
15,904,153 members
Please Sign up or sign in to vote.
0.00/5 (No votes)
See more:
C#
using System;
using System.IO;
using System.Collections.Generic;
using System.Linq;
using System.Text;

namespace ReadFromFile
{
   public class k_means1
    {
     static void Main(string[] args)
      {
       Console.WriteLine("\nBegin k-means clustering demo\n");
      StreamReader reader = new StreamReader(@"F:\DOT NET PROJECTS\2012 projects\scalable learning of collective behaviour\app.txt");
          string content = null;
          string[] line = null;

          char[] ch = { ',' };

          int a = 1;
          double[][] rawdata = new double[20][];
          while ((content = reader.ReadLine()) != null)
          {
              if (a == 1 || a == 2)
                  Console.WriteLine("Does nothing");
              else
              {
                  line = content.Split(ch);
                  try
                  {
                      for (int b = 0; b < 20; b++)
                      {
                          rawdata[b] = new double[2];
                          rawdata[b][0] = Convert.ToDouble(line[1]);
                          rawdata[b][1] = Convert.ToDouble(line[2]);
                      }

                  }
                  catch (Exception ex)
                  {
                      Console.WriteLine(ex.Message);
                  }
                  //Console.WriteLine("First value: " + line[1]);
                  //Console.WriteLine();
                  //Console.WriteLine("First value: " + line[2]);
              }
              a++;
        
           }
       Console.WriteLine("Raw unclustered data:\n");
       Console.WriteLine("    Height Weight");
       Console.WriteLine("-------------------");
       ShowData(rawdata, 1, true, true);

      int numClusters = 3;
      Console.WriteLine("\nSetting numClusters to " + numClusters);

      int[] clustering = Cluster(rawdata, numClusters); // this is it

      Console.WriteLine("\nK-means clustering complete\n");

      Console.WriteLine("Final clustering in internal form:\n");
      ShowVector(clustering, true);

      Console.WriteLine("Raw data by cluster:\n");
      ShowClustered(rawdata, clustering, numClusters, 1);

      Console.WriteLine("\nEnd k-means clustering demo\n");
      Console.ReadLine();
          }       

    // ============================================================================

    public static int[] Cluster(double[][] rawData, int numClusters)
    {
      // k-means clustering
      // index of return is tuple ID, cell is cluster ID
      // ex: [2 1 0 0 2 2] means tuple 0 is cluster 2, tuple 1 is cluster 1, tuple 2 is cluster 0, tuple 3 is cluster 0, etc.
      // an alternative clustering DS to save space is to use the .NET BitArray class
      double[][] data = Normalized(rawData); // so large values don't dominate

      bool changed = true; // was there a change in at least one cluster assignment?
      bool success = true; // were all means able to be computed? (no zero-count clusters)

      // init clustering[] to get things started
      // an alternative is to initialize means to randomly selected tuples
      // then the processing loop is
      // loop
      //    update clustering
      //    update means
      // end loop
      int[] clustering = InitClustering(data.Length, numClusters, 0); // semi-random initialization
      double[][] means = Allocate(numClusters, data[0].Length); // small convenience

      int maxCount = data.Length * 10; // sanity check
      int ct = 0;
      while (changed == true && success == true && ct < maxCount)
      {
        ++ct; // k-means typically converges very quickly
        success = UpdateMeans(data, clustering, means); // compute new cluster means if possible. no effect if fail
        changed = UpdateClustering(data, clustering, means); // (re)assign tuples to clusters. no effect if fail
      }
      // consider adding means[][] as an out parameter - the final means could be computed
      // the final means are useful in some scenarios (e.g., discretization and RBF centroids)
      // and even though you can compute final means from final clustering, in some cases it
      // makes sense to return the means (at the expense of some method signature uglinesss)
      //
      // another alternative is to return, as an out parameter, some measure of cluster goodness
      // such as the average distance between cluster means, or the average distance between tuples in 
      // a cluster, or a weighted combination of both
      return clustering;
    }

    private static double[][] Normalized(double[][] rawData)
    {
      // normalize raw data by computing (x - mean) / stddev
      // primary alternative is min-max:
      // v' = (v - min) / (max - min)

      // make a copy of input data
      double[][] result = new double[rawData.Length][];
      for (int i = 0; i < rawData.Length; ++i)
      {
        result[i] = new double[rawData[i].Length];
        Array.Copy(rawData[i], result[i], rawData[i].Length);
      }

      for (int j = 0; j < result[0].Length; ++j) // each col
      {
        double colSum = 0.0;
        for (int i = 0; i < result.Length; ++i)
          colSum += result[i][j];
        double mean = colSum / result.Length;
        double sum = 0.0;
        for (int i = 0; i < result.Length; ++i)
          sum += (result[i][j] - mean) * (result[i][j] - mean);
        double sd = sum / result.Length;
        for (int i = 0; i < result.Length; ++i)
          result[i][j] = (result[i][j] - mean) / sd;
      }
      return result;
    }

    private static int[] InitClustering(int numTuples, int numClusters, int randomSeed)
    {
      // init clustering semi-randomly (at least one tuple in each cluster)
      // consider alternatives, especially k-means++ initialization,
      // or instead of randomly assigning each tuple to a cluster, pick
      // numClusters of the tuples as initial centroids/means then use
      // those means to assign each tuple to an initial cluster.
      Random random = new Random(randomSeed);
      int[] clustering = new int[numTuples];
      for (int i = 0; i < numClusters; ++i) // make sure each cluster has at least one tuple
        clustering[i] = i;
      for (int i = numClusters; i < clustering.Length; ++i)
        clustering[i] = random.Next(0, numClusters); // other assignments random
      return clustering;
    }

    private static double[][] Allocate(int numClusters, int numColumns)
    {
      // convenience matrix allocator for Cluster()
      double[][] result = new double[numClusters][];
      for (int k = 0; k < numClusters; ++k)
        result[k] = new double[numColumns];
      return result;
    }

    private static bool UpdateMeans(double[][] data, int[] clustering, double[][] means)
    {
      // returns false if there is a cluster that has no tuples assigned to it
      // parameter means[][] is really a ref parameter

      // check existing cluster counts
      // can omit this check if InitClustering and UpdateClustering
      // both guarantee at least one tuple in each cluster (usually true)
      int numClusters = means.Length;
      int[] clusterCounts = new int[numClusters];
      for (int i = 0; i < data.Length; ++i)
      {
        int cluster = clustering[i];
        ++clusterCounts[cluster];
      }

      for (int k = 0; k < numClusters; ++k)
        if (clusterCounts[k] == 0)
          return false; // bad clustering. no change to means[][]

      // update, zero-out means so it can be used as scratch matrix 
      for (int k = 0; k < means.Length; ++k)
        for (int j = 0; j < means[k].Length; ++j)
          means[k][j] = 0.0;

      for (int i = 0; i < data.Length; ++i)
      {
        int cluster = clustering[i];
        for (int j = 0; j < data[i].Length; ++j)
          means[cluster][j] += data[i][j]; // accumulate sum
      }

      for (int k = 0; k < means.Length; ++k)
        for (int j = 0; j < means[k].Length; ++j)
          means[k][j] /= clusterCounts[k]; // danger of div by 0
      return true;
    }

    private static bool UpdateClustering(double[][] data, int[] clustering, double[][] means)
    {
      // (re)assign each tuple to a cluster (closest mean)
      // returns false if no tuple assignments change OR
      // if the reassignment would result in a clustering where
      // one or more clusters have no tuples.

      int numClusters = means.Length;
      bool changed = false;

      int[] newClustering = new int[clustering.Length]; // proposed result
      Array.Copy(clustering, newClustering, clustering.Length);

      double[] distances = new double[numClusters]; // distances from curr tuple to each mean

      for (int i = 0; i < data.Length; ++i) // walk thru each tuple
      {
        for (int k = 0; k < numClusters; ++k)
          distances[k] = Distance(data[i], means[k]); // compute distances from curr tuple to all k means

        int newClusterID = MinIndex(distances); // find closest mean ID
        if (newClusterID != newClustering[i])
        {
          changed = true;
          newClustering[i] = newClusterID; // update
        }
      }

      if (changed == false)
        return false; // no change so bail and don't update clustering[][]

      // check proposed clustering[] cluster counts
      int[] clusterCounts = new int[numClusters];
      for (int i = 0; i < data.Length; ++i)
      {
        int cluster = newClustering[i];
        ++clusterCounts[cluster];
      }

      for (int k = 0; k < numClusters; ++k)
        if (clusterCounts[k] == 0)
          return false; // bad clustering. no change to clustering[][]

      Array.Copy(newClustering, clustering, newClustering.Length); // update
      return true; // good clustering and at least one change
    }

    private static double Distance(double[] tuple, double[] mean)
    {
      // Euclidean distance between two vectors for UpdateClustering()
      // consider alternatives such as Manhattan distance
      double sumSquaredDiffs = 0.0;
      for (int j = 0; j < tuple.Length; ++j)
        sumSquaredDiffs += Math.Pow((tuple[j] - mean[j]), 2);
      return Math.Sqrt(sumSquaredDiffs);
    }

    private static int MinIndex(double[] distances)
    {
      // index of smallest value in array
      // helper for UpdateClustering()
      int indexOfMin = 0;
      double smallDist = distances[0];
      for (int k = 0; k < distances.Length; ++k)
      {
        if (distances[k] < smallDist)
        {
          smallDist = distances[k];
          indexOfMin = k;
        }
      }
      return indexOfMin;
    }

    // ============================================================================

    // misc display helpers for demo

    static void ShowData(double[][] data, int decimals, bool indices, bool newLine)
    {
      for (int i = 0; i < data.Length; ++i)
      {
        if (indices) Console.Write(i.ToString().PadLeft(3) + " ");
        for (int j = 0; j < 2; ++j)
        {
          if (data[i][j] >= 0.0) Console.Write(" ");
          Console.Write(data[i][j].ToString("F" + decimals) + " ");
        }
        Console.WriteLine("");
      }
      if (newLine) Console.WriteLine("");
    } // ShowData

    static void ShowVector(int[] vector, bool newLine)
    {
      for (int i = 0; i < vector.Length; ++i)
        Console.Write(vector[i] + " ");
      if (newLine) Console.WriteLine("\n");
    }

    static void ShowClustered(double[][] data, int[] clustering, int numClusters, int decimals)
    {
      for (int k = 0; k < numClusters; ++k)
      {
        Console.WriteLine("===================");
        for (int i = 0; i < data.Length; ++i)
        {
          int clusterID = clustering[i];
          if (clusterID != k) continue;
          Console.Write(i.ToString().PadLeft(3) + " ");
          for (int j = 0; j < data[i].Length; ++j)
          {
            if (data[i][j] >= 0.0) Console.Write(" ");
            Console.Write(data[i][j].ToString("F" + decimals) + " ");
          }
          Console.WriteLine("");
        }
        Console.WriteLine("===================");
      } // k
    }
  }


    }



this is k means algorithm.

show data method will contain instruction is
if (data[i][j] >= 0.0) Console.Write(" ");

this instruction show error like this "Object reference not set to an instance of an object."


pls help me...........
Posted

It means that data or data[i] is null. You need to find out it by yourself.

Not to worry. This is one of the very easiest cases to detect and fix. It simply means that some member/variable of some reference type is dereferenced by using and of its instance (non-static) members, which requires this member/variable to be non-null, but in fact it appears to be null. Simply execute it under debugger, it will stop the execution where the exception is thrown. Put a break point on that line, restart the application and come to this point again. Evaluate all references involved in next line and see which one is null while it needs to be not null. After you figure this out, fix the code: either make sure the member/variable is properly initialized to a non-null reference, or check it for null and, in case of null, do something else.

Please see also: want to display next record on button click. but got an error in if condition of next record function "object reference not set to an instance of an object"[^].

Sometimes, you cannot do it under debugger, by one or another reason. One really nasty case is when the problem is only manifested if software is built when debug information is not available. In this case, you have to use the harder way. First, you need to make sure that you never block propagation of exceptions by handling them silently (this is a crime of developers against themselves, yet very usual). The you need to catch absolutely all exceptions on the very top stack frame of each thread. You can do it if you handle the exceptions of the type System.Exception. In the handler, you need to log all the exception information, especially the System.Exception.StackTrace:
http://msdn.microsoft.com/en-us/library/system.exception.aspx[^],
http://msdn.microsoft.com/en-us/library/system.exception.stacktrace.aspx[^].

The stack trace is just a string showing the full path of exception propagation from the throw statement to the handler. By reading it, you can always find ends. For logging, it's the best (in most cases) to use the class System.Diagnostics.EventLog:
http://msdn.microsoft.com/en-us/library/system.diagnostics.eventlog.aspx[^].

Good luck,
—SA
 
Share this answer
 
v2
1.This type of error is generating by your trying to access a null object.

2. The line indicated by you as source of error indicate that in your case the variable data is null, but in that case the line above data.Length should generate the error and not this line. So you did not provide us the correct line of code that generate the error!

3.The solution is to Debug your code, and especially inspect your code and variable before the error is generated.
 
Share this answer
 

This content, along with any associated source code and files, is licensed under The Code Project Open License (CPOL)



CodeProject, 20 Bay Street, 11th Floor Toronto, Ontario, Canada M5J 2N8 +1 (416) 849-8900