Hi, i am trying to implement the dbscan code in c#(see below). The code takes 408 seconds for a 10000x20 matrix. it takes 20 seconds in matlab. can you point me to the part where i might be doing some thing wrong? matlab also uses kdtree.
public int[] ComputeDbscan(double[,] xtr, double epsilon, int minPts) {
bool borderpoints = true; int rows = xtr.GetLength(0); int cols = xtr.GetLength(1); bool[] visited = new bool[rows - 1 + 1]; int[] clusterId = new int[rows - 1 + 1]; int[] tags = new int[rows - 1 + 1]; for (int kk = 0; kk <= rows - 1; kk++) tags[kk] = kk;
List<List<int>> clusterlist = new List<List<int>>(); double[] xtrRow = new double[cols - 1 + 1]; int Nweight; int j; int normtype = 2;//euclidean norm bool selfmatch = true; int ny = 0; //useless variable List<int> N = new List<int>(); List<int> N2 = new List<int>(); alglib.kdtree kdt; //defien tree alglib.kdtreerequestbuffer r1; //define buffer alglib.kdtreebuildtagged(xtr, tags,rows,cols, ny,normtype, out kdt); //build tree with tags alglib.kdtreecreaterequestbuffer(kdt, out r1); //create buffer for the tree for (int kk = 0; kk <= cols - 1; kk++) xtrRow[kk] = xtr[0, kk]; //Get the epsilon Factor******************************* double[] nearest = new double[rows - 1 + 1]; if (epsilon == 0.0) { for (int i = 0; i <= rows - 1; i++) { for (int kk = 0; kk <= cols - 1; kk++) xtrRow[kk] = xtr[i, kk]; alglib.kdtreequeryknn(kdt, xtrRow, minPts - 1); double[] dists = new double[minPts-1]; alglib.kdtreequeryresultsdistances(kdt, ref dists); nearest[i] = dists[minPts - 2]; } epsilon = GetEPS(nearest); // this returns the best epsilon by simply looking for knee --not a bottleneck, negligible time }
//***************run the dbscan loop**************************** for (int i = 0; i <= rows - 1; i++) { if (visited[i] == true) continue; for (int kk = 0; kk <= cols - 1; kk++) xtrRow[kk] = xtr[i, kk]; //Get the nearest collection of points and drop it to a list alglib.kdtreequeryrnn(kdt, xtrRow, epsilon, selfmatch); int[] Narray=new int[20]; alglib.kdtreequeryresultstagsi(kdt, out Narray); //********************************************************* Nweight = Narray.Length; if (Nweight < minPts) continue; for (int kk = 0; kk <= Narray.Length - 1; kk++) N.Add(Narray[kk]); List<int> cluster = new List<int>(); cluster.Add(i); visited[i] = true; while (N.Count > 0) { j = N.Last(); N.RemoveAt(N.Count - 1); if (visited[j] == true) continue; visited[j] = true; for (int kk = 0; kk <= cols - 1; kk++) xtrRow[kk] = xtr[j, kk]; //Get the nearest collection of points and drop it to a list alglib.kdtreequeryrnn(kdt, xtrRow, epsilon, selfmatch); int[] N2array = new int[20]; alglib.kdtreequeryresultstagsi(kdt, out N2array); //********************************************************* Nweight = N2array.Length; if (Nweight >= minPts) { for (int kk = 0; kk <= N2array.Length - 1; kk++) N2.Add(N2array[kk]); N = N.Union(N2).ToList(); } if (Nweight >= minPts || borderpoints == true) { cluster.Add(j); } } clusterlist.Add(cluster); N.Clear(); N2.Clear(); cluster = null; } for (int k = 0; k <= clusterlist.Count - 1; k++) { List<int> cluster = new List<int>(); cluster = clusterlist[k]; for (int m = 0; m <= cluster.Count - 1; m++) clusterId[cluster[m]] = k + 1; } return clusterId; }
|