#include "knn_algo.h"
#include <math.h>
#include <float.h>
#include <string.h>
#include <algorithm>
#include "linear_algebra.h"
#define M_METHOD PARTIAL_SORT
using namespace std;
// require: n > k
void kNN_bruteforce(double **X, int n, int D, int k,
int **neighbors, double **knn_dist_sq)
{
int i, j;
// allocate 2d array to store square distances
double **dist_sq = NULL;
new_2d_array(dist_sq, n, n);
// compute pairwise square distances
for (i = 0; i < n; i++)
dist_sq[i][i] = DBL_MAX;
for (i = 0; i < n; i++)
for (j = i+1; j < n; j++)
{
dist_sq[i][j] = comp_dist_sq(X[i], X[j], D);
dist_sq[j][i] = dist_sq[i][j];
}
// for each data point, find k nearest neighbors
// the neighbors are sorted according to distances
for (i = 0; i < n; i++)
smallest_k_elements(dist_sq[i], n, k, neighbors[i], M_METHOD);
for (i = 0; i < n; i++)
for (j = 0; j < k; j++)
knn_dist_sq[i][j] = dist_sq[i][neighbors[i][j]];
// release memory
delete_2d_array(dist_sq, n, n);
}
void kNN_bruteforce2(double **X, int n, int D, int k,
double **dist_sq, int **neighbors, double **knn_dist_sq)
{
int i, j;
// compute pairwise square distances
for (i = 0; i < n; i++)
dist_sq[i][i] = DBL_MAX;
for (i = 0; i < n; i++)
for (j = i+1; j < n; j++)
{
dist_sq[i][j] = comp_dist_sq(X[i], X[j], D);
dist_sq[j][i] = dist_sq[i][j];
}
// for each data point, find k nearest neighbors
// the neighbors are sorted according to distances
for (i = 0; i < n; i++)
smallest_k_elements(dist_sq[i], n, k, neighbors[i], M_METHOD);
for (i = 0; i < n; i++)
for (j = 0; j < k; j++)
knn_dist_sq[i][j] = dist_sq[i][neighbors[i][j]];
}
// implicitly be able to handle num <= k case
void kNN_w_hash_table(double **X, int D, int k, int *label, int num,
int **neighbors, DynamicArray *dist_sq)
{
int i, j, ii, jj;
double dist_sq_tmp;
for (i = 0; i < num; i++)
for (j = i+1; j < num; j++)
{
ii = label[i];
jj = label[j];
if (dist_sq[ii].hasElement(jj) == -1)
{
dist_sq_tmp = comp_dist_sq(X[ii], X[jj], D);
dist_sq[ii].addElement(jj, dist_sq_tmp);
dist_sq[jj].addElement(ii, dist_sq_tmp);
}
}
for (i = 0; i < num; i++)
{
ii = label[i];
dist_sq[ii].compSmallestElements(k, neighbors[ii], NULL, M_METHOD);
}
}
void kNN_dNc_disjoint(double **X, int D, int k, int *label, int num, int p,
int **neighbors, DynamicArray *dist_sq)
{
int i;
// divide step: partition the set X(label)
int *left = NULL, *right = NULL;
int nl, nr;
double *pln_normal = new double [D];
double *pln_dist = new double [num];
double **Y = NULL;
new_2d_array(Y, num, D);
for (i = 0; i < num; i++)
memcpy(Y[i], X[label[i]], D*sizeof(double));
comp_sep_plane(Y, num, D, pln_normal, pln_dist);
partition_disjoint(pln_dist, num, left, &nl, right, &nr);
//printf("kNN_dNc_disjoint: num = %d, nl = %d, nr = %d\n", num, nl, nr);
// recursively compute kNN for the left side
int *label_l = new int [nl];
for (i = 0; i < nl; i++)
label_l[i] = label[left[i]];
if (nl <= p)
kNN_w_hash_table(X, D, k, label_l, nl, neighbors, dist_sq);
else
kNN_dNc_disjoint(X, D, k, label_l, nl, p, neighbors, dist_sq);
// recursively compute kNN for the right side
int *label_r = new int [nr];
for (i = 0; i < nr; i++)
label_r[i] = label[right[i]];
if (nr <= p)
kNN_w_hash_table(X, D, k, label_r, nr, neighbors, dist_sq);
else
kNN_dNc_disjoint(X, D, k, label_r, nr, p, neighbors, dist_sq);
// conquer step: conquer the results from both sides.
// nothing needs to be done.
// release memory
delete [] pln_normal;
delete [] pln_dist;
delete [] left;
delete [] right;
delete [] label_l;
delete [] label_r;
delete_2d_array(Y, num, D);
}
void kNN_dNc_overlap(double **X, int D, int k, int *label, int num,
int p, double r, bool is_refine,
int **neighbors, DynamicArray *dist_sq,
int *num_need_dist, int *num_comp_dist)
{
int i;
// divide step: partition the set X(label)
int *left = NULL, *right = NULL;
int nl, nr;
double *pln_normal = new double [D];
double *pln_dist = new double [num];
double **Y = NULL;
new_2d_array(Y, num, D);
for (i = 0; i < num; i++)
memcpy(Y[i], X[label[i]], D*sizeof(double));
comp_sep_plane(Y, num, D, pln_normal, pln_dist);
partition_overlap(pln_dist, num, r, left, &nl, right, &nr);
//printf("kNN_dNc_overlap: num = %d, nl = %d, nr = %d\n", num, nl, nr);
// recursively compute kNN for the left side
int *label_l = new int [nl];
for (i = 0; i < nl; i++)
label_l[i] = label[left[i]];
if (nl <= p)
kNN_w_hash_table(X, D, k, label_l, nl, neighbors, dist_sq);
else
kNN_dNc_overlap(X, D, k, label_l, nl, p, r, is_refine, neighbors, dist_sq,
num_need_dist, num_comp_dist);
// recursively compute kNN for the right side
int *label_r = new int [nr];
for (i = 0; i < nr; i++)
label_r[i] = label[right[i]];
if (nr <= p)
kNN_w_hash_table(X, D, k, label_r, nr, neighbors, dist_sq);
else
kNN_dNc_overlap(X, D, k, label_r, nr, p, r, is_refine, neighbors, dist_sq,
num_need_dist, num_comp_dist);
// conquer step: conquer the results from both sides.
// implicitly done in kNN_w_hash_table.
// do refinement
if (is_refine)
{
int n_need_dist, n_comp_dist;
refine_knn(X, D, k, label, num, neighbors, dist_sq,
&n_need_dist, &n_comp_dist);
*num_need_dist += n_need_dist;
*num_comp_dist += n_comp_dist;
}
// release memory
delete [] pln_normal;
delete [] pln_dist;
delete [] left;
delete [] right;
delete [] label_l;
delete [] label_r;
delete_2d_array(Y, num, D);
}
void kNN_dNc_glue(double **X, int D, int k, int *label, int num,
int p, double r, bool is_refine,
int **neighbors, DynamicArray *dist_sq,
int *num_need_dist, int *num_comp_dist)
{
int i;
// divide step: partition the set X(label)
int *left = NULL, *right = NULL, *middle = NULL;
int nl, nr, nm;
double *pln_normal = new double [D];
double *pln_dist = new double [num];
double **Y = NULL;
new_2d_array(Y, num, D);
for (i = 0; i < num; i++)
memcpy(Y[i], X[label[i]], D*sizeof(double));
comp_sep_plane(Y, num, D, pln_normal, pln_dist);
partition_glue(pln_dist, num, r, left, &nl, right, &nr, middle, &nm);
//printf("kNN_dNc_glue: num = %d, nl = %d, nr = %d, nm = %d\n", num, nl, nr, nm);
// recursively compute kNN for the left part
int *label_l = new int [nl];
for (i = 0; i < nl; i++)
label_l[i] = label[left[i]];
if (nl <= p)
kNN_w_hash_table(X, D, k, label_l, nl, neighbors, dist_sq);
else
kNN_dNc_glue(X, D, k, label_l, nl, p, r, is_refine, neighbors, dist_sq,
num_need_dist, num_comp_dist);
// recursively compute kNN for the right part
int *label_r = new int [nr];
for (i = 0; i < nr; i++)
label_r[i] = label[right[i]];
if (nr <= p)
kNN_w_hash_table(X, D, k, label_r, nr, neighbors, dist_sq);
else
kNN_dNc_glue(X, D, k, label_r, nr, p, r, is_refine, neighbors, dist_sq,
num_need_dist, num_comp_dist);
// recursively compute kNN for the middle part
int *label_m = new int [nm];
for (i = 0; i < nm; i++)
label_m[i] = label[middle[i]];
if (nm <= p)
kNN_w_hash_table(X, D, k, label_m, nm, neighbors, dist_sq);
else
kNN_dNc_glue(X, D, k, label_m, nm, p, r, is_refine, neighbors, dist_sq,
num_need_dist, num_comp_dist);
// conquer step: conquer the results from all three parts.
// implicitly done in kNN_w_hash_table.
// do refinement
if (is_refine)
{
int n_need_dist, n_comp_dist;
refine_kn