我正在尝试在用Java写给C的kd-ree上移植knn(k最近邻居搜索)。>
Java输出,如预期:
arest to Key: 6.0,5.0,4.0 Key:6.0,5.0,4.0,min distance:0.0 Key:5.0,4.0,3.0,min distance:3.0 Key:7.0,6.0,5.0,min distance:3.0 Key:4.0,3.0,2.0,min distance:12.0 Key:3.0,2.0,1.0,min distance:27.0
Java代码,类(它是一种快速的实现,只是在启动端口之前使算法起作用):
class kd_tree { public int DEFAULT_NUMBER_OF_DIMENSIONS = 1; static int current_number_of_data_points=0; static int current_global_median = 0; // kd_tree left; kd_tree right; float[] data; private int k_dimensions; float distance_to_neighbor; }
Java knn方法:
/*=========================================================================== Function knn, knn algorithm using kd-tree & minimumNeighbor function. Description: ==========================================================*/ public static kd_tree [] knn(kd_tree root, float[] data_point, int depth, int k_dimension, int number_of_nearest_neighbors) { kd_tree [] all_nearests = new kd_tree[current_number_of_data_points]; kd_tree [] n_nearests = new kd_tree[current_number_of_data_points]; if (root!=null) { int nearests_counter = 0; /*now lets traverse the tree inorder & calculate distances from the query point based on Morris Traversal algorithm that does not use any stacks or no recursion*/ kd_tree cur = root; kd_tree pre; while (cur != null) { if (cur.left == null) { /*debugging System.out.println(cur.data[0]); calculate distance*/ cur.distance_to_neighbor = n_dimensional_euclidean(data_point, cur.data); all_nearests[nearests_counter] = cur; nearests_counter++; cur = cur.right; // move to next right node } else { // has a left subtree pre = cur.left; while (pre.right != null && pre.right!=cur) { // find rightmost pre = pre.right; } /* Make current as the right child of its inorder predecessor */ if (pre.right == null) { pre.right = cur; cur = cur.left; } else { pre.right = null; // debugging printf("%d ", current->data); //calculate distance cur.distance_to_neighbor = n_dimensional_euclidean( data_point, cur.data); all_nearests[nearests_counter] = cur; nearests_counter++; cur = cur.right; } } }//end while //base cases //sort from least to greatest insertion_sort_based_on_distance(all_nearests); //return on specified number_of_nearest_neighbors for (int i=0; i<number_of_nearest_neighbors; i++) { n_nearests[i]=all_nearests[i]; } } return n_nearests; }
相关的C代码段:
#include <stdlib.h> #ifndef KDTREE_H #define KDTREE_H /* * Representation of a kd tree */ typedef struct tree_ { struct tree *left; struct tree *right; float * info; float distance_to_neighbor; } tree; //pre-allocated tree nodes array static tree tree_space [KD_TREE_HEAP_SIZE]; //the knn algorithm will require memory space upto tree size static tree * processing_space [KD_TREE_HEAP_SIZE]; tree * knn(tree * root, float data_point[], int depth, const int k_dimensions, int number_of_nearest_neighbors, int total_number_of_elements, tree * n_nearests);
相关knn实现,kdtree.c:
#include "kdtree.h" #include <stdio.h> #include <stdlib.h> #include <string.h> #include <float.h> #include "sorting_utility.h" static int current_number_of_kd_tree_nodes=0; static int current_number_of_data_points=0; /*=========================================================================== Function knn, knn algorithm using kd-tree. Description: ==========================================================*/ tree * knn (tree * root, float data_point[], int depth, const int k_dimensions, int number_of_nearest_neighbors, tree * n_nearests) { tree all_nearests [current_number_of_kd_tree_nodes]; tree * source_data_end_ptr = NULL; tree * source_data_start_ptr = NULL; tree * destination_data_start_ptr = NULL; tree * destination_data_end_ptr = NULL; if (NULL != root && NULL != n_nearests) { int nearests_counter = 0; //now lets traverse the tree inorder & calculate distances //from the query point tree * cur = root; tree * pre; while (NULL != cur) { if (NULL == cur->left) { cur->distance_to_neighbor = n_dimensional_euclidean (data_point, cur->info); processing_space[nearests_counter] = *cur; nearests_counter++; cur = cur->right; // move to next right node } else { pre = cur->left; while (pre->right != NULL && pre->right != cur) { // find rightmost pre = pre->right; } /* Make current as the right child of its inorder predecessor */ if (pre->right == NULL) { pre->right = cur; cur = cur->left; } else { pre->right = NULL; // //calculate distance cur->distance_to_neighbor = n_dimensional_euclidean (data_point, cur->info); processing_space[nearests_counter] = *cur; nearests_counter++; cur = cur->right; } } }//end while //JUST FOR DEBUGGING START printf ("***For debugging before sort:\n"); for (int i = 0; i < current_number_of_kd_tree_nodes; i++) { printf ("{"); for (int c = 0; c < k_dimensions; c++) { if (NULL != processing_space[i].info) { printf ("%f,", processing_space[i].info[c]); } else { break; } }//end for printf ("} "); printf ("min_distance=%f\n", processing_space[i].distance_to_neighbor); }//end for //JUST FOR DEBUGGING END /*copy relevant range up current_number_of_kd_tree_nodes before sort, *in order to avoid sorting beyond range that does not have any data*/ source_data_start_ptr = &processing_space[0]; destination_data_start_ptr = &all_nearests[0]; destination_data_end_ptr = &all_nearests[current_number_of_kd_tree_nodes - 1]; while (destination_data_start_ptr <= destination_data_end_ptr) { *destination_data_start_ptr = *source_data_start_ptr; source_data_start_ptr++; destination_data_start_ptr++; } //sort based on distance from query point quick_sort (all_nearests, 0, current_number_of_kd_tree_nodes - 1); //JUST FOR DEBUGGING START printf ("***For debugging after sort\n"); for (int i = 0; i < current_number_of_kd_tree_nodes; i++) { printf ("{"); for (int c = 0; c < k_dimensions; c++) { if (NULL != all_nearests[i].info) { printf ("%f,", all_nearests[i].info[c]); } else { break; } }//end for printf ("} "); printf ("min_distance=%f\n", all_nearests[i].distance_to_neighbor); }//end for //JUST FOR DEBUGGING END /*copy only the n_nearest & ignore/ do NOT copy any empty tree nodes*/ //reuse pointers destination_data_end_ptr = &n_nearests[number_of_nearest_neighbors - 1]; source_data_end_ptr = &all_nearests[current_number_of_kd_tree_nodes - 1]; source_data_start_ptr = all_nearests; int counter = 0; while (counter < number_of_nearest_neighbors && source_data_start_ptr < source_data_end_ptr) { //do NOT copy any empty tree nodes if (source_data_start_ptr != NULL && source_data_start_ptr->info != NULL) { /*ATTENTION: i checked with debugger & values for (distance_to_neighbor,info,left,right were not zeroed or empty*/ float distance_to_neighbor = source_data_start_ptr->distance_to_neighbor; float * info = source_data_start_ptr->info; tree * left = source_data_start_ptr->left; tree * right = source_data_start_ptr->right; n_nearests[counter].distance_to_neighbor = distance_to_neighbor; n_nearests[counter].info = info; n_nearests[counter].left = left; n_nearests[counter].right = right; counter++; } source_data_start_ptr++; } } else { printf ("Error, knn input parameter error"); } return n_nearests; }//end function
main.c相关代码段:
#include<kdtree.h> int main() { const int query_size = 10; const int k_dimensions = 3; printf("knn (k nearest neighboor)\n:"); tree * n_nearests [query_size]; printf("%Nearest to Key: {%f,%f,%f}\n",query_size,query_point[0],query_point[1],query_point[2]); knn(root,query_point, 0, k_dimensions,query_size,KD_TREE_HEAP_SIZE,n_nearests); //print n nearest neighbors tree * tree_ptr = &n_nearests[0]; for (int i = 0; i <query_size; i++) { if (NULL!=tree_ptr->info) { printf("Key={"); for (int c=0; c<k_dimensions; c++) { printf("%d,", tree_ptr->info[c]); } printf("} "); printf("%f min distance\n", tree_ptr->distance_to_neighbor); return 0; }//end main
C版本的输出:
knn (k nearest neighboor) :5 Nearest neighbors to Key: {6.000000,5.000000,4.000000} are: ***For debugging before sort: {-1.000000,-2.000000,-3.000000,} min_distance=49.000000 {0.000000,-1.000000,-2.000000,} min_distance=36.000000 {1.000000,0.000000,-1.000000,} min_distance=25.000000 {2.000000,1.000000,0.000000,} min_distance=16.000000 {3.000000,2.000000,1.000000,} min_distance=9.000000 {4.000000,3.000000,2.000000,} min_distance=4.000000 {5.000000,4.000000,3.000000,} min_distance=1.000000 {6.000000,5.000000,4.000000,} min_distance=0.000000 {7.000000,6.000000,5.000000,} min_distance=1.000000 {14.000000,13.000000,12.000000,} min_distance=64.000000 {15.000000,14.000000,13.000000,} min_distance=81.000000 {16.000000,15.000000,14.000000,} min_distance=100.000000 {} min_distance=0.000000 {} min_distance=0.000000 {} min_distance=0.000000 {} min_distance=0.000000 {} min_distance=0.000000 {} min_distance=0.000000 {} min_distance=0.000000 {} min_distance=0.000000 {} min_distance=0.000000 {} min_distance=0.000000 {} min_distance=0.000000 {} min_distance=0.000000 {} min_distance=0.000000 ***For debugging after sort {} min_distance=0.000000 {} min_distance=0.000000 {} min_distance=0.000000 {} min_distance=0.000000 {} min_distance=0.000000 {} min_distance=0.000000 {} min_distance=0.000000 {} min_distance=0.000000 {6.000000,5.000000,4.000000,} min_distance=0.000000 {} min_distance=0.000000 {} min_distance=0.000000 {} min_distance=0.000000 {} min_distance=0.000000 {} min_distance=0.000000 {5.000000,4.000000,3.000000,} min_distance=1.000000 {7.000000,6.000000,5.000000,} min_distance=1.000000 {4.000000,3.000000,2.000000,} min_distance=4.000000 {3.000000,2.000000,1.000000,} min_distance=9.000000 {2.000000,1.000000,0.000000,} min_distance=16.000000 {1.000000,0.000000,-1.000000,} min_distance=25.000000 {0.000000,-1.000000,-2.000000,} min_distance=36.000000 {-1.000000,-2.000000,-3.000000,} min_distance=49.000000 {14.000000,13.000000,12.000000,} min_distance=64.000000 {15.000000,14.000000,13.000000,} min_distance=81.000000 {16.000000,15.000000,14.000000,} min_distance=100.000000 **After function return:** Key={0,0,0,} 0.000000 min distance Key={0,0,0,} 0.000000 min distance Key={0,0,0,} 0.000000 min distance Key={0,0,0,} 0.000000 min distance Key={0,0,0,} 0.000000 min distance
我已经测试了它们按预期工作的Java和C版本中的插入,顺序遍历,搜索,删除和查找最小邻居。
在C版本中,knn函数在函数返回后返回零值吗?
为什么在主函数中调用后,结构中的值为零(请参见标题为“函数返回后”的输出区域?
希望其他人可以发现明显的东西,真是拼命拉扯我的头发。
我正在尝试将用Java用Java写的kd-ree上的一个knn(k最近邻居搜索)移植到C。Java输出,如预期的那样:分别对key:6.0,5.0,4.0 Key:6.0,5.0,4.0 ,min distance:0.0 Key:5.0,4.0,3.0,...
为什么在主函数中调用后,结构中的值为何为零(请参见标题为“函数返回后”的输出区域?