static void test_ght_node_build_tree_big(void) { GhtCoordinate coord; int i, j; int npts = 200; const double x_off = -127.0; const double y_off = 49.0; const double scale = 0.0001; GhtNode *node, *root; GhtErr err; int count = 0; stringbuffer_t *sb; for ( i = 0; i < npts; i++ ) { for ( j = 0; j < npts; j++ ) { coord.x = x_off + i*scale; coord.y = y_off + j*scale; err = ght_node_new_from_coordinate(&coord, GHT_MAX_HASH_LENGTH, &node); if ( i || j ) { err = ght_node_insert_node(root, node, GHT_DUPES_YES); } else { root = node; } } } // sb = stringbuffer_create(); // err = ght_node_to_string(root, sb, 0); // printf("\n%s\n", stringbuffer_getstring(sb)); // stringbuffer_destroy(sb); err = ght_node_count_leaves(root, &count); CU_ASSERT_EQUAL(err, GHT_OK); // printf("count %d\n", count); CU_ASSERT_EQUAL(count, npts*npts); ght_node_free(root); }
static void test_ght_node_file_serialization(void) { GhtCoordinate coord; GhtNode *node, *root, *noderead; GhtErr err; GhtWriter *writer; GhtReader *reader; stringbuffer_t *sb1; GhtAttribute *attr; const char* testfile = "test.ght"; if ( fexists(testfile) ) remove(testfile); coord.x = -127.4123; coord.y = 49.23141; err = ght_node_new_from_coordinate(&coord, GHT_MAX_HASH_LENGTH, &node); CU_ASSERT_EQUAL(err, GHT_OK); root = node; coord.x = -127.4122; coord.y = 49.23142; err = ght_node_new_from_coordinate(&coord, GHT_MAX_HASH_LENGTH, &node); err = ght_attribute_new_from_double(schema->dims[2], 88.88, &attr); err = ght_node_add_attribute(node, attr); err = ght_node_insert_node(root, node, GHT_DUPES_YES); CU_ASSERT_EQUAL(err, GHT_OK); coord.x = -127.4122001; coord.y = 49.23142001; err = ght_node_new_from_coordinate(&coord, GHT_MAX_HASH_LENGTH, &node); err = ght_attribute_new_from_double(schema->dims[2], 15.23, &attr); err = ght_node_add_attribute(node, attr); err = ght_node_insert_node(root, node, GHT_DUPES_YES); CU_ASSERT_EQUAL(err, GHT_OK); coord.x = -127.4122002; coord.y = 49.23142002; err = ght_node_new_from_coordinate(&coord, GHT_MAX_HASH_LENGTH, &node); err = ght_attribute_new_from_double(schema->dims[2], 19.23, &attr); err = ght_node_add_attribute(node, attr); err = ght_node_insert_node(root, node, GHT_DUPES_YES); CU_ASSERT_EQUAL(err, GHT_OK); // sb1 = stringbuffer_create(); // err = ght_node_to_string(root, sb1, 0); // printf("\n%s\n", stringbuffer_getstring(sb1)); // stringbuffer_destroy(sb1); err = ght_writer_new_file(testfile, &writer); CU_ASSERT_EQUAL(err, GHT_OK); err = ght_node_write(root, writer); CU_ASSERT_EQUAL(err, GHT_OK); ght_writer_free(writer); err = ght_reader_new_file(testfile, schema, &reader); CU_ASSERT_EQUAL(err, GHT_OK); err = ght_node_read(reader, &noderead); CU_ASSERT_EQUAL(err, GHT_OK); ght_reader_free(reader); remove(testfile); ght_node_free(root); ght_node_free(noderead); }
static void test_ght_node_serialization(void) { GhtCoordinate coord; int x, y; GhtNode *node1, *node2, *node3; GhtErr err; GhtWriter *writer; GhtReader *reader; const uint8_t *bytes; size_t bytes_size; stringbuffer_t *sb1, *sb2; GhtAttribute *attr; char *hex; /* ght_node_new_from_coordinate(const GhtCoordinate *coord, unsigned int resolution, GhtNode **node); */ coord.x = -127.4123; coord.y = 49.23141; err = ght_node_new_from_coordinate(&coord, GHT_MAX_HASH_LENGTH, &node1); CU_ASSERT_STRING_EQUAL(node1->hash, "c0v2hdm1wpzpy4vtv4"); CU_ASSERT_EQUAL(err, GHT_OK); err = ght_writer_new_mem(&writer); err = ght_node_write(node1, writer); bytes = bytebuffer_getbytes(writer->bytebuffer); bytes_size = bytebuffer_getsize(writer->bytebuffer); err = ght_reader_new_mem(bytes, bytes_size, schema, &reader); err = ght_node_read(reader, &node2); CU_ASSERT_STRING_EQUAL(node1->hash, node2->hash); ght_node_free(node2); /* add a child */ coord.x = -127.4125; coord.y = 49.23144; err = ght_node_new_from_coordinate(&coord, GHT_MAX_HASH_LENGTH, &node3); err = ght_attribute_new_from_double(schema->dims[3], 88.88, &attr); err = ght_node_add_attribute(node3, attr); err = ght_node_insert_node(node1, node3, GHT_DUPES_YES); CU_ASSERT_EQUAL(err, GHT_OK); /* add another (dupe) child */ err = ght_node_new_from_coordinate(&coord, GHT_MAX_HASH_LENGTH, &node3); err = ght_node_insert_node(node1, node3, GHT_DUPES_YES); /* add another (dupe) child with an attribute */ err = ght_node_new_from_coordinate(&coord, GHT_MAX_HASH_LENGTH, &node3); err = ght_attribute_new_from_double(schema->dims[2], 99.99, &attr); err = ght_node_add_attribute(node3, attr); err = ght_node_insert_node(node1, node3, GHT_DUPES_YES); sb1 = stringbuffer_create(); err = ght_node_to_string(node1, sb1, 0); // printf("ORIGINAL\n%s\n", stringbuffer_getstring(sb1)); err = ght_writer_new_mem(&writer); err = ght_node_write(node1, writer); bytes = bytebuffer_getbytes(writer->bytebuffer); bytes_size = bytebuffer_getsize(writer->bytebuffer); err = hexbytes_from_bytes(bytes, bytes_size, &hex); CU_ASSERT_STRING_EQUAL("086330763268646D3100020A77707A7079347674763400000A6374643463637839796201035800020000000001020F27000000", hex); // printf("\n\n%s\n", hex); err = ght_reader_new_mem(bytes, bytes_size, schema, &reader); err = ght_node_read(reader, &node2); sb2 = stringbuffer_create(); err = ght_node_to_string(node2, sb2, 0); // printf("COPY\n%s\n", stringbuffer_getstring(sb2)); CU_ASSERT_STRING_EQUAL(stringbuffer_getstring(sb1), stringbuffer_getstring(sb2)); stringbuffer_destroy(sb2); stringbuffer_destroy(sb1); ght_node_free(node1); ght_node_free(node2); ght_writer_free(writer); ght_reader_free(reader); }
static void test_ght_node_build_tree(void) { GhtCoordinate coord; int x, y; GhtNode *node1, *node2, *node3, *node4, *node5, *root; GhtErr err; /* ght_node_new_from_coordinate(const GhtCoordinate *coord, unsigned int resolution, GhtNode **node); */ coord.x = -127.4123; coord.y = 49.23141; err = ght_node_new_from_coordinate(&coord, GHT_MAX_HASH_LENGTH, &root); CU_ASSERT_STRING_EQUAL(root->hash, "c0v2hdm1wpzpy4vtv4"); CU_ASSERT_EQUAL(err, GHT_OK); /* ght_node_insert_node(GhtNode *node, GhtNode *node_to_insert, int duplicates) */ /* insert duplicate */ coord.x = -127.4123; coord.y = 49.23141; err = ght_node_new_from_coordinate(&coord, GHT_MAX_HASH_LENGTH, &node1); err = ght_node_insert_node(root, node1, GHT_DUPES_YES); CU_ASSERT_EQUAL(err, GHT_OK); CU_ASSERT_EQUAL(node1->hash, NULL); /* insert split */ coord.x = -127.4124; coord.y = 49.23142; err = ght_node_new_from_coordinate(&coord, GHT_MAX_HASH_LENGTH, &node2); /* before insert, it's full length */ CU_ASSERT_STRING_EQUAL(node2->hash, "c0v2hdm1gcuekpf9y1"); err = ght_node_insert_node(root, node2, GHT_DUPES_YES); CU_ASSERT_EQUAL(err, GHT_OK); /* after insert, it's been truncated to the distinct part */ CU_ASSERT_STRING_EQUAL(node2->hash, "gcuekpf9y1"); /* and the root has been truncated to the common part */ CU_ASSERT_STRING_EQUAL(root->hash, "c0v2hdm1"); /* and distinct part of the root is now a new child node */ CU_ASSERT_STRING_EQUAL(root->children->nodes[0]->hash, "wpzpy4vtv4"); /* which in turn has the old identical node as a child */ CU_ASSERT_EQUAL(root->children->nodes[0]->children->nodes[0], node1); /* insert child */ err = ght_node_new_from_hash("c0v2hdm1wpzpy4vkv4", &node3); /* before insert, it's full length */ CU_ASSERT_STRING_EQUAL(node3->hash, "c0v2hdm1wpzpy4vkv4"); err = ght_node_insert_node(root, node3, GHT_DUPES_YES); CU_ASSERT_EQUAL(err, GHT_OK); /* after insert it's only got the last piece */ CU_ASSERT_STRING_EQUAL(node3->hash, "kv4"); /* insert duplicate of previous */ err = ght_node_new_from_hash("c0v2hdm1wpzpy4vkv4", &node4); /* before insert, it's full length */ CU_ASSERT_STRING_EQUAL(node4->hash, "c0v2hdm1wpzpy4vkv4"); err = ght_node_insert_node(root, node4, GHT_DUPES_YES); CU_ASSERT_EQUAL(err, GHT_OK); /* after insert it's nulled, because it's a duplicate */ CU_ASSERT_EQUAL(node4->hash, NULL); /* also, it's hanging off the previous node */ CU_ASSERT_EQUAL(node3->children->nodes[0], node4); /* insert another duplicate of previous */ err = ght_node_new_from_hash("c0v2hdm1wpzpy4vkv4", &node5); /* before insert, it's full length */ CU_ASSERT_STRING_EQUAL(node5->hash, "c0v2hdm1wpzpy4vkv4"); err = ght_node_insert_node(root, node5, GHT_DUPES_YES); CU_ASSERT_EQUAL(err, GHT_OK); /* after insert it's nulled, because it's a duplicate */ CU_ASSERT_EQUAL(node5->hash, NULL); /* also, it's hanging off the parent node */ CU_ASSERT_EQUAL(node3->children->nodes[1], node5); ght_node_free(root); }
/** * Recursive function, walk down from parent node, looking for * appropriate insertion point for node_to_insert. If duplicates, * and duplicate leaf, insert as hash-less "attribute only" node. * ["abcdefg", "abcdeff", "abcdddd", "abbbeee"] becomes * "ab"->["c"->["d"->["ddd","ef"->["g","f"]]],"b"] */ GhtErr ght_node_insert_node(GhtNode *node, GhtNode *node_to_insert, GhtDuplicates duplicates) { GhtHash *node_leaf, *node_to_insert_leaf; GhtErr err; GhtHashMatch matchtype; /* NULL hash implies this node is a faux node for duplicate points */ if ( ! node->hash ) return GHT_INCOMPLETE; /* matchtype in (GHT_NONE, GHT_GLOBAL, GHT_SAME, GHT_CHILD, GHT_SPLIT) */ /* NONE and GLOBAL come back with GHT_ERROR, so we don't handle them yet */ GHT_TRY(ght_hash_leaf_parts(node->hash, node_to_insert->hash, GHT_MAX_HASH_LENGTH, &matchtype, &node_leaf, &node_to_insert_leaf)); /* Insert node is child of node, either explicitly, or implicitly for */ /* the "" hash which serves as a master parent */ /* "abcdef" is a GHT_CHILD or "abc", and gets added as "def" */ if ( matchtype == GHT_CHILD || matchtype == GHT_GLOBAL ) { int i; ght_node_set_hash(node_to_insert, ght_strdup(node_to_insert_leaf)); for ( i = 0; i < ght_node_num_children(node); i++ ) { err = ght_node_insert_node(node->children->nodes[i], node_to_insert, duplicates); /* Node added to one of the children */ if ( err == GHT_OK ) return GHT_OK; } /* Node didn't fit any of the children, so add it at this level */ return ght_node_add_child(node, node_to_insert); } if ( matchtype == GHT_SAME ) { /* New node is duplicate of this node. We insert an */ /* empty node (no hash) underneath, to hang attributes off of */ /* and use this node as the parent */ if ( duplicates ) { /* If this is the first duplicate, add a copy of the parent */ /* To serve as a proxy leaf for this value */ if ( ( ! node->children ) || ( node->children->num_nodes == 0 ) ) { GhtNode *parent_leaf; GHT_TRY(ght_node_new(&parent_leaf)); GHT_TRY(ght_node_transfer_attributes(node, parent_leaf)); GHT_TRY(ght_node_add_child(node, parent_leaf)); } /* Add the new node under the parent, stripping the hash */ ght_free(node_to_insert->hash); node_to_insert->hash = NULL; GHT_TRY(ght_node_add_child(node, node_to_insert)); return GHT_OK; } else { /* For now, we just skip duplicates. */ /* In future, average / median the duplicates onto parent here? */ return GHT_OK; } } /* "abcdef" and "abcghi" need to GHT_SPLIT, into "abc"->["def", "ghi"] */ if ( matchtype == GHT_SPLIT ) { /* We need a new node to hold that part of the parent that is not shared */ GhtNode *another_node_to_insert; GHT_TRY(ght_node_new_from_hash(node_leaf, &another_node_to_insert)); /* Move attributes to the new child */ GHT_TRY(ght_node_transfer_attributes(node, another_node_to_insert)); /* Any children of the parent need to move down the tree with the unique part of the hash */ if ( node->children ) { another_node_to_insert->children = node->children; node->children = NULL; } /* Null-terminate parent hash at end of shared part */ *node_leaf = '\0'; /* Pull the non-shared part of insert node hash to the front */ memmove(node_to_insert->hash, node_to_insert_leaf, strlen(node_to_insert_leaf)+1); /* Add the unique portion of the parent to the parent */ GHT_TRY(ght_node_add_child(node, another_node_to_insert)); /* Add the unique portion of the insert node to the parent */ GHT_TRY(ght_node_add_child(node, node_to_insert)); /* Done! */ return GHT_OK; } /* Don't get here */ return GHT_ERROR; }
static void test_ght_build_tree_with_attributes(void) { int i; static const char *simpledata = "test/data/simple-data.tsv"; GhtNodeList *nodelist; GhtNode *root, *node; GhtErr err; GhtAttribute attr; stringbuffer_t *sb; double d; /* Read a nodelist from a TSV file */ nodelist = tsv_file_to_node_list(simpledata, simpleschema); CU_ASSERT_EQUAL(nodelist->num_nodes, 8); /* Build node list into a tree */ root = nodelist->nodes[0]; for ( i = 1; i < nodelist->num_nodes; i++ ) { err = ght_node_insert_node(root, nodelist->nodes[i], GHT_DUPES_YES); } /* Write the tree to string: c0n0e q m m7 dvy8yz9 Z=123.4:Intensity=5 ky667sj Z=123.4:Intensity=5 qw00rg068 Z=123.4:Intensity=5 hekkhnhj3b Z=123.4:Intensity=5 6myj870p99 Z=123.3:Intensity=5 46jybv17y1 Z=123.4:Intensity=5 r 980jtyf1dh Z=123.4:Intensity=5 2khvpfu13f Z=123.4:Intensity=5 */ sb = stringbuffer_create(); ght_node_to_string(root, sb, 0); // printf("\n%s\n", stringbuffer_getstring(sb)); stringbuffer_destroy(sb); /* Compact the tree on both attributes: c0n0e Intensity=5 q m Z=123.4 m7 dvy8yz9 ky667sj qw00rg068 hekkhnhj3b Z=123.4 6myj870p99 Z=123.3 46jybv17y1 Z=123.4 r Z=123.4 980jtyf1dh 2khvpfu13f */ sb = stringbuffer_create(); ght_node_compact_attribute(root, simpleschema->dims[2], &attr); ght_node_compact_attribute(root, simpleschema->dims[3], &attr); ght_node_to_string(root, sb, 0); // printf("\n%s\n", stringbuffer_getstring(sb)); stringbuffer_destroy(sb); /* Check that Intensity=5 has migrated all the way to the top of the tree */ CU_ASSERT_STRING_EQUAL(root->attributes->dim->name, "Intensity"); ght_attribute_get_value(root->attributes, &d); CU_ASSERT_DOUBLE_EQUAL(d, 5, 0.00000001); ght_node_free(root); ght_nodelist_free_shallow(nodelist); }