int IndexFile::open_index(Asset *asset) { // use buffer if being built this->asset = asset; int result = 0; if(asset->index_status == INDEX_BUILDING) { // use buffer result = 0; } else if(!(result = open_file())) { // opened existing file if(read_info()) { result = 1; close_index(); } else { asset->index_status = INDEX_READY; } } else { result = 1; } return result; }
int IndexFile::remove_index() { if(asset->index_status == INDEX_READY || asset->index_status == INDEX_NOTTESTED) { close_index(); remove(index_filename); } }
int main(int argc, char **argv) { printf("read index files\n"); open_index("test1.idx", &name1,0); printf("List the keys in each index file in ascending order:\n\n"); uplist(&name1); printf("List the keys for each index file in decending order\n\n"); downlist(&name1); /* always close all files */ close_index(&name1); }
bool load_index(idx **idx, uint8_t *kek) { box *kbox = BOX_PTR((*idx)->addr, KDFP_LEN); box *data = BOX_PTR(kbox, BOX_LEN(KEY_LEN)); uint8_t *key = BOX_DATA(kbox); size_t size = (*idx)->size; void *addr; if (!decrypt_box(kek, kbox, KEY_LEN)) goto error; if (!decrypt_box(key, data, INDEX_LEN(size))) goto error; uint8_t *cursor = BOX_DATA(data); uint32_t *counts = (uint32_t *) cursor; uint32_t count = ntohl(*counts++); size = sizeof(**idx) + sizeof(term) * count; if (!(addr = realloc(*idx, size))) goto error; *idx = addr; (*idx)->key = key; (*idx)->count = count; for (uint32_t i = 0; i < count; i++) { term *term = &(*idx)->terms[i]; term->count = ntohl(*counts++); term->len = ntohl(*counts++); cursor = (uint8_t *) counts; term->str = cursor; cursor += term->len; term->ids = cursor; counts = (uint32_t *) (cursor + term->count * ID_LEN); } return true; error: close_index(*idx); *idx = NULL; return false; }
void do_backup(char *path) { init_recipe_store(); init_container_store(); init_index(); init_backup_jcr(path); NOTICE("\n\n==== backup begin ===="); TIMER_DECLARE(1); TIMER_BEGIN(1); time_t start = time(NULL); if (destor.simulation_level == SIMULATION_ALL) { start_read_trace_phase(); } else { start_read_phase(); start_chunk_phase(); start_hash_phase(); } start_dedup_phase(); start_rewrite_phase(); start_filter_phase(); do{ sleep(5); /*time_t now = time(NULL);*/ fprintf(stderr,"job %" PRId32 ", data size %" PRId64 " bytes, %" PRId32 " chunks, %d files processed\r", jcr.id, jcr.data_size, jcr.chunk_num, jcr.file_num); }while(jcr.status == JCR_STATUS_RUNNING || jcr.status != JCR_STATUS_DONE); fprintf(stderr,"job %" PRId32 ", data size %" PRId64 " bytes, %" PRId32 " chunks, %d files processed\n", jcr.id, jcr.data_size, jcr.chunk_num, jcr.file_num); if (destor.simulation_level == SIMULATION_ALL) { stop_read_trace_phase(); } else { stop_read_phase(); stop_chunk_phase(); stop_hash_phase(); } stop_dedup_phase(); stop_rewrite_phase(); stop_filter_phase(); TIMER_END(1, jcr.total_time); close_index(); close_container_store(); close_recipe_store(); update_backup_version(jcr.bv); free_backup_version(jcr.bv); printf("\n\njob id: %" PRId32 "\n", jcr.id); printf("index method: %d.(Remark 0: NO; 1: DDFS; 2: Extreme binning; 3: Silo; 4: Sparse; 5: Sampled; 6: Block; 7: Learn)\n", destor.index_specific); printf("sampling method: %d (%d) (Remark 1:Random; 2: Min; 3: Uniform; 4: Optimized_min)\n", destor.index_sampling_method[0], destor.index_sampling_method[1]); printf("segment method: %d (%d) (Remark 0: Fixed; 1: Content; 2: File)\n", destor.index_segment_algorithm[0], destor.index_segment_algorithm[1]); printf("prefetch # of segments: %d (Remark 1 for sparse index)\n", destor.index_segment_prefech); printf("segment selection method: %d (%d)(Remark 0: Base; 1: Top; 2: Mix)\n", destor.index_segment_selection_method[0], destor.index_segment_selection_method[1]); printf("backup path: %s\n", jcr.path); printf("number of files: %d\n", jcr.file_num); printf("number of chunks: %" PRId32 " (%" PRId64 " bytes on average)\n", jcr.chunk_num, jcr.data_size / jcr.chunk_num); printf("number of unique chunks: %" PRId32 "\n", jcr.unique_chunk_num); printf("total size(B): %" PRId64 "\n", jcr.data_size); printf("stored data size(B): %" PRId64 "\n", jcr.unique_data_size + jcr.rewritten_chunk_size); printf("deduplication ratio: %.4f, %.4f\n", jcr.data_size != 0 ? (jcr.data_size - jcr.unique_data_size - jcr.rewritten_chunk_size) / (double) (jcr.data_size) : 0, jcr.data_size / (double) (jcr.unique_data_size + jcr.rewritten_chunk_size)); printf("total time(s): %.3f\n", jcr.total_time / 1000000); printf("the index memory footprint (B): %" PRId32 "\n", destor.index_memory_footprint); printf("throughput(MB/s): %.2f\n", (double) jcr.data_size * 1000000 / (1024 * 1024 * jcr.total_time)); printf("number of zero chunks: %" PRId32 "\n", jcr.zero_chunk_num); printf("size of zero chunks: %" PRId64 "\n", jcr.zero_chunk_size); printf("number of rewritten chunks: %" PRId32 "\n", jcr.rewritten_chunk_num); printf("size of rewritten chunks: %" PRId64 "\n", jcr.rewritten_chunk_size); printf("rewritten rate in size: %.3f\n", jcr.rewritten_chunk_size / (double) jcr.data_size); destor.data_size += jcr.data_size; destor.stored_data_size += jcr.unique_data_size + jcr.rewritten_chunk_size; destor.chunk_num += jcr.chunk_num; destor.stored_chunk_num += jcr.unique_chunk_num + jcr.rewritten_chunk_num; destor.zero_chunk_num += jcr.zero_chunk_num; destor.zero_chunk_size += jcr.zero_chunk_size; destor.rewritten_chunk_num += jcr.rewritten_chunk_num; destor.rewritten_chunk_size += jcr.rewritten_chunk_size; printf("read_time : %.3fs, %.2fMB/s\n", jcr.read_time / 1000000, jcr.data_size * 1000000 / jcr.read_time / 1024 / 1024); printf("chunk_time : %.3fs, %.2fMB/s\n", jcr.chunk_time / 1000000, jcr.data_size * 1000000 / jcr.chunk_time / 1024 / 1024); printf("hash_time : %.3fs, %.2fMB/s\n", jcr.hash_time / 1000000, jcr.data_size * 1000000 / jcr.hash_time / 1024 / 1024); printf("dedup_time : %.3fs, %.2fMB/s\n", jcr.dedup_time / 1000000, jcr.data_size * 1000000 / jcr.dedup_time / 1024 / 1024); printf("rewrite_time : %.3fs, %.2fMB/s\n", jcr.rewrite_time / 1000000, jcr.data_size * 1000000 / jcr.rewrite_time / 1024 / 1024); printf("filter_time : %.3fs, %.2fMB/s\n", jcr.filter_time / 1000000, jcr.data_size * 1000000 / jcr.filter_time / 1024 / 1024); printf("write_time : %.3fs, %.2fMB/s\n", jcr.write_time / 1000000, jcr.data_size * 1000000 / jcr.write_time / 1024 / 1024); //double seek_time = 0.005; //5ms //double bandwidth = 120 * 1024 * 1024; //120MB/s /* double index_lookup_throughput = jcr.data_size / (index_read_times * seek_time + index_read_entry_counter * 24 / bandwidth) / 1024 / 1024; double write_data_throughput = 1.0 * jcr.data_size * bandwidth / (jcr->unique_chunk_num) / 1024 / 1024; double index_read_throughput = 1.0 * jcr.data_size / 1024 / 1024 / (index_read_times * seek_time + index_read_entry_counter * 24 / bandwidth); double index_write_throughput = 1.0 * jcr.data_size / 1024 / 1024 / (index_write_times * seek_time + index_write_entry_counter * 24 / bandwidth);*/ /* double estimated_throughput = write_data_throughput; if (estimated_throughput > index_read_throughput) estimated_throughput = index_read_throughput;*/ /*if (estimated_throughput > index_write_throughput) estimated_throughput = index_write_throughput;*/ char logfile[] = "backup.log"; FILE *fp = fopen(logfile, "a"); /* * job id, * the size of backup * accumulative consumed capacity, * deduplication rate, * rewritten rate, * total container number, * sparse container number, * inherited container number, * 4 * index overhead (4 * int) * throughput, */ fprintf(fp, "%" PRId32 " %" PRId64 " %" PRId64 " %.4f %.4f %" PRId32 " %" PRId32 " %" PRId32 " %" PRId32" %" PRId32 " %" PRId32" %" PRId32" %.2f\n", jcr.id, jcr.data_size, destor.stored_data_size, jcr.data_size != 0 ? (jcr.data_size - jcr.rewritten_chunk_size - jcr.unique_data_size)/(double) (jcr.data_size) : 0, jcr.data_size != 0 ? (double) (jcr.rewritten_chunk_size) / (double) (jcr.data_size) : 0, jcr.total_container_num, jcr.sparse_container_num, jcr.inherited_sparse_num, index_overhead.lookup_requests, index_overhead.lookup_requests_for_unique, index_overhead.update_requests, index_overhead.read_prefetching_units, (double) jcr.data_size * 1000000 / (1024 * 1024 * jcr.total_time)); fclose(fp); }
int IndexFile::create_index(Asset *asset, MainProgressBar *progress) { int result = 0; this->mwindow = mwindow; this->asset = asset; interrupt_flag = 0; // open the source file File source; if(open_source(&source)) return 1; get_index_filename(source_filename, mwindow->preferences->index_directory, index_filename, asset->path); // Test for index in stream table of contents if(!source.get_index(index_filename)) { printf("IndexFile::create_index 1\n"); redraw_edits(1); } else // Build index from scratch { asset->index_zoom = get_required_scale(&source); // Indexes are now built for everything since it takes too long to draw // from CDROM source. // total length of input file int64_t length_source = source.get_audio_length(0); // get amount to read at a time in floats int64_t buffersize = 65536; char string[BCTEXTLEN]; sprintf(string, _("Creating %s."), index_filename); progress->update_title(string); progress->update_length(length_source); redraw_timer->update(); // thread out index thread IndexThread *index_thread = new IndexThread(mwindow, this, asset, index_filename, buffersize, length_source); index_thread->start_build(); // current sample in source file int64_t position = 0; int64_t fragment_size = buffersize; int current_buffer = 0; // pass through file once while(position < length_source && !result) { if(length_source - position < fragment_size && fragment_size == buffersize) fragment_size = length_source - position; index_thread->input_lock[current_buffer]->lock("IndexFile::create_index 1"); index_thread->input_len[current_buffer] = fragment_size; int cancelled = progress->update(position); if(cancelled || index_thread->interrupt_flag || interrupt_flag) { result = 3; } for(int channel = 0; !result && channel < asset->channels; channel++) { source.set_audio_position(position, 0); source.set_channel(channel); // Read from source file if(source.read_samples(index_thread->buffer_in[current_buffer][channel], fragment_size, 0)) result = 1; } // Release buffer to thread if(!result) { index_thread->output_lock[current_buffer]->unlock(); current_buffer++; if(current_buffer >= TOTAL_BUFFERS) current_buffer = 0; position += fragment_size; } else { index_thread->input_lock[current_buffer]->unlock(); } } // end thread cleanly index_thread->input_lock[current_buffer]->lock("IndexFile::create_index 2"); index_thread->last_buffer[current_buffer] = 1; index_thread->output_lock[current_buffer]->unlock(); index_thread->stop_build(); delete index_thread; } source.close_file(); open_index(asset); close_index(); mwindow->edl->set_index_file(asset); return 0; }
int main(int argc, void *argv) { load_stopwords(); index_p index = load_index(); int exit = 0; while (!exit) { printf(" > "); char *command = read_line(stdin); if (!strcmp(command, "exit")) { // exit command exit = 1; printf("Exit requested..\n"); } else if (!strcmp(command, "rebuild index")) { // rebuild index command rebuild_index(index); } else if (starts_with(command, "search for ")) { // search for <search_query> command char *query = (char *) malloc(strlen(command) - 10); memcpy(query, command+11, strlen(command) - 10); index_p result = search_index(&index, query); printf("Results (showing no more than 10, there might be more):\n"); if (result) { // print result int count = 0; indexed_word_p w = result->words; if (!w) { printf("No documents found for search term %s\n", query); } while (w) { printf("Documents containing %s:\n", w->stem); int i; for (i = 0; i < w->nr_docs; i++, count++) { printf(" [%d] %s\n", count, result->documents[w->documents[i].id].name); } w = w->next; } close_index(result); } else { printf("No documents found for search term %s\n", query); } free(query); } else if (starts_with(command, "add file ")) { // add file <file> command char *file = (char*) malloc(strlen(command) - 8); memcpy(file, command+9, strlen(command) - 8); index = add_file(index, file); free(file); } else if (starts_with(command, "remove file ")) { // remove file <file> command char *file = (char*) malloc(strlen(command) - 11); memcpy(file, command+12, strlen(command) - 11); // obtain document id a.k.a. index in filebase int doc_id = find_str(&index->documents[0].name, sizeof(indexed_document_t), file, 0, index->nr_docs - 1); if (doc_id < 0) { printf("Error: %s is not in the filebase!\n", file); } else { remove_file(index, doc_id); } free(file); } free(command); } // release memory release_stopwords(); close_index(index); return 0; }
int main(int argc, char **argv) { int i, num, ret; long ltime; ENTRY e; printf("Make one index files\n"); make_index("test1.idx",&name1, 0); printf("Indexing 100 items in two index files:\n"); /* note the time to index */ time(<ime); printf("%s",ctime(<ime)); /* add 100 keys to each index file */ for (i = MIN_TEST_START; i < MAX_TEST_END; i++) { memset(&e, '\0', sizeof(ENTRY)); e.recptr = i; sprintf(e.key, "%2d",i); if((ret = add_key(&e, &name1)) == IX_OK) printf("add key %s ok.\n", e.key); else if(ret = IX_EXISTED) printf("add key %s is existed.\n", e.key); else printf("add key %s error .\n", e.key); } printf("In Process: ADD %d key ok.\n", MAX_TEST_END); printf("Indexing is complete\n\n"); printf("List the keys in each index file in ascending order:\n\n"); uplist(&name1); /*[>list both files in decending order<]*/ printf("List the keys for each index file in decending order\n\n"); downlist(&name1); /* add key*/ for(i = MIN_TEST_START; i < MAX_TEST_END/2; ++i) { memset(&e, '\0', sizeof(ENTRY)); num = rand_num(); e.recptr = num; sprintf(e.key, "%2d",num); if((ret = add_key(&e, &name1)) == IX_OK) printf("add key %s ok.\n", e.key); else if(ret = IX_EXISTED) printf("add key %s is existed.\n", e.key); else printf("add key %s error.\n", e.key); } /* delete some keys and list again */ printf("\nNow delete half keys in each file\n\n"); for (i = MIN_TEST_START; i < MAX_TEST_END/2 ; i++) { /*memset(&e, '\0', sizeof(ENTRY));*/ num = rand_num(); e.recptr = num; sprintf(e.key, "%2d",num); if((ret = delete_key(&e, &name1)) == IX_OK) printf("delete key %s ok.\n",e.key); else if(ret == IX_NOTEXISTED) printf("delete key %s not existed.\n",e.key); else printf("delete key %s error.\n", e.key); } printf("List the keys now for each index file in ascending order:\n\n"); uplist(&name1); printf("List the keys now for each index file in ascending order:\n\n"); downlist(&name1); printf("exception test.\n"); printf("------------------------------------------------------------\n"); for (i = MIN_TEST_START; i < MAX_TEST_END/2 ; i++) { /*memset(&e, '\0', sizeof(ENTRY));*/ num = rand_num(); e.recptr = num; sprintf(e.key, "%2d",num); if(num %2 == 0) { if((ret = delete_key(&e, &name1)) == IX_OK) printf("delete key %s ok.\n",e.key); else if(ret == IX_NOTEXISTED) printf("delete key %s not existed.\n",e.key); else printf("delete key %s error.\n", e.key); } else { if((ret = add_key(&e, &name1)) == IX_OK) printf("add key %s ok.\n", e.key); else if(ret = IX_EXISTED) printf("add key %s is existed.\n", e.key); else printf("add key %s error .\n", e.key); } if(num %17 == 0) { printf("exception break.\n"); break; } } printf("List the keys now for each index file in ascending order:\n\n"); uplist(&name1); printf("List the keys now for each index file in ascending order:\n\n"); downlist(&name1); /* always close all files */ close_index(&name1); }
// FIXME: For now, this assumes equality for each of the expressions. db_index_offset_t db_index_getoffset(scan_t *sp, db_uint8 indexon, db_eet_t *searchfor, db_tuple_t *comparator_tp, relation_header_t *comparator_hp, db_query_mm_t *mmp) { if (sp->idx_meta_data.num_idx <= indexon) return -1; db_index_t index; if (1!=init_index(&index, sp->idx_meta_data.names[indexon])) { return -1; } if (DB_INDEX_TYPE_INLINE == index.type) { long first = sp->tuple_start; size_t total_size = sp->base.header->tuple_size + (sp->base.header->num_attr / 8); total_size += (sp->base.header->num_attr) % 8 > 0 ? 1 : 0; long last; if (sizeof(long)!=db_fileread(index.indexref, (unsigned char*)&(last), sizeof(long))) { return -1; } if (last > 0) last = first + (total_size * (last - 1)); else last = first; long imin = 0; long imax = (last - first) / total_size; long imid; db_uint8 order[sp->idx_meta_data.num_expr[indexon]]; int result; for (result = 0; result < sp->idx_meta_data.num_expr[indexon]; ++result) order[result] = DB_TUPLE_ORDER_ASC; db_tuple_t temp; init_tuple(&temp, sp->base.header->tuple_size, sp->base.header->num_attr, mmp); rewind_scan(sp, mmp); long i = -1; /* We binary search on expressions for first occurence. */ while (imin <= imax) { imid = imin + ((imax - imin) / 2); db_filerewind(sp->relation); db_fileseek(sp->relation, (imid*(total_size))+first); next_scan(sp, &temp, mmp); /* arr[imid], key */ if (NULL == comparator_hp) /* FIXME: quick hack to let indexed scans work. */ { result = getintbypos(&temp, ((db_int)searchfor), sp->base.header) - ((db_int)comparator_tp); } else result = cmp_tuple(&temp, comparator_tp, sp->base.header, comparator_hp, sp->idx_meta_data.exprs[indexon], searchfor, sp->idx_meta_data.num_expr[indexon], order, 1, mmp); if (result < 0) imin = imid + 1; else if (result > 0) imax = imid - 1; else if (imin != imid) imax = imid; else { i = imid; break; } } if (i <= -1) i = imin; i = (first + (i*total_size)); db_filerewind(sp->relation); db_fileseek(sp->relation, i); next_scan(sp, &temp, mmp); /* FIXME: quick hack to let indexed scans work. (first part of the condition) */ if (NULL != comparator_hp && 0!=cmp_tuple(&temp, comparator_tp, sp->base.header, comparator_hp, sp->idx_meta_data.exprs[indexon], searchfor, sp->idx_meta_data.num_expr[indexon], order, 1, mmp)) i = -1; close_tuple(&temp, mmp); close_index(&index); return i; } return -1; }
/* * We assume a FIFO order of deleting backup, namely the oldest backup is deleted first. */ void do_delete(int jobid) { GHashTable *invalid_containers = trunc_manifest(jobid); init_index(); init_recipe_store(); /* Delete the invalid entries in the key-value store */ if(destor.index_category[1] == INDEX_CATEGORY_PHYSICAL_LOCALITY){ init_container_store(); struct backupVersion* bv = open_backup_version(jobid); /* The entries pointing to Invalid Containers are invalid. */ GHashTableIter iter; gpointer key, value; g_hash_table_iter_init(&iter, invalid_containers); while(g_hash_table_iter_next(&iter, &key, &value)){ containerid id = *(containerid*)key; NOTICE("Reclaim container %lld", id); struct containerMeta* cm = retrieve_container_meta_by_id(id); container_meta_foreach(cm, delete_an_entry, &id); free_container_meta(cm); } bv->deleted = 1; update_backup_version(bv); free_backup_version(bv); close_container_store(); }else if(destor.index_category[1] == INDEX_CATEGORY_LOGICAL_LOCALITY){ /* Ideally, the entries pointing to segments in backup versions of a 'bv_num' less than 'jobid' are invalid. */ /* (For simplicity) Since a FIFO order is given, we only need to remove the IDs exactly matched 'bv_num'. */ struct backupVersion* bv = open_backup_version(jobid); struct segmentRecipe* sr; while((sr=read_next_segment(bv))){ segment_recipe_foreach(sr, delete_an_entry, &sr->id); } bv->deleted = 1; update_backup_version(bv); free_backup_version(bv); }else{ WARNING("Invalid index type"); exit(1); } close_recipe_store(); close_index(); char logfile[] = "delete.log"; FILE *fp = fopen(logfile, "a"); /* * ID of the job we delete, * number of live containers, * memory footprint */ fprintf(fp, "%d %d %d\n", jobid, destor.live_container_num, destor.index_memory_footprint); fclose(fp); /* record the IDs of invalid containers */ sds didfilepath = sdsdup(destor.working_directory); char s[128]; sprintf(s, "recipes/delete_%d.id", jobid); didfilepath = sdscat(didfilepath, s); FILE* didfile = fopen(didfilepath, "w"); if(didfile){ GHashTableIter iter; gpointer key, value; g_hash_table_iter_init(&iter, invalid_containers); while(g_hash_table_iter_next(&iter, &key, &value)){ containerid id = *(containerid*)key; fprintf(didfile, "%lld\n", id); } fclose(didfile); } g_hash_table_destroy(invalid_containers); }