static apr_status_t create_sequence_file( lcn_directory_t *dir, char *file_name, int start, int size, apr_pool_t *pool ) { int i = 0; apr_status_t s = APR_SUCCESS; do { lcn_index_output_t *os; LCNCE( lcn_directory_create_output( dir, &os, file_name, pool )); for( i = 0; i <= size; i++ ) { lcn_index_output_write_int( os, start); start++; } lcn_index_output_close( os ); } while(0); return s; }
static apr_status_t create_random_file ( CuTest *tc, lcn_directory_t* dir, char* name, int size, apr_pool_t* pool) { apr_status_t s = APR_SUCCESS; int i = 0; lcn_index_output_t* os; srand( ( unsigned int ) time(NULL) ); do { LCN_TEST( lcn_directory_create_output( dir, &os, name, pool ) ); for ( i = 0; i < size; i++ ) { unsigned int data = ( unsigned int ) rand(); CuAssertTrue( tc, data > 0 ); lcn_index_output_write_long( os, data ); } lcn_index_output_close ( os ); } while(0); return s; }
static apr_status_t lcn_document_writer_write_norms( lcn_document_writer_t *document_writer, const char *segment_name ) { apr_status_t s; lcn_index_output_t *norms = NULL; apr_pool_t *pool = NULL; do { unsigned int fi_size = lcn_field_infos_size( document_writer->field_infos ); unsigned int i; lcn_field_info_t *field_info; LCNCE( apr_pool_create( &pool, document_writer->pool ) ); for( i = 0; i < fi_size; i++ ) { LCNCE( lcn_field_infos_nth_info( document_writer->field_infos, &field_info, i )); if ( lcn_field_info_is_indexed( field_info ) && ! lcn_field_info_omit_norms( field_info ) ) { float norm = document_writer->field_boosts[ i ] * lcn_similarity_length_norm( document_writer->similarity, lcn_field_info_name( field_info ), document_writer->field_lengths[ i ] ); LCNCE( lcn_directory_create_output( document_writer->directory, &norms, apr_pstrcat( pool, segment_name, ".f", apr_itoa( pool, i ), NULL ), pool )); LCNCE( lcn_index_output_write_byte( norms, lcn_similarity_encode_norm( document_writer->similarity, norm ) )); } if ( NULL != norms ) { apr_status_t st = lcn_index_output_close( norms ); s = s ? s : st; } if ( s ) { break; } } } while(0); if ( NULL != pool ) { apr_pool_destroy( pool ); } return s; }
static apr_status_t lcn_document_writer_write_postings( lcn_document_writer_t *document_writer, const char *segment_name ) { apr_status_t s; apr_status_t s_save; apr_pool_t *pool = NULL; lcn_index_output_t *freq = NULL; lcn_index_output_t *prox = NULL; lcn_term_infos_writer_t *tis = NULL; /* TODO: TermVectorsWriter termVectorWriter = null */ do { lcn_term_info_t ti; unsigned int field_ctr; unsigned int i; unsigned int j; LCNCE( apr_pool_create( &pool, document_writer->posting_table_pool ) ); LCNCE( lcn_directory_create_output( document_writer->directory, &freq, apr_pstrcat( pool, segment_name, ".frq", NULL ), pool )); LCNCE( lcn_directory_create_output( document_writer->directory, &prox, apr_pstrcat( pool, segment_name, ".prx", NULL ), pool )); LCNCE( lcn_term_infos_writer_create( &tis, document_writer->directory, segment_name, document_writer->term_index_interval, pool )); for( field_ctr = 0; field_ctr < document_writer->sorted_postings->length; field_ctr++ ) { unsigned int field_number; unsigned int *positions; lcn_field_info_t *field_info; unsigned int last_position; lcn_ptr_array_t *postings = (lcn_ptr_array_t*) document_writer->sorted_postings->arr[ field_ctr ]; if ( 0 == postings->length ) { continue; } LCNCM( lcn_field_infos_field_number( document_writer->field_infos, &field_number, lcn_term_field( ((lcn_posting_t*) postings->arr[0])->term ) ), lcn_term_field( ((lcn_posting_t*) postings->arr[0])->term ) ); for( i = 0; i < postings->length; i++ ) { lcn_posting_t *posting = (lcn_posting_t*) postings->arr[ i ]; unsigned int posting_freq = posting->freq; /* add an entry to the dictionary with pointers to prox and freq files */ ti.doc_freq = 1; ti.freq_pointer = lcn_index_output_get_file_pointer( freq ); ti.prox_pointer = lcn_index_output_get_file_pointer( prox ); ti.skip_offset = -1; LCNCE( lcn_term_infos_writer_add_term ( tis, posting->term, &ti, field_number )); /* add an entry to the freq file */ if ( 1 == posting_freq ) /* optimize freq == 1 */ { LCNCE( lcn_index_output_write_vint( freq, 1 )); /* set low bit of doc num */ } else { LCNCE( lcn_index_output_write_vint( freq, 0 )); /* the document number */ LCNCE( lcn_index_output_write_vint( freq, posting_freq )); /* frequency in doc */ } last_position = 0; positions = posting->positions; for( j = 0; j < posting_freq; j++ ) /* use delta-encoding */ { unsigned int position = positions[j]; LCNCE( lcn_index_output_write_vint( prox, position - last_position )); last_position = position; } #if 0 if (termVectorWriter != null && termVectorWriter.isFieldOpen()) { termVectorWriter.addTerm(posting.term.text(), postingFreq, posting.positions, posting.offsets); } #endif LCNCE( lcn_field_infos_by_number ( document_writer->field_infos, &field_info, field_number )); if ( LCN_TRUE == lcn_field_info_store_term_vector( field_info ) ) { fprintf(stderr, "TODO: store term_vector\n" ); #if 0 if (termVectorWriter == null) { termVectorWriter = new TermVectorsWriter(directory, segment, fieldInfos); termVectorWriter.openDocument(); } termVectorWriter.openField(currentField); #endif } #if 0 else if (termVectorWriter != null) { termVectorWriter.closeField(); } #endif } } #if 0 if (termVectorWriter != null) termVectorWriter.closeDocument(); #endif } while(0); /* * make an effort to close all streams we can but remember and re-throw * the first exception encountered in this process */ if ( NULL != freq ) { s_save = lcn_index_output_close( freq ); s = ( s ? s : s_save ); } if ( NULL != prox ) { s_save = lcn_index_output_close( prox ); s = ( s ? s : s_save ); } if ( NULL != tis ) { s_save = lcn_term_infos_writer_close( tis ); s = ( s ? s : s_save ); } if ( NULL != pool ) { apr_pool_destroy( pool ); } /* TODO if (termVectorWriter != null) * try { termVectorWriter.close(); } catch (IOException e) { if (keep == null) keep = e; } */ return (apr_status_t) s; }