Ejemplo n.º 1
0
static apr_status_t
create_sequence_file( lcn_directory_t *dir,
                      char *file_name,
                      int start,
                      int size,
                      apr_pool_t *pool )
{
    int i = 0;
    apr_status_t s = APR_SUCCESS;

    do
    {
        lcn_index_output_t *os;

        LCNCE( lcn_directory_create_output( dir, &os, file_name, pool ));

        for( i = 0; i <= size; i++ )
        {
            lcn_index_output_write_int( os, start);
            start++;
        }

        lcn_index_output_close( os );
    }
    while(0);

    return s;
}
Ejemplo n.º 2
0
static apr_status_t
create_random_file ( CuTest *tc,
                     lcn_directory_t* dir,
                     char* name,
                     int size,
                     apr_pool_t* pool)
{
    apr_status_t s = APR_SUCCESS;
    int i = 0;
    lcn_index_output_t* os;

    srand( ( unsigned int ) time(NULL) );

    do
    {
        LCN_TEST( lcn_directory_create_output( dir, &os, name, pool ) );

        for ( i = 0; i < size; i++ )
        {
            unsigned int data = ( unsigned int ) rand();
            CuAssertTrue( tc, data > 0 );
            lcn_index_output_write_long( os, data );
        }

        lcn_index_output_close ( os );
    }
    while(0);

    return s;
}
Ejemplo n.º 3
0
static apr_status_t
lcn_document_writer_write_norms( lcn_document_writer_t *document_writer,
                                 const char *segment_name )
{
    apr_status_t s;
    lcn_index_output_t *norms = NULL;
    apr_pool_t *pool = NULL;

    do
    {
        unsigned int fi_size = lcn_field_infos_size( document_writer->field_infos );
        unsigned int i;
        lcn_field_info_t *field_info;

        LCNCE( apr_pool_create( &pool, document_writer->pool ) );

        for( i = 0; i < fi_size; i++ )
        {
            LCNCE( lcn_field_infos_nth_info( document_writer->field_infos,
                                             &field_info,
                                             i ));

            if ( lcn_field_info_is_indexed( field_info ) &&
                ! lcn_field_info_omit_norms( field_info ) )
            {
                float norm =
                    document_writer->field_boosts[ i ] *
                    lcn_similarity_length_norm( document_writer->similarity,
                                                lcn_field_info_name( field_info ),
                                                document_writer->field_lengths[ i ] );

                LCNCE( lcn_directory_create_output( document_writer->directory,
                                                    &norms,
                                                    apr_pstrcat( pool, segment_name, ".f", apr_itoa( pool, i ), NULL ),
                                                    pool ));

                LCNCE( lcn_index_output_write_byte( norms, lcn_similarity_encode_norm( document_writer->similarity, norm ) ));
            }

            if ( NULL != norms )
            {
                apr_status_t st = lcn_index_output_close( norms );
                s = s ? s : st;
            }

            if ( s )
            {
                break;
            }
        }
    }
    while(0);

    if ( NULL != pool )
    {
        apr_pool_destroy( pool );
    }

    return s;
}
Ejemplo n.º 4
0
static apr_status_t
lcn_document_writer_write_postings( lcn_document_writer_t *document_writer,
                                    const char *segment_name )
{
    apr_status_t s;
    apr_status_t s_save;

    apr_pool_t *pool = NULL;
    lcn_index_output_t *freq = NULL;
    lcn_index_output_t *prox = NULL;
    lcn_term_infos_writer_t *tis = NULL;

    /* TODO: TermVectorsWriter termVectorWriter = null */

    do
    {
        lcn_term_info_t ti;
        unsigned int field_ctr;
        unsigned int i;
        unsigned int j;

        LCNCE( apr_pool_create( &pool, document_writer->posting_table_pool ) );

        LCNCE( lcn_directory_create_output( document_writer->directory,
                                            &freq,
                                            apr_pstrcat( pool, segment_name, ".frq", NULL ),
                                            pool ));

        LCNCE( lcn_directory_create_output( document_writer->directory,
                                            &prox,
                                            apr_pstrcat( pool, segment_name, ".prx", NULL ),
                                            pool ));

        LCNCE( lcn_term_infos_writer_create( &tis,
                                             document_writer->directory,
                                             segment_name,
                                             document_writer->term_index_interval,
                                             pool ));

        for( field_ctr = 0;
             field_ctr < document_writer->sorted_postings->length;
             field_ctr++ )
        {
            unsigned int field_number;
            unsigned int *positions;
            lcn_field_info_t *field_info;
            unsigned int last_position;

            lcn_ptr_array_t *postings = (lcn_ptr_array_t*) document_writer->sorted_postings->arr[ field_ctr ];

            if ( 0 == postings->length )
            {
                continue;
            }

            LCNCM( lcn_field_infos_field_number( document_writer->field_infos,
                                                 &field_number,
                                                 lcn_term_field( ((lcn_posting_t*) postings->arr[0])->term ) ),
                   lcn_term_field( ((lcn_posting_t*) postings->arr[0])->term ) );

            for( i = 0; i < postings->length; i++ )
            {
                lcn_posting_t *posting = (lcn_posting_t*) postings->arr[ i ];
                unsigned int posting_freq = posting->freq;

                /* add an entry to the dictionary with pointers to prox and freq files */
                ti.doc_freq = 1;
                ti.freq_pointer = lcn_index_output_get_file_pointer( freq );
                ti.prox_pointer = lcn_index_output_get_file_pointer( prox );
                ti.skip_offset  = -1;

                LCNCE( lcn_term_infos_writer_add_term ( tis,
                                                        posting->term,
                                                        &ti,
                                                        field_number ));

                /* add an entry to the freq file */

                if ( 1 == posting_freq )  /* optimize freq == 1 */
                {
                    LCNCE( lcn_index_output_write_vint( freq, 1 )); /* set low bit of doc num */
                }
                else
                {
                    LCNCE( lcn_index_output_write_vint( freq, 0 ));              /* the document number */
                    LCNCE( lcn_index_output_write_vint( freq, posting_freq ));   /* frequency in doc    */
                }

                last_position = 0;
                positions = posting->positions;

                for( j = 0; j < posting_freq; j++ )  /* use delta-encoding */
                {
                    unsigned int position = positions[j];
                    LCNCE( lcn_index_output_write_vint( prox, position - last_position ));
                    last_position = position;
                }

#if 0
        if (termVectorWriter != null && termVectorWriter.isFieldOpen()) {
            termVectorWriter.addTerm(posting.term.text(), postingFreq, posting.positions, posting.offsets);
        }
#endif

        LCNCE( lcn_field_infos_by_number ( document_writer->field_infos,
                                           &field_info,
                                           field_number ));

        if ( LCN_TRUE == lcn_field_info_store_term_vector( field_info ) )
        {
            fprintf(stderr, "TODO: store term_vector\n" );
#if 0
            if (termVectorWriter == null) {
              termVectorWriter =
                new TermVectorsWriter(directory, segment, fieldInfos);
              termVectorWriter.openDocument();
            }
            termVectorWriter.openField(currentField);
#endif
        }
#if 0
        else if (termVectorWriter != null) {
            termVectorWriter.closeField();
        }
#endif
            }
        }

#if 0
      if (termVectorWriter != null)
        termVectorWriter.closeDocument();
#endif

    }
    while(0);

    /*
     * make an effort to close all streams we can but remember and re-throw
     * the first exception encountered in this process
     */

    if ( NULL != freq )
    {
        s_save = lcn_index_output_close( freq );
        s = ( s ? s : s_save );
    }

    if ( NULL != prox )
    {
        s_save = lcn_index_output_close( prox );
        s = ( s ? s : s_save );
    }

    if ( NULL != tis )
    {
        s_save = lcn_term_infos_writer_close( tis );
        s = ( s ? s : s_save );
    }

    if ( NULL != pool )
    {
        apr_pool_destroy( pool );
    }

    /* TODO if (termVectorWriter  != null)
     * try {  termVectorWriter.close(); } catch (IOException e) { if (keep == null) keep = e; }
     */

    return (apr_status_t) s;
}