Пример #1
0
// ** Films::similarTo
SimilarFilmsArray Films::similarTo( const std::string& oid, int count ) const
{
    SimilarFilmsArray result;

    OID           objectId = OID( oid );
    CollectionPtr similars = m_target->collection( "similar" );
    CursorPtr     cursor   = similars->find( QUERY( "$or" << ARRAY( DOCUMENT( "first" << oid ) << DOCUMENT( "second" << objectId ) ) ) );
    DocumentPtr   document;

    while( (document = cursor->next()) ) {
        // ** Read data from document
        OID first  = document->objectId( "first" );
        OID second = document->objectId( "second" );
        int value  = document->integer( "value" );

        // ** Decode similarity & accuracy
        float similarity, accuracy;
        decodeSimilarity( value, similarity, accuracy );

        // ** Push similar film
        Film    film    = filmById( first == objectId ? second : first );
        Quality quality = qualityFromRange( similarity, m_similarityQuartiles );
        result.push_back( SimilarFilm( film, similarity, accuracy, quality ) );
    }

    std::sort( result.begin(), result.end(), SimilarFilm::sortBySimilarity );
    if( count ) {
        result.resize( std::min( count, ( int )result.size() ) );
    }

    return result;
}
Пример #2
0
// ** Films::processFilms
void Films::processFilms( float sharedThreshold )
{
    CursorPtr           films       = m_source->collection( "items" )->find();
    CollectionPtr       processed   = m_target->collection( "items" );
    CollectionPtr       similar     = m_target->collection( "similar" );

    IRecommenderItems*  items       = new StreamedFilms( *this, m_target );
    SimilarityPtr       similarity  = SimilarityPtr( new JaccardAccuracySimilarity( NumericFeatures::pearson, 0.0f, 0.0f, sharedThreshold ) );
    Recommender         recommender = Recommender( this, similarity.get() );

    DocumentPtr         document;

    int counter = 0;
    int total   = m_source->collection( "items" )->count();

    while( (document = films->next()) ) {
        // ** Read the item id
        int itemId = document->integer( "itemId" );

        // ** Print progress
        if( (++counter % 10) == 0 ) printf( "Processing films [%d/%d]\n", counter, total );

        // ** Skip processed films
        if( processed->findOne( QUERY( "itemId" << itemId ) ) != NULL ) {
            continue;
        }

        // ** Load first item
        items->first();

        // ** Compute and store similarities
        RecommenderItem item    = findById( itemId );
        SimilarItems    pairs   = recommender.findSimilarItems( items, &item );
        OID             oid     = filmIdToObjectId( itemId );

        for( SimilarItems::const_iterator i = pairs.begin(), end = pairs.end(); i != end; ++i ) {
            similar->insert( DOCUMENT( "first" << oid << "second" << filmIdToObjectId( i->m_item ) << "value" << ( int )encodeSimilarity( i->m_similarity, i->m_shared ) ) );
        }

        // ** Store processed film
        processed->insert( DOCUMENT( "itemId" << itemId ) );
    }

    // ** Build indices
    similar->ensureIndex( "idxFirst",  DOCUMENT( "first"  << 1 ) );
    similar->ensureIndex( "idxSecond", DOCUMENT( "second" << 1 ) );

    // ** Delete iterator
    delete items;
}
Пример #3
0
// ** Films::objectIdToFilmId
int Films::objectIdToFilmId( const OID& oid ) const
{
    int result = -1;

    CollectionPtr items = m_source->collection( "items" );
    DocumentPtr   item  = items->findOne( QUERY( "_id" << oid ) );

    assert( item != NULL );
    if( item != NULL ) {
        result = item->integer( "itemId" );
    }

    return result;
}
Пример #4
0
// ** PreloadedFilms::PreloadedFilms
PreloadedFilms::PreloadedFilms( const Films& films, const CursorPtr& cursor )
{
    do {
        DocumentPtr document = cursor->next();
        if( document == NULL ) {
            break;
        }

        RecommenderItem* item = new RecommenderItem;
        item->m_userData                = new OID( *document->_id().value() );
        item->m_id                      = document->integer( "itemId" );
        item->m_name                    = document->string( "name.ru" );
        item->m_featureSpaces["votes"]  = films.votesForFilm( item->m_id );
        m_items[item->m_id]             = item;
    } while( true );
}
Пример #5
0
// ** Films::updateVotesCount
void Films::updateVotesCount( void )
{
    CollectionPtr  items  = m_source->collection( "items" );
    CollectionPtr  votes  = m_source->collection( "votes" );
    CursorPtr      cursor = items->find();
    DocumentPtr    document;
    IntegerSamples samples;

    int progress = 0;
    int total    = items->count();

    while( (document = cursor->next()) ) {
        int itemId = document->integer( "itemId" );
        int count  = votes->count( QUERY( "itemId" << itemId ) );

        items->update( QUERY( "itemId" << itemId ), DOCUMENT( "$set" << DOCUMENT( "votesCount" << count ) ) );
        printf( "Updating votes count [%d/%d]...\n", ++progress, total );
    }
}
Пример #6
0
// ** Films::showStats
void Films::showStats( void ) const
{
    CollectionPtr  items  = m_source->collection( "items" );
    CursorPtr      cursor = items->find();
    DocumentPtr    document;
    IntegerSamples countSamples;

    while( (document = cursor->next()) ) {
        countSamples += document->integer( "votesCount" );
    }

    IntegerArray quartiles = countSamples.quartiles();

    printf( "Votes: min %d, max %d, average amount %d, medean %d, quartiles %d/%d/%d\n", countSamples.min(), countSamples.max(), countSamples.mean(), countSamples.median(), quartiles[0], quartiles[1], quartiles[2] );

    for( int i = 2; i <= 7; i++ ) {
        int amount = pow( 10, i );
        printf( "%d items has greater than %d votes\n", countSamples.greaterCount( amount ), amount );
    }
}
Пример #7
0
// ** Films::votesForFilm
NumericFeatures Films::votesForFilm( int filmId ) const
{
    NumericFeatures result;

    CollectionPtr votes  = m_source->collection( "votes" );
    CursorPtr     cursor = votes->find( QUERY( "itemId" << filmId ) );
    DocumentPtr   document;

    while( (document = cursor->next()) ) {
        printf( "get\n" );
        result.set( document->integer( "userId" ), document->number( "rating" ) );
        printf( "v %d\n", result.size() );
    //    sleep(1);
        printf( "next\n" );
    }

    printf( "done! %d\n", result.size() );

    return result;
}
Пример #8
0
// ** Films::updateSharedAndSimilarityRanges
void Films::updateSharedAndSimilarityRanges( void )
{
    CollectionPtr similar   = m_target->collection( "similar" );
    CursorPtr     cursor    = similar->find();
    int           count     = similar->count();
    int           progress  = 0;
    DocumentPtr   document;

    FloatSamples  similaritySamples, sharedSamples;

    while( (document = cursor->next()) ) {
        float similarity, shared;
        decodeSimilarity( document->integer( "value" ), similarity, shared );

        similaritySamples += similarity;
        sharedSamples     += shared;

        if( (++progress % 1000) == 0 ) printf( "Updating ranges [%d/%d]\n", progress, count );
    }

    m_target->collection( "info" )->upsert( QUERY( "type" << "similarity" ), DOCUMENT( "type" << "similarity" << "shared" << sharedSamples.quartiles() << "similarity" << similaritySamples.quartiles() ) );
}
Пример #9
0
// ** StreamedFilms::documentToItem
void StreamedFilms::documentToItem( RecommenderItem& item, const DocumentPtr& document ) const
{
    item.m_id                      = document->integer( "itemId" );
    item.m_featureSpaces["votes"]  = m_films.votesForFilm( item.m_id );
}
Пример #10
0
// ** Films::filmFromDocument
Film Films::filmFromDocument( const DocumentPtr& document ) const
{
    Film film( document->_id(), document->string( "name.ru" ), document->integerSet( "genres" ), document->integer( "year" ) );
    film.m_video = document->string( "video" );

    return film;
}