RatingMatrix::RatingMatrix( DataReader& dreader, int userpos, int itempos, int ratingpos ) : m_numRatings( 0 ), m_sumRatings( 0 ), m_minRating( INT_MAX ), m_maxRating( INT_MIN ) { map< pair<size_t,size_t>, double > inputData; while( !dreader.eof() ) { vector<string> line = dreader.readline(); if( line.empty() ) { break; } string userId = line[userpos]; string itemId = line[itempos]; std::stringstream ss( line[ratingpos] ); double rating; ss >> rating; //cout << "user [" << userId << "] item [" << itemId << "] rating: [" << rating << "]" << endl; size_t row, col; map<string,size_t>::iterator ind; ind = m_userMapper.find( userId ); if( ind == m_userMapper.end() ) { row = m_userMapper.size(); m_userMapper[userId] = row; } else { row = ind->second; } ind = m_itemMapper.find( itemId ); if( ind == m_itemMapper.end() ) { col = m_itemMapper.size(); m_itemMapper[itemId] = col; } else { col = ind->second; } if( inputData.find( pair<size_t,size_t>( row, col ) ) != inputData.end() ) { cout << "warning: <user:"******" ,item:" << itemId << "> pair duplicated" << endl; continue; } inputData[pair<size_t,size_t>( row, col )] = rating; // Statistical data ++m_numRatings; m_sumRatings += rating; if( rating < m_minRating ) { m_minRating = rating; } if( rating > m_maxRating ) { m_maxRating = rating; } } m_smatrix.resize( m_userMapper.size(), m_itemMapper.size() ); map< pair<size_t,size_t>, double >::iterator ind; map< pair<size_t,size_t>, double >::iterator end = inputData.end(); for( ind = inputData.begin() ; ind != end ; ++ind ) { size_t row = ind->first.first; size_t col = ind->first.second; double rating = ind->second; //cout << "row: " << row << " col: " << col << " rating: " << rating << endl; m_smatrix.set( row, col, rating ); } //cout << "min rating: " << m_minRating << " max rating: " << m_maxRating << endl; //cout << "dim: " << m_smatrix.rows() << "x" << m_smatrix.columns() << endl; //m_smatrix.print(); }