示例#1
0
RatingMatrix::RatingMatrix( DataReader& dreader, int userpos, int itempos, int ratingpos )
: m_numRatings( 0 ),
  m_sumRatings( 0 ),
  m_minRating( INT_MAX ),
  m_maxRating( INT_MIN )
{
   map< pair<size_t,size_t>, double > inputData;
   while( !dreader.eof() )
   {
      vector<string> line = dreader.readline();
      if( line.empty() )
      {
         break;
      }
      string userId = line[userpos];
      string itemId = line[itempos];
      std::stringstream ss( line[ratingpos] );
      double rating;
      ss >> rating;

      //cout << "user [" << userId << "] item [" << itemId << "] rating: [" << rating << "]" << endl;
      size_t row, col;
      map<string,size_t>::iterator ind;

      ind = m_userMapper.find( userId );
      if( ind == m_userMapper.end() )
      {
         row = m_userMapper.size();
         m_userMapper[userId] = row;
      }
      else
      {
         row = ind->second;
      }

      ind = m_itemMapper.find( itemId );
      if( ind == m_itemMapper.end() )
      {
         col = m_itemMapper.size();
         m_itemMapper[itemId] = col;
      }
      else
      {
         col = ind->second;
      }

      if( inputData.find( pair<size_t,size_t>( row, col ) ) != inputData.end() )
      {
         cout << "warning: <user:"******" ,item:" << itemId << "> pair duplicated" << endl;
         continue;
      }
      inputData[pair<size_t,size_t>( row, col )] = rating;

      // Statistical data
      ++m_numRatings;
      m_sumRatings += rating;
      if( rating < m_minRating )
      {
         m_minRating = rating;
      }
      if( rating > m_maxRating )
      {
         m_maxRating = rating;
      }
   }

   m_smatrix.resize( m_userMapper.size(), m_itemMapper.size() );
   map< pair<size_t,size_t>, double >::iterator ind;
   map< pair<size_t,size_t>, double >::iterator end = inputData.end();
   for( ind = inputData.begin() ; ind != end ; ++ind )
   {
      size_t row = ind->first.first;
      size_t col = ind->first.second;
      double rating = ind->second;

      //cout << "row: " << row << " col: " << col << " rating: " << rating << endl;
      m_smatrix.set( row, col, rating );
   }
   //cout << "min rating: " << m_minRating << " max rating: " << m_maxRating << endl;
   //cout << "dim: " << m_smatrix.rows() << "x" << m_smatrix.columns() << endl;
   //m_smatrix.print();
}