void modify_data(DataSequence& data, DataSequence const& old_data)
      {
        boost::sort(
          data,
          md::neighbor::inexclusive_cells_neighbor_list_detail::less_()
        );

        typedef typename DataSequence::iterator data_iterator;
        typedef typename DataSequence::const_iterator old_data_iterator;
        data_iterator iter = data.begin();
        data_iterator const last = data.end();
        old_data_iterator old_iter = old_data.begin();
        old_data_iterator const old_last = old_data.end();

        while (iter != last && old_iter != old_last)
        {
          if (md::neighbor::inexclusive_cells_neighbor_list_detail::equal_to(
                *iter, *old_iter
              ))
          {
            *iter = *old_iter;
            ++iter;
            ++old_iter;
          }
          else if (md::neighbor::inexclusive_cells_neighbor_list_detail::less(
                     *iter, *old_iter
                   ))
            ++iter;
          else
            ++old_iter;
        }
      }
      typename boost::enable_if<
        md::neighbor::meta::is_simple_neighbor<
          typename DataSequence::value_type
        >,
      void>::type update(
        DataSequence& data, Length& buffer_length, Length const cutoff_length,
        Length const search_length, SquaredLength const squared_search_length,
        Particles const& particles, Time const time_step)
      {
        data.clear();
        md::neighbor::inexclusive_cells_neighbor_list_detail::update_impl(
          data, squared_search_length, particles
        );

        buffer_length
          = md::neighbor::inexclusive_cells_neighbor_list_detail
            ::get_buffer_length(
              cutoff_length, search_length, particles, time_step
            );
      }
      typename boost::enable_if<
        boost::mpl::and_<
          typename boost::mpl::not_<
            typename md::neighbor::meta::is_simple_neighbor<
              typename DataSequence::value_type
            >::type
          >::type,
          typename MD_IS_CONVERTIBLE<
            typename std::iterator_traits<
              typename DataSequence::iterator
            >::iterator_category,
            std::random_access_iterator_tag
          >::type
        >,
      void>::type update(
        DataSequence& data, Length& buffer_length, Length const cutoff_length,
        Length const search_length, SquaredLength const squared_search_length,
        Particles1 const& particles1, Particles2 const& particles2,
        Time const time_step)
      {
        DataSequence const old_data(data);

        data.clear();
        md::neighbor::inexclusive_cells_neighbor_list_detail::update_impl(
          data, squared_search_length, particles1, particles2
        );

        md::neighbor::inexclusive_cells_neighbor_list_detail::modify_data(
          data, old_data
        );

        buffer_length
          = md::neighbor::inexclusive_cells_neighbor_list_detail
            ::get_buffer_length(
              cutoff_length, search_length, particles1, particles2, time_step
            );
      }
Ejemplo n.º 4
0
int DataSet::load(const char *fileData, const char *fileStateLabels,
				  const char *fileSeqLabels, const char * fileAdjMat,
				  const char * fileStatesPerNodes,const char * fileDataSparse)
{
	istream* isData = NULL;
	istream* isDataSparse = NULL;
	istream* isStateLabels = NULL;
	istream* isSeqLabels = NULL;
	istream* isAdjMat = NULL;
	istream* isStatesPerNodes = NULL;

	if(fileData != NULL)
	{
		isData = new ifstream(fileData);
		if(!((ifstream*)isData)->is_open())
		{
			cerr << "Can't find data file: " << fileData << endl;
			delete isData;
			isData = NULL;
			throw BadFileName("Can't find data files");
		}
	}

	if(fileStateLabels != NULL)
	{
		isStateLabels = new ifstream(fileStateLabels);
		if(!((ifstream*)isStateLabels)->is_open())
		{
			cerr << "Can't find state labels file: " << fileStateLabels << endl;
			delete isStateLabels;
			isStateLabels = NULL;
			throw BadFileName("Can't find state labels file");
		}
	}
	if(fileSeqLabels != NULL)
	{
		isSeqLabels = new ifstream(fileSeqLabels);
		if(!((ifstream*)isSeqLabels)->is_open())
		{
			cerr << "Can't find sequence labels file: " << fileSeqLabels << endl;
			delete isSeqLabels;
			isSeqLabels = NULL;
		}
	}
	if(fileAdjMat != NULL)
	{
		isAdjMat = new ifstream(fileAdjMat);
		if(!((ifstream*)isAdjMat)->is_open())
		{
			cerr << "Can't find adjency matrices file: " << fileAdjMat << endl;
			delete isAdjMat;
			isAdjMat = NULL;
		}
	}
	if(fileStatesPerNodes != NULL)
	{
		isStatesPerNodes = new ifstream(fileStatesPerNodes);
		if(!((ifstream*)isStatesPerNodes)->is_open())
		{
			cerr << "Can't find states per nodes file: " << fileStatesPerNodes << endl;
			delete isStatesPerNodes;
			isStatesPerNodes = NULL;
		}
	}
	
	if(fileDataSparse != NULL)
	{
		isDataSparse = new ifstream(fileDataSparse);
		if(!((ifstream*)isDataSparse)->is_open())
		{
			cerr << "Can't find sparse data file: " << fileDataSparse << endl;
			delete isDataSparse;
			isDataSparse = NULL;
			throw BadFileName("Can't find sparse data files");
		}
	}

	DataSequence* seq = new DataSequence;
	int seqLabel;

	while(seq->load(isData,isStateLabels,isAdjMat,isStatesPerNodes,isDataSparse) == 0)
	{
		if(isSeqLabels)
		{
			*isSeqLabels >> seqLabel;
			seq->setSequenceLabel(seqLabel);
		}
		container.insert(container.end(),seq);
		seq = new DataSequence;				
	}
	delete seq;
	if(isData)
        delete isData;
	if(isStateLabels)
		delete isStateLabels;
	if(isSeqLabels)
		delete isSeqLabels;
	if(isAdjMat)
		delete isAdjMat;
	if(isStatesPerNodes)
		delete isStatesPerNodes;
	if(isDataSparse)
		delete isDataSparse;

	return 0;
}
Ejemplo n.º 5
0
double Gradient::computeGradient(dVector& vecGradient, Model* m, DataSet* X)
{
  double ans = 0.0;
  
#ifdef _OPENMP
  if( nbThreadsMP < 1 )
    nbThreadsMP = omp_get_max_threads();
  setMaxNumberThreads(nbThreadsMP);
  pInfEngine->setMaxNumberThreads(nbThreadsMP);
  pFeatureGen->setMaxNumberThreads(nbThreadsMP);
#endif
  
  //Check the size of vecGradient
  int nbFeatures = pFeatureGen->getNumberOfFeatures();
  if(vecGradient.getLength() != nbFeatures)
    vecGradient.create(nbFeatures);
  else
    vecGradient.set(0);
  
  ////////////////////////////////////////////////////////////
  // Start of parallel Region
  // Some weird stuff in gcc 4.1, with openmp 2.5 support
  //
  // Note 1: In OpenMP 2.5, the iteration variable in "for" must be
  // a signed integer variable type. In OpenMP 3.0 (_OPENMP>=200805),
  // it may  also be an unsigned integer variable type, a pointer type,
  // or a constant-time random access iterator type.
  //
  // Note 2: schedule(static | dynamic): In the dynamic schedule, there
  // is no predictable order in which the loop items are assigned to
  // different threads. Each thread asks the OpenMP runtime library for
  // an iteration number, then handles it, then asks for the next one.
  // It is thus useful when different iterations in the loop may take
  // different time to execute.
#pragma omp parallel default(none) \
  shared(vecGradient, X, m, ans, nbFeatures, std::cout)
  {
    // code inside this region runs in parallel
    dVector g(nbFeatures, COLVECTOR, 0.0);
    
#pragma omp for schedule(dynamic) reduction(+:ans)
    for(int i=0; (int)i<X->size(); i++) {
      DataSequence* x = X->at(i);
      if( m->isWeightSequence() && x->getWeightSequence() != 1.0) {
        dVector tmp(nbFeatures, COLVECTOR, 0.0);
        ans += computeGradient(tmp, m, x) * x->getWeightSequence();
        tmp.multiply(x->getWeightSequence());
        g.add(tmp);
      }
      else {
        ans += computeGradient(g, m, x);
      }
    }
    
    // We now put togheter the gradients
    // No two threads can execute a critical directive of the same name at the same time
#pragma omp critical (reduce_sum)
    {
      vecGradient.add(g);
    }
  }
  // End of parallel Region
  ////////////////////////////////////////////////////////////
  vecGradient.negate();
  
  // MaxMargin objective: min L = 0.5*\L2sigma*W*W + Loss()
  // MLE objective: min L = 0.5*1/(\L2sigma*\L2sigma)*W*W - log p(y|x)
  
  // Add the regularization term
  double scale = (m->isMaxMargin())
		? m->getRegL2Sigma()
		: 1/(double)(m->getRegL2Sigma()*m->getRegL2Sigma());
  
  if( m->isMaxMargin() )
    ans = (1/(double)X->size()) * ans;
  
  if(m->getRegL2Sigma()!=0.0f)
  {
    for(int f=0; f<nbFeatures; f++)
      vecGradient[f] += (*m->getWeights())[f]*scale;
    ans += 0.5*scale*m->getWeights()->l2Norm(false);
  }
  
  return ans;
}
double Gradient::computeGradient(dVector& vecGradient, Model* m, DataSet* X)
{
	//Check the size of vecGradient
	int nbFeatures = pFeatureGen->getNumberOfFeatures();
	double ans = 0.0;
	int TID = 0;
	if(vecGradient.getLength() != nbFeatures)
		vecGradient.create(nbFeatures);
	else
		vecGradient.set(0);
	// Initialize the buffers (vecFeaturesMP) for each thread
#ifdef _OPENMP
	setMaxNumberThreads(omp_get_max_threads());
	pInfEngine->setMaxNumberThreads(omp_get_max_threads());
	pFeatureGen->setMaxNumberThreads(omp_get_max_threads());
#endif
	for(int t=0;t<nbThreadsMP;t++)
	{
		if(localGrads[t].getLength() != nbFeatures)
			localGrads[t].resize(1,nbFeatures,0);
		else
			localGrads[t].set(0);
	}

////////////////////////////////////////////////////////////
// Start of parallel Region

	// Some weird stuff in gcc 4.1, with openmp 2.5 support
#if ((_OPENMP == 200505) && __GNUG__)
#pragma omp parallel \
	shared(X, m, ans, nbFeatures, std::cout)	\
	private(TID) \
	default(none)
#else
#pragma omp parallel \
	shared(vecGradient, X, m, ans, nbFeatures, std::cout)	\
	private(TID) \
	default(none)
#endif
	{
#ifdef _OPENMP 
		TID = omp_get_thread_num();
#endif
		// Create a temporary gradient
		double localSum = 0;

#ifdef WITH_SEQUENCE_WEIGHTS
		dVector tmpVecGradient(nbFeatures);
#endif

#pragma omp for
		// we can use unsigned if we have openmp 3.0 support (_OPENMP>=200805).
#ifdef _OPENMP 
    #if _OPENMP >= 200805
		for(unsigned int i = 0; i< X->size(); i++){
    #else
	    for(int i = 0; i< X->size(); i++){
    #endif
#else
		for(unsigned int i = 0; i< X->size(); i++){
#endif
			if (m->getDebugLevel() >=2){
#pragma omp critical(output)
				std::cout << "Thread "<<TID<<" computes gradient for sequence " 
						  << i <<" out of " << (int)X->size() 
						  << " (Size: " <<  X->at(i)->length() << ")" << std::endl;
			}
			DataSequence* x = X->at(i);
#ifdef WITH_SEQUENCE_WEIGHTS
			tmpVecGradient.set(0);
			localSum += computeGradient(tmpVecGradient, m, x) * x->getWeightSequence();
			if(x->getWeightSequence() != 1.0)
				tmpVecGradient.multiply(x->getWeightSequence());
			localGrads[TID].add(tmpVecGradient);
#else
			localSum += computeGradient(localGrads[TID], m, x);// * x->getWeightSequence();
#endif
		}
#pragma omp critical (reduce_sum)
		// We now put togheter the sums
		{
			if( m->getDebugLevel() >= 2){
				std::cout<<"Thread "<<TID<<" update sums"<<std::endl;
			}
			ans += localSum;
			vecGradient.add(localGrads[TID]);
		}
	} 
	
// End of parallel Region
////////////////////////////////////////////////////////////

	// because we are minimizing -LogP
	vecGradient.negate();

	// Add the regularization term
	double sigmaL2Square = m->getRegL2Sigma()*m->getRegL2Sigma();
	if(sigmaL2Square != 0.0f) {
		if (m->getDebugLevel() >= 2){
			std::cout << "Adding L2 norm gradient\n";
		}
		for(int f = 0; f < nbFeatures; f++) {
			vecGradient[f] += (*m->getWeights())[f]/sigmaL2Square;
		}
		double weightNorm = m->getWeights()->l2Norm(false);
		ans += weightNorm / (2.0*m->getRegL2Sigma()*m->getRegL2Sigma());	
	}
	return ans;
}