Esempio n. 1
0
void GridTable::constructor(int rank,Parameters*parameters){
	m_parameters=parameters;
	m_kmerAcademy.constructor(rank,m_parameters);
	m_size=0;


	uint64_t buckets=m_parameters->getNumberOfBuckets();
	int bucketsPerGroup=m_parameters->getNumberOfBucketsPerGroup();
	double loadFactorThreshold=m_parameters->getLoadFactorThreshold();

	cout<<"[GridTable] buckets="<<buckets<<" bucketsPerGroup="<<bucketsPerGroup;
	cout<<" loadFactorThreshold="<<loadFactorThreshold<<endl;

	m_hashTable.constructor(buckets,"RAY_MALLOC_TYPE_GRID_TABLE",
		m_parameters->showMemoryAllocations(),m_parameters->getRank(),
		bucketsPerGroup,loadFactorThreshold
		);

	if(m_parameters->hasOption("-hash-table-verbosity"))
		m_hashTable.toggleVerbosity();

	m_inserted=false;

	if(m_parameters->showMemoryUsage()){
		showMemoryUsage(rank);
	}

	m_findOperations=0;

	m_verbose=false;
}
Esempio n. 2
0
void SchreyerFrame::show(int len) const
{
  std::cout << "#levels=" << mFrame.mLevels.size() << " currentLevel=" << currentLevel() << std::endl;
  for (int i=0; i<mFrame.mLevels.size(); i++)
    {
      auto& myframe = level(i);
      auto& myorder = schreyerOrder(i);
      if (myframe.size() == 0) continue;
      std::cout << "--- level " << i << " ------" << std::endl;
      for (int j=0; j<myframe.size(); j++)
        {
          std::cout << "    " << j << " " << myframe[j].mDegree 
                    << " (" << myframe[j].mBegin << "," << myframe[j].mEnd << ") " << std::flush;
          std::cout << "(size:" << myframe[j].mSyzygy.len << ") [";
          monoid().showAlpha(myorder.mTotalMonom[j]);
          std::cout << "  " << myorder.mTieBreaker[j] << "] ";
          if (len == 0 or myframe[j].mSyzygy.len == 0)
            monoid().showAlpha(myframe[j].mMonom);
          else
            display_poly(stdout, ring(), myframe[j].mSyzygy);
          std::cout << std::endl;
        }
    }
  showMemoryUsage();
}
Esempio n. 3
0
void GridTable::constructor(int rank,Parameters*parameters){
	m_parameters=parameters;
	m_kmerAcademy.constructor(rank,m_parameters);
	m_size=0;
	m_hashTable.constructor(RAY_MALLOC_TYPE_GRID_TABLE,
		m_parameters->showMemoryAllocations(),m_parameters->getRank());

	m_inserted=false;

	if(m_parameters->showMemoryUsage()){
		showMemoryUsage(rank);
	}
}
Esempio n. 4
0
void SeedingData::computeSeeds(){
	if(!m_initiatedIterator){
		m_last=time(NULL);

		m_SEEDING_i=0;

		m_activeWorkerIterator=m_activeWorkers.begin();
		m_splayTreeIterator.constructor(m_subgraph,m_wordSize,m_parameters);
		m_initiatedIterator=true;
		m_maximumAliveWorkers=30000;
		#ifdef ASSERT
		m_splayTreeIterator.hasNext();
		#endif
	}

	m_virtualCommunicator->processInbox(&m_activeWorkersToRestore);

	if(!m_virtualCommunicator->isReady()){
		return;
	}

	// flush all mode is necessary to empty buffers and 
	// restart things from scratch..

	// 1. iterate on active workers
	if(m_activeWorkerIterator!=m_activeWorkers.end()){
		uint64_t workerId=*m_activeWorkerIterator;
		#ifdef ASSERT
		assert(m_aliveWorkers.count(workerId)>0);
		assert(!m_aliveWorkers[workerId].isDone());
		#endif
		m_virtualCommunicator->resetLocalPushedMessageStatus();

		//force the worker to work until he finishes or pushes something on the stack
		while(!m_aliveWorkers[workerId].isDone()&&!m_virtualCommunicator->getLocalPushedMessageStatus()){
			m_aliveWorkers[workerId].work();
		}

		if(m_virtualCommunicator->getLocalPushedMessageStatus()){
			m_waitingWorkers.push_back(workerId);
		}
		if(m_aliveWorkers[workerId].isDone()){
			m_workersDone.push_back(workerId);
			vector<Kmer> seed=*(m_aliveWorkers[workerId].getSeed());

			int nucleotides=seed.size()+(m_wordSize)-1;

			// only consider the long ones.
			if(nucleotides>=m_parameters->getMinimumContigLength()){
				
				Kmer firstVertex=seed[0];
				Kmer lastVertex=seed[seed.size()-1];
				Kmer firstReverse=m_parameters->_complementVertex(&lastVertex);

				if(firstVertex<firstReverse){
					printf("Rank %i discovered a seed with %i vertices\n",m_rank,(int)seed.size());
					fflush(stdout);

					if(m_parameters->showMemoryUsage()){
						showMemoryUsage(m_rank);
					}
					m_SEEDING_seeds.push_back(seed);
				}
			}
		}
		m_activeWorkerIterator++;
	}else{
		updateStates();

		//  add one worker to active workers
		//  reason is that those already in the pool don't communicate anymore -- 
		//  as for they need responses.
		if(!m_virtualCommunicator->getGlobalPushedMessageStatus()&&m_activeWorkers.empty()){
			// there is at least one worker to start
			// AND
			// the number of alive workers is below the maximum
			if(m_SEEDING_i<(uint64_t)m_subgraph->size()&&(int)m_aliveWorkers.size()<m_maximumAliveWorkers){
				if(m_SEEDING_i % 100000 ==0){
					printf("Rank %i is creating seeds [%i/%i]\n",getRank(),(int)m_SEEDING_i+1,(int)m_subgraph->size());
					fflush(stdout);

					if(m_parameters->showMemoryUsage()){
						showMemoryUsage(m_rank);
					}
				}
				#ifdef ASSERT
				if(m_SEEDING_i==0){
					assert(m_completedJobs==0&&m_activeWorkers.size()==0&&m_aliveWorkers.size()==0);
				}
				#endif
				Vertex*node=m_splayTreeIterator.next();
				Kmer vertexKey=*(m_splayTreeIterator.getKey());

				int coverage=node->getCoverage(&vertexKey);
				int minimum=5;
				if(coverage<minimum){
					m_completedJobs++;
				}else{
					m_aliveWorkers[m_SEEDING_i].constructor(&vertexKey,m_parameters,m_outboxAllocator,m_virtualCommunicator,m_SEEDING_i);
					m_activeWorkers.insert(m_SEEDING_i);
				}

				int population=m_aliveWorkers.size();
				if(population>m_maximumWorkers){
					m_maximumWorkers=population;
				}

				m_SEEDING_i++;

				// skip the reverse complement as we don't really need it anyway.
			}else{
				m_virtualCommunicator->forceFlush();
			}
		}

		// brace yourself for the next round
		m_activeWorkerIterator=m_activeWorkers.begin();
	}

	#ifdef ASSERT
	assert((int)m_aliveWorkers.size()<=m_maximumAliveWorkers);
	#endif

	if((int)m_subgraph->size()==m_completedJobs){
		(*m_mode)=RAY_SLAVE_MODE_DO_NOTHING;
		printf("Rank %i has %i seeds\n",m_rank,(int)m_SEEDING_seeds.size());
		fflush(stdout);
		printf("Rank %i is creating seeds [%i/%i] (completed)\n",getRank(),(int)m_SEEDING_i,(int)m_subgraph->size());
		fflush(stdout);
		printf("Rank %i: peak number of workers: %i, maximum: %i\n",m_rank,m_maximumWorkers,m_maximumAliveWorkers);
		fflush(stdout);
		m_virtualCommunicator->printStatistics();
		Message aMessage(NULL,0,MASTER_RANK,RAY_MPI_TAG_SEEDING_IS_OVER,getRank());
		m_outbox->push_back(aMessage);

		if(m_parameters->showMemoryUsage()){
			showMemoryUsage(m_rank);
		}

		#ifdef ASSERT
		assert(m_aliveWorkers.size()==0);
		assert(m_activeWorkers.size()==0);
		#endif

		// sort the seeds by length
		std::sort(m_SEEDING_seeds.begin(),m_SEEDING_seeds.end(),myComparator_sort);
	}
}
Esempio n. 5
0
void SeedingData::call_RAY_SLAVE_MODE_START_SEEDING(){
	if(!m_initiatedIterator){
		m_last=time(NULL);

		m_SEEDING_i=0;

		m_activeWorkerIterator=m_activeWorkers.begin();
		m_splayTreeIterator.constructor(m_subgraph,m_wordSize,m_parameters);
		m_initiatedIterator=true;
		m_maximumAliveWorkers=32768;

		#ifdef ASSERT
		m_splayTreeIterator.hasNext();
		#endif


		m_virtualCommunicator->resetCounters();
	}

	if(!m_checkedCheckpoint){
		if(m_parameters->hasCheckpoint("Seeds")){
			cout<<"Rank "<<m_parameters->getRank()<<": checkpoint Seeds exists, not computing seeds."<<endl;
			(*m_mode)=RAY_SLAVE_MODE_DO_NOTHING;
			Message aMessage(NULL,0,MASTER_RANK,RAY_MPI_TAG_SEEDING_IS_OVER,getRank());
			m_outbox->push_back(&aMessage);

			loadCheckpoint();

			return;
		}
		m_checkedCheckpoint=true;
	}

	m_virtualCommunicator->processInbox(&m_activeWorkersToRestore);

	if(!m_virtualCommunicator->isReady()){
		return;
	}

	// flush all mode is necessary to empty buffers and 
	// restart things from scratch..

	// 1. iterate on active workers
	if(m_activeWorkerIterator!=m_activeWorkers.end()){
		WorkerHandle workerId=*m_activeWorkerIterator;
		#ifdef ASSERT
		assert(m_aliveWorkers.count(workerId)>0);
		assert(!m_aliveWorkers[workerId].isDone());
		#endif
		m_virtualCommunicator->resetLocalPushedMessageStatus();

		//force the worker to work until he finishes or pushes something on the stack
		while(!m_aliveWorkers[workerId].isDone()&&!m_virtualCommunicator->getLocalPushedMessageStatus()){
			m_aliveWorkers[workerId].work();
		}

		if(m_virtualCommunicator->getLocalPushedMessageStatus()){
			m_waitingWorkers.push_back(workerId);
		}
		if(m_aliveWorkers[workerId].isDone()){
			m_workersDone.push_back(workerId);
			GraphPath*seed=m_aliveWorkers[workerId].getSeed();

			int nucleotides=getNumberOfNucleotides(seed->size(),m_wordSize);

			if(seed->size() > 0 && m_debugSeeds){
				cout<<"Raw seed length: "<<nucleotides<<" nucleotides"<<endl;
			}

			#ifdef ASSERT
			assert(nucleotides==0 || nucleotides>=m_wordSize);
			#endif

			SeedWorker*worker=&(m_aliveWorkers[workerId]);

			if(worker->isHeadADeadEnd() && worker->isTailADeadEnd()){
			
				m_skippedObjectsWithTwoDeadEnds++;

			}else if(worker->isHeadADeadEnd()){

				m_skippedObjectsWithDeadEndForHead++;

			}else if(worker->isTailADeadEnd()){

				m_skippedObjectsWithDeadEndForTail++;

			}else if(worker->isBubbleWeakComponent()){

				m_skippedObjectsWithBubbleWeakComponent++;

			// only consider the long ones.
			}else if(nucleotides>=m_parameters->getMinimumContigLength()){
				#ifdef SHOW_DISCOVERIES
				printf("Rank %i discovered a seed with %i vertices\n",m_rank,(int)seed.size());
				#endif
				
				#ifdef ASSERT
				assert(seed->size()>0);
				#endif

				Kmer firstVertex;
				seed->at(0,&firstVertex);
				Kmer lastVertex;
				seed->at(seed->size()-1,&lastVertex);
				Kmer firstReverse=m_parameters->_complementVertex(&lastVertex);

				int minimumNucleotidesForVerbosity=1024;

				bool verbose=nucleotides>=minimumNucleotidesForVerbosity;

				if(m_debugSeeds){
					verbose=true;
				}

				if(firstVertex<firstReverse){

					if(verbose){
						printf("Rank %i stored a seed with %i vertices\n",m_rank,(int)seed->size());
					}

					if(m_parameters->showMemoryUsage() && verbose){
						showMemoryUsage(m_rank);
					}

					GraphPath*theSeed=seed;

					theSeed->computePeakCoverage();
		
					CoverageDepth peakCoverage=theSeed->getPeakCoverage();

					if(verbose)
						cout<<"Got a seed, peak coverage: "<<peakCoverage;
	
					/* ignore the seed if it has too much coverage. */
					if(peakCoverage >= m_minimumSeedCoverageDepth
						&& peakCoverage <= m_parameters->getMaximumSeedCoverage()){

						if(verbose)
							cout<<", adding seed."<<endl;

						m_SEEDING_seeds.push_back(*theSeed);
		
						m_eligiblePaths++;
					}else{

						if(verbose)
							cout<<", ignoring seed."<<endl;
			
						m_skippedNotEnoughCoverage++;
					}
				}else{
					m_skippedNotMine++;
				}
			}else{
				m_skippedTooShort++;
			}
		}
		m_activeWorkerIterator++;
	}else{
		updateStates();

		//  add one worker to active workers
		//  reason is that those already in the pool don't communicate anymore -- 
		//  as for they need responses.
		if(!m_virtualCommunicator->getGlobalPushedMessageStatus()&&m_activeWorkers.empty()){
			// there is at least one worker to start
			// AND
			// the number of alive workers is below the maximum
			if(m_SEEDING_i<m_subgraph->size()&&(int)m_aliveWorkers.size()<m_maximumAliveWorkers){
				if(m_SEEDING_i % 100000 ==0){
					printf("Rank %i is creating seeds [%i/%i]\n",getRank(),(int)m_SEEDING_i+1,(int)m_subgraph->size());

					if(m_parameters->showMemoryUsage()){
						showMemoryUsage(m_rank);
					}
				}

				#ifdef ASSERT
				if(m_SEEDING_i==0){
					assert(m_completedJobs==0&&m_activeWorkers.size()==0&&m_aliveWorkers.size()==0);
				}
				#endif

				m_splayTreeIterator.next();
				Kmer vertexKey=*(m_splayTreeIterator.getKey());

				m_aliveWorkers[m_SEEDING_i].constructor(&vertexKey,m_parameters,m_outboxAllocator,m_virtualCommunicator,m_SEEDING_i,
RAY_MPI_TAG_GET_VERTEX_EDGES_COMPACT,
RAY_MPI_TAG_REQUEST_VERTEX_COVERAGE
);
				if(m_debugSeeds)
					m_aliveWorkers[m_SEEDING_i].enableDebugMode();

				m_activeWorkers.insert(m_SEEDING_i);

				int population=m_aliveWorkers.size();
				if(population>m_maximumWorkers){
					m_maximumWorkers=population;
				}

				m_SEEDING_i++;

				// skip the reverse complement as we don't really need it anyway.
			}else{
				m_virtualCommunicator->forceFlush();
			}
		}

		// brace yourself for the next round
		m_activeWorkerIterator=m_activeWorkers.begin();
	}

	#ifdef ASSERT
	assert((int)m_aliveWorkers.size()<=m_maximumAliveWorkers);
	#endif

	if((int)m_subgraph->size()==m_completedJobs){

		printf("Rank %i has %i seeds\n",m_rank,(int)m_SEEDING_seeds.size());
		printf("Rank %i is creating seeds [%i/%i] (completed)\n",getRank(),(int)m_SEEDING_i,(int)m_subgraph->size());
		printf("Rank %i: peak number of workers: %i, maximum: %i\n",m_rank,m_maximumWorkers,m_maximumAliveWorkers);
		m_virtualCommunicator->printStatistics();

		cout<<"Rank "<<m_rank<<" runtime statistics for seeding algorithm: "<<endl;
		cout<<"Rank "<<m_rank<<" Skipped paths because of dead end for head: "<<m_skippedObjectsWithDeadEndForHead<<endl;
		cout<<"Rank "<<m_rank<<" Skipped paths because of dead end for tail: "<<m_skippedObjectsWithDeadEndForTail<<endl;
		cout<<"Rank "<<m_rank<<" Skipped paths because of two dead ends: "<<m_skippedObjectsWithTwoDeadEnds<<endl;
		cout<<"Rank "<<m_rank<<" Skipped paths because of bubble weak component: "<<m_skippedObjectsWithBubbleWeakComponent<<endl;
		cout<<"Rank "<<m_rank<<" Skipped paths because of short length: "<<m_skippedTooShort<<endl;
		cout<<"Rank "<<m_rank<<" Skipped paths because of bad ownership: "<<m_skippedNotMine<<endl;
		cout<<"Rank "<<m_rank<<" Skipped paths because of low coverage: "<<m_skippedNotEnoughCoverage<<endl;
		cout<<"Rank "<<m_rank<<" Eligible paths: "<<m_eligiblePaths<<endl;

		#ifdef ASSERT
		assert(m_eligiblePaths==(int)m_SEEDING_seeds.size());
		#endif

		(*m_mode)=RAY_SLAVE_MODE_DO_NOTHING;
		Message aMessage(NULL,0,MASTER_RANK,RAY_MPI_TAG_SEEDING_IS_OVER,getRank());
		m_outbox->push_back(&aMessage);

		if(m_parameters->showMemoryUsage()){
			showMemoryUsage(m_rank);
		}

		#ifdef ASSERT
		assert(m_aliveWorkers.size()==0);
		assert(m_activeWorkers.size()==0);
		#endif

		// sort the seeds by length
		std::sort(m_SEEDING_seeds.begin(),
			m_SEEDING_seeds.end(),myComparator_sort);
	}
}
Esempio n. 6
0
void SchreyerFrame::start_computation(StopConditions& stop)
{
  // This is the computation of the non-minimal maps themselves
  decltype(timer()) timeA, timeB;
  //  if (level(0).size() == 0)
  //    mState = Done;;
  computeFrame();
  if (M2_gbTrace >= 1)
    {
      std::cout << "computation status after computing frame: " << std::endl;
      mComputationStatus.output();
    }

  int top_slanted_degree = mHiSlantedDegree;
  if (stop.stop_after_degree and mHiSlantedDegree > stop.degree_limit->array[0])
    top_slanted_degree = stop.degree_limit->array[0];

  computeSyzygies(top_slanted_degree ,mMaxLength);

  if (M2_gbTrace >= 1)
    {
      showMemoryUsage();
      std::cout << "total time for make matrix: " << timeMakeMatrix << std::endl;
      std::cout << "total time for sort matrix: " << timeSortMatrix << std::endl;
      std::cout << "total time for reorder matrix: " << timeReorderMatrix << std::endl;
      std::cout << "total time for gauss matrix: " << timeGaussMatrix << std::endl;
      std::cout << "total time for clear matrix: " << timeClearMatrix << std::endl;
      std::cout << "total time for reset hash table: " << timeResetHashTable << std::endl; 
      std::cout << "total time for computing ranks: " << timeComputeRanks << std::endl;
    }
  
  return;
#if 0  
  if (M2_gbTrace >= 1)
    {
      std::cout << "computation status after computing syzygies: " << std::endl;
      mComputationStatus.output();
    }
  timeA = timer();
  computeRanks(mHiSlantedDegree, mMaxLength);
  timeB = timer();
  timeComputeRanks += seconds(timeB-timeA);
  if (M2_gbTrace >= 1)
    {
      std::cout << "computation status after computing ranks: " << std::endl;
      mComputationStatus.output();
    }


  // This next part needs to be computed after the frame, as otherwise mHiSlantedDegree isn't yet set.
  int top_slanted_degree = 0;

  top_slanted_degree = mHiSlantedDegree;
  if (stop.stop_after_degree and mHiSlantedDegree > stop.degree_limit->array[0])
    top_slanted_degree = stop.degree_limit->array[0];

  while (true)
    {
      switch (mState) {
      case Initializing:
        break;
      case Frame:
        std::cerr << "ERROR: should not get to this point anymore..." << std::endl;
        if (M2_gbTrace >= 1)
          std::cout << "maxsize = " << mFrame.mLevels.size() << " and mCurrentLevel = " << mCurrentLevel << std::endl;
        if (mCurrentLevel >= mFrame.mLevels.size() or computeNextLevel() == 0)
          {
            //show(6);
            mState = Matrices;
            mCurrentLevel = 2;
            getBounds(mLoSlantedDegree, mHiSlantedDegree, mMaxLength);
            mSlantedDegree = mLoSlantedDegree;
            setBettiDisplays();
            if (M2_gbTrace >= 1)
              {
                std::cout << "non-minimal betti: " << std::endl;
                mBettiNonminimal.output();
              }
            //for (int i=0; i<mMinimalizeTODO.size(); i++)
            //  {
            //     auto a = mMinimalizeTODO[i];
            //     std::cout << "(" << a.first << "," << a.second << ") ";
            //  }
            // std::cout << std::endl;
          }
        break;
      case Matrices:
        if (M2_gbTrace >= 1)
          std::cout << "start_computation: entering matrices(" << mSlantedDegree << ", " << mCurrentLevel << ")" << std::endl;
        if (stop.always_stop) return;
        
        if (mCurrentLevel > mMaxLength)
          {
            mCurrentLevel = 2;
            mSlantedDegree++;
            if (mSlantedDegree > top_slanted_degree)
              {
                if (M2_gbTrace >= 1)
                  showMemoryUsage();
#if 0                
                debugCheckOrderAll();
#endif
                timeA = timer();
                for (auto it=mMinimalizeTODO.cbegin(); it != mMinimalizeTODO.cend(); ++it)
                  {
                    int rk = rank(it->first, it->second);
                    mBettiMinimal.entry(it->first, it->second) -= rk;
                    mBettiMinimal.entry(it->first+1, it->second-1) -= rk;
                  }
                timeB = timer();
                timeComputeRanks += seconds(timeB-timeA);
                mState = Done;
                if (M2_gbTrace >= 1)
                  mBettiMinimal.output();
                 break;
              }
            //            if (stop.stop_after_degree and mSlantedDegree > stop.degree_limit->array[0])
            //              return;
          }
        if (M2_gbTrace >= 2)
          {
            std::cout << "construct(" << mSlantedDegree << ", " << mCurrentLevel << ")..." << std::flush;
          }
        mComputer.construct(mCurrentLevel, mSlantedDegree+mCurrentLevel);
        if (M2_gbTrace >= 2)
          {
            std::cout << "done" << std::endl;
          }
        ///std::cout << "Number of distinct monomials so far = " << mAllMonomials.count() << std::endl;
        mCurrentLevel++;
        break;
      case Done:
        if (M2_gbTrace >= 1)
          {
            std::cout << "total time for make matrix: " << timeMakeMatrix << std::endl;
            std::cout << "total time for sort matrix: " << timeSortMatrix << std::endl;
            std::cout << "total time for reorder matrix: " << timeReorderMatrix << std::endl;
            std::cout << "total time for gauss matrix: " << timeGaussMatrix << std::endl;
            std::cout << "total time for clear matrix: " << timeClearMatrix << std::endl;
            std::cout << "total time for reset hash table: " << timeResetHashTable << std::endl; 
            std::cout << "total time for computing ranks: " << timeComputeRanks << std::endl;
          }
        return;
      default:
        break;
      }
    }
  #endif
}
Esempio n. 7
0
BettiDisplay SchreyerFrame::minimalBettiNumbers(
                                              bool stop_after_degree,
                                              int top_slanted_degree,
                                              int length_limit
                                              )
{
  // The lo degree will be: mLoSlantedDegree.
  // The highest slanted degree will either be mHiSlantedDegree, or top_slanted_degree (minimum of these two).
  // The length we need to compute to is either maxLevel(), or length_limit+1.
  // We set maxlevel to length_limit.  We insist that length_limit <= maxLevel() - 2.
  // Here is what needs to be computed:
  //  lo: . . . . . . .
  //      . . . . . . .
  /// hi: . . . . . .
  // Each dot in all rows other than 'hi' needs to have syzygies computed for it.
  // if hi == mHiSlantedDegree, then we do NOT need to compute syzygies in this last row.
  //   else we need to compute syzygies in these rows, EXCEPT not at level maxlevel+1

  computeFrame();

  int top_degree; // slanted degree
  if (stop_after_degree)
    {
      top_degree = std::min(top_slanted_degree, mHiSlantedDegree);
      top_degree = std::max(mLoSlantedDegree, top_degree);
    }
  else
    {
      top_degree = mHiSlantedDegree;
    }
  // First: if length_limit is too low, extend the Frame
  if (length_limit >= maxLevel())
    {
      std::cout << "WARNING: cannot extend resolution length" << std::endl;
      length_limit = maxLevel()-1;
      // Extend the length of the Frame, change mMaxLength, possibly mHiSlantedDegree
      // increase mComputationStatus if needed, mMinimalBetti, ...
      // computeFrame()
    }

  // What needs to be computed?
  // lodeg..hideg, level: 0..maxlevel.  Note: need to compute at level maxlevel+1 in order to get min betti numbers at
  //   level maxlevel.
  // Also note: if hideg is the highest degree that occurs in the frame, we do not need to compute any matrices for these.

  for (int deg=mLoSlantedDegree; deg <= top_degree-1; deg++)
    for (int lev=1; lev<=length_limit+1; lev++)
      computeRank(deg, lev);

  for (int lev=1; lev<=length_limit; lev++)
    computeRank(top_degree, lev);

  if (M2_gbTrace >= 1)
    {
      showMemoryUsage();
      std::cout << "total setPoly: " << poly_constructor::ncalls << std::endl;
      std::cout << "total setPolyFromArray: " << poly_constructor::ncalls_fromarray << std::endl;
      std::cout << "total ~poly: " << poly::npoly_destructor << std::endl;
      
      std::cout << "total time for make matrix: " << timeMakeMatrix << std::endl;
      std::cout << "total time for sort matrix: " << timeSortMatrix << std::endl;
      std::cout << "total time for reorder matrix: " << timeReorderMatrix << std::endl;
      std::cout << "total time for gauss matrix: " << timeGaussMatrix << std::endl;
      std::cout << "total time for clear matrix: " << timeClearMatrix << std::endl;
      std::cout << "total time for reset hash table: " << timeResetHashTable << std::endl; 
      std::cout << "total time for computing ranks: " << timeComputeRanks << std::endl;
    }
  
  BettiDisplay B(mBettiMinimal); // copy
  B.resize(mLoSlantedDegree,
           top_degree,
           length_limit);

  return B;
}
Esempio n. 8
0
void SeedingData::computeSeeds(){
	if(!m_initiatedIterator){
		m_last=time(NULL);

		m_SEEDING_i=0;

		m_activeWorkerIterator=m_activeWorkers.begin();
		m_splayTreeIterator.constructor(m_subgraph,m_wordSize,m_parameters);
		m_initiatedIterator=true;
		m_maximumAliveWorkers=32768;

		#ifdef ASSERT
		m_splayTreeIterator.hasNext();
		#endif
	}

	if(!m_checkedCheckpoint){
		if(m_parameters->hasCheckpoint("Seeds")){
			cout<<"Rank "<<m_parameters->getRank()<<": checkpoint Seeds exists, not computing seeds."<<endl;
			(*m_mode)=RAY_SLAVE_MODE_DO_NOTHING;
			Message aMessage(NULL,0,MASTER_RANK,RAY_MPI_TAG_SEEDING_IS_OVER,getRank());
			m_outbox->push_back(aMessage);

			loadCheckpoint();

			return;
		}
		m_checkedCheckpoint=true;
	}

	m_virtualCommunicator->processInbox(&m_activeWorkersToRestore);

	if(!m_virtualCommunicator->isReady()){
		return;
	}

	// flush all mode is necessary to empty buffers and 
	// restart things from scratch..

	// 1. iterate on active workers
	if(m_activeWorkerIterator!=m_activeWorkers.end()){
		uint64_t workerId=*m_activeWorkerIterator;
		#ifdef ASSERT
		assert(m_aliveWorkers.count(workerId)>0);
		assert(!m_aliveWorkers[workerId].isDone());
		#endif
		m_virtualCommunicator->resetLocalPushedMessageStatus();

		//force the worker to work until he finishes or pushes something on the stack
		while(!m_aliveWorkers[workerId].isDone()&&!m_virtualCommunicator->getLocalPushedMessageStatus()){
			m_aliveWorkers[workerId].work();
		}

		if(m_virtualCommunicator->getLocalPushedMessageStatus()){
			m_waitingWorkers.push_back(workerId);
		}
		if(m_aliveWorkers[workerId].isDone()){
			m_workersDone.push_back(workerId);
			vector<Kmer> seed=*(m_aliveWorkers[workerId].getSeed());

			int nucleotides=seed.size()+(m_wordSize)-1;

			if(seed.size() > 0 && m_parameters->debugSeeds()){
				cout<<"Raw seed length: "<<nucleotides<<" nucleotides"<<endl;
			}

			// only consider the long ones.
			if(nucleotides>=m_parameters->getMinimumContigLength()){
				#ifdef SHOW_DISCOVERIES
				printf("Rank %i discovered a seed with %i vertices\n",m_rank,(int)seed.size());
				#endif
				
				Kmer firstVertex=seed[0];
				Kmer lastVertex=seed[seed.size()-1];
				Kmer firstReverse=m_parameters->_complementVertex(&lastVertex);

				if(firstVertex<firstReverse){
					printf("Rank %i stored a seed with %i vertices\n",m_rank,(int)seed.size());
					fflush(stdout);

					if(m_parameters->showMemoryUsage()){
						showMemoryUsage(m_rank);
					}
					m_SEEDING_seeds.push_back(seed);
				}
			}
		}
		m_activeWorkerIterator++;
	}else{
		updateStates();

		//  add one worker to active workers
		//  reason is that those already in the pool don't communicate anymore -- 
		//  as for they need responses.
		if(!m_virtualCommunicator->getGlobalPushedMessageStatus()&&m_activeWorkers.empty()){
			// there is at least one worker to start
			// AND
			// the number of alive workers is below the maximum
			if(m_SEEDING_i<(uint64_t)m_subgraph->size()&&(int)m_aliveWorkers.size()<m_maximumAliveWorkers){
				if(m_SEEDING_i % 100000 ==0){
					printf("Rank %i is creating seeds [%i/%i]\n",getRank(),(int)m_SEEDING_i+1,(int)m_subgraph->size());
					fflush(stdout);

					if(m_parameters->showMemoryUsage()){
						showMemoryUsage(m_rank);
					}
				}
				#ifdef ASSERT
				if(m_SEEDING_i==0){
					assert(m_completedJobs==0&&m_activeWorkers.size()==0&&m_aliveWorkers.size()==0);
				}
				#endif

				m_splayTreeIterator.next();
				Kmer vertexKey=*(m_splayTreeIterator.getKey());

				m_aliveWorkers[m_SEEDING_i].constructor(&vertexKey,m_parameters,m_outboxAllocator,m_virtualCommunicator,m_SEEDING_i);
				m_activeWorkers.insert(m_SEEDING_i);

				int population=m_aliveWorkers.size();
				if(population>m_maximumWorkers){
					m_maximumWorkers=population;
				}

				m_SEEDING_i++;

				// skip the reverse complement as we don't really need it anyway.
			}else{
				m_virtualCommunicator->forceFlush();
			}
		}

		// brace yourself for the next round
		m_activeWorkerIterator=m_activeWorkers.begin();
	}

	#ifdef ASSERT
	assert((int)m_aliveWorkers.size()<=m_maximumAliveWorkers);
	#endif

	if((int)m_subgraph->size()==m_completedJobs){
		printf("Rank %i has %i seeds\n",m_rank,(int)m_SEEDING_seeds.size());
		fflush(stdout);
		printf("Rank %i is creating seeds [%i/%i] (completed)\n",getRank(),(int)m_SEEDING_i,(int)m_subgraph->size());
		fflush(stdout);
		printf("Rank %i: peak number of workers: %i, maximum: %i\n",m_rank,m_maximumWorkers,m_maximumAliveWorkers);
		fflush(stdout);
		m_virtualCommunicator->printStatistics();

		(*m_mode)=RAY_SLAVE_MODE_DO_NOTHING;
		Message aMessage(NULL,0,MASTER_RANK,RAY_MPI_TAG_SEEDING_IS_OVER,getRank());
		m_outbox->push_back(aMessage);

		if(m_parameters->showMemoryUsage()){
			showMemoryUsage(m_rank);
		}

		#ifdef ASSERT
		assert(m_aliveWorkers.size()==0);
		assert(m_activeWorkers.size()==0);
		#endif

		// sort the seeds by length
		std::sort(m_SEEDING_seeds.begin(),m_SEEDING_seeds.end(),myComparator_sort);

		/** write seeds for debugging purposes */
		if(m_parameters->hasOption("-write-seeds")){
			ostringstream fileName;
			fileName<<m_parameters->getPrefix()<<"Rank"<<m_parameters->getRank()<<".RaySeeds.fasta";
			ofstream f(fileName.str().c_str());
			for(int i=0;i<(int)m_SEEDING_seeds.size();i++){
				uint64_t id=getPathUniqueId(m_parameters->getRank(),i);
				f<<">RaySeed-"<<id<<endl;

				f<<addLineBreaks(convertToString(&(m_SEEDING_seeds[i]),
					m_parameters->getWordSize(),m_parameters->getColorSpaceMode()),
					m_parameters->getColumns());
			}
			f.close();
		}

	}
}
Esempio n. 9
0
void SequencesIndexer::attachReads(ArrayOfReads*m_myReads,
				RingAllocator*m_outboxAllocator,
				StaticVector*m_outbox,
				int*m_mode,
				int m_wordSize,
				int m_size,
				int m_rank
			){
	if(!m_initiatedIterator){
		m_theSequenceId=0;

		m_activeWorkerIterator.constructor(&m_activeWorkers);
		m_initiatedIterator=true;
		m_maximumAliveWorkers=30000;
	}

	m_virtualCommunicator->processInbox(&m_activeWorkersToRestore);


	if(!m_virtualCommunicator->isReady()){
		return;
	}

	if(m_activeWorkerIterator.hasNext()){
		uint64_t workerId=m_activeWorkerIterator.next()->getKey();
		#ifdef ASSERT
		assert(m_aliveWorkers.find(workerId,false)!=NULL);
		assert(!m_aliveWorkers.find(workerId,false)->getValue()->isDone());
		#endif
		m_virtualCommunicator->resetLocalPushedMessageStatus();

		//force the worker to work until he finishes or pushes something on the stack
		while(!m_aliveWorkers.find(workerId,false)->getValue()->isDone()&&!m_virtualCommunicator->getLocalPushedMessageStatus()){
			m_aliveWorkers.find(workerId,false)->getValue()->work();
		}

		if(m_virtualCommunicator->getLocalPushedMessageStatus()){
			m_waitingWorkers.push_back(workerId);
		}
		if(m_aliveWorkers.find(workerId,false)->getValue()->isDone()){
			m_workersDone.push_back(workerId);
		}
	}else{
		updateStates();

		//  add one worker to active workers
		//  reason is that those already in the pool don't communicate anymore -- 
		//  as for they need responses.
		if(!m_virtualCommunicator->getGlobalPushedMessageStatus()&&m_activeWorkers.size()==0){
			// there is at least one worker to start
			// AND
			// the number of alive workers is below the maximum
			if(m_theSequenceId<(int)m_myReads->size()&&(int)m_aliveWorkers.size()<m_maximumAliveWorkers){
				if(m_theSequenceId%10000==0){
					printf("Rank %i is selecting optimal read markers [%i/%i]\n",m_rank,m_theSequenceId+1,(int)m_myReads->size());
					fflush(stdout);
					if(m_parameters->showMemoryUsage())
						showMemoryUsage(m_rank);
				}

				#ifdef ASSERT
				if(m_theSequenceId==0){
					assert(m_completedJobs==0&&m_activeWorkers.size()==0&&m_aliveWorkers.size()==0);
				}
				#endif
				char sequence[4000];
				#ifdef ASSERT
				assert(m_theSequenceId<(int)m_myReads->size());
				#endif

				m_myReads->at(m_theSequenceId)->getSeq(sequence,m_parameters->getColorSpaceMode(),false);

				bool flag;
				m_aliveWorkers.insert(m_theSequenceId,&m_workAllocator,&flag)->getValue()->constructor(m_theSequenceId,sequence,m_parameters,m_outboxAllocator,m_virtualCommunicator,
					m_theSequenceId,m_myReads,&m_workAllocator);
				m_activeWorkers.insert(m_theSequenceId,&m_workAllocator,&flag);
				int population=m_aliveWorkers.size();
				if(population>m_maximumWorkers){
					m_maximumWorkers=population;
				}

				m_theSequenceId++;
			}else{
				m_virtualCommunicator->forceFlush();
			}
		}

		m_activeWorkerIterator.constructor(&m_activeWorkers);
	}

	#ifdef ASSERT
	assert((int)m_aliveWorkers.size()<=m_maximumAliveWorkers);
	#endif

	if((int)m_myReads->size()==m_completedJobs){
		printf("Rank %i is selecting optimal read markers [%i/%i] (completed)\n",m_rank,(int)m_myReads->size(),(int)m_myReads->size());
		fflush(stdout);
		printf("Rank %i: peak number of workers: %i, maximum: %i\n",m_rank,m_maximumWorkers,m_maximumAliveWorkers);
		fflush(stdout);
		(*m_mode)=RAY_SLAVE_MODE_DO_NOTHING;
		Message aMessage(NULL,0,MASTER_RANK,RAY_MPI_TAG_MASTER_IS_DONE_ATTACHING_READS_REPLY,m_rank);
		m_outbox->push_back(aMessage);

		m_virtualCommunicator->printStatistics();

		if(m_parameters->showMemoryUsage()){
			showMemoryUsage(m_rank);
		}

		#ifdef ASSERT
		assert(m_aliveWorkers.size()==0);
		assert(m_activeWorkers.size()==0);
		#endif

		int freed=m_workAllocator.getNumberOfChunks()*m_workAllocator.getChunkSize();
		m_workAllocator.clear();

		if(m_parameters->showMemoryUsage()){
			cout<<"Rank "<<m_parameters->getRank()<<": Freeing unused assembler memory: "<<freed/1024<<" KiB freed"<<endl;
			showMemoryUsage(m_rank);
		}
	}
}
Esempio n. 10
0
void SequencesIndexer::call_RAY_SLAVE_MODE_INDEX_SEQUENCES(){
	if(!m_initiatedIterator){
		m_theSequenceId=0;

		m_activeWorkerIterator.constructor(&m_activeWorkers);
		m_initiatedIterator=true;
		m_maximumAliveWorkers=32768;


		m_virtualCommunicator->resetCounters();
	}

	if(!m_checkedCheckpoint){
		if(m_parameters->hasCheckpoint("OptimalMarkers") && m_parameters->hasCheckpoint("ReadOffsets")){
			cout<<"Rank "<<m_parameters->getRank()<<": checkpoint OptimalMarkers exists, not selecting markers."<<endl;
			(*m_mode)=RAY_SLAVE_MODE_DO_NOTHING;
			Message aMessage(NULL,0,MASTER_RANK,RAY_MPI_TAG_MASTER_IS_DONE_ATTACHING_READS_REPLY,m_rank);
			m_outbox->push_back(&aMessage);
			return;
		}
		m_checkedCheckpoint=true;
	}

	m_virtualCommunicator->processInbox(&m_activeWorkersToRestore);

	if(!m_virtualCommunicator->isReady()){
		return;
	}

	if(m_activeWorkerIterator.hasNext()){
		WorkerHandle workerId=m_activeWorkerIterator.next()->getKey();

		#ifdef ASSERT
		assert(m_aliveWorkers.find(workerId,false)!=NULL);
		assert(!m_aliveWorkers.find(workerId,false)->getValue()->isDone());
		#endif
		m_virtualCommunicator->resetLocalPushedMessageStatus();

		//force the worker to work until he finishes or pushes something on the stack
		while(!m_aliveWorkers.find(workerId,false)->getValue()->isDone()&&!m_virtualCommunicator->getLocalPushedMessageStatus()){
			m_aliveWorkers.find(workerId,false)->getValue()->work();
		}

		if(m_virtualCommunicator->getLocalPushedMessageStatus()){
			m_waitingWorkers.push_back(workerId);
		}
		if(m_aliveWorkers.find(workerId,false)->getValue()->isDone()){
			m_workersDone.push_back(workerId);
		}
	}else{
		updateStates();

		//  add one worker to active workers
		//  reason is that those already in the pool don't communicate anymore -- 
		//  as for they need responses.
		if(!m_virtualCommunicator->getGlobalPushedMessageStatus()&&m_activeWorkers.size()==0){
			// there is at least one worker to start
			// AND
			// the number of alive workers is below the maximum
			if(m_theSequenceId<(int)m_myReads->size()&&(int)m_aliveWorkers.size()<m_maximumAliveWorkers){
				if(m_theSequenceId%100000==0){
					printf("Rank %i is selecting optimal read markers [%i/%i]\n",m_rank,m_theSequenceId+1,(int)m_myReads->size());

					m_derivative.addX(m_theSequenceId);
					m_derivative.printStatus(SLAVE_MODES[RAY_SLAVE_MODE_INDEX_SEQUENCES],RAY_SLAVE_MODE_INDEX_SEQUENCES);
					m_derivative.printEstimatedTime(m_myReads->size());

					if(m_parameters->showMemoryUsage())
						showMemoryUsage(m_rank);
				}

				#ifdef ASSERT
				if(m_theSequenceId==0){
					assert(m_completedJobs==0&&m_activeWorkers.size()==0&&m_aliveWorkers.size()==0);
				}
				assert(m_theSequenceId<(int)m_myReads->size());
				#endif


				bool flag;
				m_aliveWorkers.insert(m_theSequenceId,&m_workAllocator,&flag)->getValue()->constructor(m_theSequenceId,m_parameters,m_outboxAllocator,m_virtualCommunicator,
					m_theSequenceId,m_myReads,&m_workAllocator,&m_readMarkerFile,&m_forwardStatistics,
					&m_reverseStatistics,
	RAY_MPI_TAG_ATTACH_SEQUENCE,
	RAY_MPI_TAG_REQUEST_VERTEX_COVERAGE
);

				m_activeWorkers.insert(m_theSequenceId,&m_workAllocator,&flag);
				int population=m_aliveWorkers.size();
				if(population>m_maximumWorkers){
					m_maximumWorkers=population;
				}

				m_theSequenceId++;
			}else{
				m_virtualCommunicator->forceFlush();
			}
		}

		m_activeWorkerIterator.constructor(&m_activeWorkers);
	}

	#ifdef ASSERT
	assert((int)m_aliveWorkers.size()<=m_maximumAliveWorkers);
	#endif

	if((int)m_myReads->size()==m_completedJobs){
		printf("Rank %i is selecting optimal read markers [%i/%i] (completed)\n",m_rank,(int)m_myReads->size(),(int)m_myReads->size());
		printf("Rank %i: peak number of workers: %i, maximum: %i\n",m_rank,m_maximumWorkers,m_maximumAliveWorkers);
		(*m_mode)=RAY_SLAVE_MODE_DO_NOTHING;
		Message aMessage(NULL,0,MASTER_RANK,RAY_MPI_TAG_MASTER_IS_DONE_ATTACHING_READS_REPLY,m_rank);
		m_outbox->push_back(&aMessage);

		m_derivative.writeFile(&cout);

		m_virtualCommunicator->printStatistics();

		if(m_parameters->showMemoryUsage()){
			showMemoryUsage(m_rank);
		}

		#ifdef ASSERT
		assert(m_aliveWorkers.size()==0);
		assert(m_activeWorkers.size()==0);
		#endif

		int freed=m_workAllocator.getNumberOfChunks()*m_workAllocator.getChunkSize();
		m_workAllocator.clear();

		if(m_parameters->showMemoryUsage()){
			cout<<"Rank "<<m_parameters->getRank()<<": Freeing unused assembler memory: "<<freed/1024<<" KiB freed"<<endl;
			showMemoryUsage(m_rank);
		}

		if(m_parameters->hasOption("-write-read-markers")){
			m_readMarkerFile.close();
		}

		if(m_parameters->hasOption("-write-marker-summary")){

			ostringstream file1;
			file1<<m_parameters->getPrefix()<<"Rank"<<m_parameters->getRank()<<".ForwardMarkerSummary.txt";
			string fileName1=file1.str();
			ofstream f1(fileName1.c_str());

			for(map<int,map<int,int> >::iterator i=m_forwardStatistics.begin();i!=m_forwardStatistics.end();i++){
				int offset=i->first;
				for(map<int,int>::iterator j=i->second.begin();j!=i->second.end();j++){
					int coverage=j->first;
					int count=j->second;
					f1<<offset<<"	"<<coverage<<"	"<<count<<endl;
				}
			}
			f1.close();

			ostringstream file2;
			file2<<m_parameters->getPrefix()<<"Rank"<<m_parameters->getRank()<<".ReverseMarkerSummary.txt";
			string fileName2=file2.str();
			ofstream f2(fileName2.c_str());

			for(map<int,map<int,int> >::iterator i=m_reverseStatistics.begin();i!=m_reverseStatistics.end();i++){
				int offset=i->first;
				for(map<int,int>::iterator j=i->second.begin();j!=i->second.end();j++){
					int coverage=j->first;
					int count=j->second;
					f2<<offset<<"	"<<coverage<<"	"<<count<<endl;
				}
			}
			f2.close();

		}

		m_forwardStatistics.clear();
		m_reverseStatistics.clear();
	}
}