SCORE GlobalAlign(const ProfPos *PA, unsigned uLengthA, const ProfPos *PB, unsigned uLengthB, PWPath &Path) { #if TIMING TICKS t1 = GetClockTicks(); #endif if (0 == uLengthA) { AllInserts(Path, uLengthB); return 0; } else if (0 == uLengthB) { AllDeletes(Path, uLengthA); return 0; } SCORE Score = 0; if (g_bDiags) Score = GlobalAlignDiags(PA, uLengthA, PB, uLengthB, Path); else Score = GlobalAlignNoDiags(PA, uLengthA, PB, uLengthB, Path); #if TIMING TICKS t2 = GetClockTicks(); g_ticksDP += (t2 - t1); #endif return Score; }
int main(int argc,char **argv){ struct tms s1; struct tms s2; volatile int count,count1; volatile int i; volatile double X=1024; Clock *test_clk = CreateClock(); StartClock(test_clk); for(i=0;i<1000000;i++){ count1+=(i+1); count1 = (count1*i)/count1; X += ((X*X) + X/0.331)*(X*X*X); } StopClock(test_clk); printf("PROBE1: Clock Ticks Elapsed is %Lu \n",(clock_t)GetClockTicks(test_clk)); StartClock(test_clk); for(i=0;i<1000000;i++){ count1+=(i+1); count1 = (count1*i)/count1; X += ((X*X) + X/0.331)*(X*X*X); } StopClock(test_clk); printf("PROBE2: Clock Ticks Elapsed is %Lu \n",(clock_t)GetClockTicks(test_clk)); printf("size of clock_t is %d \n",sizeof(clock_t)); }
SCORE ObjScoreIds(const MSA &msa, const unsigned Ids1[], unsigned uCount1, const unsigned Ids2[], unsigned uCount2) { #if TIMING TICKS t1 = GetClockTicks(); #endif unsigned *SeqIndexes1 = new unsigned[uCount1]; unsigned *SeqIndexes2 = new unsigned[uCount2]; for (unsigned n = 0; n < uCount1; ++n) SeqIndexes1[n] = msa.GetSeqIndex(Ids1[n]); for (unsigned n = 0; n < uCount2; ++n) SeqIndexes2[n] = msa.GetSeqIndex(Ids2[n]); #if DOUBLE_AFFINE extern SCORE ObjScoreDA(const MSA &msa, SCORE *ptrLetters, SCORE *ptrGaps); SCORE Letters, Gaps; SCORE dObjScore = ObjScoreDA(msa, &Letters, &Gaps); delete[] SeqIndexes1; delete[] SeqIndexes2; #else SCORE dObjScore = ObjScore(msa, SeqIndexes1, uCount1, SeqIndexes2, uCount2); #endif #if TIMING TICKS t2 = GetClockTicks(); g_ticksObjScore += (t2 - t1); #endif return dObjScore; }
SCORE GlobalAlign(const ProfPos *PA, unsigned uLengthA, const ProfPos *PB, unsigned uLengthB, PWPath &Path) { #if TIMING TICKS t1 = GetClockTicks(); #endif g_bKeepSimpleDP = true; PWPath SimplePath; GlobalAlignSimple(PA, uLengthA, PB, uLengthB, SimplePath); SCORE Score = NWSmall(PA, uLengthA, PB, uLengthB, Path); if (!Path.Equal(SimplePath)) { Log("Simple:\n"); SimplePath.LogMe(); Log("Small:\n"); Path.LogMe(); Quit("Paths differ"); } #if TIMING TICKS t2 = GetClockTicks(); g_ticksDP += (t2 - t1); #endif return Score; }
void Run() { SetStartTime(); Log("Started %s\n", GetTimeAsStr()); for (int i = 0; i < g_argc; ++i) Log("%s ", g_argv[i]); Log("\n"); #if TIMING TICKS t1 = GetClockTicks(); #endif if (g_bRefine) Refine(); else if (g_bRefineW) { extern void DoRefineW(); DoRefineW(); } else if (g_bProfDB) ProfDB(); else if (g_bSW) Local(); else if (0 != g_pstrSPFileName) DoSP(); else if (g_bProfile) Profile(); else if (g_bPPScore) PPScore(); else if (g_bPAS) ProgAlignSubFams(); else DoMuscle(); #if TIMING extern TICKS g_ticksDP; extern TICKS g_ticksObjScore; TICKS t2 = GetClockTicks(); TICKS TotalTicks = t2 - t1; TICKS ticksOther = TotalTicks - g_ticksDP - g_ticksObjScore; double dSecs = TicksToSecs(TotalTicks); double PctDP = (double) g_ticksDP*100.0/(double) TotalTicks; double PctOS = (double) g_ticksObjScore*100.0/(double) TotalTicks; double PctOther = (double) ticksOther*100.0/(double) TotalTicks; Log(" Ticks Secs Pct\n"); Log(" ============ ======= =====\n"); Log("DP %12ld %7.2f %5.1f%%\n", (long) g_ticksDP, TicksToSecs(g_ticksDP), PctDP); Log("OS %12ld %7.2f %5.1f%%\n", (long) g_ticksObjScore, TicksToSecs(g_ticksObjScore), PctOS); Log("Other %12ld %7.2f %5.1f%%\n", (long) ticksOther, TicksToSecs(ticksOther), PctOther); Log("Total %12ld %7.2f 100.0%%\n", (long) TotalTicks, dSecs); #endif ListDiagSavings(); Log("Finished %s\n", GetTimeAsStr()); }
SCORE GlobalAlign(const ProfPos *PA, unsigned uLengthA, const ProfPos *PB, unsigned uLengthB, PWPath &Path) { #if TIMING TICKS t1 = GetClockTicks(); #endif SCORE Score = NWSmall(PA, uLengthA, PB, uLengthB, Path); #if TIMING TICKS t2 = GetClockTicks(); g_ticksDP += (t2 - t1); #endif return Score; }
/* *takes a binary file, set the columns which you want to *sort in ESortParams.col_start and ESortParams.col_end */ void ExSortKmerEdges(const char *bin_kedge, const char *run_file, unsigned char KEY_SIZE, ExKeyCompare key_compare){ unsigned char *run_buffer = NULL; size_t key_buf_len = (KEY_SIZE*KEYS_IN_RUN); size_t buf_len = key_buf_len + sizeof(unsigned long); size_t ret_len; int bin_kedge_fd = open(bin_kedge, O_RDONLY); int run_file_fd = fileno(fopen(run_file, "w")); unsigned long *runlen; unsigned long rcount = 0; unsigned char *key_buf; Clock *clk = CreateClock(); if(!(bin_kedge_fd > 0 && run_file_fd > 0)){ perror("FAILED TO OPEN FILES:"); assert(0); } run_buffer = malloc(sizeof(unsigned char)*buf_len); assert(run_buffer); runlen = (unsigned long *)run_buffer; key_buf = run_buffer + sizeof(unsigned long); StartClock(clk); while((ret_len = SafeRead(bin_kedge_fd, key_buf, key_buf_len)) >= KEY_SIZE){ *runlen = (unsigned long) ret_len; IntegerSort_SB(key_buf, key_buf + (*runlen) - KEY_SIZE, KEY_SIZE, 1, ESortParams.col_start, ESortParams.col_end, 0, CharMap, ESortParams.endian); ret_len = SafeWrite(run_file_fd, run_buffer, ((*runlen + sizeof(unsigned long))< buf_len)?(*runlen + sizeof(unsigned long)):buf_len); rcount++; assert(ret_len == ((*runlen + sizeof(unsigned long) < buf_len)?(*runlen + sizeof(unsigned long)):buf_len)); } StopClock(clk); FreeISortBuckets(); /*create the final run*/ close(run_file_fd); close(bin_kedge_fd); free(run_buffer); printf("\n[EX-SORT CREATED %lu RUNS] took %ld ticks\n", rcount, GetClockTicks(clk)); StartClock(clk); /*call the external rway merge*/ ExternalRWayMerge(run_file, RUNS_PER_MERGE, KEY_SIZE, key_compare, rcount); StopClock(clk); printf("[R-WAY MERGE] took %ld ticks\n", GetClockTicks(clk)); }
void LogTransformT(RowStarts *rowptr,ColIndices *colind,ValueType *val,Indices n,ValueType *u, ValueType *v,ValueType *dist,Indices *m,Indices *m_inv){ ValueType *max_c = new ValueType[n]; rowptr--;colind--;val--;max_c--; m--; m_inv--; u--; dist--; Indices k,i,j,j0; Clock *anst_clk; anst_clk = CreateClock(); StartClock(anst_clk); /*TODO: Avoid HUGE_VAL directly in templated code*/ for(j=1;j<=n;j++){ /*Find the maximum in the column*/ max_c[j] = (ValueType)0.0; m[j] = (Indices)0; m_inv[j]=0; u[j] = HUGE_VAL;dist[j]=HUGE_VAL; } for(i=1;i<=n;i++){ for(k=rowptr[i]+1;k<(rowptr[i+1]+1);k++){ val[k] = fabs(val[k]); j = colind[k]+1; if(val[k] > max_c[j]){ max_c[j] = val[k]; } } } #if 0 for(k=1;k<=n;k++){ printf("max in col %u is %e \n",k,max_c[k]); } #endif for(i=1;i<=n;i++){ for(k=rowptr[i]+1;k<(rowptr[i+1]+1);k++){ j = colind[k]+1; if(val[k]<=1.0e-30){ val[k] = HUGE_VAL; }else{ if(max_c[j]<=1.0e-30){ val[k] = HUGE_VAL;//MAX_DOUBLE_VALUE/n - (log(val[k])/M_LN10); }else{ val[k] = log10(max_c[j]/val[k]); val[k] = fabs(val[k]); if(val[k]< u[j]){ u[j] = val[k]; } } } /*take care of -0.0*/ /*assert(val[k]>=0.0);*/ } } StopClock(anst_clk); anst_ticks = GetClockTicks(anst_clk); ++max_c; delete max_c; }
void ANSTMC21(RowStarts rowptr,ColIndices colind,Indices n,Indices *m){ Indices *col_marker; Clock *anst_clock = CreateClock(); Indices *p; Indices *m_inv; CreateMC21WorkSpace(n,&col_marker,&p,&m_inv); StartClock(anst_clock); FindPerfectMatch(rowptr,colind,n,m,p,m_inv,col_marker); StopClock(anst_clock); anst_ticks = GetClockTicks(anst_clock); FreeMC21WorkSpace(col_marker,p,m_inv); free(anst_clock); }
/////////////////////////////////////////////////////////////////////////////// // Name: CheckDelPidFiles // Author: Mihai Buha ([email protected]) // Description: removes all the pidfiles specified, but not too frequently // Parameters: p_pszNames - input - null-terminated array of filenames // p_szMissNames - output - if not null, fill with names of // missing files. Take care to allocate enough memory for // "Watchdog: <all filenames in p_pszNames> " or else bad // things will happen. // Returns: true if all pidfiles existed or timeout not expired // false if some pidfile was missing (controlling app was dead - // did not create a file during latest 2 intervals) /////////////////////////////////////////////////////////////////////////////// bool CheckDelPidFiles (const char** p_pszNames, char* p_szMissNames) { static clock_t last_checked; static long n_SC_CLK_TCK; static bool last_existed = true; clock_t timestamp; bool status = true; bool exists = true; if(!n_SC_CLK_TCK) n_SC_CLK_TCK = sysconf( _SC_CLK_TCK); timestamp = GetClockTicks(); if( timestamp < last_checked){ // large uptime overflowed the clock_t last_checked = timestamp; } if( timestamp - last_checked < PIDFILES_FACTOR * PIDFILES_TIMEOUT * n_SC_CLK_TCK){ return status; } last_checked = timestamp; int i; for( i=0; p_pszNames[i]; ++i) { int nFileLen = GetFileLen(p_pszNames[i]); if ( nFileLen > 0) { unlink( p_pszNames[i]); continue; } if (nFileLen == 0) { LOG("CheckDelPidFiles: file %s len==0", p_pszNames[i]); unlink( p_pszNames[i]); } LOG_ERR( "CheckDelPidFiles: pidfile %s missing!", p_pszNames[i]); if( exists && p_szMissNames){ sprintf( p_szMissNames, "Watchdog: "); } exists = false; if( p_szMissNames){ strcat( p_szMissNames, p_pszNames[i]); strcat( p_szMissNames, " "); } system_to( 60, NIVIS_TMP"take_system_snapshot.sh "ACTIVITY_DATA_PATH"snapshot_warning.txt &"); } status = exists || last_existed; last_existed = exists; return status; }
/////////////////////////////////////////////////////////////////////////////// // Name: TouchPidFile // Author: Mihai Buha ([email protected]) // Description: creates a file containing the PID // Parameters: p_szName - input - name of the file (ex: modulename.pid) // Returns: true if successful, false if failed /////////////////////////////////////////////////////////////////////////////// bool TouchPidFile( const char* p_szName ) { static clock_t last_checked; static long n_SC_CLK_TCK; clock_t timestamp; int nFd; static char pid[6]; bool status = true; if(!n_SC_CLK_TCK) n_SC_CLK_TCK = sysconf( _SC_CLK_TCK); timestamp = GetClockTicks(); if( timestamp < last_checked){ // large uptime overflowed the clock_t last_checked = timestamp; } if( timestamp - last_checked < PIDFILES_TIMEOUT * n_SC_CLK_TCK) { return status; } last_checked = timestamp; nFd = open( p_szName, O_CREAT | O_RDWR, 0666 ); if( nFd < 0 ) { LOG_ERR( "TouchPidFile: can't create pidfile %s", p_szName ); return false; } if(!pid[0]) { snprintf( pid, 5, "%d", getpid() ); } // if( lseek( nFd, 0, SEEK_SET ) < 0) // { LOG_ERR( "TouchPidFile: can't write to pidfile %s", p_szName ); // status = false; // } if( write( nFd, pid, strlen( pid) ) < 0) { LOG_ERR( "TouchPidFile: can't write to pidfile %s", p_szName ); status = false; } if( close( nFd)) { LOG_ERR( "TouchPidFile: can't close pidfile %s", p_szName ); status = false; } return status; }
void LogTransform(SparseGraph *G){ node_t *rowptr = G->rowptr; rowptr--; node_t *colind = G->colind; colind--; double *val = G->nnz;val--; node_t n = G->order; node_t k; node_t nnz_size = G->nnz_size; node_t k0,j0; double *max_c = (double *)calloc(n,sizeof(double));max_c--; #if 0 for(k=1;k<=n;k++){ /*Find the maximum in the column*/ max_c[k] = (double)0.0; } #endif Clock *anst_clk = CreateClock(); StartClock(anst_clk); for(k=1;k<=nnz_size;k++){ val[k] = fabs(val[k]); if(val[k] > max_c[colind[k]]){ max_c[colind[k]] = val[k]; } } #if 0 for(k=1;k<=n;k++){ printf("max in col %u is %e \n",k,max_c[k]); } #endif for(k=1;k<=nnz_size;k++){ if(fabs(val[k]-0.0)<=1.0e-30){ val[k] = HUGE_VAL; }else{ if(fabs(max_c[colind[k]]-0.0)<=1.0e-30){ val[k] = HUGE_VAL;//MAX_DOUBLE_VALUE/n - (log(val[k])/M_LN10); }else{ val[k] = log10(max_c[colind[k]]/val[k]); val[k] = fabs(val[k]); } } /*take care of -0.0*/ assert(val[k]>=0.0); } StopClock(anst_clk); anst_ticks = GetClockTicks(anst_clk); free(++max_c); }
////////////////////////////////////////////////////////////////////////////// /// @brief removes all the pidfiles specified, but not too frequently /// @author Mihai Buha ([email protected]) /// @param[in] p_pszNames null-terminated array of filenames /// @param[out] p_szMissNames if not null, fill with names of /// missing files. Take care to allocate enough memory for /// "Watchdog: <all filenames in p_pszNames> " or else bad /// things will happen. /// @retval true All pidfiles existed or timeout not expired /// @retval false Some pidfile was missing (controlling app was dead - /// did not create a file during latest 2 intervals) ////////////////////////////////////////////////////////////////////////////// bool CWatchdogMngr::checkDelPidFiles (const char** p_pszNames, char* p_szMissNames) { clock_t timestamp ; static clock_t sStartTime = GetClockTicks(); static clock_t last_checked; static bool last_existed = true; bool status = true; bool exists = true; timestamp = GetClockTicks(); if( timestamp < last_checked) { // large uptime overflowed the clock_t last_checked = timestamp; sStartTime = timestamp; } if( (timestamp - last_checked < WTD_PID_VRFY_INTERVAL * sysconf(_SC_CLK_TCK)) || (timestamp - sStartTime < WTD_PID_VRFY_INTERVAL * sysconf(_SC_CLK_TCK)) ) { return true; } last_checked = timestamp; for( unsigned i=0; p_pszNames[i]; ++i) { int nFileLen = GetFileLen(p_pszNames[i]); if (nFileLen == 0) { usleep(30*1000);//wait a little maybe in process of writing the pid file nFileLen = GetFileLen(p_pszNames[i]); } if ( nFileLen > 0) { unlink( p_pszNames[i]); continue; } if (nFileLen == 0) { ERR("checkDelPidFiles: pidfile[%s] len==0", p_pszNames[i]); systemf_to( 20 ,"log2flash 'WTD: ERR:checkDelPidFiles: pidfile[%s] len==0' &", p_pszNames[i]); unlink( p_pszNames[i]); } LOG( "ERR:Missing [%s]", p_pszNames[i]); if ( exists ) { exists = false; } if( p_szMissNames ) { strcat( p_szMissNames, p_pszNames[i]); strcat( p_szMissNames, ","); } } // Take snapshot if any of the pidfiles is missing. if (! exists) { system_to( 60, NIVIS_TMP"take_system_snapshot.sh "ACTIVITY_DATA_PATH"snapshot_warning.txt &"); } status = exists || last_existed; last_existed = exists; return status; }
SCORE ObjScore(const MSA &msa, const unsigned SeqIndexes1[], unsigned uSeqCount1, const unsigned SeqIndexes2[], unsigned uSeqCount2) { #if TIMING TICKS t1 = GetClockTicks(); #endif const unsigned uSeqCount = msa.GetSeqCount(); OBJSCORE OS = g_ObjScore; if (g_ObjScore == OBJSCORE_SPM) { if (uSeqCount <= 100) OS = OBJSCORE_XP; else OS = OBJSCORE_SPF; } MSA msa1; MSA msa2; switch (OS) { case OBJSCORE_DP: case OBJSCORE_XP: MSAFromSeqSubset(msa, SeqIndexes1, uSeqCount1, msa1); MSAFromSeqSubset(msa, SeqIndexes2, uSeqCount2, msa2); SetMSAWeightsMuscle(msa1); SetMSAWeightsMuscle(msa2); break; case OBJSCORE_SP: case OBJSCORE_SPF: case OBJSCORE_PS: // Yuck -- casting away const (design flaw) SetMSAWeightsMuscle((MSA &) msa); break; } SCORE Score = 0; switch (OS) { case OBJSCORE_SP: Score = ObjScoreSP(msa); break; case OBJSCORE_DP: Score = ObjScoreDP(msa1, msa2); break; case OBJSCORE_XP: Score = ObjScoreXP(msa1, msa2); break; case OBJSCORE_PS: Score = ObjScorePS(msa); break; case OBJSCORE_SPF: Score = ObjScoreSPDimer(msa); break; default: Quit("Invalid g_ObjScore=%d", g_ObjScore); } #if TIMING TICKS t2 = GetClockTicks(); g_ticksObjScore += (t2 - t1); #endif return Score; }
SCORE GlobalAlignDiags(const ProfPos *PA, unsigned uLengthA, const ProfPos *PB, unsigned uLengthB, PWPath &Path) { #if LIST_DIAGS TICKS t1 = GetClockTicks(); #endif DiagList DL; if (ALPHA_Amino == g_Alpha) FindDiags(PA, uLengthA, PB, uLengthB, DL); else if (ALPHA_DNA == g_Alpha || ALPHA_RNA == g_Alpha) FindDiagsNuc(PA, uLengthA, PB, uLengthB, DL); else Quit("GlobalAlignDiags: bad alpha"); #if TRACE Log("GlobalAlignDiags, diag list:\n"); DL.LogMe(); #endif DL.Sort(); DL.DeleteIncompatible(); #if TRACE Log("After DeleteIncompatible:\n"); DL.LogMe(); #endif MergeDiags(DL); #if TRACE Log("After MergeDiags:\n"); DL.LogMe(); #endif DPRegionList RL; DiagListToDPRegionList(DL, RL, uLengthA, uLengthB); #if TRACE Log("RegionList:\n"); RL.LogMe(); #endif #if LIST_DIAGS { TICKS t2 = GetClockTicks(); unsigned uArea = RL.GetDPArea(); Log("ticks=%ld\n", (long) (t2 - t1)); Log("area=%u\n", uArea); } #endif g_dDPAreaWithoutDiags += uLengthA*uLengthB; double dDPAreaWithDiags = 0.0; const unsigned uRegionCount = RL.GetCount(); for (unsigned uRegionIndex = 0; uRegionIndex < uRegionCount; ++uRegionIndex) { const DPRegion &r = RL.Get(uRegionIndex); PWPath RegPath; if (DPREGIONTYPE_Diag == r.m_Type) { DiagToPath(r.m_Diag, RegPath); #if TRACE_PATH Log("DiagToPath, path=\n"); RegPath.LogMe(); #endif } else if (DPREGIONTYPE_Rect == r.m_Type) { const unsigned uRegStartPosA = r.m_Rect.m_uStartPosA; const unsigned uRegStartPosB = r.m_Rect.m_uStartPosB; const unsigned uRegLengthA = r.m_Rect.m_uLengthA; const unsigned uRegLengthB = r.m_Rect.m_uLengthB; const ProfPos *RegPA = PA + uRegStartPosA; const ProfPos *RegPB = PB + uRegStartPosB; dDPAreaWithDiags += uRegLengthA*uRegLengthB; GlobalAlignNoDiags(RegPA, uRegLengthA, RegPB, uRegLengthB, RegPath); #if TRACE_PATH Log("GlobalAlignNoDiags RegPath=\n"); RegPath.LogMe(); #endif OffsetPath(RegPath, uRegStartPosA, uRegStartPosB); #if TRACE_PATH Log("After offset path, RegPath=\n"); RegPath.LogMe(); #endif } else Quit("GlobalAlignDiags, Invalid region type %u", r.m_Type); AppendRegPath(Path, RegPath); #if TRACE_PATH Log("After AppendPath, path="); Path.LogMe(); #endif } #if TRACE { double dDPAreaWithoutDiags = uLengthA*uLengthB; Log("DP area with diags %.3g without %.3g pct saved %.3g %%\n", dDPAreaWithDiags, dDPAreaWithoutDiags, (1.0 - dDPAreaWithDiags/dDPAreaWithoutDiags)*100.0); } #endif g_dDPAreaWithDiags += dDPAreaWithDiags; return 0; }
size_t WeightedMatching(RowStarts rowptr,ColIndices colind,ValueTypePtr C, ValueTypePtr dist,ValueTypePtr u,ValueTypePtr v,Indices *p, Indices *m_inv,Indices *m, Indices n,CompareFunction cmpFunc){ typedef typename std::iterator_traits<ValueTypePtr>::value_type ValueType; Indices i,j,i1,jend,k,m_inv_prev; Indices match_size=0; Indices k0,j0; ValueType curr_shortest_path = (ValueType)0; ValueType curr_aug_path = GetMaxTypeValue<ValueType>(curr_aug_path); ValueType dist1; Indices itrace; /*Cost of the edges in the match if *$(i,j) \in M$ then $clabel[i] = C[i][j]$*/ Indices *clabel = new Indices[n]; Indices *aug_label = new Indices[n]; Indices *update_stack = new Indices[n]; Indices update_stack_index; /*Save The Operations on the Heap.*/ Indices save_heap_index; Indices *save_heap_op = new Indices[n]; #ifdef TURN_ON_SAVE_HEAP double close_factor = (double)1.0 + (double)1.0e-16; #endif /*Force the write back to memory to avoid floating point issues*/ ValueType force_mem_write[3]; #ifndef NO_LOCAL_PROFILING CreateProfilingClocks(); #endif /*Core Profiling Clock*/ Clock *core_clk = CreateClock(); #ifdef USE_BIT_ARRAY BitArray_t *col_marker = CreateBitArray(n); BitArray_t *heap_marker = CreateBitArray(n); #else Indices *col_marker = new Indices[n]; unsigned int *heap_marker = NULL; col_marker--; for(i=1;i<=n;i++){ /*Do we need Initialization?*/ col_marker[i] = (Indices)0; } #endif #if BINARY_HEAP Heap *bin_heap = NewBinaryHeap(cmpFunc,n,GetDistKeyID); ValueType *dist_ptr = NULL; heap_marker = bin_heap->keyMap; #endif /*Algorithm Uses 1-Indexing to be consistent*/ C--;m--;dist--;u--;v--;p--;m_inv--; rowptr--;colind--;clabel--;save_heap_op--; update_stack--;aug_label--; assert(dist && u && v && p); ComputeInitialExtremeMatch<ValueType,Indices>(u,v,clabel,C,m,m_inv,colind, rowptr,n,dist); match_size=0; StartClock(core_clk); for(i=1;i<=n;i++){ if(m_inv[i]){ match_size++; continue; } /* *Aim is to find a value for jend such that the path *from i-->jend is the shortest */ i1 = i; p[i1] = 0; jend=0; itrace=i; #ifdef USE_BIT_ARRAY ResetAllBits(col_marker); ResetAllBits(heap_marker); #endif #if BINARY_HEAP bin_heap->n = 0; dist_base = (unsigned long)&(dist[1]); #endif curr_shortest_path=(ValueType)0; curr_aug_path=GetMaxTypeValue<ValueType>(curr_aug_path); save_heap_index = (Indices)0; update_stack_index = (Indices)0; while(1){ for(k=rowptr[i1]+1;k<(rowptr[i1+1]+1);k++){ j = colind[k]+1; #ifdef USE_BIT_ARRAY if(CheckBit(col_marker,j)){ #else if(col_marker[j]==i){ #endif continue; } force_mem_write[k%3] = C[k]-(v[i1]+u[j]); dist1 = curr_shortest_path + force_mem_write[k%3]; /*Prune any dist1's > curr_aug_path, since *all the costs>0 */ if(dist1 < curr_aug_path){ if(!m[j]){ /*we need itrace because, the last i1 which *we explore may not actually give the shortest *augmenting path.*/ jend = j; itrace = i1; curr_aug_path = dist1; aug_label[j] = k; }else if(dist1 < dist[j]){ /*Update the dist*/ dist[j] = dist1; p[m[j]] = i1; aug_label[j] = k; #if SIMPLE_HEAP #ifdef USE_BIT_ARRAY SetBit(heap_marker,j); #else heap_marker[j] = i; #endif #elif BINARY_HEAP /*SIMPLE_HEAP*/ #ifdef USE_BIT_ARRAY if(CheckBit(heap_marker,j)){ #else if(heap_marker[j]){ #endif #ifndef NO_LOCAL_PROFILING StartClock(hupdate_clk); #endif /*Call the decrease Key Operation*/ DecreaseKey(bin_heap,j); #ifndef NO_LOCAL_PROFILING StopClock(hupdate_clk); hupdate_ticks += GetClockTicks(hupdate_clk); #endif } #ifdef TURN_ON_SAVE_HEAP else if(curr_shortest_path && dist[j] <= (curr_shortest_path)*(close_factor)){ /*If dist[j] is close to root push it in *save_heap_op*/ assert(save_heap_index < n); save_heap_op[++save_heap_index] = j; } #endif else{ #ifndef NO_LOCAL_PROFILING StartClock(hins_clk); #endif InsertHeap(bin_heap,&(dist[j])); #ifndef NO_LOCAL_PROFILING StopClock(hins_clk); hins_ticks += GetClockTicks(hins_clk); #endif #ifdef USE_BIT_ARRAY SetBit(heap_marker,j); #endif } #endif /*SIMPLE_HEAP*/ } } } if(curr_aug_path <= curr_shortest_path){ break; } /*We now have a heap of matched cols, so pick the min*/ #ifdef SIMPLE_HEAP j = SimplePickMin(heap_marker,dist,n); if(j){ curr_shortest_path = dist[j]; UnsetBit(heap_marker,j); #elif BINARY_HEAP #ifndef NO_LOCAL_PROFILING StartClock(hdel_clk); #endif if(save_heap_index){ j = save_heap_op[save_heap_index]; save_heap_index--; curr_shortest_path = dist[j]; #ifdef USE_BIT_ARRAY SetBit(col_marker,j); #else col_marker[j] = (Indices)i; update_stack[++update_stack_index]=j; #endif /*#ifdef USE_BIT_ARRAY*/ i1 = m[j]; }else if(dist_ptr = (ValueType *) HeapDelete(bin_heap)) { #ifndef NO_LOCAL_PROFILING StopClock(hdel_clk); hdel_ticks += GetClockTicks(hdel_clk); #endif assert((unsigned long)dist_ptr >= (unsigned long)&dist[1]); j = ((((unsigned long)dist_ptr - (unsigned long)&dist[1]))/sizeof(ValueType))+1; assert(j>=1 && j<=n); curr_shortest_path = dist[j]; heap_marker[j] = 0; /*Setting the keyMap in Heap to 0*/ #endif /*#ifdef SIMPLE_HEAP */ #ifdef USE_BIT_ARRAY SetBit(col_marker,j); update_stack[++update_stack_index]=j; #else col_marker[j] = (Indices)i; update_stack[++update_stack_index]=j; #endif /*#ifdef USE_BIT_ARRAY*/ i1 = m[j]; }else{ break; } } /*We found a shortest augmenting path*/ if(jend){ unsigned long **harray = bin_heap->heapArray; #ifndef NO_LOCAL_PROFILING StartClock(dual_clk); #endif /*NOTE1: We need a very fast way to update *the dual variables and also reset the dist[] *we avoid linear scan where ever we can to update *these dual variables*/ while(update_stack_index){ /*Update u[j]: while*/ j=update_stack[update_stack_index--]; u[j] = (u[j]+dist[j])-curr_aug_path; if(m[j]){ /*See NOTE1*/ i1 = m[j]; v[i1] = C[clabel[i1]] - u[j]; } dist[j] = MAX_DOUBLE_VALUE; if(bin_heap->n){ dist_ptr = (double *)harray[bin_heap->n]; j = ((((unsigned long)dist_ptr - (unsigned long)&dist[1]))/sizeof(ValueType))+1; heap_marker[j] = 0; *((double *)harray[bin_heap->n]) = MAX_DOUBLE_VALUE; bin_heap->n -= 1 ; } } /*Update u[j]: while*/ /*Uncomment if you need to print augmenting path*/ /*node_t itrace_prev;*/ /*printf("Shortest augmenting Path {");*/ j=jend; while(itrace){ m_inv_prev = m_inv[itrace]; m[j] = itrace; m_inv[itrace]=j; /*See NOTE1(above)*/ clabel[itrace] = aug_label[j]; v[itrace] = C[clabel[itrace]] - u[j]; /*printf("(%u,%u)",itrace,j);*/ j=m_inv_prev; /*itrace_prev = itrace;*/ itrace = p[itrace]; /* if(itrace){ printf("(%u,%u)",itrace_prev,m_inv_prev); }*/ } /*printf("}\n");*/ /*There may some dist[] still in the heap*/ while(bin_heap->n){ dist_ptr = (double *)harray[bin_heap->n]; j = ((((unsigned long)dist_ptr - (unsigned long)&dist[1]))/sizeof(ValueType))+1; heap_marker[j] = 0; *((double *)harray[bin_heap->n]) = MAX_DOUBLE_VALUE; bin_heap->n -= 1; } match_size++; /*End Dual Update*/ #ifndef NO_LOCAL_PROFILING StopClock(dual_clk); dual_ticks += GetClockTicks(dual_clk); #endif } /*if(jend) : Found Augmeting Path*/ } /*for(i=1;i<=n;i++): Main Outer Loop*/ StopClock(core_clk); WeightedMatchTicks = GetClockTicks(core_clk); #ifndef NO_LOCAL_PROFILING printf("Profile Summary\n"); printf("HINS=(%d) HDEL=(%d) HUPDATE=(%d)\n",(int)hins_ticks,(int)hdel_ticks, (int)hupdate_ticks); printf("DUAL=(%d) \n",(int)dual_ticks); #endif #ifdef USE_BIT_ARRAY FreeBitArray(col_marker); FreeBitArray(heap_marker); #else col_marker++; delete col_marker; #endif #ifdef SIMPLE_HEAP heap_marker++; delete heap_marker; #endif aug_label++; delete aug_label; save_heap_op++; delete save_heap_op; clabel++; delete clabel; #ifdef BINARY_HEAP FreeHeap(bin_heap); #endif return match_size; } /*O(n) time picking the maximum from the heap_marker */ node_t SimplePickMin(BitArray_t *bit_heap,double *dist,node_t n){ node_t min_j=0;node_t j; double curr_min = HUGE_VAL; for(j=1;j<=n;j++){ if(CheckBit(bit_heap,j) && dist[j] < curr_min){ min_j = j; curr_min = dist[j]; } } return min_j; } #ifdef BINARY_HEAP inline keyid_t GetDistKeyID(void *dist_ptr){ assert((unsigned long)dist_ptr >= dist_base); return (((((unsigned long)dist_ptr-dist_base))/sizeof(double))+1); } #endif BitArray_t* CreateBitArray(unsigned int size){ div_t d = div(size,SIZE_OF_BYTE_IN_BITS); BitArray_t *bits = (BitArray_t *)malloc(sizeof(BitArray_t)*1); assert(bits); bits->size = (d.rem > 0)?(d.quot+1):d.quot; bits->ba = (char *)malloc(sizeof(char)*(bits->size)); assert(bits->ba); memset(bits->ba,'\0',bits->size); return bits; }
void ComputeInitialExtremeMatch(ValueType *u,ValueType *v,Indices *clabel,ValueType *C, Indices *m,Indices *m_inv,Indices* colind,Indices* rowptr,Indices n,ValueType *dist){ Indices i,k,i1,k1; Indices k0,k10,j0,j,j1,j10; ValueType vmin; ValueType C1k; Clock *init_match_clk = CreateClock(); StartClock(init_match_clk); #if 0 /*Compute u[j]*/ for(j=1;j<=n;j++){ u[j] = GetMaxTypeValue<ValueType>(u[j]); dist[j] = u[j]; m[j]=0;m_inv[j]=0; } for(i=1;i<=n;i++){ for(k=rowptr[i];k<rowptr[i+1];k++){ if(C[k]<u[colind[k]]){ u[colind[k]] = C[k]; } } } #endif /*Compute v[i]*/ for(i=1;i<=n;i++){ v[i] = GetMaxTypeValue<ValueType>(v[i]); for(k=rowptr[i]+1;k<(rowptr[i+1]+1);k++){ j = colind[k]+1; vmin = (C[k]-u[j]); if(vmin < v[i]){ v[i] = vmin; } } } /*Update Cost and match.*/ for(i=1;i<=n;i++){ for(k=rowptr[i]+1;k<(rowptr[i+1]+1);k++){ j = colind[k]+1; C1k = C[k]-v[i]-u[j]; /*to handle -0.0*/ if(fabs(C1k-0.0) <= 1.0e-30 && (!m[j] && !m_inv[i])){ m[j] = i; m_inv[i] = j; clabel[i] = k; } } } /*1-Step Augmentation*/ for(i=1;i<=n;i++){ if(!m_inv[i]){ /*Unmatched Row*/ for(k=rowptr[i]+1;k<(rowptr[i+1]+1) && !(m_inv[i]);k++){ j = colind[k]+1; C1k = fabs(C[k]-v[i]-u[j]); if(C1k <= 1.0e-30){ /*assert(m[colind[k]]);*/ i1 = m[j]; /*assert(m_inv[i1] == j);*/ /*See if we can find any C1(i1,j1) == 0*/ for(k1=rowptr[i1]+1;k1<(rowptr[i1+1]+1);k1++){ j1 = colind[k1]+1; C1k = fabs(C[k1] - v[i1]-u[j1]); if(C1k <= 1.0e-30 && !(m[j1])){ /*augment the match.*/ m[j] = i; m_inv[i] = j; clabel[i] = k; m[j1] = i1; m_inv[i1] = j1; clabel[i1] = k1; break; } } } } } } StopClock(init_match_clk); InitialMatchTicks = GetClockTicks(init_match_clk); }