void CreateProfilingClocks(void){ hins_ticks=0;hdel_ticks=0;hupdate_ticks=0; dual_ticks=0; hins_clk = CreateClock(); hdel_clk = CreateClock(); hdel_clk = CreateClock(); hupdate_clk = CreateClock(); dual_clk = CreateClock(); }
/** Parse a clock tray component. */ void ParseClock(const TokenNode *tp, TrayType *tray) { TrayComponentType *cp; const char *format; const char *zone; const char *temp; int width, height; Assert(tp); Assert(tray); format = FindAttribute(tp->attributes, "format"); zone = FindAttribute(tp->attributes, "zone"); temp = FindAttribute(tp->attributes, WIDTH_ATTRIBUTE); if(temp) { width = ParseUnsigned(tp, temp); } else { width = 0; } temp = FindAttribute(tp->attributes, HEIGHT_ATTRIBUTE); if(temp) { height = ParseUnsigned(tp, temp); } else { height = 0; } cp = CreateClock(format, zone, width, height); if(JLIKELY(cp)) { ParseTrayComponentActions(tp, cp, AddClockAction); AddTrayComponent(tray, cp); } }
int main(int argc,char **argv){ struct tms s1; struct tms s2; volatile int count,count1; volatile int i; volatile double X=1024; Clock *test_clk = CreateClock(); StartClock(test_clk); for(i=0;i<1000000;i++){ count1+=(i+1); count1 = (count1*i)/count1; X += ((X*X) + X/0.331)*(X*X*X); } StopClock(test_clk); printf("PROBE1: Clock Ticks Elapsed is %Lu \n",(clock_t)GetClockTicks(test_clk)); StartClock(test_clk); for(i=0;i<1000000;i++){ count1+=(i+1); count1 = (count1*i)/count1; X += ((X*X) + X/0.331)*(X*X*X); } StopClock(test_clk); printf("PROBE2: Clock Ticks Elapsed is %Lu \n",(clock_t)GetClockTicks(test_clk)); printf("size of clock_t is %d \n",sizeof(clock_t)); }
void LogTransformT(RowStarts *rowptr,ColIndices *colind,ValueType *val,Indices n,ValueType *u, ValueType *v,ValueType *dist,Indices *m,Indices *m_inv){ ValueType *max_c = new ValueType[n]; rowptr--;colind--;val--;max_c--; m--; m_inv--; u--; dist--; Indices k,i,j,j0; Clock *anst_clk; anst_clk = CreateClock(); StartClock(anst_clk); /*TODO: Avoid HUGE_VAL directly in templated code*/ for(j=1;j<=n;j++){ /*Find the maximum in the column*/ max_c[j] = (ValueType)0.0; m[j] = (Indices)0; m_inv[j]=0; u[j] = HUGE_VAL;dist[j]=HUGE_VAL; } for(i=1;i<=n;i++){ for(k=rowptr[i]+1;k<(rowptr[i+1]+1);k++){ val[k] = fabs(val[k]); j = colind[k]+1; if(val[k] > max_c[j]){ max_c[j] = val[k]; } } } #if 0 for(k=1;k<=n;k++){ printf("max in col %u is %e \n",k,max_c[k]); } #endif for(i=1;i<=n;i++){ for(k=rowptr[i]+1;k<(rowptr[i+1]+1);k++){ j = colind[k]+1; if(val[k]<=1.0e-30){ val[k] = HUGE_VAL; }else{ if(max_c[j]<=1.0e-30){ val[k] = HUGE_VAL;//MAX_DOUBLE_VALUE/n - (log(val[k])/M_LN10); }else{ val[k] = log10(max_c[j]/val[k]); val[k] = fabs(val[k]); if(val[k]< u[j]){ u[j] = val[k]; } } } /*take care of -0.0*/ /*assert(val[k]>=0.0);*/ } } StopClock(anst_clk); anst_ticks = GetClockTicks(anst_clk); ++max_c; delete max_c; }
void ANSTMC21(RowStarts rowptr,ColIndices colind,Indices n,Indices *m){ Indices *col_marker; Clock *anst_clock = CreateClock(); Indices *p; Indices *m_inv; CreateMC21WorkSpace(n,&col_marker,&p,&m_inv); StartClock(anst_clock); FindPerfectMatch(rowptr,colind,n,m,p,m_inv,col_marker); StopClock(anst_clock); anst_ticks = GetClockTicks(anst_clock); FreeMC21WorkSpace(col_marker,p,m_inv); free(anst_clock); }
/* *takes a binary file, set the columns which you want to *sort in ESortParams.col_start and ESortParams.col_end */ void ExSortKmerEdges(const char *bin_kedge, const char *run_file, unsigned char KEY_SIZE, ExKeyCompare key_compare){ unsigned char *run_buffer = NULL; size_t key_buf_len = (KEY_SIZE*KEYS_IN_RUN); size_t buf_len = key_buf_len + sizeof(unsigned long); size_t ret_len; int bin_kedge_fd = open(bin_kedge, O_RDONLY); int run_file_fd = fileno(fopen(run_file, "w")); unsigned long *runlen; unsigned long rcount = 0; unsigned char *key_buf; Clock *clk = CreateClock(); if(!(bin_kedge_fd > 0 && run_file_fd > 0)){ perror("FAILED TO OPEN FILES:"); assert(0); } run_buffer = malloc(sizeof(unsigned char)*buf_len); assert(run_buffer); runlen = (unsigned long *)run_buffer; key_buf = run_buffer + sizeof(unsigned long); StartClock(clk); while((ret_len = SafeRead(bin_kedge_fd, key_buf, key_buf_len)) >= KEY_SIZE){ *runlen = (unsigned long) ret_len; IntegerSort_SB(key_buf, key_buf + (*runlen) - KEY_SIZE, KEY_SIZE, 1, ESortParams.col_start, ESortParams.col_end, 0, CharMap, ESortParams.endian); ret_len = SafeWrite(run_file_fd, run_buffer, ((*runlen + sizeof(unsigned long))< buf_len)?(*runlen + sizeof(unsigned long)):buf_len); rcount++; assert(ret_len == ((*runlen + sizeof(unsigned long) < buf_len)?(*runlen + sizeof(unsigned long)):buf_len)); } StopClock(clk); FreeISortBuckets(); /*create the final run*/ close(run_file_fd); close(bin_kedge_fd); free(run_buffer); printf("\n[EX-SORT CREATED %lu RUNS] took %ld ticks\n", rcount, GetClockTicks(clk)); StartClock(clk); /*call the external rway merge*/ ExternalRWayMerge(run_file, RUNS_PER_MERGE, KEY_SIZE, key_compare, rcount); StopClock(clk); printf("[R-WAY MERGE] took %ld ticks\n", GetClockTicks(clk)); }
void LogTransform(SparseGraph *G){ node_t *rowptr = G->rowptr; rowptr--; node_t *colind = G->colind; colind--; double *val = G->nnz;val--; node_t n = G->order; node_t k; node_t nnz_size = G->nnz_size; node_t k0,j0; double *max_c = (double *)calloc(n,sizeof(double));max_c--; #if 0 for(k=1;k<=n;k++){ /*Find the maximum in the column*/ max_c[k] = (double)0.0; } #endif Clock *anst_clk = CreateClock(); StartClock(anst_clk); for(k=1;k<=nnz_size;k++){ val[k] = fabs(val[k]); if(val[k] > max_c[colind[k]]){ max_c[colind[k]] = val[k]; } } #if 0 for(k=1;k<=n;k++){ printf("max in col %u is %e \n",k,max_c[k]); } #endif for(k=1;k<=nnz_size;k++){ if(fabs(val[k]-0.0)<=1.0e-30){ val[k] = HUGE_VAL; }else{ if(fabs(max_c[colind[k]]-0.0)<=1.0e-30){ val[k] = HUGE_VAL;//MAX_DOUBLE_VALUE/n - (log(val[k])/M_LN10); }else{ val[k] = log10(max_c[colind[k]]/val[k]); val[k] = fabs(val[k]); } } /*take care of -0.0*/ assert(val[k]>=0.0); } StopClock(anst_clk); anst_ticks = GetClockTicks(anst_clk); free(++max_c); }
/** Parse a clock tray component. */ void ParseClock(const TokenNode *tp, TrayType *tray) { TrayComponentType *cp; const char *format; const char *zone; const char *command; const char *temp; int width, height; Assert(tp); Assert(tray); format = FindAttribute(tp->attributes, FORMAT_ATTRIBUTE); zone = FindAttribute(tp->attributes, ZONE_ATTRIBUTE); if(tp->value && strlen(tp->value) > 0) { command = tp->value; } else { command = NULL; } temp = FindAttribute(tp->attributes, WIDTH_ATTRIBUTE); if(temp) { width = ParseUnsigned(tp, temp); } else { width = 0; } temp = FindAttribute(tp->attributes, HEIGHT_ATTRIBUTE); if(temp) { height = ParseUnsigned(tp, temp); } else { height = 0; } cp = CreateClock(format, zone, command, width, height); if(JLIKELY(cp)) { AddTrayComponent(tray, cp); } }
size_t WeightedMatching(RowStarts rowptr,ColIndices colind,ValueTypePtr C, ValueTypePtr dist,ValueTypePtr u,ValueTypePtr v,Indices *p, Indices *m_inv,Indices *m, Indices n,CompareFunction cmpFunc){ typedef typename std::iterator_traits<ValueTypePtr>::value_type ValueType; Indices i,j,i1,jend,k,m_inv_prev; Indices match_size=0; Indices k0,j0; ValueType curr_shortest_path = (ValueType)0; ValueType curr_aug_path = GetMaxTypeValue<ValueType>(curr_aug_path); ValueType dist1; Indices itrace; /*Cost of the edges in the match if *$(i,j) \in M$ then $clabel[i] = C[i][j]$*/ Indices *clabel = new Indices[n]; Indices *aug_label = new Indices[n]; Indices *update_stack = new Indices[n]; Indices update_stack_index; /*Save The Operations on the Heap.*/ Indices save_heap_index; Indices *save_heap_op = new Indices[n]; #ifdef TURN_ON_SAVE_HEAP double close_factor = (double)1.0 + (double)1.0e-16; #endif /*Force the write back to memory to avoid floating point issues*/ ValueType force_mem_write[3]; #ifndef NO_LOCAL_PROFILING CreateProfilingClocks(); #endif /*Core Profiling Clock*/ Clock *core_clk = CreateClock(); #ifdef USE_BIT_ARRAY BitArray_t *col_marker = CreateBitArray(n); BitArray_t *heap_marker = CreateBitArray(n); #else Indices *col_marker = new Indices[n]; unsigned int *heap_marker = NULL; col_marker--; for(i=1;i<=n;i++){ /*Do we need Initialization?*/ col_marker[i] = (Indices)0; } #endif #if BINARY_HEAP Heap *bin_heap = NewBinaryHeap(cmpFunc,n,GetDistKeyID); ValueType *dist_ptr = NULL; heap_marker = bin_heap->keyMap; #endif /*Algorithm Uses 1-Indexing to be consistent*/ C--;m--;dist--;u--;v--;p--;m_inv--; rowptr--;colind--;clabel--;save_heap_op--; update_stack--;aug_label--; assert(dist && u && v && p); ComputeInitialExtremeMatch<ValueType,Indices>(u,v,clabel,C,m,m_inv,colind, rowptr,n,dist); match_size=0; StartClock(core_clk); for(i=1;i<=n;i++){ if(m_inv[i]){ match_size++; continue; } /* *Aim is to find a value for jend such that the path *from i-->jend is the shortest */ i1 = i; p[i1] = 0; jend=0; itrace=i; #ifdef USE_BIT_ARRAY ResetAllBits(col_marker); ResetAllBits(heap_marker); #endif #if BINARY_HEAP bin_heap->n = 0; dist_base = (unsigned long)&(dist[1]); #endif curr_shortest_path=(ValueType)0; curr_aug_path=GetMaxTypeValue<ValueType>(curr_aug_path); save_heap_index = (Indices)0; update_stack_index = (Indices)0; while(1){ for(k=rowptr[i1]+1;k<(rowptr[i1+1]+1);k++){ j = colind[k]+1; #ifdef USE_BIT_ARRAY if(CheckBit(col_marker,j)){ #else if(col_marker[j]==i){ #endif continue; } force_mem_write[k%3] = C[k]-(v[i1]+u[j]); dist1 = curr_shortest_path + force_mem_write[k%3]; /*Prune any dist1's > curr_aug_path, since *all the costs>0 */ if(dist1 < curr_aug_path){ if(!m[j]){ /*we need itrace because, the last i1 which *we explore may not actually give the shortest *augmenting path.*/ jend = j; itrace = i1; curr_aug_path = dist1; aug_label[j] = k; }else if(dist1 < dist[j]){ /*Update the dist*/ dist[j] = dist1; p[m[j]] = i1; aug_label[j] = k; #if SIMPLE_HEAP #ifdef USE_BIT_ARRAY SetBit(heap_marker,j); #else heap_marker[j] = i; #endif #elif BINARY_HEAP /*SIMPLE_HEAP*/ #ifdef USE_BIT_ARRAY if(CheckBit(heap_marker,j)){ #else if(heap_marker[j]){ #endif #ifndef NO_LOCAL_PROFILING StartClock(hupdate_clk); #endif /*Call the decrease Key Operation*/ DecreaseKey(bin_heap,j); #ifndef NO_LOCAL_PROFILING StopClock(hupdate_clk); hupdate_ticks += GetClockTicks(hupdate_clk); #endif } #ifdef TURN_ON_SAVE_HEAP else if(curr_shortest_path && dist[j] <= (curr_shortest_path)*(close_factor)){ /*If dist[j] is close to root push it in *save_heap_op*/ assert(save_heap_index < n); save_heap_op[++save_heap_index] = j; } #endif else{ #ifndef NO_LOCAL_PROFILING StartClock(hins_clk); #endif InsertHeap(bin_heap,&(dist[j])); #ifndef NO_LOCAL_PROFILING StopClock(hins_clk); hins_ticks += GetClockTicks(hins_clk); #endif #ifdef USE_BIT_ARRAY SetBit(heap_marker,j); #endif } #endif /*SIMPLE_HEAP*/ } } } if(curr_aug_path <= curr_shortest_path){ break; } /*We now have a heap of matched cols, so pick the min*/ #ifdef SIMPLE_HEAP j = SimplePickMin(heap_marker,dist,n); if(j){ curr_shortest_path = dist[j]; UnsetBit(heap_marker,j); #elif BINARY_HEAP #ifndef NO_LOCAL_PROFILING StartClock(hdel_clk); #endif if(save_heap_index){ j = save_heap_op[save_heap_index]; save_heap_index--; curr_shortest_path = dist[j]; #ifdef USE_BIT_ARRAY SetBit(col_marker,j); #else col_marker[j] = (Indices)i; update_stack[++update_stack_index]=j; #endif /*#ifdef USE_BIT_ARRAY*/ i1 = m[j]; }else if(dist_ptr = (ValueType *) HeapDelete(bin_heap)) { #ifndef NO_LOCAL_PROFILING StopClock(hdel_clk); hdel_ticks += GetClockTicks(hdel_clk); #endif assert((unsigned long)dist_ptr >= (unsigned long)&dist[1]); j = ((((unsigned long)dist_ptr - (unsigned long)&dist[1]))/sizeof(ValueType))+1; assert(j>=1 && j<=n); curr_shortest_path = dist[j]; heap_marker[j] = 0; /*Setting the keyMap in Heap to 0*/ #endif /*#ifdef SIMPLE_HEAP */ #ifdef USE_BIT_ARRAY SetBit(col_marker,j); update_stack[++update_stack_index]=j; #else col_marker[j] = (Indices)i; update_stack[++update_stack_index]=j; #endif /*#ifdef USE_BIT_ARRAY*/ i1 = m[j]; }else{ break; } } /*We found a shortest augmenting path*/ if(jend){ unsigned long **harray = bin_heap->heapArray; #ifndef NO_LOCAL_PROFILING StartClock(dual_clk); #endif /*NOTE1: We need a very fast way to update *the dual variables and also reset the dist[] *we avoid linear scan where ever we can to update *these dual variables*/ while(update_stack_index){ /*Update u[j]: while*/ j=update_stack[update_stack_index--]; u[j] = (u[j]+dist[j])-curr_aug_path; if(m[j]){ /*See NOTE1*/ i1 = m[j]; v[i1] = C[clabel[i1]] - u[j]; } dist[j] = MAX_DOUBLE_VALUE; if(bin_heap->n){ dist_ptr = (double *)harray[bin_heap->n]; j = ((((unsigned long)dist_ptr - (unsigned long)&dist[1]))/sizeof(ValueType))+1; heap_marker[j] = 0; *((double *)harray[bin_heap->n]) = MAX_DOUBLE_VALUE; bin_heap->n -= 1 ; } } /*Update u[j]: while*/ /*Uncomment if you need to print augmenting path*/ /*node_t itrace_prev;*/ /*printf("Shortest augmenting Path {");*/ j=jend; while(itrace){ m_inv_prev = m_inv[itrace]; m[j] = itrace; m_inv[itrace]=j; /*See NOTE1(above)*/ clabel[itrace] = aug_label[j]; v[itrace] = C[clabel[itrace]] - u[j]; /*printf("(%u,%u)",itrace,j);*/ j=m_inv_prev; /*itrace_prev = itrace;*/ itrace = p[itrace]; /* if(itrace){ printf("(%u,%u)",itrace_prev,m_inv_prev); }*/ } /*printf("}\n");*/ /*There may some dist[] still in the heap*/ while(bin_heap->n){ dist_ptr = (double *)harray[bin_heap->n]; j = ((((unsigned long)dist_ptr - (unsigned long)&dist[1]))/sizeof(ValueType))+1; heap_marker[j] = 0; *((double *)harray[bin_heap->n]) = MAX_DOUBLE_VALUE; bin_heap->n -= 1; } match_size++; /*End Dual Update*/ #ifndef NO_LOCAL_PROFILING StopClock(dual_clk); dual_ticks += GetClockTicks(dual_clk); #endif } /*if(jend) : Found Augmeting Path*/ } /*for(i=1;i<=n;i++): Main Outer Loop*/ StopClock(core_clk); WeightedMatchTicks = GetClockTicks(core_clk); #ifndef NO_LOCAL_PROFILING printf("Profile Summary\n"); printf("HINS=(%d) HDEL=(%d) HUPDATE=(%d)\n",(int)hins_ticks,(int)hdel_ticks, (int)hupdate_ticks); printf("DUAL=(%d) \n",(int)dual_ticks); #endif #ifdef USE_BIT_ARRAY FreeBitArray(col_marker); FreeBitArray(heap_marker); #else col_marker++; delete col_marker; #endif #ifdef SIMPLE_HEAP heap_marker++; delete heap_marker; #endif aug_label++; delete aug_label; save_heap_op++; delete save_heap_op; clabel++; delete clabel; #ifdef BINARY_HEAP FreeHeap(bin_heap); #endif return match_size; } /*O(n) time picking the maximum from the heap_marker */ node_t SimplePickMin(BitArray_t *bit_heap,double *dist,node_t n){ node_t min_j=0;node_t j; double curr_min = HUGE_VAL; for(j=1;j<=n;j++){ if(CheckBit(bit_heap,j) && dist[j] < curr_min){ min_j = j; curr_min = dist[j]; } } return min_j; } #ifdef BINARY_HEAP inline keyid_t GetDistKeyID(void *dist_ptr){ assert((unsigned long)dist_ptr >= dist_base); return (((((unsigned long)dist_ptr-dist_base))/sizeof(double))+1); } #endif BitArray_t* CreateBitArray(unsigned int size){ div_t d = div(size,SIZE_OF_BYTE_IN_BITS); BitArray_t *bits = (BitArray_t *)malloc(sizeof(BitArray_t)*1); assert(bits); bits->size = (d.rem > 0)?(d.quot+1):d.quot; bits->ba = (char *)malloc(sizeof(char)*(bits->size)); assert(bits->ba); memset(bits->ba,'\0',bits->size); return bits; }
void ComputeInitialExtremeMatch(ValueType *u,ValueType *v,Indices *clabel,ValueType *C, Indices *m,Indices *m_inv,Indices* colind,Indices* rowptr,Indices n,ValueType *dist){ Indices i,k,i1,k1; Indices k0,k10,j0,j,j1,j10; ValueType vmin; ValueType C1k; Clock *init_match_clk = CreateClock(); StartClock(init_match_clk); #if 0 /*Compute u[j]*/ for(j=1;j<=n;j++){ u[j] = GetMaxTypeValue<ValueType>(u[j]); dist[j] = u[j]; m[j]=0;m_inv[j]=0; } for(i=1;i<=n;i++){ for(k=rowptr[i];k<rowptr[i+1];k++){ if(C[k]<u[colind[k]]){ u[colind[k]] = C[k]; } } } #endif /*Compute v[i]*/ for(i=1;i<=n;i++){ v[i] = GetMaxTypeValue<ValueType>(v[i]); for(k=rowptr[i]+1;k<(rowptr[i+1]+1);k++){ j = colind[k]+1; vmin = (C[k]-u[j]); if(vmin < v[i]){ v[i] = vmin; } } } /*Update Cost and match.*/ for(i=1;i<=n;i++){ for(k=rowptr[i]+1;k<(rowptr[i+1]+1);k++){ j = colind[k]+1; C1k = C[k]-v[i]-u[j]; /*to handle -0.0*/ if(fabs(C1k-0.0) <= 1.0e-30 && (!m[j] && !m_inv[i])){ m[j] = i; m_inv[i] = j; clabel[i] = k; } } } /*1-Step Augmentation*/ for(i=1;i<=n;i++){ if(!m_inv[i]){ /*Unmatched Row*/ for(k=rowptr[i]+1;k<(rowptr[i+1]+1) && !(m_inv[i]);k++){ j = colind[k]+1; C1k = fabs(C[k]-v[i]-u[j]); if(C1k <= 1.0e-30){ /*assert(m[colind[k]]);*/ i1 = m[j]; /*assert(m_inv[i1] == j);*/ /*See if we can find any C1(i1,j1) == 0*/ for(k1=rowptr[i1]+1;k1<(rowptr[i1+1]+1);k1++){ j1 = colind[k1]+1; C1k = fabs(C[k1] - v[i1]-u[j1]); if(C1k <= 1.0e-30 && !(m[j1])){ /*augment the match.*/ m[j] = i; m_inv[i] = j; clabel[i] = k; m[j1] = i1; m_inv[i1] = j1; clabel[i1] = k1; break; } } } } } } StopClock(init_match_clk); InitialMatchTicks = GetClockTicks(init_match_clk); }