void SetPrint(int argcIn, const char* argvIn[]) { if( argcIn == 1 || (argcIn == 2 && argvIn[1][0] == 'm') ) { cout << endl; PrintSteps(true); PrintSummary(true); } else if( argcIn > 1 && argcIn < 4 ) { if( argvIn[1][0] == '-' && (argvIn[1][1] == 'n' || argvIn[1][9] == 'n') ) { PrintSteps(false); PrintSummary(false); } else if( argvIn[1][0] == '-' && (argvIn[1][1] == 's' || argvIn[1][9] == 's') ) { PrintSteps(true); PrintSummary(false); } else if( argvIn[1][0] == '-' && (argvIn[1][1] == 'a' || argvIn[1][9] == 'a') ) { PrintSteps(false); PrintSummary(true); } cout << endl; } else { PrintSteps(false); PrintSummary(false); } }
void SingleEvent () { real vvSum; real sp; int n; NextEvent (); if (evIdB < MOL_LIMIT) { ProcessCollision (); ++ collCount; } else if (evIdB < MOL_LIMIT + NDIM * 2 || evIdB >= MOL_LIMIT + 100) { ProcessCellCrossing (); ++ crossCount; } else if (evIdB == MOL_LIMIT + 6) { UpdateSystem (); nextSumTime += intervalSum; ScheduleEvent (0, MOL_LIMIT + 6, nextSumTime); VZero (vSum); vvSum = 0.; sp = 0.; DO_MOL { VVAdd (vSum, mol[n].rv); vvSum += VLenSq (mol[n].rv); sp += VDot (mol[n].r, gravField); } kinEnVal = vvSum * 0.5 / nMol; totEnVal = kinEnVal - sp / nMol; PrintSummary (stdout); } else if (evIdB == MOL_LIMIT + 7) {
void SingleStep () { ++ stepCount; timeNow = stepCount * deltaT; PredictorStep (); PredictorStepQ (); GenSiteCoords (); ComputeSiteForces (); ComputeTorqs (); ComputeAccelsQ (); ApplyThermostat (); CorrectorStep (); CorrectorStepQ (); AdjustQuat (); ApplyBoundaryCond (); EvalProps (); if (stepCount % stepAdjustTemp == 0) AdjustTemp (); AccumProps (1); if (stepCount % stepAvg == 0) { AccumProps (2); PrintSummary (stdout); AccumProps (0); } if (stepCount >= stepEquil && (stepCount - stepEquil) % stepRdf == 0) EvalRdf (); }
int myGetState(void * hndl, UserState * ustate, unsigned int privdata) { static int iter=0; log_verbose("Reached myGetState with privdata %x\n", privdata); /* Same state is being returned for both engines */ ustate->LinkState = LINK_UP; ustate->Errors = 0; ustate->MinPktSize = RawMinPktSize; ustate->MaxPktSize = RawMaxPktSize; ustate->TestMode = RawTestMode; if(privdata == 0x54545454) ustate->Buffers = TxBufs.TotalNum; else ustate->Buffers = RxBufs.TotalNum; if(iter++ >= 4) { PrintSummary(); iter = 0; } return 0; }
int main (int argc, char **argv) { int i, nocase = 0; FILE *fd; char filename[20]; ACSM_STRUCT * acsm; if (argc < 3) { fprintf (stderr,"Usage: acsmx filename pattern1 pattern2 ... -nocase\n"); exit (0); } acsm = acsmNew (); strcpy (filename, argv[1]); fd = fopen(filename,"r"); if(fd == NULL) { fprintf(stderr,"Open file error!\n"); exit(1); } for (i = 1; i < argc; i++) if (strcmp (argv[i], "-nocase") == 0) nocase = 1; for (i = 2; i < argc; i++) { if (argv[i][0] == '-') continue; printf("%s,%d\n",argv[i],strlen (argv[i])); acsmAddPattern (acsm, argv[i], strlen (argv[i]), nocase,1); } fgets(text,MAXLEN,fd); /* Generate GtoTo Table and Fail Table */ acsmCompile (acsm); printf("--------------------------------\n"); NS_TIME(time); NS_TIME_START(time); /*Search Pattern*/ //while ( fgets(text,MAXLEN,fd) ) //{ acsmSearch (acsm, text, strlen (text), PrintMatch); // nline++; //} NS_TIME_END(time); PrintSummary(acsm->acsmPatterns); int a[10]={45,45,45,4,1}; #ifdef __HAVE__LOAD__ printf("-------%d\n", getSummary (acsm->acsmPatterns,a)); #endif acsmFree (acsm); printf ("\n### AC Match Finished ###\n"); // system("pause"); return (0); }
void SingleEvent () { real vvSum; int n; NextEvent (); if (evIdB < MOL_LIMIT) { ProcessCollision (); EvalFreePath (); ++ collCount; } else if (evIdB >= MOL_LIMIT + 100) { ProcessCellCrossing (); ++ crossCount; } else if (evIdB == MOL_LIMIT + 6) { UpdateSystem (); nextSumTime += intervalSum; ScheduleEvent (0, MOL_LIMIT + 6, nextSumTime); VZero (vSum); vvSum = 0.; DO_MOL { VVAdd (vSum, mol[n].rv); vvSum += VLenSq (mol[n].rv); } kinEnVal = vvSum * 0.5 / nMol; PrintSummary (stdout); }
void PrintQueryResult(int ego, char **caption, char **result, int row, int col) { if (ego) VPrint(caption, result, row, col); else HPrint(caption, result, row, col); PrintSummary(row); }
void SingleStep () { ++ stepCount; timeNow = stepCount * deltaT; PredictorStep (); PredictorStepS (); ComputeForces (); ComputeForcesDipoleR (); ComputeForcesDipoleF (); ComputeDipoleAccel (); ApplyThermostat (); CorrectorStep (); CorrectorStepS (); AdjustDipole (); ApplyBoundaryCond (); EvalProps (); if (stepCount % stepAdjustTemp == 0) AdjustTemp (); AccumProps (1); if (stepCount % stepAvg == 0) { AccumProps (2); PrintSummary (stdout); AccumProps (0); } if (stepCount >= stepEquil && (stepCount - stepEquil) % stepRdf == 0) EvalRdf (); }
static void __exit rawdata_cleanup(void) { int i; /* Stop the polling routine */ del_timer_sync(&poll_timer); //DriverState = CLOSED; /* Stop any running tests, else the hardware's packet checker & * generator will continue to run. */ XIo_Out32(TXbarbase+TX_CONFIG_ADDRESS, 0); XIo_Out32(TXbarbase+RX_CONFIG_ADDRESS, 0); printk(KERN_INFO "%s: Unregistering Xilinx driver from kernel.\n", MYNAME); if (TxBufCnt != RxBufCnt) { printk("%s: Buffers Transmitted %u Received %u\n", MYNAME, TxBufCnt, RxBufCnt); printk("TxSeqNo = %u, RxSeqNo = %u\n", TxSeqNo, RxSeqNo); mdelay(1); } #ifdef FIFO_EMPTY_CHECK DmaFifoEmptyWait(MYHANDLE,DIR_TYPE_S2C); // wait for appropriate time to stabalize mdelay(STABILITY_WAIT_TIME); #endif DmaUnregister(handle[0]); #ifdef FIFO_EMPTY_CHECK DmaFifoEmptyWait(MYHANDLE,DIR_TYPE_C2S); // wait for appropriate time to stabalize mdelay(STABILITY_WAIT_TIME); #endif DmaUnregister(handle[2]); PrintSummary(); mdelay(1000); /* Not sure if free_page() sleeps or not. */ spin_lock_bh(&RawLock); printk("Freeing user buffers\n"); for(i=0; i<TxBufs.TotalNum; i++) //kfree(TxBufs.origVA[i]); free_page((unsigned long)(TxBufs.origVA[i])); for(i=0; i<RxBufs.TotalNum; i++) //kfree(RxBufs.origVA[i]); free_page((unsigned long)(RxBufs.origVA[i])); spin_unlock_bh(&RawLock); }
void MCommandProfiler::Analysis() { FILE* fp = fopen("CmdProfile.txt", "wt"); if (fp == 0) return; PrintTitle(fp); PrintSummary(fp); PrintCmdItems(fp); fclose(fp); Reset(); }
int main (int argc, char **argv) { GetNameList (argc, argv); //PrintNameList (stdout); SetParams (); SetupJob (); moreCycles = 1; PrintSummary (stdout); while (moreCycles) { SingleStep (); if (stepCount >= stepLimit) moreCycles = 0; } }
int main(int argc, char **argv) { int i, nocase = 0; FILE *fd; char filename[20]; ACSM_STRUCT * acsm; // if (argc < 3) // { // fprintf(stderr, "Usage: acsmx filename pattern1 pattern2 ... -nocase\n"); // exit(0); // } acsm = acsmNew(); strcpy_s(filename, "test.txt"); fd = fopen(filename, "r"); if (fd == NULL) { fprintf(stderr, "Open file error!\n"); exit(1); } for (i = 1; i < argc; i++) if (strcmp(argv[i], "-nocase") == 0) nocase = 1; char a[] = { "test" }; acsmAddPattern(acsm, (unsigned char *)a, strlen(a), nocase); /* Generate GtoTo Table and Fail Table */ acsmCompile(acsm); /*Search Pattern*/ while (fgets(( char*)text, MAXLEN, fd)) { acsmSearch(acsm, text, strlen((char*)text), PrintMatch); nline++; } PrintSummary(acsm->acsmPatterns); acsmFree(acsm); printf("\n### AC Match Finished ###\n"); system("pause"); return (0); }
void SingleStep () { ++ stepCount; timeNow = stepCount * deltaT; LeapfrogStep (1); ApplyBoundaryCond (); ComputeForces (); LeapfrogStep (2); //EvalProps (); //AccumProps (1); if (stepCount % stepAvg == 0) { //AccumProps (2); PrintSummary (stdout); //PrintVels (stdout); //AccumProps (0); } }
static void __exit rawdata_cleanup(void) { int i; /* Stop the polling routine */ del_timer_sync(&poll_timer); //DriverState = CLOSED; /* Stop any running tests, else the hardware's packet checker & * generator will continue to run. */ log_verbose("TXbarbase = %p\n", TXbarbase); XIo_Out32(TXbarbase+TX_CONFIG_ADDRESS, 0); #ifndef XAUI XIo_Out32(TXbarbase+RX_CONFIG_ADDRESS, 0); #endif printk(KERN_INFO "%s: Unregistering Xilinx driver from kernel.\n", MYNAME); if (TxBufCnt != RxBufCnt) { printk("%s: Buffers Transmitted %u Received %u\n", MYNAME, TxBufCnt, RxBufCnt); printk("TxSeqNo = %u, RxSeqNo = %u\n", TxSeqNo, RxSeqNo); mdelay(1); } DmaUnregister(handle[0]); DmaUnregister(handle[2]); PrintSummary(); mdelay(1000); /* Not sure if free_page() sleeps or not. */ spin_lock_bh(&RawLock); printk("Freeing user buffers\n"); for(i=0; i<TxBufs.TotalNum; i++) //kfree(TxBufs.origVA[i]); free_page((unsigned long)(TxBufs.origVA[i])); for(i=0; i<RxBufs.TotalNum; i++) //kfree(RxBufs.origVA[i]); free_page((unsigned long)(RxBufs.origVA[i])); spin_unlock_bh(&RawLock); }
int main(int argc, char* argv[]) { int attrib_ct, line_ct; int k; //loop counters ListP *Attributes; //open files getFileHandles(argc, argv); //get attributes and line counters attrib_ct = readValue(); line_ct = readValue(); getc(inF); //eat newline //Make array of attributes Attributes = makeArrayofList(attrib_ct); //read file for (k = 0; k < line_ct; k++) readAttributes(Attributes, attrib_ct); PrintSummary(Attributes, attrib_ct); }
void SingleStep () { ++ stepCount; timeNow = stepCount * deltaT; LeapfrogStep (1); GenSiteCoords (); ComputeSiteForces (); ComputeTorqs (); ApplyThermostat (); LeapfrogStep (2); ApplyBoundaryCond (); EvalProps (); if (stepCount % stepAdjustTemp == 0 || stepCount < stepEquil && stepCount % 100 == 0) AdjustTemp (); AccumProps (1); if (stepCount % stepAvg == 0) { AccumProps (2); PrintSummary (stdout); AccumProps (0); } }
STATUS LNPUBLIC ReadSummaryData (VOID far *optional_param, SEARCH_MATCH far *search_info, ITEM_TABLE far *summary_info) { SEARCH_MATCH SearchMatch; STATUS error; memcpy ((char*)(&SearchMatch), (char *)search_info, sizeof(SEARCH_MATCH)); if (!(SearchMatch.SERetFlags & SE_FMATCH)) return (NOERROR); /* Print the note ID. */ printf ("\nNote ID is: %#010lx.\n", SearchMatch.ID.NoteID); /* Print the summary data. */ if (error = PrintSummary( (char*)summary_info )) return (error); return (NOERROR); }
//////////////////////////////////////////////////////////////////////////////// // Program Main //////////////////////////////////////////////////////////////////////////////// int main(int argc, char *argv[]) { int Nx, Ny, Nz, max_iters; int blockX, blockY, blockZ; if (argc == 8) { Nx = atoi(argv[1]); Ny = atoi(argv[2]); Nz = atoi(argv[3]); max_iters = atoi(argv[4]); blockX = atoi(argv[5]); blockY = atoi(argv[6]); blockZ = atoi(argv[7]); } else { printf("Usage: %s nx ny nz i block_x block_y block_z number_of_threads\n", argv[0]); exit(1); } // Get the number of GPUS int number_of_devices; checkCuda(cudaGetDeviceCount(&number_of_devices)); if (number_of_devices < 2) { printf("Less than two devices were found.\n"); printf("Exiting...\n"); return -1; } // Decompose along the Z-axis int _Nz = Nz/number_of_devices; // Define constants const _DOUBLE_ L = 1.0; const _DOUBLE_ h = L/(Nx+1); const _DOUBLE_ dt = h*h/6.0; const _DOUBLE_ beta = dt/(h*h); const _DOUBLE_ c0 = beta; const _DOUBLE_ c1 = (1-6*beta); // Check if ECC is turned on ECCCheck(number_of_devices); // Set the number of OpenMP threads omp_set_num_threads(number_of_devices); #pragma omp parallel { unsigned int tid = omp_get_num_threads(); #pragma omp single { printf("Number of OpenMP threads: %d\n", tid); } } // CPU memory operations int dt_size = sizeof(_DOUBLE_); _DOUBLE_ *u_new, *u_old; u_new = (_DOUBLE_ *)malloc(sizeof(_DOUBLE_)*(Nx+2)*(Ny+2)*(Nz+2)); u_old = (_DOUBLE_ *)malloc(sizeof(_DOUBLE_)*(Nx+2)*(Ny+2)*(Nz+2)); init(u_old, u_new, h, Nx, Ny, Nz); // Allocate and generate arrays on the host size_t pitch_bytes; size_t pitch_gc_bytes; _DOUBLE_ *h_Unew, *h_Uold; _DOUBLE_ *h_s_Uolds[number_of_devices], *h_s_Unews[number_of_devices]; _DOUBLE_ *left_send_buffer[number_of_devices], *left_receive_buffer[number_of_devices]; _DOUBLE_ *right_send_buffer[number_of_devices], *right_receive_buffer[number_of_devices]; h_Unew = (_DOUBLE_ *)malloc(sizeof(_DOUBLE_)*(Nx+2)*(Ny+2)*(Nz+2)); h_Uold = (_DOUBLE_ *)malloc(sizeof(_DOUBLE_)*(Nx+2)*(Ny+2)*(Nz+2)); init(h_Uold, h_Unew, h, Nx, Ny, Nz); #pragma omp parallel { unsigned int tid = omp_get_thread_num(); h_s_Unews[tid] = (_DOUBLE_ *)malloc(sizeof(_DOUBLE_)*(Nx+2)*(Ny+2)*(_Nz+2)); h_s_Uolds[tid] = (_DOUBLE_ *)malloc(sizeof(_DOUBLE_)*(Nx+2)*(Ny+2)*(_Nz+2)); right_send_buffer[tid] = (_DOUBLE_ *)malloc(sizeof(_DOUBLE_)*(Nx+2)*(Ny+2)*(_GC_DEPTH)); left_send_buffer[tid] = (_DOUBLE_ *)malloc(sizeof(_DOUBLE_)*(Nx+2)*(Ny+2)*(_GC_DEPTH)); right_receive_buffer[tid] = (_DOUBLE_ *)malloc(sizeof(_DOUBLE_)*(Nx+2)*(Ny+2)*(_GC_DEPTH)); left_receive_buffer[tid] = (_DOUBLE_ *)malloc(sizeof(_DOUBLE_)*(Nx+2)*(Ny+2)*(_GC_DEPTH)); checkCuda(cudaHostAlloc((void**)&h_s_Unews[tid], dt_size*(Nx+2)*(Ny+2)*(_Nz+2), cudaHostAllocPortable)); checkCuda(cudaHostAlloc((void**)&h_s_Uolds[tid], dt_size*(Nx+2)*(Ny+2)*(_Nz+2), cudaHostAllocPortable)); checkCuda(cudaHostAlloc((void**)&right_send_buffer[tid], dt_size*(Nx+2)*(Ny+2)*(_GC_DEPTH), cudaHostAllocPortable)); checkCuda(cudaHostAlloc((void**)&left_send_buffer[tid], dt_size*(Nx+2)*(Ny+2)*(_GC_DEPTH), cudaHostAllocPortable)); checkCuda(cudaHostAlloc((void**)&right_receive_buffer[tid], dt_size*(Nx+2)*(Ny+2)*(_GC_DEPTH), cudaHostAllocPortable)); checkCuda(cudaHostAlloc((void**)&left_receive_buffer[tid], dt_size*(Nx+2)*(Ny+2)*(_GC_DEPTH), cudaHostAllocPortable)); init_subdomain(h_s_Uolds[tid], h_Uold, Nx, Ny, _Nz, tid); } // GPU memory operations _DOUBLE_ *d_s_Unews[number_of_devices], *d_s_Uolds[number_of_devices]; _DOUBLE_ *d_right_send_buffer[number_of_devices], *d_left_send_buffer[number_of_devices]; _DOUBLE_ *d_right_receive_buffer[number_of_devices], *d_left_receive_buffer[number_of_devices]; #pragma omp parallel { unsigned int tid = omp_get_thread_num(); checkCuda(cudaSetDevice(tid)); CopyToConstantMemory(c0, c1); checkCuda(cudaMallocPitch((void**)&d_s_Uolds[tid], &pitch_bytes, dt_size*(Nx+2), (Ny+2)*(_Nz+2))); checkCuda(cudaMallocPitch((void**)&d_s_Unews[tid], &pitch_bytes, dt_size*(Nx+2), (Ny+2)*(_Nz+2))); checkCuda(cudaMallocPitch((void**)&d_left_receive_buffer[tid], &pitch_gc_bytes, dt_size*(Nx+2), (Ny+2)*(_GC_DEPTH))); checkCuda(cudaMallocPitch((void**)&d_right_receive_buffer[tid], &pitch_gc_bytes, dt_size*(Nx+2), (Ny+2)*(_GC_DEPTH))); checkCuda(cudaMallocPitch((void**)&d_left_send_buffer[tid], &pitch_gc_bytes, dt_size*(Nx+2), (Ny+2)*(_GC_DEPTH))); checkCuda(cudaMallocPitch((void**)&d_right_send_buffer[tid], &pitch_gc_bytes, dt_size*(Nx+2), (Ny+2)*(_GC_DEPTH))); } // Copy data from host to the device double HtD_timer = 0.; HtD_timer -= omp_get_wtime(); #pragma omp parallel { unsigned int tid = omp_get_thread_num(); checkCuda(cudaSetDevice(tid)); checkCuda(cudaMemcpy2D(d_s_Uolds[tid], pitch_bytes, h_s_Uolds[tid], dt_size*(Nx+2), dt_size*(Nx+2), ((Ny+2)*(_Nz+2)), cudaMemcpyDefault)); checkCuda(cudaMemcpy2D(d_s_Unews[tid], pitch_bytes, h_s_Unews[tid], dt_size*(Nx+2), dt_size*(Nx+2), ((Ny+2)*(_Nz+2)), cudaMemcpyDefault)); } HtD_timer += omp_get_wtime(); int pitch = pitch_bytes/dt_size; int gc_pitch = pitch_gc_bytes/dt_size; // GPU kernel launch parameters dim3 threads_per_block(blockX, blockY, blockZ); unsigned int blocksInX = getBlock(Nx, blockX); unsigned int blocksInY = getBlock(Ny, blockY); unsigned int blocksInZ = getBlock(_Nz-2, k_loop); dim3 thread_blocks(blocksInX, blocksInY, blocksInZ); dim3 thread_blocks_halo(blocksInX, blocksInY); double compute_timer = 0.; compute_timer -= omp_get_wtime(); #pragma omp parallel { unsigned int tid = omp_get_thread_num(); for(int iterations = 0; iterations < max_iters; iterations++) { // Compute inner nodes checkCuda(cudaSetDevice(tid)); ComputeInnerPoints(thread_blocks, threads_per_block, d_s_Unews[tid], d_s_Uolds[tid], pitch, Nx, Ny, _Nz); // Copy right boundary data to host if (tid == 0) { checkCuda(cudaSetDevice(tid)); CopyBoundaryRegionToGhostCell(thread_blocks_halo, threads_per_block, d_s_Unews[tid], d_right_send_buffer[tid], Nx, Ny, _Nz, pitch, gc_pitch, 0); checkCuda(cudaMemcpy2D(right_send_buffer[tid], dt_size*(Nx+2), d_right_send_buffer[tid], pitch_gc_bytes, dt_size*(Nx+2), (Ny+2)*(_GC_DEPTH), cudaMemcpyDefault)); } // Copy left boundary data to host if (tid == 1) { checkCuda(cudaSetDevice(tid)); CopyBoundaryRegionToGhostCell(thread_blocks_halo, threads_per_block, d_s_Unews[tid], d_left_send_buffer[tid], Nx, Ny, _Nz, pitch, gc_pitch, 1); checkCuda(cudaMemcpy2D(left_send_buffer[tid], dt_size*(Nx+2), d_left_send_buffer[tid], pitch_gc_bytes, dt_size*(Nx+2), (Ny+2)*(_GC_DEPTH), cudaMemcpyDefault)); } #pragma omp barrier // Copy right boundary data to device 1 if (tid == 1) { checkCuda(cudaSetDevice(tid)); checkCuda(cudaMemcpy2D(d_left_receive_buffer[tid], pitch_gc_bytes, right_send_buffer[tid-1], dt_size*(Nx+2), dt_size*(Nx+2), ((Ny+2)*(_GC_DEPTH)), cudaMemcpyDefault)); CopyGhostCellToBoundaryRegion(thread_blocks_halo, threads_per_block, d_s_Unews[tid], d_left_receive_buffer[tid], Nx, Ny, _Nz, pitch, gc_pitch, 1); } // Copy left boundary data to device 0 if (tid == 0) { checkCuda(cudaSetDevice(tid)); checkCuda(cudaMemcpy2D(d_right_receive_buffer[tid], pitch_gc_bytes, left_send_buffer[tid+1], dt_size*(Nx+2), dt_size*(Nx+2), ((Ny+2)*(_GC_DEPTH)), cudaMemcpyDefault)); CopyGhostCellToBoundaryRegion(thread_blocks_halo, threads_per_block, d_s_Unews[tid], d_right_receive_buffer[tid], Nx, Ny, _Nz, pitch, gc_pitch, 0); } // Swap pointers on the host #pragma omp barrier checkCuda(cudaSetDevice(tid)); checkCuda(cudaDeviceSynchronize()); swap(_DOUBLE_*, d_s_Unews[tid], d_s_Uolds[tid]); } } compute_timer += omp_get_wtime(); // Copy data from device to host double DtH_timer = 0; DtH_timer -= omp_get_wtime(); #pragma omp parallel { unsigned int tid = omp_get_thread_num(); checkCuda(cudaSetDevice(tid)); checkCuda(cudaMemcpy2D(h_s_Uolds[tid], dt_size*(Nx+2), d_s_Uolds[tid], pitch_bytes, dt_size*(Nx+2), (Ny+2)*(_Nz+2), cudaMemcpyDeviceToHost)); } DtH_timer += omp_get_wtime(); // Merge sub-domains into a one big domain #pragma omp parallel { unsigned int tid = omp_get_thread_num(); merge_domains(h_s_Uolds[tid], h_Uold, Nx, Ny, _Nz, tid); } // Calculate on host #if defined(DEBUG) || defined(_DEBUG) cpu_heat3D(u_new, u_old, c0, c1, max_iters, Nx, Ny, Nz); #endif float gflops = CalcGflops(compute_timer, max_iters, Nx, Ny, Nz); PrintSummary("3D Heat (7-pt)", "Plane sweeping", compute_timer, HtD_timer, DtH_timer, gflops, max_iters, Nx); _DOUBLE_ t = max_iters * dt; CalcError(h_Uold, u_old, t, h, Nx, Ny, Nz); #if defined(DEBUG) || defined(_DEBUG) //exportToVTK(h_Uold, h, "heat3D.vtk", Nx, Ny, Nz); #endif #pragma omp parallel { unsigned int tid = omp_get_thread_num(); checkCuda(cudaSetDevice(tid)); checkCuda(cudaFree(d_s_Unews[tid])); checkCuda(cudaFree(d_s_Uolds[tid])); checkCuda(cudaFree(d_right_send_buffer[tid])); checkCuda(cudaFree(d_left_send_buffer[tid])); checkCuda(cudaFree(d_right_receive_buffer[tid])); checkCuda(cudaFree(d_left_receive_buffer[tid])); checkCuda(cudaFreeHost(h_s_Unews[tid])); checkCuda(cudaFreeHost(h_s_Uolds[tid])); checkCuda(cudaFreeHost(left_send_buffer[tid])); checkCuda(cudaFreeHost(right_send_buffer[tid])); checkCuda(cudaFreeHost(left_receive_buffer[tid])); checkCuda(cudaFreeHost(right_receive_buffer[tid])); checkCuda(cudaDeviceReset()); } free(u_old); free(u_new); return 0; }
/////////////////////// // Main program entry /////////////////////// int main(int argc, char** argv) { unsigned int max_iters, Nx, Ny, Nz, blockX, blockY, blockZ; int rank, numberOfProcesses; if (argc == 8) { Nx = atoi(argv[1]); Ny = atoi(argv[2]); Nz = atoi(argv[3]); max_iters = atoi(argv[4]); blockX = atoi(argv[5]); blockY = atoi(argv[6]); blockZ = atoi(argv[7]); } else { printf("Usage: %s nx ny nz i block_x block_y block_z\n", argv[0]); exit(1); } InitializeMPI(&argc, &argv, &rank, &numberOfProcesses); AssignDevices(rank); ECCCheck(rank); // Define constants const _DOUBLE_ L = 1.0; const _DOUBLE_ h = L/(Nx+1); const _DOUBLE_ dt = h*h/6.0; const _DOUBLE_ beta = dt/(h*h); const _DOUBLE_ c0 = beta; const _DOUBLE_ c1 = (1-6*beta); // Copy constants to Constant Memory on the GPUs CopyToConstantMemory(c0, c1); // Decompose along the z-axis const int _Nz = Nz/numberOfProcesses; const int dt_size = sizeof(_DOUBLE_); // Host memory allocations _DOUBLE_ *u_new, *u_old; _DOUBLE_ *h_Uold; u_new = (_DOUBLE_*)malloc(sizeof(_DOUBLE_)*(Nx+2)*(Ny+2)*(Nz+2)); u_old = (_DOUBLE_*)malloc(sizeof(_DOUBLE_)*(Nx+2)*(Ny+2)*(Nz+2)); if (rank == 0) { h_Uold = (_DOUBLE_*)malloc(sizeof(_DOUBLE_)*(Nx+2)*(Ny+2)*(Nz+2)); } init(u_old, u_new, h, Nx, Ny, Nz); // Allocate and generate host subdomains _DOUBLE_ *h_s_Uolds, *h_s_Unews, *h_s_rbuf[numberOfProcesses]; _DOUBLE_ *left_send_buffer, *left_receive_buffer; _DOUBLE_ *right_send_buffer, *right_receive_buffer; h_s_Unews = (_DOUBLE_*)malloc(sizeof(_DOUBLE_)*(Nx+2)*(Ny+2)*(_Nz+2)); h_s_Uolds = (_DOUBLE_*)malloc(sizeof(_DOUBLE_)*(Nx+2)*(Ny+2)*(_Nz+2)); #if defined(DEBUG) || defined(_DEBUG) if (rank == 0) { for (int i = 0; i < numberOfProcesses; i++) { h_s_rbuf[i] = (_DOUBLE_*)malloc(sizeof(_DOUBLE_)*(Nx+2)*(Ny+2)*(_Nz+2)); checkCuda(cudaHostAlloc((void**)&h_s_rbuf[i], dt_size*(Nx+2)*(Ny+2)*(_Nz+2), cudaHostAllocPortable)); } } #endif right_send_buffer = (_DOUBLE_*)malloc(sizeof(_DOUBLE_)*(Nx+2)*(Ny+2)*(_GC_DEPTH)); left_send_buffer = (_DOUBLE_*)malloc(sizeof(_DOUBLE_)*(Nx+2)*(Ny+2)*(_GC_DEPTH)); right_receive_buffer = (_DOUBLE_*)malloc(sizeof(_DOUBLE_)*(Nx+2)*(Ny+2)*(_GC_DEPTH)); left_receive_buffer = (_DOUBLE_*)malloc(sizeof(_DOUBLE_)*(Nx+2)*(Ny+2)*(_GC_DEPTH)); checkCuda(cudaHostAlloc((void**)&h_s_Unews, dt_size*(Nx+2)*(Ny+2)*(_Nz+2), cudaHostAllocPortable)); checkCuda(cudaHostAlloc((void**)&h_s_Uolds, dt_size*(Nx+2)*(Ny+2)*(_Nz+2), cudaHostAllocPortable)); checkCuda(cudaHostAlloc((void**)&right_send_buffer, dt_size*(Nx+2)*(Ny+2)*(_GC_DEPTH), cudaHostAllocPortable)); checkCuda(cudaHostAlloc((void**)&left_send_buffer, dt_size*(Nx+2)*(Ny+2)*(_GC_DEPTH), cudaHostAllocPortable)); checkCuda(cudaHostAlloc((void**)&right_receive_buffer, dt_size*(Nx+2)*(Ny+2)*(_GC_DEPTH), cudaHostAllocPortable)); checkCuda(cudaHostAlloc((void**)&left_receive_buffer, dt_size*(Nx+2)*(Ny+2)*(_GC_DEPTH), cudaHostAllocPortable)); init_subdomain(h_s_Uolds, u_old, Nx, Ny, _Nz, rank); // GPU stream operations cudaStream_t compute_stream; cudaStream_t data_stream; checkCuda(cudaStreamCreate(&compute_stream)); checkCuda(cudaStreamCreate(&data_stream)); // GPU Memory Operations size_t pitch_bytes, pitch_gc_bytes; _DOUBLE_ *d_s_Unews, *d_s_Uolds; _DOUBLE_ *d_right_send_buffer, *d_left_send_buffer; _DOUBLE_ *d_right_receive_buffer, *d_left_receive_buffer; checkCuda(cudaMallocPitch((void**)&d_s_Uolds, &pitch_bytes, dt_size*(Nx+2), (Ny+2)*(_Nz+2))); checkCuda(cudaMallocPitch((void**)&d_s_Unews, &pitch_bytes, dt_size*(Nx+2), (Ny+2)*(_Nz+2))); checkCuda(cudaMallocPitch((void**)&d_left_send_buffer, &pitch_gc_bytes, dt_size*(Nx+2), (Ny+2)*(_GC_DEPTH))); checkCuda(cudaMallocPitch((void**)&d_left_receive_buffer, &pitch_gc_bytes, dt_size*(Nx+2), (Ny+2)*(_GC_DEPTH))); checkCuda(cudaMallocPitch((void**)&d_right_send_buffer, &pitch_gc_bytes, dt_size*(Nx+2), (Ny+2)*(_GC_DEPTH))); checkCuda(cudaMallocPitch((void**)&d_right_receive_buffer, &pitch_gc_bytes, dt_size*(Nx+2), (Ny+2)*(_GC_DEPTH))); // Copy subdomains from host to device and get walltime double HtD_timer = 0.; MPI_CHECK(MPI_Barrier(MPI_COMM_WORLD)); HtD_timer -= MPI_Wtime(); MPI_CHECK(MPI_Barrier(MPI_COMM_WORLD)); checkCuda(cudaMemcpy2D(d_s_Uolds, pitch_bytes, h_s_Uolds, dt_size*(Nx+2), dt_size*(Nx+2), ((Ny+2)*(_Nz+2)), cudaMemcpyDefault)); checkCuda(cudaMemcpy2D(d_s_Unews, pitch_bytes, h_s_Unews, dt_size*(Nx+2), dt_size*(Nx+2), ((Ny+2)*(_Nz+2)), cudaMemcpyDefault)); MPI_CHECK(MPI_Barrier(MPI_COMM_WORLD)); HtD_timer += MPI_Wtime(); MPI_CHECK(MPI_Barrier(MPI_COMM_WORLD)); unsigned int ghost_width = 1; int pitch = pitch_bytes/dt_size; int gc_pitch = pitch_gc_bytes/dt_size; // GPU kernel launch parameters dim3 threads_per_block(blockX, blockY, blockZ); unsigned int blocksInX = getBlock(Nx, blockX); unsigned int blocksInY = getBlock(Ny, blockY); unsigned int blocksInZ = getBlock(_Nz-2, k_loop); dim3 thread_blocks(blocksInX, blocksInY, blocksInZ); dim3 thread_blocks_halo(blocksInX, blocksInY); //MPI_Status status; MPI_Status status[numberOfProcesses]; MPI_Request gather_send_request[numberOfProcesses]; MPI_Request right_send_request[numberOfProcesses], left_send_request[numberOfProcesses]; MPI_Request right_receive_request[numberOfProcesses], left_receive_request[numberOfProcesses]; double compute_timer = 0.; MPI_CHECK(MPI_Barrier(MPI_COMM_WORLD)); compute_timer -= MPI_Wtime(); MPI_CHECK(MPI_Barrier(MPI_COMM_WORLD)); for(unsigned int iterations = 0; iterations < max_iters; iterations++) { // Compute right boundary data on device 0 if (rank == 0) { int kstart = (_Nz+1)-ghost_width; int kstop = _Nz+1; ComputeInnerPointsAsync(thread_blocks_halo, threads_per_block, data_stream, d_s_Unews, d_s_Uolds, pitch, Nx, Ny, _Nz, kstart, kstop); CopyBoundaryRegionToGhostCellAsync(thread_blocks_halo, threads_per_block, data_stream, d_s_Unews, d_right_send_buffer, Nx, Ny, _Nz, pitch, gc_pitch, 0); checkCuda(cudaMemcpy2DAsync(right_send_buffer, dt_size*(Nx+2), d_right_send_buffer, pitch_gc_bytes, dt_size*(Nx+2), (Ny+2)*(_GC_DEPTH), cudaMemcpyDefault, data_stream)); checkCuda(cudaStreamSynchronize(data_stream)); MPI_CHECK(MPI_Isend(right_send_buffer, (Nx+2)*(Ny+2)*(_GC_DEPTH), MPI_DOUBLE, 1, 0, MPI_COMM_WORLD, &right_send_request[rank])); } else { int kstart = 1; int kstop = 1+ghost_width; ComputeInnerPointsAsync(thread_blocks_halo, threads_per_block, data_stream, d_s_Unews, d_s_Uolds, pitch, Nx, Ny, _Nz, kstart, kstop); CopyBoundaryRegionToGhostCellAsync(thread_blocks_halo, threads_per_block, data_stream, d_s_Unews, d_left_send_buffer, Nx, Ny, _Nz, pitch, gc_pitch, 1); checkCuda(cudaMemcpy2DAsync(left_send_buffer, dt_size*(Nx+2), d_left_send_buffer, pitch_gc_bytes, dt_size*(Nx+2), (Ny+2)*(_GC_DEPTH), cudaMemcpyDefault, data_stream)); checkCuda(cudaStreamSynchronize(data_stream)); MPI_CHECK(MPI_Isend(left_send_buffer, (Nx+2)*(Ny+2)*(_GC_DEPTH), MPI_DOUBLE, 0, 1, MPI_COMM_WORLD, &left_send_request[rank])); } // Compute inner nodes for device 0 if (rank == 0) { int kstart = 1; int kstop = (_Nz+1)-ghost_width; ComputeInnerPointsAsync(thread_blocks, threads_per_block, compute_stream, d_s_Unews, d_s_Uolds, pitch, Nx, Ny, _Nz, kstart, kstop); } // Compute inner nodes for device 1 else { int kstart = 1+ghost_width; int kstop = _Nz+1; ComputeInnerPointsAsync(thread_blocks, threads_per_block, compute_stream, d_s_Unews, d_s_Uolds, pitch, Nx, Ny, _Nz, kstart, kstop); } // Receive data from device 1 if (rank == 0) { MPI_CHECK(MPI_Irecv(left_send_buffer, (Nx+2)*(Ny+2)*(_GC_DEPTH), MPI_DOUBLE, 1, 1, MPI_COMM_WORLD, &right_receive_request[rank])); } else { MPI_CHECK(MPI_Irecv(right_send_buffer, (Nx+2)*(Ny+2)*(_GC_DEPTH), MPI_DOUBLE, 0, 0, MPI_COMM_WORLD, &left_receive_request[rank])); } if (rank == 0) { MPI_CHECK(MPI_Wait(&right_receive_request[rank], &status[rank])); checkCuda(cudaMemcpy2DAsync(d_right_receive_buffer, pitch_gc_bytes, left_send_buffer, dt_size*(Nx+2), dt_size*(Nx+2), ((Ny+2)*(_GC_DEPTH)), cudaMemcpyDefault, data_stream)); CopyGhostCellToBoundaryRegionAsync(thread_blocks_halo, threads_per_block, data_stream, d_s_Unews, d_right_receive_buffer, Nx, Ny, _Nz, pitch, gc_pitch, 0); } else { MPI_CHECK(MPI_Wait(&left_receive_request[rank], &status[rank])); checkCuda(cudaMemcpy2DAsync(d_left_receive_buffer, pitch_gc_bytes, right_send_buffer, dt_size*(Nx+2), dt_size*(Nx+2), ((Ny+2)*(_GC_DEPTH)), cudaMemcpyDefault, data_stream)); CopyGhostCellToBoundaryRegionAsync(thread_blocks_halo, threads_per_block, data_stream, d_s_Unews, d_left_receive_buffer, Nx, Ny, _Nz, pitch, gc_pitch, 1); } if (rank == 0) { MPI_CHECK(MPI_Wait(&right_send_request[rank], MPI_STATUS_IGNORE)); } else { MPI_CHECK(MPI_Wait(&left_send_request[rank], MPI_STATUS_IGNORE)); } // Swap pointers on the host checkCuda(cudaDeviceSynchronize()); swap(_DOUBLE_*, d_s_Unews, d_s_Uolds); } MPI_CHECK(MPI_Barrier(MPI_COMM_WORLD)); compute_timer += MPI_Wtime(); MPI_CHECK(MPI_Barrier(MPI_COMM_WORLD)); // Copy data from device to host double DtH_timer = 0; MPI_CHECK(MPI_Barrier(MPI_COMM_WORLD)); DtH_timer -= MPI_Wtime(); MPI_CHECK(MPI_Barrier(MPI_COMM_WORLD)); checkCuda(cudaMemcpy2D(h_s_Uolds, dt_size*(Nx+2), d_s_Uolds, pitch_bytes, dt_size*(Nx+2), (Ny+2)*(_Nz+2), cudaMemcpyDefault)); MPI_CHECK(MPI_Barrier(MPI_COMM_WORLD)); DtH_timer += MPI_Wtime(); MPI_CHECK(MPI_Barrier(MPI_COMM_WORLD)); // Gather results from subdomains MPI_CHECK(MPI_Isend(h_s_Uolds, (Nx+2)*(Ny+2)*(_Nz+2), MPI_DOUBLE, 0, 0, MPI_COMM_WORLD, &gather_send_request[rank])); if (rank == 0) { for (int i = 0; i < numberOfProcesses; i++) { MPI_CHECK(MPI_Recv(h_s_rbuf[i], (Nx+2)*(Ny+2)*(_Nz+2), MPI_DOUBLE, i, 0, MPI_COMM_WORLD, &status[rank])); merge_domains(h_s_rbuf[i], h_Uold, Nx, Ny, _Nz, i); } } // Calculate on host #if defined(DEBUG) || defined(_DEBUG) if (rank == 0) { cpu_heat3D(u_new, u_old, c0, c1, max_iters, Nx, Ny, Nz); } #endif if (rank == 0) { float gflops = CalcGflops(compute_timer, max_iters, Nx, Ny, Nz); PrintSummary("3D Heat (7-pt)", "Plane sweeping", compute_timer, HtD_timer, DtH_timer, gflops, max_iters, Nx); _DOUBLE_ t = max_iters * dt; CalcError(h_Uold, u_old, t, h, Nx, Ny, Nz); } Finalize(); // Free device memory checkCuda(cudaFree(d_s_Unews)); checkCuda(cudaFree(d_s_Uolds)); checkCuda(cudaFree(d_right_send_buffer)); checkCuda(cudaFree(d_left_send_buffer)); checkCuda(cudaFree(d_right_receive_buffer)); checkCuda(cudaFree(d_left_receive_buffer)); // Free host memory checkCuda(cudaFreeHost(h_s_Unews)); checkCuda(cudaFreeHost(h_s_Uolds)); #if defined(DEBUG) || defined(_DEBUG) if (rank == 0) { for (int i = 0; i < numberOfProcesses; i++) { checkCuda(cudaFreeHost(h_s_rbuf[i])); } free(h_Uold); } #endif checkCuda(cudaFreeHost(left_send_buffer)); checkCuda(cudaFreeHost(left_receive_buffer)); checkCuda(cudaFreeHost(right_send_buffer)); checkCuda(cudaFreeHost(right_receive_buffer)); checkCuda(cudaDeviceReset()); free(u_old); free(u_new); return 0; }
void CG3::operator()(cudaColorSpinorField &x, cudaColorSpinorField &b) { // Check to see that we're not trying to invert on a zero-field source const double b2 = norm2(b); if(b2 == 0){ profile.TPSTOP(QUDA_PROFILE_INIT); printfQuda("Warning: inverting on zero-field source\n"); x=b; param.true_res = 0.0; param.true_res_hq = 0.0; return; } ColorSpinorParam csParam(x); csParam.create = QUDA_ZERO_FIELD_CREATE; cudaColorSpinorField x_prev(b, csParam); cudaColorSpinorField r_prev(b, csParam); cudaColorSpinorField temp(b, csParam); cudaColorSpinorField r(b); cudaColorSpinorField w(b); mat(r, x, temp); // r = Mx double r2 = xmyNormCuda(b,r); // r = b - Mx PrintStats("CG3", 0, r2, b2, 0.0); double stop = stopping(param.tol, b2, param.residual_type); if(convergence(r2, 0.0, stop, 0.0)) return; // First iteration mat(w, r, temp); double rAr = reDotProductCuda(r,w); double rho = 1.0; double gamma_prev = 0.0; double gamma = r2/rAr; cudaColorSpinorField x_new(x); cudaColorSpinorField r_new(r); axpyCuda(gamma, r, x_new); // x_new += gamma*r axpyCuda(-gamma, w, r_new); // r_new -= gamma*w // end of first iteration // axpbyCuda(a,b,x,y) => y = a*x + b*y int k = 1; // First iteration performed above double r2_prev; while(!convergence(r2, 0.0, stop, 0.0) && k<param.maxiter){ x_prev = x; x = x_new; r_prev = r; r = r_new; mat(w, r, temp); rAr = reDotProductCuda(r,w); r2_prev = r2; r2 = norm2(r); // Need to rearrange this! PrintStats("CG3", k, r2, b2, 0.0); gamma_prev = gamma; gamma = r2/rAr; rho = 1.0/(1. - (gamma/gamma_prev)*(r2/r2_prev)*(1.0/rho)); x_new = x; axCuda(rho,x_new); axpyCuda(rho*gamma,r,x_new); axpyCuda((1. - rho),x_prev,x_new); r_new = r; axCuda(rho,r_new); axpyCuda(-rho*gamma,w,r_new); axpyCuda((1.-rho),r_prev,r_new); double rr_old = reDotProductCuda(r_new,r); printfQuda("rr_old = %1.14lf\n", rr_old); k++; } if(k == param.maxiter) warningQuda("Exceeded maximum iterations %d", param.maxiter); // compute the true residual mat(r, x, temp); param.true_res = sqrt(xmyNormCuda(b, r)/b2); PrintSummary("CG3", k, r2, b2); return; }
void CG::operator()(cudaColorSpinorField &x, cudaColorSpinorField &b) { profile.Start(QUDA_PROFILE_INIT); // Check to see that we're not trying to invert on a zero-field source const double b2 = norm2(b); if(b2 == 0){ profile.Stop(QUDA_PROFILE_INIT); printfQuda("Warning: inverting on zero-field source\n"); x=b; param.true_res = 0.0; param.true_res_hq = 0.0; return; } cudaColorSpinorField r(b); ColorSpinorParam csParam(x); csParam.create = QUDA_ZERO_FIELD_CREATE; cudaColorSpinorField y(b, csParam); mat(r, x, y); // zeroCuda(y); double r2 = xmyNormCuda(b, r); csParam.setPrecision(param.precision_sloppy); cudaColorSpinorField Ap(x, csParam); cudaColorSpinorField tmp(x, csParam); cudaColorSpinorField *tmp2_p = &tmp; // tmp only needed for multi-gpu Wilson-like kernels if (mat.Type() != typeid(DiracStaggeredPC).name() && mat.Type() != typeid(DiracStaggered).name()) { tmp2_p = new cudaColorSpinorField(x, csParam); } cudaColorSpinorField &tmp2 = *tmp2_p; cudaColorSpinorField *x_sloppy, *r_sloppy; if (param.precision_sloppy == x.Precision()) { csParam.create = QUDA_REFERENCE_FIELD_CREATE; x_sloppy = &x; r_sloppy = &r; } else { csParam.create = QUDA_COPY_FIELD_CREATE; x_sloppy = new cudaColorSpinorField(x, csParam); r_sloppy = new cudaColorSpinorField(r, csParam); } cudaColorSpinorField &xSloppy = *x_sloppy; cudaColorSpinorField &rSloppy = *r_sloppy; cudaColorSpinorField p(rSloppy); if(&x != &xSloppy){ copyCuda(y,x); zeroCuda(xSloppy); }else{ zeroCuda(y); } const bool use_heavy_quark_res = (param.residual_type & QUDA_HEAVY_QUARK_RESIDUAL) ? true : false; profile.Stop(QUDA_PROFILE_INIT); profile.Start(QUDA_PROFILE_PREAMBLE); double r2_old; double stop = b2*param.tol*param.tol; // stopping condition of solver double heavy_quark_res = 0.0; // heavy quark residual if(use_heavy_quark_res) heavy_quark_res = sqrt(HeavyQuarkResidualNormCuda(x,r).z); int heavy_quark_check = 10; // how often to check the heavy quark residual double alpha=0.0, beta=0.0; double pAp; int rUpdate = 0; double rNorm = sqrt(r2); double r0Norm = rNorm; double maxrx = rNorm; double maxrr = rNorm; double delta = param.delta; // this parameter determines how many consective reliable update // reisudal increases we tolerate before terminating the solver, // i.e., how long do we want to keep trying to converge int maxResIncrease = 0; // 0 means we have no tolerance profile.Stop(QUDA_PROFILE_PREAMBLE); profile.Start(QUDA_PROFILE_COMPUTE); blas_flops = 0; int k=0; PrintStats("CG", k, r2, b2, heavy_quark_res); int steps_since_reliable = 1; while ( !convergence(r2, heavy_quark_res, stop, param.tol_hq) && k < param.maxiter) { matSloppy(Ap, p, tmp, tmp2); // tmp as tmp double sigma; bool breakdown = false; if (param.pipeline) { double3 triplet = tripleCGReductionCuda(rSloppy, Ap, p); r2 = triplet.x; double Ap2 = triplet.y; pAp = triplet.z; r2_old = r2; alpha = r2 / pAp; sigma = alpha*(alpha * Ap2 - pAp); if (sigma < 0.0 || steps_since_reliable==0) { // sigma condition has broken down r2 = axpyNormCuda(-alpha, Ap, rSloppy); sigma = r2; breakdown = true; } r2 = sigma; } else { r2_old = r2; pAp = reDotProductCuda(p, Ap); alpha = r2 / pAp; // here we are deploying the alternative beta computation Complex cg_norm = axpyCGNormCuda(-alpha, Ap, rSloppy); r2 = real(cg_norm); // (r_new, r_new) sigma = imag(cg_norm) >= 0.0 ? imag(cg_norm) : r2; // use r2 if (r_k+1, r_k+1-r_k) breaks } // reliable update conditions rNorm = sqrt(r2); if (rNorm > maxrx) maxrx = rNorm; if (rNorm > maxrr) maxrr = rNorm; int updateX = (rNorm < delta*r0Norm && r0Norm <= maxrx) ? 1 : 0; int updateR = ((rNorm < delta*maxrr && r0Norm <= maxrr) || updateX) ? 1 : 0; // force a reliable update if we are within target tolerance (only if doing reliable updates) if ( convergence(r2, heavy_quark_res, stop, param.tol_hq) && delta >= param.tol) updateX = 1; if ( !(updateR || updateX)) { //beta = r2 / r2_old; beta = sigma / r2_old; // use the alternative beta computation if (param.pipeline && !breakdown) tripleCGUpdateCuda(alpha, beta, Ap, rSloppy, xSloppy, p); else axpyZpbxCuda(alpha, p, xSloppy, rSloppy, beta); if (use_heavy_quark_res && k%heavy_quark_check==0) { copyCuda(tmp,y); heavy_quark_res = sqrt(xpyHeavyQuarkResidualNormCuda(xSloppy, tmp, rSloppy).z); } steps_since_reliable++; } else { axpyCuda(alpha, p, xSloppy); if (x.Precision() != xSloppy.Precision()) copyCuda(x, xSloppy); xpyCuda(x, y); // swap these around? mat(r, y, x); // here we can use x as tmp r2 = xmyNormCuda(b, r); if (x.Precision() != rSloppy.Precision()) copyCuda(rSloppy, r); zeroCuda(xSloppy); // break-out check if we have reached the limit of the precision static int resIncrease = 0; if (sqrt(r2) > r0Norm && updateX) { // reuse r0Norm for this warningQuda("CG: new reliable residual norm %e is greater than previous reliable residual norm %e", sqrt(r2), r0Norm); k++; rUpdate++; if (++resIncrease > maxResIncrease) break; } else { resIncrease = 0; } rNorm = sqrt(r2); maxrr = rNorm; maxrx = rNorm; r0Norm = rNorm; rUpdate++; // explicitly restore the orthogonality of the gradient vector double rp = reDotProductCuda(rSloppy, p) / (r2); axpyCuda(-rp, rSloppy, p); beta = r2 / r2_old; xpayCuda(rSloppy, beta, p); if(use_heavy_quark_res) heavy_quark_res = sqrt(HeavyQuarkResidualNormCuda(y,r).z); steps_since_reliable = 0; } breakdown = false; k++; PrintStats("CG", k, r2, b2, heavy_quark_res); } if (x.Precision() != xSloppy.Precision()) copyCuda(x, xSloppy); xpyCuda(y, x); profile.Stop(QUDA_PROFILE_COMPUTE); profile.Start(QUDA_PROFILE_EPILOGUE); param.secs = profile.Last(QUDA_PROFILE_COMPUTE); double gflops = (quda::blas_flops + mat.flops() + matSloppy.flops())*1e-9; reduceDouble(gflops); param.gflops = gflops; param.iter += k; if (k==param.maxiter) warningQuda("Exceeded maximum iterations %d", param.maxiter); if (getVerbosity() >= QUDA_VERBOSE) printfQuda("CG: Reliable updates = %d\n", rUpdate); // compute the true residuals mat(r, x, y); param.true_res = sqrt(xmyNormCuda(b, r) / b2); #if (__COMPUTE_CAPABILITY__ >= 200) param.true_res_hq = sqrt(HeavyQuarkResidualNormCuda(x,r).z); #else param.true_res_hq = 0.0; #endif PrintSummary("CG", k, r2, b2); // reset the flops counters quda::blas_flops = 0; mat.flops(); matSloppy.flops(); profile.Stop(QUDA_PROFILE_EPILOGUE); profile.Start(QUDA_PROFILE_FREE); if (&tmp2 != &tmp) delete tmp2_p; if (param.precision_sloppy != x.Precision()) { delete r_sloppy; delete x_sloppy; } profile.Stop(QUDA_PROFILE_FREE); return; }
int mySetState(void * hndl, UserState * ustate, unsigned int privdata) { int val; static unsigned int testmode; log_verbose(KERN_INFO "Reached mySetState with privdata %x\n", privdata); /* Check driver state */ if(DriverState != REGISTERED) { printk("Driver does not seem to be ready\n"); return EFAULT; } /* Check handle value */ if((hndl != handle[0]) && (hndl != handle[2])) { printk("Came with wrong handle\n"); return EBADF; } /* Valid only for TX engine */ if(privdata == 0x54545454) { spin_lock_bh(&RawLock); /* Set up the value to be written into the register */ RawTestMode = ustate->TestMode; if(RawTestMode & TEST_START) { testmode = 0; if(RawTestMode & ENABLE_LOOPBACK) testmode |= LOOPBACK; if(RawTestMode & ENABLE_PKTCHK) testmode |= PKTCHKR; if(RawTestMode & ENABLE_PKTGEN) testmode |= PKTGENR; } else { /* Deliberately not clearing the loopback bit, incase a * loopback test was going on - allows the loopback path * to drain off packets. Just stopping the source of packets. */ if(RawTestMode & ENABLE_PKTCHK) testmode &= ~PKTCHKR; if(RawTestMode & ENABLE_PKTGEN) testmode &= ~PKTGENR; } printk("SetState TX with RawTestMode %x, reg value %x\n", RawTestMode, testmode); /* Now write the registers */ if(RawTestMode & TEST_START) { if(!(RawTestMode & (ENABLE_PKTCHK|ENABLE_PKTGEN|ENABLE_LOOPBACK))) { printk("%s Driver: TX Test Start with wrong mode %x\n", MYNAME, testmode); RawTestMode = 0; spin_unlock_bh(&RawLock); return EBADRQC; } printk("%s Driver: Starting the test - mode %x, reg %x\n", MYNAME, RawTestMode, testmode); /* Next, set packet sizes. Ensure they don't exceed PKTSIZEs */ RawMinPktSize = ustate->MinPktSize; RawMaxPktSize = ustate->MaxPktSize; /* Set RX packet size for memory path */ val = RawMaxPktSize; if(val % BYTEMULTIPLE) { printk("********** ODD PACKET SIZE **********\n"); //val -= (val % BYTEMULTIPLE); } printk("Reg %x = %x\n", PKT_SIZE_ADDRESS, val); RawMinPktSize = RawMaxPktSize = val; /* Now ensure the sizes remain within bounds */ if(RawMaxPktSize > MAXPKTSIZE) RawMinPktSize = RawMaxPktSize = MAXPKTSIZE; if(RawMinPktSize < MINPKTSIZE) RawMinPktSize = RawMaxPktSize = MINPKTSIZE; if(RawMinPktSize > RawMaxPktSize) RawMinPktSize = RawMaxPktSize; val = RawMaxPktSize; printk("========Reg %x = %d\n", PKT_SIZE_ADDRESS, val); XIo_Out32(TXbarbase+PKT_SIZE_ADDRESS, val); printk("RxPktSize %d\n", val); /* #ifdef XRAWDATA0 XIo_Out32(TXbarbase+START_ADRS_0, 0x00000000); XIo_Out32(TXbarbase+START_ADRS_1, 0x04000000); XIo_Out32(TXbarbase+END_ADRS_0, 0x03000000); XIo_Out32(TXbarbase+END_ADRS_1, 0x07000000); XIo_Out32(TXbarbase+WRBURST_0, BURST_SIZE ); XIo_Out32(TXbarbase+RDBURST_0, BURST_SIZE ); XIo_Out32(TXbarbase+WRBURST_1, BURST_SIZE ); XIo_Out32(TXbarbase+RDBURST_1, BURST_SIZE ); #else XIo_Out32(TXbarbase+START_ADRS_2, 0x08000000); XIo_Out32(TXbarbase+START_ADRS_3, 0x0C000000); XIo_Out32(TXbarbase+END_ADRS_2, 0x0B000000); XIo_Out32(TXbarbase+END_ADRS_3, 0x0F000000); XIo_Out32(TXbarbase+WRBURST_2, BURST_SIZE ); XIo_Out32(TXbarbase+RDBURST_2, BURST_SIZE ); XIo_Out32(TXbarbase+WRBURST_3, BURST_SIZE ); XIo_Out32(TXbarbase+RDBURST_3, BURST_SIZE ); #endif */ /* Incase the last test was a loopback test, that bit may not be cleared. */ XIo_Out32(TXbarbase+TX_CONFIG_ADDRESS, 0); if(RawTestMode & (ENABLE_PKTCHK|ENABLE_LOOPBACK)) { TxSeqNo = 0; if(RawTestMode & ENABLE_LOOPBACK) RxSeqNo = 0; printk("========Reg %x = %x\n", TX_CONFIG_ADDRESS, testmode); XIo_Out32(TXbarbase+TX_CONFIG_ADDRESS, testmode); } if(RawTestMode & ENABLE_PKTGEN) { RxSeqNo = 0; printk("========Reg %x = %x\n", RX_CONFIG_ADDRESS, testmode); XIo_Out32(TXbarbase+RX_CONFIG_ADDRESS, testmode); } } /* Else, stop the test. Do not remove any loopback here because * the DMA queues and hardware FIFOs must drain first. */ else { printk("%s Driver: Stopping the test, mode %x\n", MYNAME, testmode); printk("========Reg %x = %x\n", TX_CONFIG_ADDRESS, testmode); XIo_Out32(TXbarbase+TX_CONFIG_ADDRESS, testmode); printk("========Reg %x = %x\n", RX_CONFIG_ADDRESS, testmode); XIo_Out32(TXbarbase+RX_CONFIG_ADDRESS, testmode); /* Not resetting sequence numbers here - causes problems * in debugging. Instead, reset the sequence numbers when * starting a test. */ } PrintSummary(); spin_unlock_bh(&RawLock); } return 0; }
int main (int argc, char **argv) { int i; unsigned int total_len = 0; struct timeval begtime,endtime; FILE *sfd,*pfd; char sfilename[20] = "string"; char pfilename[20] = "pattern"; //=============================================== if (argc < 4) { fprintf (stderr,"Usage: acsmx stringfile patternfile ... -nocase\n"); exit (0); } strcpy (sfilename, argv[1]); sfd = fopen(sfilename,"r"); if(sfd == NULL) { fprintf(stderr,"Open file error!\n"); exit(1); } strcpy(pfilename,argv[2]); pfd = fopen(pfilename,"r"); if(sfd == NULL) { fprintf(stderr,"Open file error!\n"); exit(1); } thread_num = atoi(argv[3]); acsm = acsmNew (thread_num); //read patterns i = 0; while(fgets(pattern,MAXPATTERNLEN,pfd)) { int len = strlen(pattern); acsmAddPattern (acsm, pattern, len-1); i++; } fclose(pfd); printf("\n\nread %d patterns\n\n===============================",i); /* Generate GtoTo Table and Fail Table */ acsmCompile (acsm); //========================================================= /*read string*/ for(i = 0;i < MAXLINE;i++) { if(!fgets(text[i],MAXLEN,sfd)) break; total_len += strlen(text[i]) - 1; //ignore the last char '\n' } line = i; fclose(sfd); printf("\n\nreading finished...\n=============================\n\n"); printf("%d lines\t%d bytes",line,total_len); printf("\n\n=============================\n"); gettimeofday(&begtime,0); //create multi_thread thread_array = calloc(thread_num,sizeof(pthread_t)); valid_len_array = calloc(thread_num,sizeof(unsigned int)); pthread_barrier_init(&barrier_thread,NULL,thread_num); pthread_barrier_init(&barrier_validation,NULL,thread_num); for(i = 0;i < thread_num; i++) { pthread_create(&thread_array[i], NULL, SearchThread, (void*)i); } //=========================================================== int err; for(i = 0;i < thread_num;i++) { err = pthread_join(thread_array[i],NULL); if(err != 0) { printf("can not join with thread %d:%s\n", i,strerror(err)); } } gettimeofday(&endtime,0); PrintSummary(acsm); acsmFree (acsm); printf ("\n### AC Match Finished ###\n"); printf("\nTime Cost: %lu us\n\n",(endtime.tv_sec - begtime.tv_sec)*1000000 + (endtime.tv_usec - begtime.tv_usec)); printf ("\n====================================\n\n"); printf ("Validation Stage Len:\n\n"); for(i = 0;i < thread_num;i++) printf("rank%d\t%u\n",i,valid_len_array[i]); printf ("\n====================================\n\n"); free(thread_array); free(valid_len_array); pthread_barrier_destroy(&barrier_thread); pthread_barrier_destroy(&barrier_validation); return 0; }
int main( int argc, char **argv ) { struct stat stat_buff; stat_record_t sum_stat; printer_t print_header, print_record; nfprof_t profile_data; char *rfile, *Rfile, *Mdirs, *wfile, *ffile, *filter, *tstring, *stat_type; char *byte_limit_string, *packet_limit_string, *print_format, *record_header; char *print_order, *query_file, *UnCompress_file, *nameserver, *aggr_fmt; int c, ffd, ret, element_stat, fdump; int i, user_format, quiet, flow_stat, topN, aggregate, aggregate_mask, bidir; int print_stat, syntax_only, date_sorted, do_tag, compress, do_xstat; int plain_numbers, GuessDir, pipe_output, csv_output; time_t t_start, t_end; uint32_t limitflows; char Ident[IDENTLEN]; rfile = Rfile = Mdirs = wfile = ffile = filter = tstring = stat_type = NULL; byte_limit_string = packet_limit_string = NULL; fdump = aggregate = 0; aggregate_mask = 0; bidir = 0; t_start = t_end = 0; syntax_only = 0; topN = -1; flow_stat = 0; print_stat = 0; element_stat = 0; do_xstat = 0; limitflows = 0; date_sorted = 0; total_bytes = 0; total_flows = 0; skipped_blocks = 0; do_tag = 0; quiet = 0; user_format = 0; compress = 0; plain_numbers = 0; pipe_output = 0; csv_output = 0; is_anonymized = 0; GuessDir = 0; nameserver = NULL; print_format = NULL; print_header = NULL; print_record = NULL; print_order = NULL; query_file = NULL; UnCompress_file = NULL; aggr_fmt = NULL; record_header = NULL; Ident[0] = '\0'; while ((c = getopt(argc, argv, "6aA:Bbc:D:E:s:hHn:i:j:f:qzr:v:w:K:M:NImO:R:XZt:TVv:x:l:L:o:")) != EOF) { switch (c) { case 'h': usage(argv[0]); exit(0); break; case 'a': aggregate = 1; break; case 'A': if ( !ParseAggregateMask(optarg, &aggr_fmt ) ) { exit(255); } aggregate_mask = 1; break; case 'B': GuessDir = 1; case 'b': if ( !SetBidirAggregation() ) { exit(255); } bidir = 1; // implies aggregate = 1; break; case 'D': nameserver = optarg; if ( !set_nameserver(nameserver) ) { exit(255); } break; case 'E': query_file = optarg; if ( !InitExporterList() ) { exit(255); } PrintExporters(query_file); exit(0); break; case 'X': fdump = 1; break; case 'Z': syntax_only = 1; break; case 'q': quiet = 1; break; case 'z': compress = 1; break; case 'c': limitflows = atoi(optarg); if ( !limitflows ) { LogError("Option -c needs a number > 0\n"); exit(255); } break; case 's': stat_type = optarg; if ( !SetStat(stat_type, &element_stat, &flow_stat) ) { exit(255); } break; case 'V': { char *e1, *e2; e1 = ""; e2 = ""; #ifdef NSEL e1 = "NSEL-NEL"; #endif printf("%s: Version: %s%s%s\n",argv[0], e1, e2, nfdump_version); exit(0); } break; case 'l': packet_limit_string = optarg; break; case 'K': LogError("*** Anonymisation moved! Use nfanon to anonymise flows!\n"); exit(255); break; case 'H': do_xstat = 1; break; case 'L': byte_limit_string = optarg; break; case 'N': plain_numbers = 1; break; case 'f': ffile = optarg; break; case 't': tstring = optarg; break; case 'r': rfile = optarg; if ( strcmp(rfile, "-") == 0 ) rfile = NULL; break; case 'm': print_order = "tstart"; Parse_PrintOrder(print_order); date_sorted = 1; LogError("Option -m depricated. Use '-O tstart' instead\n"); break; case 'M': Mdirs = optarg; break; case 'I': print_stat++; break; case 'o': // output mode print_format = optarg; break; case 'O': { // stat order by int ret; print_order = optarg; ret = Parse_PrintOrder(print_order); if ( ret < 0 ) { LogError("Unknown print order '%s'\n", print_order); exit(255); } date_sorted = ret == 6; // index into order_mode } break; case 'R': Rfile = optarg; break; case 'w': wfile = optarg; break; case 'n': topN = atoi(optarg); if ( topN < 0 ) { LogError("TopnN number %i out of range\n", topN); exit(255); } break; case 'T': do_tag = 1; break; case 'i': strncpy(Ident, optarg, IDENT_SIZE); Ident[IDENT_SIZE - 1] = 0; if ( strchr(Ident, ' ') ) { LogError("Ident must not contain spaces\n"); exit(255); } break; case 'j': UnCompress_file = optarg; UnCompressFile(UnCompress_file); exit(0); break; case 'x': query_file = optarg; InitExtensionMaps(NO_EXTENSION_LIST); DumpExMaps(query_file); exit(0); break; case 'v': query_file = optarg; QueryFile(query_file); exit(0); break; case '6': // print long IPv6 addr Setv6Mode(1); break; default: usage(argv[0]); exit(0); } } if (argc - optind > 1) { usage(argv[0]); exit(255); } else { /* user specified a pcap filter */ filter = argv[optind]; FilterFilename = NULL; } // Change Ident only if ( rfile && strlen(Ident) > 0 ) { ChangeIdent(rfile, Ident); exit(0); } if ( (element_stat || flow_stat) && (topN == -1) ) topN = 10; if ( topN < 0 ) topN = 0; if ( (element_stat && !flow_stat) && aggregate_mask ) { LogError("Warning: Aggregation ignored for element statistics\n"); aggregate_mask = 0; } if ( !flow_stat && aggregate_mask ) { aggregate = 1; } if ( rfile && Rfile ) { LogError("-r and -R are mutually exclusive. Plase specify either -r or -R\n"); exit(255); } if ( Mdirs && !(rfile || Rfile) ) { LogError("-M needs either -r or -R to specify the file or file list. Add '-R .' for all files in the directories.\n"); exit(255); } extension_map_list = InitExtensionMaps(NEEDS_EXTENSION_LIST); if ( !InitExporterList() ) { exit(255); } SetupInputFileSequence(Mdirs, rfile, Rfile); if ( print_stat ) { nffile_t *nffile; if ( !rfile && !Rfile && !Mdirs) { LogError("Expect data file(s).\n"); exit(255); } memset((void *)&sum_stat, 0, sizeof(stat_record_t)); sum_stat.first_seen = 0x7fffffff; sum_stat.msec_first = 999; nffile = GetNextFile(NULL, 0, 0); if ( !nffile ) { LogError("Error open file: %s\n", strerror(errno)); exit(250); } while ( nffile && nffile != EMPTY_LIST ) { SumStatRecords(&sum_stat, nffile->stat_record); nffile = GetNextFile(nffile, 0, 0); } PrintStat(&sum_stat); exit(0); } // handle print mode if ( !print_format ) { // automatically select an appropriate output format for custom aggregation // aggr_fmt is compiled by ParseAggregateMask if ( aggr_fmt ) { int len = strlen(AggrPrependFmt) + strlen(aggr_fmt) + strlen(AggrAppendFmt) + 7; // +7 for 'fmt:', 2 spaces and '\0' print_format = malloc(len); if ( !print_format ) { LogError("malloc() error in %s line %d: %s\n", __FILE__, __LINE__, strerror(errno) ); exit(255); } snprintf(print_format, len, "fmt:%s %s %s",AggrPrependFmt, aggr_fmt, AggrAppendFmt ); print_format[len-1] = '\0'; } else if ( bidir ) { print_format = "biline"; } else print_format = DefaultMode; } if ( strncasecmp(print_format, "fmt:", 4) == 0 ) { // special user defined output format char *format = &print_format[4]; if ( strlen(format) ) { if ( !ParseOutputFormat(format, plain_numbers, printmap) ) exit(255); print_record = format_special; record_header = get_record_header(); user_format = 1; } else { LogError("Missing format description for user defined output format!\n"); exit(255); } } else { // predefined output format // Check for long_v6 mode i = strlen(print_format); if ( i > 2 ) { if ( print_format[i-1] == '6' ) { Setv6Mode(1); print_format[i-1] = '\0'; } else Setv6Mode(0); } i = 0; while ( printmap[i].printmode ) { if ( strncasecmp(print_format, printmap[i].printmode, MAXMODELEN) == 0 ) { if ( printmap[i].Format ) { if ( !ParseOutputFormat(printmap[i].Format, plain_numbers, printmap) ) exit(255); // predefined custom format print_record = printmap[i].func; record_header = get_record_header(); user_format = 1; } else { // To support the pipe output format for element stats - check for pipe, and remember this if ( strncasecmp(print_format, "pipe", MAXMODELEN) == 0 ) { pipe_output = 1; } if ( strncasecmp(print_format, "csv", MAXMODELEN) == 0 ) { csv_output = 1; set_record_header(); record_header = get_record_header(); } // predefined static format print_record = printmap[i].func; user_format = 0; } break; } i++; } } if ( !print_record ) { LogError("Unknown output mode '%s'\n", print_format); exit(255); } // this is the only case, where headers are printed. if ( strncasecmp(print_format, "raw", 16) == 0 ) print_header = format_file_block_header; if ( aggregate && (flow_stat || element_stat) ) { aggregate = 0; LogError("Command line switch -s overwrites -a\n"); } if ( !filter && ffile ) { if ( stat(ffile, &stat_buff) ) { LogError("Can't stat filter file '%s': %s\n", ffile, strerror(errno)); exit(255); } filter = (char *)malloc(stat_buff.st_size+1); if ( !filter ) { LogError("malloc() error in %s line %d: %s\n", __FILE__, __LINE__, strerror(errno) ); exit(255); } ffd = open(ffile, O_RDONLY); if ( ffd < 0 ) { LogError("Can't open filter file '%s': %s\n", ffile, strerror(errno)); exit(255); } ret = read(ffd, (void *)filter, stat_buff.st_size); if ( ret < 0 ) { perror("Error reading filter file"); close(ffd); exit(255); } total_bytes += ret; filter[stat_buff.st_size] = 0; close(ffd); FilterFilename = ffile; } // if no filter is given, set the default ip filter which passes through every flow if ( !filter || strlen(filter) == 0 ) filter = "any"; Engine = CompileFilter(filter); if ( !Engine ) exit(254); if ( fdump ) { printf("StartNode: %i Engine: %s\n", Engine->StartNode, Engine->Extended ? "Extended" : "Fast"); DumpList(Engine); exit(0); } if ( syntax_only ) exit(0); if ( print_order && flow_stat ) { printf("-s record and -O (-m) are mutually exclusive options\n"); exit(255); } if ((aggregate || flow_stat || print_order) && !Init_FlowTable() ) exit(250); if (element_stat && !Init_StatTable(HashBits, NumPrealloc) ) exit(250); SetLimits(element_stat || aggregate || flow_stat, packet_limit_string, byte_limit_string); if ( tstring ) { if ( !ScanTimeFrame(tstring, &t_start, &t_end) ) exit(255); } if ( !(flow_stat || element_stat || wfile || quiet ) && record_header ) { if ( user_format ) { printf("%s\n", record_header); } else { // static format - no static format with header any more, but keep code anyway if ( Getv6Mode() ) { printf("%s\n", record_header); } else printf("%s\n", record_header); } } nfprof_start(&profile_data); sum_stat = process_data(wfile, element_stat, aggregate || flow_stat, print_order != NULL, print_header, print_record, t_start, t_end, limitflows, do_tag, compress, do_xstat); nfprof_end(&profile_data, total_flows); if ( total_bytes == 0 ) { printf("No matched flows\n"); exit(0); } if (aggregate || print_order) { if ( wfile ) { nffile_t *nffile = OpenNewFile(wfile, NULL, compress, is_anonymized, NULL); if ( !nffile ) exit(255); if ( ExportFlowTable(nffile, aggregate, bidir, date_sorted, extension_map_list) ) { CloseUpdateFile(nffile, Ident ); } else { CloseFile(nffile); unlink(wfile); } DisposeFile(nffile); } else { PrintFlowTable(print_record, topN, do_tag, GuessDir, extension_map_list); } } if (flow_stat) { PrintFlowStat(record_header, print_record, topN, do_tag, quiet, csv_output, extension_map_list); #ifdef DEVEL printf("Loopcnt: %u\n", loopcnt); #endif } if (element_stat) { PrintElementStat(&sum_stat, plain_numbers, record_header, print_record, topN, do_tag, quiet, pipe_output, csv_output); } if ( !quiet ) { if ( csv_output ) { PrintSummary(&sum_stat, plain_numbers, csv_output); } else if ( !wfile ) { if (is_anonymized) printf("IP addresses anonymised\n"); PrintSummary(&sum_stat, plain_numbers, csv_output); if ( t_last_flow == 0 ) { // in case of a pre 1.6.6 collected and empty flow file printf("Time window: <unknown>\n"); } else { printf("Time window: %s\n", TimeString(t_first_flow, t_last_flow)); } printf("Total flows processed: %u, Blocks skipped: %u, Bytes read: %llu\n", total_flows, skipped_blocks, (unsigned long long)total_bytes); nfprof_print(&profile_data, stdout); } } Dispose_FlowTable(); Dispose_StatTable(); FreeExtensionMaps(extension_map_list); #ifdef DEVEL if ( hash_hit || hash_miss ) printf("Hash hit: %i, miss: %i, skip: %i, ratio: %5.3f\n", hash_hit, hash_miss, hash_skip, (float)hash_hit/((float)(hash_hit+hash_miss))); #endif return 0; }
//--------------------------------------------- //mining frequent itemsets from database //--------------------------------------------- void MineAllPats() { ITEM_COUNTER* pfreqitems; int *pitem_order_map; double ntotal_occurrences; clock_t start, start_mining;; int i; if(goparameters.bresult_name_given) gpfsout = new FSout(goparameters.pat_filename); start_mining = clock(); gntotal_patterns = 0; gnmax_pattern_len = 0; gntotal_singlepath = 0; gndepth = 0; gntotal_call = 0; gpdata = new Data(goparameters.data_filename); //count frequent items in original database pfreqitems = NULL; start = clock(); goDBMiner.ScanDBCountFreqItems(&pfreqitems); //goTimeTracer.mdInitial_count_time = (double)(clock()-start)/CLOCKS_PER_SEC; gppat_counters = new int[gnmax_trans_len]; //goMemTracer.IncBuffer(gnmax_trans_len*sizeof(int)); for(i=0;i<gnmax_trans_len;i++) gppat_counters[i] = 0; if(gntotal_freqitems==0) delete gpdata; else if(gntotal_freqitems==1) { gnmax_pattern_len = 1; OutputOnePattern(pfreqitems[0].item, pfreqitems[0].support); delete gpdata; } else if(gntotal_freqitems>1) { gpheader_itemset = new int[gntotal_freqitems]; //goMemTracer.IncBuffer(gntotal_freqitems*sizeof(int)); //an array which maps an item to its frequency order; the most infrequent item has order 0 pitem_order_map = new int[gnmax_item_id]; for(i=0;i<gnmax_item_id;i++) pitem_order_map[i] = -1; for(i=0;i<gntotal_freqitems;i++) pitem_order_map[pfreqitems[i].item] = i; //if the number of frequent items in a conditonal database is smaller than maximal bucket size, use bucket counting technique if(gntotal_freqitems <= MAX_BUCKET_SIZE) { gpbuckets = new int[(1<<gntotal_freqitems)]; memset(gpbuckets, 0, sizeof(int)*(1<<gntotal_freqitems)); goDBMiner.ScanDBBucketCount(pitem_order_map, gpbuckets); bucket_count(gpbuckets, gntotal_freqitems, pfreqitems); delete []gpbuckets; delete gpdata; delete []pitem_order_map; } else { HEADER_TABLE pheader_table; goAFOPTMiner.Init(MIN(gnmax_trans_len, gntotal_freqitems+1)); gpbuckets = new int[(1<<MAX_BUCKET_SIZE)]; //goMemTracer.IncBuffer(sizeof(int)*(1<<MAX_BUCKET_SIZE)); //initialize header table start = clock(); pheader_table = new HEADER_NODE[gntotal_freqitems]; //goMemTracer.IncBuffer(sizeof(HEADER_NODE)*gntotal_freqitems); ntotal_occurrences = 0; for(i=0;i<gntotal_freqitems;i++) { pheader_table[i].item = pfreqitems[i].item; pheader_table[i].nsupport = pfreqitems[i].support; pheader_table[i].parray_conddb = NULL; pheader_table[i].order = i; ntotal_occurrences += pfreqitems[i].support; } //to choose a proper representation format for each conditional database if((double)goparameters.nmin_sup/gndb_size>=BUILD_TREE_MINSUP_THRESHOLD || ntotal_occurrences/(gndb_size*gntotal_freqitems)>=BUILD_TREE_AVGSUP_THRESHOLD || gntotal_freqitems<=BUILD_TREE_ITEM_THRESHOLD ) { for(i=0;i<gntotal_freqitems;i++) pheader_table[i].flag = AFOPT_FLAG; } else { for(i=0;i<gntotal_freqitems-BUILD_TREE_ITEM_THRESHOLD;i++) pheader_table[i].flag = 0; for(i=MAX(0, gntotal_freqitems-BUILD_TREE_ITEM_THRESHOLD);i<gntotal_freqitems;i++) pheader_table[i].flag = AFOPT_FLAG; } //scan database to construct conditional databases and do bucket counting memset(gpbuckets, 0, sizeof(int)*(1<<MAX_BUCKET_SIZE)); goDBMiner.ScanDBBuildCondDB(pheader_table, pitem_order_map, gntotal_freqitems, gpbuckets); //goTimeTracer.mdInitial_construct_time = (double)(clock()-start)/CLOCKS_PER_SEC; //goMemTracer.mninitial_struct_size = //goMemTracer.mnArrayDB_size+//goMemTracer.mnAFOPTree_size+sizeof(int)*(1<<MAX_BUCKET_SIZE); bucket_count(gpbuckets, MAX_BUCKET_SIZE, &(pfreqitems[gntotal_freqitems-MAX_BUCKET_SIZE])); delete []pitem_order_map; delete gpdata; //mining frequent itemsets in depth first order if((double)goparameters.nmin_sup/gndb_size>=BUILD_TREE_MINSUP_THRESHOLD || ntotal_occurrences/(gndb_size*gntotal_freqitems)>=BUILD_TREE_AVGSUP_THRESHOLD || gntotal_freqitems<=BUILD_TREE_ITEM_THRESHOLD) goAFOPTMiner.DepthAFOPTGrowth(pheader_table, gntotal_freqitems, 0); else { goArrayMiner.DepthArrayGrowth(pheader_table, gntotal_freqitems); goAFOPTMiner.DepthAFOPTGrowth(pheader_table, gntotal_freqitems, gntotal_freqitems-BUILD_TREE_ITEM_THRESHOLD); } delete []pheader_table; //goMemTracer.DecBuffer(gntotal_freqitems*sizeof(HEADER_NODE)); delete []gpbuckets; //goMemTracer.DecBuffer(sizeof(int)*(1<<MAX_BUCKET_SIZE)); } delete []gpheader_itemset; //goMemTracer.DecBuffer(gntotal_freqitems*sizeof(int)); } delete []pfreqitems; //goMemTracer.DecBuffer(gntotal_freqitems*sizeof(ITEM_COUNTER)); if(goparameters.bresult_name_given) delete gpfsout; //goTimeTracer.mdtotal_running_time = (double)(clock()-start_mining)/CLOCKS_PER_SEC; PrintSummary(); delete []gppat_counters; //goMemTracer.DecBuffer(gnmax_trans_len*sizeof(int)); }