cl_mem _clCreateAndCpyPinnedMem(int size, float* h_mem_source) { cl_mem d_mem, d_mem_pinned; unsigned char* h_mem_pinned = NULL; d_mem_pinned = clCreateBuffer(oclHandles.context, CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR, size, NULL, &oclHandles.cl_status); if (oclHandles.cl_status != CL_SUCCESS) printf("excpetion in _clMalloc. CL_MEM_READ_ONLY|CL_MEM_ALLOC_HOST_PTR in _clCreateAndCpyPinnedMem\n"); h_mem_pinned = (unsigned char*) clEnqueueMapBuffer(oclHandles.queue, d_mem_pinned, CL_FALSE, CL_MAP_WRITE, 0, size, NULL, NULL, NULL, &oclHandles.cl_status); memcpy(h_mem_pinned, h_mem_source, size); d_mem = clCreateBuffer(oclHandles.context, CL_MEM_READ_ONLY, size, NULL, &oclHandles.cl_status); if (oclHandles.cl_status != CL_SUCCESS) printf("excpetion in _clMalloc. CL_MEM_READ_ONLY in _clCreateAndCpyPinnedMem\n"); clEnqueueUnmapMemObject(oclHandles.queue, d_mem_pinned, (void*) h_mem_pinned, 0, NULL, NULL); // if(accMode == DIRECT) // { h_mem_pinned = (unsigned char*) clEnqueueMapBuffer(oclHandles.queue, d_mem_pinned, CL_FALSE, CL_MAP_WRITE, 0, size, NULL, NULL, NULL, &oclHandles.cl_status); // DIRECT: API access to device buffer //for(int i = 0; i < MEMCOPY_ITERATIONS; i++) oclHandles.cl_status = clEnqueueWriteBuffer(oclHandles.queue, d_mem, CL_FALSE, 0, size, h_mem_pinned, 0, NULL, NULL); _clFinish(); #ifdef ERRMSG if(oclHandles.cl_status != CL_SUCCESS) printf("excpetion in _clCreateAndCpyMem() -> clEnqueueWriteBuffer.\n"); #endif // } // else // { // MAPPED: mapped pointers to device buffer and conventional pointer access // void* dm_idata = clEnqueueMapBuffer(oclHandles.queue, d_mem, CL_TRUE, CL_MAP_WRITE, 0, size, 0, NULL, NULL, &oclHandles.cl_status); // // // memcpy(dm_idata, h_mem_pinned, size); // // clEnqueueUnmapMemObject(oclHandles.queue, d_mem, dm_idata, 0, NULL, NULL); //} return d_mem; }
cl_mem _clCreateAndCpyPagedMem(int size, unsigned char* h_mem_source) { cl_mem d_mem; float * h_mem_pagealbe = NULL; d_mem = clCreateBuffer(oclHandles.context, CL_MEM_READ_WRITE, size, NULL, &oclHandles.cl_status); h_mem_pagealbe = (float*) malloc(size); h_mem_pagealbe = (float *) clEnqueueMapBuffer(oclHandles.queue, d_mem, CL_TRUE, CL_MAP_WRITE, 0, size, NULL, NULL, NULL, &oclHandles.cl_status); memcpy(h_mem_pagealbe, h_mem_source, size); oclHandles.cl_status = clEnqueueWriteBuffer(oclHandles.queue, d_mem, CL_FALSE, 0, size, h_mem_pagealbe, 0, NULL, NULL); _clFinish(); return d_mem; }
//---------------------------------------------------------- //--breadth first search on GPUs //---------------------------------------------------------- void run_bfs_gpu(int no_of_nodes, Node *h_graph_nodes, int edge_list_size, \ int *h_graph_edges, bool *h_graph_mask, bool *h_updating_graph_mask, \ bool *h_graph_visited, int *h_cost) throw(std::string){ //int number_elements = height*width; bool h_over; cl_mem d_graph_nodes, d_graph_edges, d_graph_mask, d_updating_graph_mask, \ d_graph_visited, d_cost, d_over; // try{ //--1 transfer data from host to device //printf("initializing\n"); _clInit(); //printf("allocating\n"); d_graph_nodes = _clMalloc(no_of_nodes*sizeof(Node), h_graph_nodes); d_graph_edges = _clMalloc(edge_list_size*sizeof(int), h_graph_edges); d_graph_mask = _clMalloc(no_of_nodes*sizeof(bool), h_graph_mask); d_updating_graph_mask = _clMalloc(no_of_nodes*sizeof(bool), h_updating_graph_mask); d_graph_visited = _clMalloc(no_of_nodes*sizeof(bool), h_graph_visited); d_cost = _clMallocRW(no_of_nodes*sizeof(int), h_cost); d_over = _clMallocRW(sizeof(bool), &h_over); //printf("copyin\n"); _clMemcpyH2D(d_graph_nodes, no_of_nodes*sizeof(Node), h_graph_nodes); _clMemcpyH2D(d_graph_edges, edge_list_size*sizeof(int), h_graph_edges); _clMemcpyH2D(d_graph_mask, no_of_nodes*sizeof(bool), h_graph_mask); _clMemcpyH2D(d_updating_graph_mask, no_of_nodes*sizeof(bool), h_updating_graph_mask); _clMemcpyH2D(d_graph_visited, no_of_nodes*sizeof(bool), h_graph_visited); _clMemcpyH2D(d_cost, no_of_nodes*sizeof(int), h_cost); //--2 invoke kernel #ifdef PROFILING timer kernel_timer; double kernel_time = 0.0; kernel_timer.reset(); kernel_timer.start(); #endif int kerId=0; // printf("launching kernel\n"); do{ // printf("copy in\n"); h_over = false; _clMemcpyH2D(d_over, sizeof(bool), &h_over); //--kernel 0 int kernel_id = 0; int kernel_idx = 0; // printf("set arg 1\n"); _clSetArgs(kernel_id, kernel_idx++, d_graph_nodes); _clSetArgs(kernel_id, kernel_idx++, d_graph_edges); _clSetArgs(kernel_id, kernel_idx++, d_graph_mask); _clSetArgs(kernel_id, kernel_idx++, d_updating_graph_mask); _clSetArgs(kernel_id, kernel_idx++, d_graph_visited); _clSetArgs(kernel_id, kernel_idx++, d_cost); _clSetArgs(kernel_id, kernel_idx++, &no_of_nodes, sizeof(int)); //int work_items = no_of_nodes; // printf("invoke 1\n"); _clInvokeKernel(kernel_id, no_of_nodes, work_group_size); //--kernel 1 kernel_id = 1; kernel_idx = 0; // printf("set arg 2\n"); _clSetArgs(kernel_id, kernel_idx++, d_graph_mask); _clSetArgs(kernel_id, kernel_idx++, d_updating_graph_mask); _clSetArgs(kernel_id, kernel_idx++, d_graph_visited); _clSetArgs(kernel_id, kernel_idx++, d_over); _clSetArgs(kernel_id, kernel_idx++, &no_of_nodes, sizeof(int)); //work_items = no_of_nodes; // printf("invoke 2\n"); _clInvokeKernel(kernel_id, no_of_nodes, work_group_size); // printf("copy back\n"); _clMemcpyD2H(d_over,sizeof(bool), &h_over); // printf("done\n"); // printf("K%d\n",kerId++); }while(h_over); // printf("done!"); _clFinish(); #ifdef PROFILING kernel_timer.stop(); kernel_time = kernel_timer.getTimeInSeconds(); #endif //--3 transfer data from device to host _clMemcpyD2H(d_cost,no_of_nodes*sizeof(int), h_cost); //--statistics #ifdef PROFILING std::cout<<"kernel time(s):"<<kernel_time<<std::endl; #endif //--4 release cl resources. _clFree(d_graph_nodes); _clFree(d_graph_edges); _clFree(d_graph_mask); _clFree(d_updating_graph_mask); _clFree(d_graph_visited); _clFree(d_cost); _clFree(d_over); _clRelease(); // } // catch(std::string msg){ // _clFree(d_graph_nodes); // _clFree(d_graph_edges); // _clFree(d_graph_mask); // _clFree(d_updating_graph_mask); // _clFree(d_graph_visited); // _clFree(d_cost); // _clFree(d_over); // _clRelease(); // std::string e_str = "in run_transpose_gpu -> "; // e_str += msg; // throw(e_str); // } return ; }
//---------------------------------------------------------- //--breadth first search on GPUs //---------------------------------------------------------- void run_bfs_gpu(int no_of_nodes, Node *h_graph_nodes, int edge_list_size, \ int *h_graph_edges, char *h_graph_mask, char *h_updating_graph_mask, \ char *h_graph_visited, int *h_cost) throw(std::string){ //int number_elements = height*width; char h_over; cl_mem d_graph_nodes, d_graph_edges, d_graph_mask, d_updating_graph_mask, \ d_graph_visited, d_cost, d_over; try{ //--1 transfer data from host to device _clInit(); d_graph_nodes = _clMalloc(no_of_nodes*sizeof(Node), h_graph_nodes); d_graph_edges = _clMalloc(edge_list_size*sizeof(int), h_graph_edges); d_graph_mask = _clMallocRW(no_of_nodes*sizeof(char), h_graph_mask); d_updating_graph_mask = _clMallocRW(no_of_nodes*sizeof(char), h_updating_graph_mask); d_graph_visited = _clMallocRW(no_of_nodes*sizeof(char), h_graph_visited); d_cost = _clMallocRW(no_of_nodes*sizeof(int), h_cost); d_over = _clMallocRW(sizeof(char), &h_over); _clMemcpyH2D(d_graph_nodes, no_of_nodes*sizeof(Node), h_graph_nodes); _clMemcpyH2D(d_graph_edges, edge_list_size*sizeof(int), h_graph_edges); _clMemcpyH2D(d_graph_mask, no_of_nodes*sizeof(char), h_graph_mask); _clMemcpyH2D(d_updating_graph_mask, no_of_nodes*sizeof(char), h_updating_graph_mask); _clMemcpyH2D(d_graph_visited, no_of_nodes*sizeof(char), h_graph_visited); _clMemcpyH2D(d_cost, no_of_nodes*sizeof(int), h_cost); //--2 invoke kernel #ifdef PROFILING timer kernel_timer; double kernel_time = 0.0; kernel_timer.reset(); kernel_timer.start(); #endif do{ h_over = false; _clMemcpyH2D(d_over, sizeof(char), &h_over); //--kernel 0 int kernel_id = 0; int kernel_idx = 0; _clSetArgs(kernel_id, kernel_idx++, d_graph_nodes); _clSetArgs(kernel_id, kernel_idx++, d_graph_edges); _clSetArgs(kernel_id, kernel_idx++, d_graph_mask); _clSetArgs(kernel_id, kernel_idx++, d_updating_graph_mask); _clSetArgs(kernel_id, kernel_idx++, d_graph_visited); _clSetArgs(kernel_id, kernel_idx++, d_cost); _clSetArgs(kernel_id, kernel_idx++, &no_of_nodes, sizeof(int)); //int work_items = no_of_nodes; _clInvokeKernel(kernel_id, no_of_nodes, work_group_size); //--kernel 1 kernel_id = 1; kernel_idx = 0; _clSetArgs(kernel_id, kernel_idx++, d_graph_mask); _clSetArgs(kernel_id, kernel_idx++, d_updating_graph_mask); _clSetArgs(kernel_id, kernel_idx++, d_graph_visited); _clSetArgs(kernel_id, kernel_idx++, d_over); _clSetArgs(kernel_id, kernel_idx++, &no_of_nodes, sizeof(int)); //work_items = no_of_nodes; _clInvokeKernel(kernel_id, no_of_nodes, work_group_size); _clMemcpyD2H(d_over,sizeof(char), &h_over); }while(h_over); _clFinish(); #ifdef PROFILING kernel_timer.stop(); kernel_time = kernel_timer.getTimeInSeconds(); #endif //--3 transfer data from device to host _clMemcpyD2H(d_cost,no_of_nodes*sizeof(int), h_cost); //--statistics #ifdef PROFILING std::cout<<"kernel time(s):"<<kernel_time<<std::endl; #endif //--4 release cl resources. _clFree(d_graph_nodes); _clFree(d_graph_edges); _clFree(d_graph_mask); _clFree(d_updating_graph_mask); _clFree(d_graph_visited); _clFree(d_cost); _clFree(d_over); _clRelease(); } catch(std::string msg){ _clFree(d_graph_nodes); _clFree(d_graph_edges); _clFree(d_graph_mask); _clFree(d_updating_graph_mask); _clFree(d_graph_visited); _clFree(d_cost); _clFree(d_over); _clRelease(); std::string e_str = "in run_transpose_gpu -> "; e_str += msg; throw(e_str); } return ; }
/* Crack callback */ static void ocl_lastpass_crack_callback(char *line, int self) { int a; int *found; int err; struct hash_list_s *mylist, *addlist; char plain[MAX]; char hex1[16]; cl_uint16 addline; cl_uint16 salt; cl_uint16 singlehash; unsigned char mhash[64]; size_t gws,gws1; mylist = hash_list; while (mylist) { if (mylist->salt2[0]==1) {mylist=mylist->next;continue;} /* setup addline */ addline.s0=addline.s1=addline.s2=addline.s3=addline.s4=addline.s5=addline.s6=addline.s7=addline.sF=0; addline.sF=strlen(line); addline.s0=line[0]|(line[1]<<8)|(line[2]<<16)|(line[3]<<24); addline.s1=line[4]|(line[5]<<8)|(line[6]<<16)|(line[7]<<24); addline.s2=line[8]|(line[9]<<8)|(line[10]<<16)|(line[11]<<24); addline.s3=line[12]|(line[13]<<8)|(line[14]<<16)|(line[15]<<24); memcpy(mhash,mylist->hash,16); unsigned int A,B,C,D; memcpy(hex1,mhash,4); memcpy(&A, hex1, 4); memcpy(hex1,mhash+4,4); memcpy(&B, hex1, 4); memcpy(hex1,mhash+8,4); memcpy(&C, hex1, 4); memcpy(hex1,mhash+12,4); memcpy(&D, hex1, 4); singlehash.s0=A; singlehash.s1=B; singlehash.s2=C; singlehash.s3=D; if (attack_over!=0) pthread_exit(NULL); pthread_mutex_lock(&wthreads[self].tempmutex); pthread_mutex_unlock(&wthreads[self].tempmutex); /* setup salt */ char mysalt[512]; char myuser[512]; char *tok; char *save; int iterations; int len; strcpy(mysalt,mylist->salt); tok = strtok_r(mysalt,":",&save); tok = strtok_r(NULL,":",&save); if (!tok) return; strcpy(myuser,tok); tok = strtok_r(NULL,":",&save); if (!tok) return; tok = strtok_r(NULL,":",&save); if (!tok) return; iterations = atoi(tok); len = strlen(myuser); bzero(mhash,64); strcpy((char*)mhash,myuser); memcpy(mhash+len,"\x00\x00\x00\x01\x80",5); unsigned int tmp,tmp1,tmp2; salt.s0=(mhash[0])|(mhash[1]<<8)|(mhash[2]<<16)|(mhash[3]<<24); salt.s1=(mhash[4])|(mhash[5]<<8)|(mhash[6]<<16)|(mhash[7]<<24); salt.s2=(mhash[8])|(mhash[9]<<8)|(mhash[10]<<16)|(mhash[11]<<24); salt.s3=(mhash[12])|(mhash[13]<<8)|(mhash[14]<<16)|(mhash[15]<<24); salt.s4=(mhash[16])|(mhash[17]<<8)|(mhash[18]<<16)|(mhash[19]<<24); salt.s5=(mhash[20])|(mhash[21]<<8)|(mhash[22]<<16)|(mhash[23]<<24); salt.s6=(mhash[24])|(mhash[25]<<8)|(mhash[26]<<16)|(mhash[27]<<24); salt.s7=(mhash[28])|(mhash[29]<<8)|(mhash[30]<<16)|(mhash[31]<<24); salt.s8=(mhash[32])|(mhash[33]<<8)|(mhash[34]<<16)|(mhash[35]<<24); salt.s9=(mhash[36])|(mhash[37]<<8)|(mhash[38]<<16)|(mhash[39]<<24); salt.sA=(mhash[40])|(mhash[41]<<8)|(mhash[42]<<16)|(mhash[43]<<24); salt.sB=(mhash[44])|(mhash[45]<<8)|(mhash[46]<<16)|(mhash[47]<<24); salt.sC=(mhash[48])|(mhash[49]<<8)|(mhash[50]<<16)|(mhash[51]<<24); salt.sD=(mhash[52])|(mhash[53]<<8)|(mhash[54]<<16)|(mhash[55]<<24); REV(salt.s0); REV(salt.s1); REV(salt.s2); REV(salt.s3); REV(salt.s4); REV(salt.s5); REV(salt.s6); REV(salt.s7); REV(salt.s8); REV(salt.s9); REV(salt.sA); REV(salt.sB); REV(salt.sC); REV(salt.sD); salt.sF=len+4; _clSetKernelArg(rule_kernelmod[self], 0, sizeof(cl_mem), (void*) &rule_images2_buf[self]); _clSetKernelArg(rule_kernelmod[self], 1, sizeof(cl_mem), (void*) &rule_images_buf[self]); _clSetKernelArg(rule_kernelmod[self], 2, sizeof(cl_mem), (void*) &rule_sizes_buf[self]); _clSetKernelArg(rule_kernelmod[self], 3, sizeof(cl_uint16), (void*) &addline); _clSetKernelArg(rule_kernelmod[self], 4, sizeof(cl_uint16), (void*) &salt); _clSetKernelArg(rule_kernelpre1[self], 0, sizeof(cl_mem), (void*) &rule_images3_buf[self]); _clSetKernelArg(rule_kernelpre1[self], 1, sizeof(cl_mem), (void*) &rule_images2_buf[self]); _clSetKernelArg(rule_kernelpre1[self], 2, sizeof(cl_mem), (void*) &rule_sizes_buf[self]); _clSetKernelArg(rule_kernelpre1[self], 3, sizeof(cl_mem), (void*) &rule_found_ind_buf[self]); _clSetKernelArg(rule_kernelpre1[self], 4, sizeof(cl_mem), (void*) &rule_found_buf[self]); _clSetKernelArg(rule_kernelpre1[self], 5, sizeof(cl_uint16), (void*) &singlehash); _clSetKernelArg(rule_kernelpre1[self], 6, sizeof(cl_uint16), (void*) &salt); _clSetKernelArg(rule_kernelbl1[self], 0, sizeof(cl_mem), (void*) &rule_images3_buf[self]); _clSetKernelArg(rule_kernelbl1[self], 1, sizeof(cl_mem), (void*) &rule_images2_buf[self]); _clSetKernelArg(rule_kernelbl1[self], 2, sizeof(cl_mem), (void*) &rule_sizes_buf[self]); _clSetKernelArg(rule_kernelbl1[self], 3, sizeof(cl_mem), (void*) &rule_found_ind_buf[self]); _clSetKernelArg(rule_kernelbl1[self], 4, sizeof(cl_mem), (void*) &rule_found_buf[self]); _clSetKernelArg(rule_kernelbl1[self], 5, sizeof(cl_uint16), (void*) &singlehash); _clSetKernelArg(rule_kernelbl1[self], 6, sizeof(cl_uint16), (void*) &salt); _clSetKernelArg(rule_kernellast[self], 0, sizeof(cl_mem), (void*) &rule_buffer[self]); _clSetKernelArg(rule_kernellast[self], 1, sizeof(cl_mem), (void*) &rule_images3_buf[self]); _clSetKernelArg(rule_kernellast[self], 2, sizeof(cl_mem), (void*) &rule_sizes_buf[self]); _clSetKernelArg(rule_kernellast[self], 3, sizeof(cl_mem), (void*) &rule_found_ind_buf[self]); _clSetKernelArg(rule_kernellast[self], 4, sizeof(cl_mem), (void*) &rule_found_buf[self]); _clSetKernelArg(rule_kernellast[self], 5, sizeof(cl_uint16), (void*) &singlehash); _clSetKernelArg(rule_kernellast[self], 6, sizeof(cl_uint16), (void*) &salt); gws = (rule_counts[self][0] / wthreads[self].vectorsize); while ((gws%64)!=0) gws++; gws1 = gws*wthreads[self].vectorsize; if (gws1==0) gws1=64; if (gws==0) gws=64; _clEnqueueNDRangeKernel(rule_oclqueue[self], rule_kernelmod[self], 1, NULL, &gws1, rule_local_work_size, 0, NULL, NULL); _clFinish(rule_oclqueue[self]); _clEnqueueNDRangeKernel(rule_oclqueue[self], rule_kernelpre1[self], 1, NULL, &gws, rule_local_work_size, 0, NULL, NULL); _clFinish(rule_oclqueue[self]); for (a=1;a<iterations;a+=100) { salt.sA=a; salt.sB=a+100; if (salt.sB>iterations) salt.sB=iterations; _clSetKernelArg(rule_kernelbl1[self], 6, sizeof(cl_uint16), (void*) &salt); _clEnqueueNDRangeKernel(rule_oclqueue[self], rule_kernelbl1[self], 1, NULL, &gws, rule_local_work_size, 0, NULL, NULL); _clFinish(rule_oclqueue[self]); wthreads[self].tries+=(gws1)/(get_hashes_num()*(iterations/100)); } bzero(mhash,64); strcpy((char*)mhash,"lastpass rocks\x02\x02"); salt.s0=(mhash[0])|(mhash[1]<<8)|(mhash[2]<<16)|(mhash[3]<<24); salt.s1=(mhash[4])|(mhash[5]<<8)|(mhash[6]<<16)|(mhash[7]<<24); salt.s2=(mhash[8])|(mhash[9]<<8)|(mhash[10]<<16)|(mhash[11]<<24); salt.s3=(mhash[12])|(mhash[13]<<8)|(mhash[14]<<16)|(mhash[15]<<24); _clSetKernelArg(rule_kernellast[self], 6, sizeof(cl_uint16), (void*) &salt); _clSetKernelArg(rule_kernellast[self], 6, sizeof(cl_uint16), (void*) &salt); _clEnqueueNDRangeKernel(rule_oclqueue[self], rule_kernellast[self], 1, NULL, &gws1, rule_local_work_size, 0, NULL, NULL); found = _clEnqueueMapBuffer(rule_oclqueue[self], rule_found_buf[self], CL_TRUE,CL_MAP_READ, 0, 4, 0, 0, NULL, &err); if (err!=CL_SUCCESS) continue; if (*found>0) { _clEnqueueReadBuffer(rule_oclqueue[self], rule_found_ind_buf[self], CL_TRUE, 0, ocl_rule_workset[self]*sizeof(cl_uint)*wthreads[self].vectorsize, rule_found_ind[self], 0, NULL, NULL); for (a=0;a<gws1;a++) if (rule_found_ind[self][a]==1) { _clEnqueueReadBuffer(rule_oclqueue[self], rule_buffer[self], CL_TRUE, a*hash_ret_len1, hash_ret_len1, rule_ptr[self]+a*hash_ret_len1, 0, NULL, NULL); if (memcmp("lastpass rocks\x02\x02", (char *)rule_ptr[self]+(a)*hash_ret_len1, 16) == 0) { int flag = 0; strcpy(plain,&rule_images[self][0]+(a*MAX)); strcat(plain,line); pthread_mutex_lock(&crackedmutex); addlist = cracked_list; while (addlist) { if ((strcmp(addlist->username, mylist->username) == 0) && (memcmp(addlist->hash, mylist->hash, hash_ret_len1) == 0)) flag = 1; addlist = addlist->next; } pthread_mutex_unlock(&crackedmutex); if (flag == 0) { add_cracked_list(mylist->username, mylist->hash, mylist->salt, plain); mylist->salt2[0]=1; } } } bzero(rule_found_ind[self],ocl_rule_workset[self]*sizeof(cl_uint)); _clEnqueueWriteBuffer(rule_oclqueue[self], rule_found_ind_buf[self], CL_FALSE, 0, ocl_rule_workset[self]*sizeof(cl_uint), rule_found_ind[self], 0, NULL, NULL); *found = 0; _clEnqueueWriteBuffer(rule_oclqueue[self], rule_found_buf[self], CL_FALSE, 0, 4, found, 0, NULL, NULL); } _clEnqueueUnmapMemObject(rule_oclqueue[self],rule_found_buf[self],(void *)found,0,NULL,NULL); mylist = mylist->next; } }
/* Crack callback */ static void ocl_pdf_crack_callback(char *line, int self) { int a; int *found; int err; char plain[MAX]; cl_uint16 addline; cl_uint16 salt; cl_uint16 singlehash; size_t gws,gws1; /* setup addline */ addline.s0=addline.s1=addline.s2=addline.s3=addline.s4=addline.s5=addline.s6=addline.s7=addline.sF=0; addline.sF=strlen(line); addline.s0=line[0]|(line[1]<<8)|(line[2]<<16)|(line[3]<<24); addline.s1=line[4]|(line[5]<<8)|(line[6]<<16)|(line[7]<<24); addline.s2=line[8]|(line[9]<<8)|(line[10]<<16)|(line[11]<<24); addline.s3=line[12]|(line[13]<<8)|(line[14]<<16)|(line[15]<<24); singlehash = get_singlehash(); /* setup salt */ salt.s0=(cs.o[0])|(cs.o[1]<<8)|(cs.o[2]<<16)|(cs.o[3]<<24); salt.s1=(cs.o[4])|(cs.o[5]<<8)|(cs.o[6]<<16)|(cs.o[7]<<24); salt.s2=(cs.o[8])|(cs.o[9]<<8)|(cs.o[10]<<16)|(cs.o[11]<<24); salt.s3=(cs.o[12])|(cs.o[13]<<8)|(cs.o[14]<<16)|(cs.o[15]<<24); salt.s4=(cs.o[16])|(cs.o[17]<<8)|(cs.o[18]<<16)|(cs.o[19]<<24); salt.s5=(cs.o[20])|(cs.o[21]<<8)|(cs.o[22]<<16)|(cs.o[23]<<24); salt.s6=(cs.o[24])|(cs.o[25]<<8)|(cs.o[26]<<16)|(cs.o[27]<<24); salt.s7=(cs.o[28])|(cs.o[29]<<8)|(cs.o[30]<<16)|(cs.o[31]<<24); salt.s8=(cs.o[32])|(cs.o[33]<<8)|(cs.o[34]<<16)|(cs.o[35]<<24); salt.s9=(cs.o[36])|(cs.o[37]<<8)|(cs.o[38]<<16)|(cs.o[39]<<24); salt.sA=(cs.id[0])|(cs.id[1]<<8)|(cs.id[2]<<16)|(cs.id[3]<<24); salt.sB=(cs.id[4])|(cs.id[5]<<8)|(cs.id[6]<<16)|(cs.id[7]<<24); salt.sC=(cs.id[8])|(cs.id[9]<<8)|(cs.id[10]<<16)|(cs.id[11]<<24); salt.sD=(cs.id[12])|(cs.id[13]<<8)|(cs.id[14]<<16)|(cs.id[15]<<24); salt.sF=cs.P; salt.sE=cs.encrypt_metadata; if (attack_over!=0) pthread_exit(NULL); pthread_mutex_lock(&wthreads[self].tempmutex); pthread_mutex_unlock(&wthreads[self].tempmutex); _clSetKernelArg(rule_kernelmod[self], 0, sizeof(cl_mem), (void*) &rule_images2_buf[self]); _clSetKernelArg(rule_kernelmod[self], 1, sizeof(cl_mem), (void*) &rule_images_buf[self]); _clSetKernelArg(rule_kernelmod[self], 2, sizeof(cl_mem), (void*) &rule_sizes_buf[self]); _clSetKernelArg(rule_kernelmod[self], 3, sizeof(cl_uint16), (void*) &addline); _clSetKernelArg(rule_kernelmod[self], 4, sizeof(cl_uint16), (void*) &salt); _clSetKernelArg(rule_kernelpre1[self], 0, sizeof(cl_mem), (void*) &rule_images3_buf[self]); _clSetKernelArg(rule_kernelpre1[self], 1, sizeof(cl_mem), (void*) &rule_images2_buf[self]); _clSetKernelArg(rule_kernelpre1[self], 2, sizeof(cl_mem), (void*) &rule_sizes_buf[self]); _clSetKernelArg(rule_kernelpre1[self], 3, sizeof(cl_mem), (void*) &rule_found_ind_buf[self]); _clSetKernelArg(rule_kernelpre1[self], 4, sizeof(cl_mem), (void*) &rule_found_buf[self]); _clSetKernelArg(rule_kernelpre1[self], 5, sizeof(cl_uint16), (void*) &singlehash); _clSetKernelArg(rule_kernelpre1[self], 6, sizeof(cl_uint16), (void*) &salt); if (cs.R==6) { _clSetKernelArg(rule_kernelbl1[self], 0, sizeof(cl_mem), (void*) &rule_images3_buf[self]); _clSetKernelArg(rule_kernelbl1[self], 1, sizeof(cl_mem), (void*) &rule_images3_buf[self]); _clSetKernelArg(rule_kernelbl1[self], 2, sizeof(cl_mem), (void*) &rule_images2_buf[self]); _clSetKernelArg(rule_kernelbl1[self], 3, sizeof(cl_mem), (void*) &rule_sizes_buf[self]); _clSetKernelArg(rule_kernelbl1[self], 4, sizeof(cl_mem), (void*) &rule_found_ind_buf[self]); _clSetKernelArg(rule_kernelbl1[self], 5, sizeof(cl_mem), (void*) &rule_found_buf[self]); _clSetKernelArg(rule_kernelbl1[self], 6, sizeof(cl_uint16), (void*) &singlehash); _clSetKernelArg(rule_kernelbl1[self], 7, sizeof(cl_uint16), (void*) &salt); } else { _clSetKernelArg(rule_kernelbl1[self], 0, sizeof(cl_mem), (void*) &rule_images3_buf[self]); _clSetKernelArg(rule_kernelbl1[self], 1, sizeof(cl_mem), (void*) &rule_images3_buf[self]); _clSetKernelArg(rule_kernelbl1[self], 2, sizeof(cl_mem), (void*) &rule_sizes_buf[self]); _clSetKernelArg(rule_kernelbl1[self], 3, sizeof(cl_mem), (void*) &rule_found_ind_buf[self]); _clSetKernelArg(rule_kernelbl1[self], 4, sizeof(cl_mem), (void*) &rule_found_buf[self]); _clSetKernelArg(rule_kernelbl1[self], 5, sizeof(cl_uint16), (void*) &singlehash); _clSetKernelArg(rule_kernelbl1[self], 6, sizeof(cl_uint16), (void*) &salt); } _clSetKernelArg(rule_kernellast[self], 0, sizeof(cl_mem), (void*) &rule_buffer[self]); _clSetKernelArg(rule_kernellast[self], 1, sizeof(cl_mem), (void*) &rule_images3_buf[self]); _clSetKernelArg(rule_kernellast[self], 2, sizeof(cl_mem), (void*) &rule_sizes_buf[self]); _clSetKernelArg(rule_kernellast[self], 3, sizeof(cl_mem), (void*) &rule_found_ind_buf[self]); _clSetKernelArg(rule_kernellast[self], 4, sizeof(cl_mem), (void*) &rule_found_buf[self]); _clSetKernelArg(rule_kernellast[self], 5, sizeof(cl_uint16), (void*) &singlehash); _clSetKernelArg(rule_kernellast[self], 6, sizeof(cl_uint16), (void*) &salt); if (rule_counts[self][0]==-1) return; gws = (rule_counts[self][0] / wthreads[self].vectorsize); while ((gws%64)!=0) gws++; gws1 = gws*wthreads[self].vectorsize; if (gws1==0) gws1=64; if (gws==0) gws=64; _clEnqueueNDRangeKernel(rule_oclqueue[self], rule_kernelmod[self], 1, NULL, &gws, rule_local_work_size, 0, NULL, NULL); _clFinish(rule_oclqueue[self]); _clEnqueueNDRangeKernel(rule_oclqueue[self], rule_kernelpre1[self], 1, NULL, &gws, rule_local_work_size, 0, NULL, NULL); _clFinish(rule_oclqueue[self]); if (cs.R==6) { for (a=0;a<256+32;a++) { if (attack_over!=0) return; singlehash.sE = a; _clSetKernelArg(rule_kernelbl1[self], 6, sizeof(cl_uint16), (void*) &singlehash); _clEnqueueNDRangeKernel(rule_oclqueue[self], rule_kernelbl1[self], 1, NULL, &gws, rule_local_work_size, 0, NULL, NULL); _clFinish(rule_oclqueue[self]); wthreads[self].tries+=(gws)/(256+32); } _clEnqueueNDRangeKernel(rule_oclqueue[self], rule_kernellast[self], 1, NULL, &gws, rule_local_work_size, 0, NULL, NULL); _clFinish(rule_oclqueue[self]); } else if (cs.R<5) { _clEnqueueNDRangeKernel(rule_oclqueue[self], rule_kernelbl1[self], 1, NULL, &gws, rule_local_work_size, 0, NULL, NULL); _clFinish(rule_oclqueue[self]); _clEnqueueNDRangeKernel(rule_oclqueue[self], rule_kernellast[self], 1, NULL, &gws, rule_local_work_size, 0, NULL, NULL); _clFinish(rule_oclqueue[self]); } found = _clEnqueueMapBuffer(rule_oclqueue[self], rule_found_buf[self], CL_TRUE,CL_MAP_READ, 0, 4, 0, 0, NULL, &err); if (cs.R!=6) wthreads[self].tries+=(gws); if (*found>0) { _clEnqueueReadBuffer(rule_oclqueue[self], rule_found_ind_buf[self], CL_TRUE, 0, ocl_rule_workset[self]*sizeof(cl_uint), rule_found_ind[self], 0, NULL, NULL); for (a=0;a<ocl_rule_workset[self];a++) if (rule_found_ind[self][a]==1) { { strcpy(plain,&rule_images[self][0]+(a*MAX)); strcat(plain,line); pthread_mutex_lock(&crackedmutex); if (!cracked_list) { pthread_mutex_unlock(&crackedmutex); add_cracked_list(hash_list->username, hash_list->hash, hash_list->salt, plain); } else pthread_mutex_unlock(&crackedmutex); } } bzero(rule_found_ind[self],ocl_rule_workset[self]*sizeof(cl_uint)); _clEnqueueWriteBuffer(rule_oclqueue[self], rule_found_ind_buf[self], CL_FALSE, 0, ocl_rule_workset[self]*sizeof(cl_uint), rule_found_ind[self], 0, NULL, NULL); *found = 0; _clEnqueueWriteBuffer(rule_oclqueue[self], rule_found_buf[self], CL_FALSE, 0, 4, found, 0, NULL, NULL); } _clEnqueueUnmapMemObject(rule_oclqueue[self],rule_found_buf[self],(void *)found,0,NULL,NULL); }
/* Crack callback */ static void ocl_androidfde_crack_callback(char *line, int self) { int a,b,c,e; char plain[MAX]; cl_uint16 addline; cl_uint16 salt; cl_uint16 salt2; size_t nws1; size_t nws; /* setup addline */ addline.s0=addline.s1=addline.s2=addline.s3=addline.s4=addline.s5=addline.s6=addline.s7=addline.sF=0; addline.sF=strlen(line); addline.s0=line[0]|(line[1]<<8)|(line[2]<<16)|(line[3]<<24); addline.s1=line[4]|(line[5]<<8)|(line[6]<<16)|(line[7]<<24); addline.s2=line[8]|(line[9]<<8)|(line[10]<<16)|(line[11]<<24); addline.s3=line[12]|(line[13]<<8)|(line[14]<<16)|(line[15]<<24); /* setup salt */ salt=ocl_get_salt(); salt2=ocl_get_salt2(); if (attack_over!=0) pthread_exit(NULL); if (rule_counts[self][0]==-1) return; nws = (rule_counts[self][0] / wthreads[self].vectorsize); while ((nws%64)!=0) nws++; nws1 = nws*wthreads[self].vectorsize; if (nws1==0) nws1=64; if (nws==0) nws=64; _clSetKernelArg(rule_kernelend[self], 0, sizeof(cl_mem), (void*) &rule_buffer[self]); _clSetKernelArg(rule_kernelend[self], 1, sizeof(cl_mem), (void*) &rule_images2_buf[self]); _clSetKernelArg(rule_kernelend[self], 2, sizeof(cl_mem), (void*) &rule_images4_buf[self]); _clSetKernelArg(rule_kernelend[self], 3, sizeof(cl_uint16), (void*) &addline); _clSetKernelArg(rule_kernelend[self], 4, sizeof(cl_uint16), (void*) &salt); _clSetKernelArg(rule_kernelend[self], 6, sizeof(cl_uint16), (void*) &salt2); _clSetKernelArg(rule_kernelend[self], 7, sizeof(cl_mem), (void*) &rule_found_buf[self]); _clSetKernelArg(rule_kernelend[self], 8, sizeof(cl_mem), (void*) &rule_found_ind_buf[self]); _clSetKernelArg(rule_kernelend2[self], 0, sizeof(cl_mem), (void*) &rule_buffer[self]); _clSetKernelArg(rule_kernelend2[self], 1, sizeof(cl_mem), (void*) &rule_images2_buf[self]); _clSetKernelArg(rule_kernelend2[self], 2, sizeof(cl_mem), (void*) &rule_images4_buf[self]); _clSetKernelArg(rule_kernelend2[self], 3, sizeof(cl_uint16), (void*) &addline); _clSetKernelArg(rule_kernelend2[self], 4, sizeof(cl_uint16), (void*) &salt); _clSetKernelArg(rule_kernelend2[self], 6, sizeof(cl_uint16), (void*) &salt2); _clSetKernelArg(rule_kernelend2[self], 7, sizeof(cl_mem), (void*) &rule_found_buf[self]); _clSetKernelArg(rule_kernelend2[self], 8, sizeof(cl_mem), (void*) &rule_found_ind_buf[self]); _clSetKernelArg(rule_kernelmod[self], 0, sizeof(cl_mem), (void*) &rule_images2_buf[self]); _clSetKernelArg(rule_kernelmod[self], 1, sizeof(cl_mem), (void*) &rule_images_buf[self]); _clSetKernelArg(rule_kernelmod[self], 2, sizeof(cl_mem), (void*) &rule_images3_buf[self]); _clSetKernelArg(rule_kernelmod[self], 3, sizeof(cl_uint16), (void*) &addline); _clSetKernelArg(rule_kernelmod[self], 4, sizeof(cl_uint16), (void*) &salt); _clSetKernelArg(rule_kernelmod[self], 6, sizeof(cl_uint16), (void*) &salt2); _clSetKernelArg(rule_kernelmod[self], 7, sizeof(cl_mem), (void*) &rule_sizes_buf[self]); _clSetKernelArg(rule_kernelpre1[self], 0, sizeof(cl_mem), (void*) &rule_images4_buf[self]); _clSetKernelArg(rule_kernelpre1[self], 1, sizeof(cl_mem), (void*) &rule_images2_buf[self]); _clSetKernelArg(rule_kernelpre1[self], 2, sizeof(cl_mem), (void*) &rule_images3_buf[self]); _clSetKernelArg(rule_kernelpre1[self], 3, sizeof(cl_uint16), (void*) &addline); _clSetKernelArg(rule_kernelpre1[self], 4, sizeof(cl_uint16), (void*) &salt); _clSetKernelArg(rule_kernelpre1[self], 6, sizeof(cl_uint16), (void*) &salt2); _clSetKernelArg(rule_kernelbl1[self], 0, sizeof(cl_mem), (void*) &rule_images4_buf[self]); _clSetKernelArg(rule_kernelbl1[self], 1, sizeof(cl_mem), (void*) &rule_images2_buf[self]); _clSetKernelArg(rule_kernelbl1[self], 2, sizeof(cl_mem), (void*) &rule_images3_buf[self]); _clSetKernelArg(rule_kernelbl1[self], 3, sizeof(cl_uint16), (void*) &addline); _clSetKernelArg(rule_kernelbl1[self], 4, sizeof(cl_uint16), (void*) &salt); _clSetKernelArg(rule_kernelbl1[self], 6, sizeof(cl_uint16), (void*) &salt2); _clSetKernelArg(rule_kernelpre2[self], 0, sizeof(cl_mem), (void*) &rule_images4_buf[self]); _clSetKernelArg(rule_kernelpre2[self], 1, sizeof(cl_mem), (void*) &rule_images2_buf[self]); _clSetKernelArg(rule_kernelpre2[self], 2, sizeof(cl_mem), (void*) &rule_images3_buf[self]); _clSetKernelArg(rule_kernelpre2[self], 3, sizeof(cl_uint16), (void*) &addline); _clSetKernelArg(rule_kernelpre2[self], 4, sizeof(cl_uint16), (void*) &salt); _clSetKernelArg(rule_kernelpre2[self], 6, sizeof(cl_uint16), (void*) &salt2); _clSetKernelArg(rule_kernelbl2[self], 0, sizeof(cl_mem), (void*) &rule_images4_buf[self]); _clSetKernelArg(rule_kernelbl2[self], 1, sizeof(cl_mem), (void*) &rule_images2_buf[self]); _clSetKernelArg(rule_kernelbl2[self], 2, sizeof(cl_mem), (void*) &rule_images3_buf[self]); _clSetKernelArg(rule_kernelbl2[self], 3, sizeof(cl_uint16), (void*) &addline); _clSetKernelArg(rule_kernelbl2[self], 4, sizeof(cl_uint16), (void*) &salt); _clSetKernelArg(rule_kernelbl2[self], 6, sizeof(cl_uint16), (void*) &salt2); _clEnqueueNDRangeKernel(rule_oclqueue[self], rule_kernelmod[self], 1, NULL, &nws1, rule_local_work_size, 0, NULL, NULL); _clFinish(rule_oclqueue[self]); _clEnqueueNDRangeKernel(rule_oclqueue[self], rule_kernelpre1[self], 1, NULL, &nws, rule_local_work_size, 0, NULL, NULL); _clFinish(rule_oclqueue[self]); b=(2*2000)/1000; for (a=0;a<2000;a+=1000) { if (attack_over!=0) pthread_exit(NULL); addline.sA=a; addline.sB=a+1000; if (a==0) addline.sA=1; _clSetKernelArg(rule_kernelbl1[self], 3, sizeof(cl_uint16), (void*) &addline); _clEnqueueNDRangeKernel(rule_oclqueue[self], rule_kernelbl1[self], 1, NULL, &nws, rule_local_work_size, 0, NULL, NULL); _clFinish(rule_oclqueue[self]); wthreads[self].tries+=(nws1)/b; pthread_mutex_lock(&wthreads[self].tempmutex); pthread_mutex_unlock(&wthreads[self].tempmutex); } _clEnqueueNDRangeKernel(rule_oclqueue[self], rule_kernelpre2[self], 1, NULL, &nws, rule_local_work_size, 0, NULL, NULL); _clFinish(rule_oclqueue[self]); for (a=0;a<2000;a+=1000) { if (attack_over!=0) pthread_exit(NULL); addline.sA=a; addline.sB=a+1000; if (addline.sB>2000) addline.sB=2000; if (a==0) addline.sA=1; _clSetKernelArg(rule_kernelbl2[self], 3, sizeof(cl_uint16), (void*) &addline); _clEnqueueNDRangeKernel(rule_oclqueue[self], rule_kernelbl2[self], 1, NULL, &nws, rule_local_work_size, 0, NULL, NULL); _clFinish(rule_oclqueue[self]); wthreads[self].tries+=(nws1)/b; pthread_mutex_lock(&wthreads[self].tempmutex); pthread_mutex_unlock(&wthreads[self].tempmutex); } _clEnqueueNDRangeKernel(rule_oclqueue[self], rule_kernelend[self], 1, NULL, &nws, rule_local_work_size, 0, NULL, NULL); _clEnqueueReadBuffer(rule_oclqueue[self], rule_buffer[self], CL_TRUE, 0, hash_ret_len1*wthreads[self].vectorsize*nws, rule_ptr[self], 0, NULL, NULL); for (a=0;a<nws;a++) for (c=0;c<wthreads[self].vectorsize;c++) { e=(a)*wthreads[self].vectorsize+c; if (check_androidfde(rule_ptr[self]+e*32)==hash_ok) { strcpy(plain,&rule_images[self][0]+(e*MAX)); strcat(plain,line); add_cracked_list(hash_list->username, hash_list->hash, hash_list->salt, plain); } } }
/* Crack callback */ static void ocl_kwallet_crack_callback(char *line, int self) { int a,c,d,e; char plainimg[MAX*2]; cl_uint16 addline; cl_uint16 salt; char key[20]; size_t gws,gws1; /* setup addline */ addline.s0=addline.s1=addline.s2=addline.s3=addline.s4=addline.s5=addline.s6=addline.s7=addline.sF=0; addline.sF=strlen(line); addline.s0=line[0]|(line[1]<<8)|(line[2]<<16)|(line[3]<<24); addline.s1=line[4]|(line[5]<<8)|(line[6]<<16)|(line[7]<<24); addline.s2=line[8]|(line[9]<<8)|(line[10]<<16)|(line[11]<<24); addline.s3=line[12]|(line[13]<<8)|(line[14]<<16)|(line[15]<<24); if (attack_over!=0) pthread_exit(NULL); pthread_mutex_lock(&wthreads[self].tempmutex); pthread_mutex_unlock(&wthreads[self].tempmutex); _clSetKernelArg(rule_kernelmod[self], 0, sizeof(cl_mem), (void*) &rule_images2_buf[self]); _clSetKernelArg(rule_kernelmod[self], 1, sizeof(cl_mem), (void*) &rule_images_buf[self]); _clSetKernelArg(rule_kernelmod[self], 2, sizeof(cl_mem), (void*) &rule_sizes_buf[self]); _clSetKernelArg(rule_kernelmod[self], 3, sizeof(cl_uint16), (void*) &addline); _clSetKernelArg(rule_kernelpre1[self], 0, sizeof(cl_mem), (void*) &rule_images3_buf[self]); _clSetKernelArg(rule_kernelpre1[self], 1, sizeof(cl_mem), (void*) &rule_images2_buf[self]); _clSetKernelArg(rule_kernelpre1[self], 2, sizeof(cl_mem), (void*) &rule_sizes_buf[self]); _clSetKernelArg(rule_kernelbl1[self], 0, sizeof(cl_mem), (void*) &rule_images3_buf[self]); _clSetKernelArg(rule_kernelbl1[self], 1, sizeof(cl_mem), (void*) &rule_images3_buf[self]); _clSetKernelArg(rule_kernelbl1[self], 2, sizeof(cl_mem), (void*) &rule_sizes_buf[self]); _clSetKernelArg(rule_kernellast[self], 0, sizeof(cl_mem), (void*) &rule_buffer[self]); _clSetKernelArg(rule_kernellast[self], 1, sizeof(cl_mem), (void*) &rule_images3_buf[self]); _clSetKernelArg(rule_kernellast[self], 2, sizeof(cl_mem), (void*) &rule_sizes_buf[self]); _clSetKernelArg(rule_kernellast[self], 3, sizeof(cl_uint16), (void*) &salt); if (rule_counts[self][0]==-1) return; gws = (rule_counts[self][0] / wthreads[self].vectorsize); while ((gws%64)!=0) gws++; gws1 = gws*wthreads[self].vectorsize; if (gws1==0) gws1=64; if (gws==0) gws=64; _clEnqueueNDRangeKernel(rule_oclqueue[self], rule_kernelmod[self], 1, NULL, &gws1, rule_local_work_size, 0, NULL, NULL); _clFinish(rule_oclqueue[self]); _clEnqueueNDRangeKernel(rule_oclqueue[self], rule_kernelpre1[self], 1, NULL, &gws1, rule_local_work_size, 0, NULL, NULL); _clFinish(rule_oclqueue[self]); for (a=0;a<((cs.iterations)/300);a++) { _clEnqueueNDRangeKernel(rule_oclqueue[self], rule_kernelbl1[self], 1, NULL, &gws, rule_local_work_size, 0, NULL, NULL); _clFinish(rule_oclqueue[self]); wthreads[self].tries+=(gws1)/(cs.iterations/300); } salt.sA=((cs.iterations)%300); _clSetKernelArg(rule_kernellast[self], 3, sizeof(cl_uint16), (void*) &salt); _clEnqueueNDRangeKernel(rule_oclqueue[self], rule_kernellast[self], 1, NULL, &gws, rule_local_work_size, 0, NULL, NULL); _clEnqueueReadBuffer(rule_oclqueue[self], rule_buffer[self], CL_TRUE, 0, hash_ret_len1*wthreads[self].vectorsize*ocl_rule_workset[self], rule_ptr[self], 0, NULL, NULL); for (a=0;a<gws;a++) { for (c=0;c<wthreads[self].vectorsize;c++) { e=(a)*wthreads[self].vectorsize+c; memcpy(key,(char *)rule_ptr[self]+(e)*hash_ret_len1,hash_ret_len1); if (check_kwallet(key)==hash_ok) { for (d=0;d<MAX;d++) plainimg[d] = rule_images[self][e*MAX+d]; strncat(plainimg,line,32); plainimg[31]=0; if (!cracked_list) add_cracked_list(hash_list->username, hash_list->hash, hash_list->salt, (char *)plainimg); } } } }
//---------------------------------------------------------- //--breadth first search on GPUs //---------------------------------------------------------- void run_bfs_gpu(int no_of_nodes, Node *h_graph_nodes, int edge_list_size, \ int *h_graph_edges, int *h_graph_mask, int *h_updating_graph_mask, \ int *h_graph_visited, int *h_cost) throw(std::string){ //int number_elements = height*width; int h_over; cl_mem d_graph_nodes, d_graph_edges, d_graph_mask, d_updating_graph_mask, \ d_graph_visited, d_cost, d_over; try{ //--1 transfer data from host to device _clInit(); d_graph_nodes = _clMalloc(no_of_nodes*sizeof(Node), h_graph_nodes); d_graph_edges = _clMalloc(edge_list_size*sizeof(int), h_graph_edges); d_graph_mask = _clMallocRW(no_of_nodes*sizeof(int), h_graph_mask); d_updating_graph_mask = _clMallocRW(no_of_nodes*sizeof(int), h_updating_graph_mask); d_graph_visited = _clMallocRW(no_of_nodes*sizeof(int), h_graph_visited); d_cost = _clMallocRW(no_of_nodes*sizeof(int), h_cost); d_over = _clMallocRW(sizeof(int), &h_over); _clMemcpyH2D(d_graph_nodes, no_of_nodes*sizeof(Node), h_graph_nodes); _clMemcpyH2D(d_graph_edges, edge_list_size*sizeof(int), h_graph_edges); _clMemcpyH2D(d_graph_mask, no_of_nodes*sizeof(int), h_graph_mask); _clMemcpyH2D(d_updating_graph_mask, no_of_nodes*sizeof(int), h_updating_graph_mask); _clMemcpyH2D(d_graph_visited, no_of_nodes*sizeof(int), h_graph_visited); _clMemcpyH2D(d_cost, no_of_nodes*sizeof(int), h_cost); //--2 invoke kernel #ifdef PROFILING timer kernel_timer; double kernel_time = 0.0; kernel_timer.reset(); kernel_timer.start(); #endif struct timespec startT, endT; clock_gettime(CLOCK_MONOTONIC, &startT); do{ h_over = false; _clMemcpyH2D(d_over, sizeof(int), &h_over); //--kernel 0 int kernel_id = 0; int kernel_idx = 0; _clSetArgs(kernel_id, kernel_idx++, d_graph_nodes); _clSetArgs(kernel_id, kernel_idx++, d_graph_edges); _clSetArgs(kernel_id, kernel_idx++, d_graph_mask); _clSetArgs(kernel_id, kernel_idx++, d_updating_graph_mask); _clSetArgs(kernel_id, kernel_idx++, d_graph_visited); _clSetArgs(kernel_id, kernel_idx++, d_cost); _clSetArgs(kernel_id, kernel_idx++, &no_of_nodes, sizeof(int)); //int work_items = no_of_nodes; _clInvokeKernel(kernel_id, no_of_nodes, work_group_size); //--kernel 1 kernel_id = 1; kernel_idx = 0; _clSetArgs(kernel_id, kernel_idx++, d_graph_mask); _clSetArgs(kernel_id, kernel_idx++, d_updating_graph_mask); _clSetArgs(kernel_id, kernel_idx++, d_graph_visited); _clSetArgs(kernel_id, kernel_idx++, d_over); _clSetArgs(kernel_id, kernel_idx++, &no_of_nodes, sizeof(int)); //work_items = no_of_nodes; _clInvokeKernel(kernel_id, no_of_nodes, work_group_size); _clMemcpyD2H(d_over,sizeof(int), &h_over); }while(h_over); _clFinish(); clock_gettime(CLOCK_MONOTONIC, &endT); uint64_t diff = 1000000000 * (endT.tv_sec - startT.tv_sec); uint64_t nanodiff = endT.tv_nsec - startT.tv_nsec; //printf("elapsed accelerator time = %llu nanoseconds\n", (long long unsigned int) diff); //printf("start time seconds%u \n", startT.tv_sec); //printf("end time seconds %u \n", endT.tv_sec); //printf("difference %u \n", diff); //printf("start time nanoseconds %u \n", startT.tv_nsec); //printf("end time nanoseconds %u \n", endT.tv_nsec); printf(" accelerator time %u \n", nanodiff + diff); #ifdef PROFILING kernel_timer.stop(); kernel_time = kernel_timer.getTimeInSeconds(); #endif //--3 transfer data from device to host _clMemcpyD2H(d_cost,no_of_nodes*sizeof(int), h_cost); //--statistics #ifdef PROFILING std::cout<<"kernel time(s):"<<kernel_time<<std::endl; #endif //--4 release cl resources. _clFree(d_graph_nodes); _clFree(d_graph_edges); _clFree(d_graph_mask); _clFree(d_updating_graph_mask); _clFree(d_graph_visited); _clFree(d_cost); _clFree(d_over); _clRelease(); } catch(std::string msg){ _clFree(d_graph_nodes); _clFree(d_graph_edges); _clFree(d_graph_mask); _clFree(d_updating_graph_mask); _clFree(d_graph_visited); _clFree(d_cost); _clFree(d_over); _clRelease(); std::string e_str = "in run_transpose_gpu -> "; e_str += msg; throw(e_str); } return ; }
/* Crack callback */ static void ocl_sha512unix_crack_callback(char *line, int self) { int a,b,c,e; int *found; int err; struct hash_list_s *mylist, *addlist; char plain[MAX]; char hex1[16]; cl_uint16 salt; cl_ulong8 singlehash; unsigned char base64[89]; int cc,cc1; size_t gws,gws1; cc = self_kernel16[self]; cc1 = self_kernel16[self]+strlen(line); if (cc1>15) cc1=15; mylist = hash_list; while (mylist) { if (mylist->salt2[0]==1) {mylist=mylist->next;continue;} salt.sC=cc1; /* setup_psalt */ unsigned char mhash[89]; memcpy(base64,mylist->hash,88); b64_pton_crypt(base64,mhash); uint64_t A1,A2,A3,A4,A5,A6,A7,A8; memcpy(hex1,mhash,8); memcpy(&A1, hex1, 8); memcpy(hex1,mhash+8,8); memcpy(&A2, hex1, 8); memcpy(hex1,mhash+16,8); memcpy(&A3, hex1, 8); memcpy(hex1,mhash+24,8); memcpy(&A4, hex1, 8); memcpy(hex1,mhash+32,8); memcpy(&A5, hex1, 8); memcpy(hex1,mhash+40,8); memcpy(&A6, hex1, 8); memcpy(hex1,mhash+48,8); memcpy(&A7, hex1, 8); memcpy(hex1,mhash+56,8); memcpy(&A8, hex1, 8); singlehash.s0=A1;singlehash.s1=A2;singlehash.s2=A3;singlehash.s3=A4; singlehash.s4=A5;singlehash.s5=A6;singlehash.s6=A7;singlehash.s7=A8; if (rule_counts[self][cc]==-1) return; gws = (rule_counts[self][cc] / wthreads[self].vectorsize); while ((gws%64)!=0) gws++; gws1 = gws*wthreads[self].vectorsize; if (gws1==0) gws1=64; if (gws==0) gws=64; for (a=0;a<gws;a++) { char candidate[32]; bzero(candidate,32); bzero(hex1,16); memcpy(hex1,mylist->salt+3,strlen(mylist->salt)-4); salt.sD=strlen(hex1); strcpy(candidate,rule_images162[cc][self]+(a*16)); strcat(candidate,line); setup_spint0(candidate,hex1,&rule_images16[cc1][self][0]+(a*96)); if (attack_over!=0) pthread_exit(NULL); } _clEnqueueWriteBuffer(rule_oclqueue[self], rule_images16_buf[cc1][self], CL_FALSE, 0, ocl_rule_workset[self]*wthreads[self].vectorsize*96, rule_images16[cc1][self], 0, NULL, NULL); if (attack_over!=0) pthread_exit(NULL); pthread_mutex_lock(&wthreads[self].tempmutex); pthread_mutex_unlock(&wthreads[self].tempmutex); /* Set sha512unixm, sha512unixe then the transform kernels */ _clSetKernelArg(rule_kernelmod[self], 0, sizeof(cl_mem), (void*) &rule_images163_buf[cc1][self]); _clSetKernelArg(rule_kernelmod[self], 1, sizeof(cl_mem), (void*) &rule_images16_buf[cc1][self]); _clSetKernelArg(rule_kernelmod[self], 2, sizeof(cl_mem), (void*) &rule_sizes162_buf[cc1][self]); _clSetKernelArg(rule_kernelmod[self], 3, sizeof(cl_mem), (void*) &rule_sizes16_buf[cc1][self]); _clSetKernelArg(rule_kernelmod[self], 4, sizeof(cl_uint16), (void*) &salt); _clSetKernelArg(rule_kernelmod[self], 5, sizeof(cl_uint16), (void*) &salt); _clSetKernelArg(rule_kernelpre1[self], 0, sizeof(cl_mem), (void*) &rule_images163_buf[cc1][self]); _clSetKernelArg(rule_kernelpre1[self], 1, sizeof(cl_mem), (void*) &rule_images163_buf[cc1][self]); _clSetKernelArg(rule_kernelpre1[self], 2, sizeof(cl_mem), (void*) &rule_sizes162_buf[cc1][self]); _clSetKernelArg(rule_kernelpre1[self], 3, sizeof(cl_mem), (void*) &rule_found_ind_buf[self]); _clSetKernelArg(rule_kernelpre1[self], 4, sizeof(cl_mem), (void*) &rule_found_buf[self]); _clSetKernelArg(rule_kernelpre1[self], 5, sizeof(cl_uint16), (void*) &salt); _clSetKernelArg(rule_kernelpre1[self], 6, sizeof(cl_ulong8), (void*) &singlehash); _clSetKernelArg(rule_kernelbl1[self], 0, sizeof(cl_mem), (void*) &rule_images163_buf[cc1][self]); _clSetKernelArg(rule_kernelbl1[self], 1, sizeof(cl_mem), (void*) &rule_images163_buf[cc1][self]); _clSetKernelArg(rule_kernelbl1[self], 2, sizeof(cl_mem), (void*) &rule_sizes162_buf[cc1][self]); _clSetKernelArg(rule_kernelbl1[self], 3, sizeof(cl_mem), (void*) &rule_found_ind_buf[self]); _clSetKernelArg(rule_kernelbl1[self], 4, sizeof(cl_mem), (void*) &rule_found_buf[self]); _clSetKernelArg(rule_kernelbl1[self], 5, sizeof(cl_uint16), (void*) &salt); _clSetKernelArg(rule_kernelbl1[self], 6, sizeof(cl_ulong8), (void*) &singlehash); _clSetKernelArg(rule_kernelend[self], 0, sizeof(cl_mem), (void*) &rule_buffer[self]); _clSetKernelArg(rule_kernelend[self], 1, sizeof(cl_mem), (void*) &rule_images163_buf[cc1][self]); _clSetKernelArg(rule_kernelend[self], 2, sizeof(cl_mem), (void*) &rule_sizes162_buf[cc1][self]); _clSetKernelArg(rule_kernelend[self], 3, sizeof(cl_mem), (void*) &rule_found_ind_buf[self]); _clSetKernelArg(rule_kernelend[self], 4, sizeof(cl_mem), (void*) &rule_found_buf[self]); _clSetKernelArg(rule_kernelend[self], 5, sizeof(cl_uint16), (void*) &salt); _clSetKernelArg(rule_kernelend[self], 6, sizeof(cl_ulong8), (void*) &singlehash); /* Now call first transform00+4999*(transformX+sha512unixm+sha512unixe) */ _clEnqueueNDRangeKernel(rule_oclqueue[self], rule_kernelmod[self], 1, NULL, &gws1, rule_local_work_size, 0, NULL, NULL); _clFinish(rule_oclqueue[self]); _clEnqueueNDRangeKernel(rule_oclqueue[self], rule_kernelpre1[self], 1, NULL, &gws1, rule_local_work_size, 0, NULL, NULL); _clFinish(rule_oclqueue[self]); for (a=0;a<5000;a+=200) { salt.sA=a; salt.sB=a+200; if (salt.sB>5000) salt.sB=5000; _clSetKernelArg(rule_kernelbl1[self], 5, sizeof(cl_uint16), (void*) &salt); _clEnqueueNDRangeKernel(rule_oclqueue[self], rule_kernelbl1[self], 1, NULL, &gws1, rule_local_work_size, 0, NULL, NULL); _clFinish(rule_oclqueue[self]); wthreads[self].tries+=(wthreads[self].vectorsize*ocl_rule_workset[self])/(get_hashes_num()*25); if (attack_over!=0) pthread_exit(NULL); } _clEnqueueNDRangeKernel(rule_oclqueue[self], rule_kernelend[self], 1, NULL, &gws1, rule_local_work_size, 0, NULL, NULL); _clFinish(rule_oclqueue[self]); found = _clEnqueueMapBuffer(rule_oclqueue[self], rule_found_buf[self], CL_TRUE,CL_MAP_READ, 0, 4, 0, 0, NULL, &err); if (err!=CL_SUCCESS) continue; if (*found>0) { _clEnqueueReadBuffer(rule_oclqueue[self], rule_found_ind_buf[self], CL_TRUE, 0, ocl_rule_workset[self]*sizeof(cl_uint), rule_found_ind[self], 0, NULL, NULL); for (a=0;a<gws;a++) if (rule_found_ind[self]!=0) { b=a*wthreads[self].vectorsize; _clEnqueueReadBuffer(rule_oclqueue[self], rule_buffer[self], CL_TRUE, b*hash_ret_len1, hash_ret_len1*wthreads[self].vectorsize, rule_ptr[self]+b*hash_ret_len1, 0, NULL, NULL); for (c=0;c<wthreads[self].vectorsize;c++) { e=(a)*wthreads[self].vectorsize+c; unsigned char mhash[89]; memcpy(base64,mylist->hash,88); b64_pton_crypt(base64,mhash); if (memcmp(mhash, (char *)rule_ptr[self]+(e)*hash_ret_len1, hash_ret_len1-2) == 0) { int flag = 0; strcpy(plain,&rule_images162[cc][self][0]+(e*16)); strcat(plain,line); pthread_mutex_lock(&crackedmutex); addlist = cracked_list; while (addlist) { if ((strcmp(addlist->username, mylist->username) == 0) && (memcmp(addlist->hash, mylist->hash, hash_ret_len1) == 0)) flag = 1; addlist = addlist->next; } pthread_mutex_unlock(&crackedmutex); if (flag == 0) { add_cracked_list(mylist->username, mylist->hash, mylist->salt, plain); mylist->salt2[0]=1; } } } } bzero(rule_found_ind[self],ocl_rule_workset[self]*sizeof(cl_uint)); _clEnqueueWriteBuffer(rule_oclqueue[self], rule_found_ind_buf[self], CL_FALSE, 0, ocl_rule_workset[self]*sizeof(cl_uint), rule_found_ind[self], 0, NULL, NULL); *found = 0; _clEnqueueWriteBuffer(rule_oclqueue[self], rule_found_buf[self], CL_FALSE, 0, 4, found, 0, NULL, NULL); } _clEnqueueUnmapMemObject(rule_oclqueue[self],rule_found_buf[self],(void *)found,0,NULL,NULL); mylist = mylist->next; } }
/* Crack callback */ static void ocl_wpa_crack_callback(char *line, int self) { int a,b,c,e; int *found; int err; char plain[MAX]; cl_uint16 addline; cl_uint16 salt; cl_uint16 salt2; /* setup addline */ addline.s0=addline.s1=addline.s2=addline.s3=addline.s4=addline.s5=addline.s6=addline.s7=addline.sF=0; addline.sF=strlen(line); addline.s0=line[0]|(line[1]<<8)|(line[2]<<16)|(line[3]<<24); addline.s1=line[4]|(line[5]<<8)|(line[6]<<16)|(line[7]<<24); addline.s2=line[8]|(line[9]<<8)|(line[10]<<16)|(line[11]<<24); addline.s3=line[12]|(line[13]<<8)|(line[14]<<16)|(line[15]<<24); /* setup salt */ salt=ocl_get_salt(); salt2=ocl_get_salt2(); if (attack_over!=0) pthread_exit(NULL); pthread_mutex_lock(&wthreads[self].tempmutex); pthread_mutex_unlock(&wthreads[self].tempmutex); size_t nws1=ocl_rule_workset[self]*wthreads[self].vectorsize; size_t nws=ocl_rule_workset[self]; _clSetKernelArg(rule_kernelend[self], 0, sizeof(cl_mem), (void*) &rule_buffer[self]); _clSetKernelArg(rule_kernelend[self], 1, sizeof(cl_mem), (void*) &rule_images2_buf[self]); _clSetKernelArg(rule_kernelend[self], 2, sizeof(cl_mem), (void*) &rule_images4_buf[self]); _clSetKernelArg(rule_kernelend[self], 3, sizeof(cl_uint16), (void*) &addline); _clSetKernelArg(rule_kernelend[self], 4, sizeof(cl_uint16), (void*) &salt); _clSetKernelArg(rule_kernelend[self], 6, sizeof(cl_uint16), (void*) &salt2); _clSetKernelArg(rule_kernelend[self], 7, sizeof(cl_mem), (void*) &block_buf[self]); _clSetKernelArg(rule_kernelend[self], 8, sizeof(cl_mem), (void*) &eapol_buf[self]); _clSetKernelArg(rule_kernelend[self], 9, sizeof(cl_mem), (void*) &rule_found_buf[self]); _clSetKernelArg(rule_kernelend[self], 10, sizeof(cl_mem), (void*) &rule_found_ind_buf[self]); _clSetKernelArg(rule_kernelmod[self], 0, sizeof(cl_mem), (void*) &rule_images2_buf[self]); _clSetKernelArg(rule_kernelmod[self], 1, sizeof(cl_mem), (void*) &rule_images_buf[self]); _clSetKernelArg(rule_kernelmod[self], 2, sizeof(cl_mem), (void*) &rule_images3_buf[self]); _clSetKernelArg(rule_kernelmod[self], 3, sizeof(cl_uint16), (void*) &addline); _clSetKernelArg(rule_kernelmod[self], 4, sizeof(cl_uint16), (void*) &salt); _clSetKernelArg(rule_kernelmod[self], 6, sizeof(cl_uint16), (void*) &salt2); _clSetKernelArg(rule_kernelmod[self], 7, sizeof(cl_mem), (void*) &block_buf[self]); _clSetKernelArg(rule_kernelmod[self], 8, sizeof(cl_mem), (void*) &eapol_buf[self]); _clSetKernelArg(rule_kernelmod[self], 9, sizeof(cl_mem), (void*) &rule_sizes_buf[self]); _clSetKernelArg(rule_kernelpre1[self], 0, sizeof(cl_mem), (void*) &rule_images4_buf[self]); _clSetKernelArg(rule_kernelpre1[self], 1, sizeof(cl_mem), (void*) &rule_images2_buf[self]); _clSetKernelArg(rule_kernelpre1[self], 2, sizeof(cl_mem), (void*) &rule_images3_buf[self]); _clSetKernelArg(rule_kernelpre1[self], 3, sizeof(cl_uint16), (void*) &addline); _clSetKernelArg(rule_kernelpre1[self], 4, sizeof(cl_uint16), (void*) &salt); _clSetKernelArg(rule_kernelpre1[self], 6, sizeof(cl_uint16), (void*) &salt2); _clSetKernelArg(rule_kernelpre1[self], 7, sizeof(cl_mem), (void*) &block_buf[self]); _clSetKernelArg(rule_kernelpre1[self], 8, sizeof(cl_mem), (void*) &eapol_buf[self]); _clSetKernelArg(rule_kernelbl1[self], 0, sizeof(cl_mem), (void*) &rule_images4_buf[self]); _clSetKernelArg(rule_kernelbl1[self], 1, sizeof(cl_mem), (void*) &rule_images2_buf[self]); _clSetKernelArg(rule_kernelbl1[self], 2, sizeof(cl_mem), (void*) &rule_images3_buf[self]); _clSetKernelArg(rule_kernelbl1[self], 3, sizeof(cl_uint16), (void*) &addline); _clSetKernelArg(rule_kernelbl1[self], 4, sizeof(cl_uint16), (void*) &salt); _clSetKernelArg(rule_kernelbl1[self], 6, sizeof(cl_uint16), (void*) &salt2); _clSetKernelArg(rule_kernelbl1[self], 7, sizeof(cl_mem), (void*) &block_buf[self]); _clSetKernelArg(rule_kernelbl1[self], 8, sizeof(cl_mem), (void*) &eapol_buf[self]); _clSetKernelArg(rule_kernelpre2[self], 0, sizeof(cl_mem), (void*) &rule_images4_buf[self]); _clSetKernelArg(rule_kernelpre2[self], 1, sizeof(cl_mem), (void*) &rule_images2_buf[self]); _clSetKernelArg(rule_kernelpre2[self], 2, sizeof(cl_mem), (void*) &rule_images3_buf[self]); _clSetKernelArg(rule_kernelpre2[self], 3, sizeof(cl_uint16), (void*) &addline); _clSetKernelArg(rule_kernelpre2[self], 4, sizeof(cl_uint16), (void*) &salt); _clSetKernelArg(rule_kernelpre2[self], 6, sizeof(cl_uint16), (void*) &salt2); _clSetKernelArg(rule_kernelpre2[self], 7, sizeof(cl_mem), (void*) &block_buf[self]); _clSetKernelArg(rule_kernelpre2[self], 8, sizeof(cl_mem), (void*) &eapol_buf[self]); _clSetKernelArg(rule_kernelbl2[self], 0, sizeof(cl_mem), (void*) &rule_images4_buf[self]); _clSetKernelArg(rule_kernelbl2[self], 1, sizeof(cl_mem), (void*) &rule_images2_buf[self]); _clSetKernelArg(rule_kernelbl2[self], 2, sizeof(cl_mem), (void*) &rule_images3_buf[self]); _clSetKernelArg(rule_kernelbl2[self], 3, sizeof(cl_uint16), (void*) &addline); _clSetKernelArg(rule_kernelbl2[self], 4, sizeof(cl_uint16), (void*) &salt); _clSetKernelArg(rule_kernelbl2[self], 6, sizeof(cl_uint16), (void*) &salt2); _clSetKernelArg(rule_kernelbl2[self], 7, sizeof(cl_mem), (void*) &block_buf[self]); _clSetKernelArg(rule_kernelbl2[self], 8, sizeof(cl_mem), (void*) &eapol_buf[self]); _clEnqueueNDRangeKernel(rule_oclqueue[self], rule_kernelmod[self], 1, NULL, &nws1, rule_local_work_size, 0, NULL, NULL); _clFinish(rule_oclqueue[self]); _clEnqueueNDRangeKernel(rule_oclqueue[self], rule_kernelpre1[self], 1, NULL, &nws, rule_local_work_size, 0, NULL, NULL); _clFinish(rule_oclqueue[self]); for (a=0;a<7;a++) { if (attack_over==1) pthread_exit(NULL); addline.sA=a*1170; _clSetKernelArg(rule_kernelbl1[self], 3, sizeof(cl_uint16), (void*) &addline); _clEnqueueNDRangeKernel(rule_oclqueue[self], rule_kernelbl1[self], 1, NULL, &nws, rule_local_work_size, 0, NULL, NULL); _clFinish(rule_oclqueue[self]); wthreads[self].tries+=(ocl_rule_workset[self]*wthreads[self].vectorsize)/14; } _clEnqueueNDRangeKernel(rule_oclqueue[self], rule_kernelpre2[self], 1, NULL, &nws, rule_local_work_size, 0, NULL, NULL); _clFinish(rule_oclqueue[self]); for (a=0;a<7;a++) { if (attack_over==1) pthread_exit(NULL); addline.sA=a*1170; _clSetKernelArg(rule_kernelbl2[self], 3, sizeof(cl_uint16), (void*) &addline); _clEnqueueNDRangeKernel(rule_oclqueue[self], rule_kernelbl2[self], 1, NULL, &nws, rule_local_work_size, 0, NULL, NULL); _clFinish(rule_oclqueue[self]); wthreads[self].tries+=(ocl_rule_workset[self]*wthreads[self].vectorsize)/14; } _clEnqueueNDRangeKernel(rule_oclqueue[self], rule_kernelend[self], 1, NULL, &nws, rule_local_work_size, 0, NULL, NULL); found = _clEnqueueMapBuffer(rule_oclqueue[self], rule_found_buf[self], CL_TRUE,CL_MAP_READ, 0, 4, 0, 0, NULL, &err); if (err!=CL_SUCCESS) return; if (*found>0) { _clEnqueueReadBuffer(rule_oclqueue[self], rule_found_ind_buf[self], CL_TRUE, 0, ocl_rule_workset[self]*sizeof(cl_uint), rule_found_ind[self], 0, NULL, NULL); for (a=0;a<ocl_rule_workset[self];a++) if (rule_found_ind[self][a]==1) { b=a*wthreads[self].vectorsize; _clEnqueueReadBuffer(rule_oclqueue[self], rule_buffer[self], CL_TRUE, b*hash_ret_len1, hash_ret_len1*wthreads[self].vectorsize, rule_ptr[self]+b*hash_ret_len1, 0, NULL, NULL); for (c=0;c<wthreads[self].vectorsize;c++) { e=(a)*wthreads[self].vectorsize+c; if (memcmp(hccap.keymic, (char *)rule_ptr[self]+(e)*hash_ret_len1, hash_ret_len1-1) == 0) { strcpy(plain,&rule_images[self][0]+(e*MAX)); strcat(plain,line); add_cracked_list(hash_list->username, hash_list->hash, hash_list->salt, plain); } } } bzero(rule_found_ind[self],ocl_rule_workset[self]*sizeof(cl_uint)); _clEnqueueWriteBuffer(rule_oclqueue[self], rule_found_ind_buf[self], CL_FALSE, 0, ocl_rule_workset[self]*sizeof(cl_uint), rule_found_ind[self], 0, NULL, NULL); *found = 0; _clEnqueueWriteBuffer(rule_oclqueue[self], rule_found_buf[self], CL_FALSE, 0, 4, found, 0, NULL, NULL); } _clEnqueueUnmapMemObject(rule_oclqueue[self],rule_found_buf[self],(void *)found,0,NULL,NULL); }
double run_gpu(datatype *h_i_vector, datatype *h_o_vector, datatype *h_o_vector_ref,\ int w, int h, int kernel_id, bool verify)throw(std::string){ cl_mem d_i_vector, d_o_vector; int number_elements_out = w * h; int number_elements_in = w * h; try{ //--1 transfer data from host to device d_i_vector = _clMalloc(number_elements_in * sizeof(datatype)); d_o_vector = _clMalloc(number_elements_out * sizeof(datatype)); _clMemcpyH2D(d_i_vector, h_i_vector, number_elements_in*sizeof(datatype)); _clFinish(); //--2 invoke kernel int args_idx = 0; _clSetArgs(kernel_id, args_idx++, d_i_vector); _clSetArgs(kernel_id, args_idx++, d_o_vector); _clSetArgs(kernel_id, args_idx++, &w, sizeof(int)); _clSetArgs(kernel_id, args_idx++, &h, sizeof(int)); int work_group_unit = 16; int range_x = -1; int range_y = -1; switch(kernel_id){ case 0: range_x = w, range_y = h; break; case 1: range_x = w/2, range_y = h; break; case 2: range_x = w/4, range_y = h; break; case 3: range_x = w/8, range_y = h; break; case 4: range_x = w/16, range_y = h; break; default: throw(string("Unknown kernel id!!!")); break; } int group_x = work_group_unit * 4; int group_y = 1; int number_iterations = 1; unsigned long deltaT = 0.0f; unsigned long kernel_exe_time = 0.0f; std::cout<<"--testing..."<<std::endl; for(int i=-1; i<number_iterations; i++){ _clInvokeKernel2D(kernel_id, range_x, range_y, group_x, group_y, &kernel_exe_time); if(i==0) deltaT += kernel_exe_time; } deltaT = deltaT/number_iterations; std::cout<<"--done."<<std::endl; _clMemcpyD2H(h_o_vector, d_o_vector, number_elements_out*sizeof(datatype)); if(verify){ verify_array<datatype>(h_o_vector, h_o_vector_ref, number_elements_out); } //--4 release cl resources. _clFree(d_i_vector); _clFree(d_o_vector); return (double)(((double)w*(double)h*(double)(h+1))*sizeof(datatype))/(double)deltaT; } catch(std::string msg){ std::string e_str = "in run_gpu -> "; e_str += msg; throw(e_str); } return 0.0; }
/* Crack callback */ static void ocl_wordpress_crack_callback(char *line, int self) { int a,b,c,e,iter; int *found; int err; struct hash_list_s *mylist, *addlist; char plain[MAX]; char hex1[16]; cl_uint16 addline; cl_uint16 salt; cl_uint16 singlehash; char mhash[20]; char base64[64]; mylist = hash_list; while (mylist) { if (mylist->salt2[0]==1) {mylist=mylist->next;continue;} /* setup addline */ addline.s0=addline.s1=addline.s2=addline.s3=addline.s4=addline.s5=addline.s6=addline.s7=addline.sF=0; addline.sF=strlen(line); addline.s0=line[0]|(line[1]<<8)|(line[2]<<16)|(line[3]<<24); addline.s1=line[4]|(line[5]<<8)|(line[6]<<16)|(line[7]<<24); addline.s2=line[8]|(line[9]<<8)|(line[10]<<16)|(line[11]<<24); addline.s3=line[12]|(line[13]<<8)|(line[14]<<16)|(line[15]<<24); /* setup salt */ salt.sE=(mylist->salt[4])|(mylist->salt[5]<<8)|(mylist->salt[6]<<16)|(mylist->salt[7]<<24); salt.sF=(mylist->salt[8])|(mylist->salt[9]<<8)|(mylist->salt[10]<<16)|(mylist->salt[11]<<24); char *p = strchr((char *)cov_2char, mylist->salt[3]); if (!p) return; iter = 1 << (p - (char *)cov_2char); memcpy(base64,mylist->hash,34); b64_pton(base64+12,mhash); unsigned int A,B,C,D; memcpy(hex1,mhash,4); memcpy(&A, hex1, 4); memcpy(hex1,mhash+4,4); memcpy(&B, hex1, 4); memcpy(hex1,mhash+8,4); memcpy(&C, hex1, 4); memcpy(hex1,mhash+12,4); memcpy(&D, hex1, 4); singlehash.x=A; singlehash.y=B; singlehash.z=C; singlehash.w=D; if (attack_over!=0) pthread_exit(NULL); pthread_mutex_lock(&wthreads[self].tempmutex); pthread_mutex_unlock(&wthreads[self].tempmutex); _clSetKernelArg(rule_kernelmod[self], 0, sizeof(cl_mem), (void*) &rule_images2_buf[self]); _clSetKernelArg(rule_kernelmod[self], 1, sizeof(cl_mem), (void*) &rule_images_buf[self]); _clSetKernelArg(rule_kernelmod[self], 2, sizeof(cl_mem), (void*) &rule_sizes2_buf[self]); _clSetKernelArg(rule_kernelmod[self], 3, sizeof(cl_mem), (void*) &rule_sizes_buf[self]); _clSetKernelArg(rule_kernelmod[self], 4, sizeof(cl_uint16), (void*) &addline); _clSetKernelArg(rule_kernelmod[self], 5, sizeof(cl_uint16), (void*) &salt); _clSetKernelArg(rule_kernelpre1[self], 0, sizeof(cl_mem), (void*) &rule_images3_buf[self]); _clSetKernelArg(rule_kernelpre1[self], 1, sizeof(cl_mem), (void*) &rule_images2_buf[self]); _clSetKernelArg(rule_kernelpre1[self], 2, sizeof(cl_mem), (void*) &rule_sizes2_buf[self]); _clSetKernelArg(rule_kernelpre1[self], 3, sizeof(cl_mem), (void*) &rule_found_ind_buf[self]); _clSetKernelArg(rule_kernelpre1[self], 4, sizeof(cl_mem), (void*) &rule_found_buf[self]); _clSetKernelArg(rule_kernelpre1[self], 5, sizeof(cl_uint4), (void*) &singlehash); _clSetKernelArg(rule_kernelpre1[self], 6, sizeof(cl_uint16), (void*) &salt); _clSetKernelArg(rule_kernelbl1[self], 0, sizeof(cl_mem), (void*) &rule_images3_buf[self]); _clSetKernelArg(rule_kernelbl1[self], 1, sizeof(cl_mem), (void*) &rule_images2_buf[self]); _clSetKernelArg(rule_kernelbl1[self], 2, sizeof(cl_mem), (void*) &rule_sizes2_buf[self]); _clSetKernelArg(rule_kernelbl1[self], 3, sizeof(cl_mem), (void*) &rule_found_ind_buf[self]); _clSetKernelArg(rule_kernelbl1[self], 4, sizeof(cl_mem), (void*) &rule_found_buf[self]); _clSetKernelArg(rule_kernelbl1[self], 5, sizeof(cl_uint4), (void*) &singlehash); _clSetKernelArg(rule_kernelbl1[self], 6, sizeof(cl_uint16), (void*) &salt); _clSetKernelArg(rule_kernellast[self], 0, sizeof(cl_mem), (void*) &rule_buffer[self]); _clSetKernelArg(rule_kernellast[self], 1, sizeof(cl_mem), (void*) &rule_images3_buf[self]); _clSetKernelArg(rule_kernellast[self], 2, sizeof(cl_mem), (void*) &rule_sizes2_buf[self]); _clSetKernelArg(rule_kernellast[self], 3, sizeof(cl_mem), (void*) &rule_found_ind_buf[self]); _clSetKernelArg(rule_kernellast[self], 4, sizeof(cl_mem), (void*) &rule_found_buf[self]); _clSetKernelArg(rule_kernellast[self], 5, sizeof(cl_uint4), (void*) &singlehash); _clSetKernelArg(rule_kernellast[self], 6, sizeof(cl_uint16), (void*) &salt); size_t nws=ocl_rule_workset[self]*wthreads[self].vectorsize; _clEnqueueNDRangeKernel(rule_oclqueue[self], rule_kernelmod[self], 1, NULL, &nws, rule_local_work_size, 0, NULL, NULL); _clFinish(rule_oclqueue[self]); _clEnqueueNDRangeKernel(rule_oclqueue[self], rule_kernelpre1[self], 1, NULL, &ocl_rule_workset[self], rule_local_work_size, 0, NULL, NULL); _clFinish(rule_oclqueue[self]); for (a=0;a<(iter/1024);a++) { if (attack_over!=0) pthread_exit(NULL); wthreads[self].tries+=(ocl_rule_workset[self]*wthreads[self].vectorsize)/((get_hashes_num()*(iter/1024))); _clEnqueueNDRangeKernel(rule_oclqueue[self], rule_kernelbl1[self], 1, NULL, &ocl_rule_workset[self], rule_local_work_size, 0, NULL, NULL); _clFinish(rule_oclqueue[self]); } _clEnqueueNDRangeKernel(rule_oclqueue[self], rule_kernellast[self], 1, NULL, &ocl_rule_workset[self], rule_local_work_size, 0, NULL, NULL); found = _clEnqueueMapBuffer(rule_oclqueue[self], rule_found_buf[self], CL_TRUE,CL_MAP_READ, 0, 4, 0, 0, NULL, &err); if (err!=CL_SUCCESS) continue; if (*found>0) { _clEnqueueReadBuffer(rule_oclqueue[self], rule_found_ind_buf[self], CL_TRUE, 0, ocl_rule_workset[self]*sizeof(cl_uint), rule_found_ind[self], 0, NULL, NULL); for (a=0;a<ocl_rule_workset[self];a++) if (rule_found_ind[self][a]==1) { b=a*wthreads[self].vectorsize; _clEnqueueReadBuffer(rule_oclqueue[self], rule_buffer[self], CL_TRUE, b*hash_ret_len1, hash_ret_len1*wthreads[self].vectorsize, rule_ptr[self]+b*hash_ret_len1, 0, NULL, NULL); for (c=0;c<wthreads[self].vectorsize;c++) { e=(a)*wthreads[self].vectorsize+c; memcpy(base64,mylist->hash,34); b64_pton(base64+12,mhash); if (memcmp(mhash, (char *)rule_ptr[self]+(e)*hash_ret_len1, hash_ret_len1-1) == 0) { int flag = 0; strcpy(plain,&rule_images[self][0]+(e*MAX)); strcat(plain,line); pthread_mutex_lock(&crackedmutex); addlist = cracked_list; while (addlist) { if ((strcmp(addlist->username, mylist->username) == 0) && (memcmp(addlist->hash, mylist->hash, hash_ret_len1) == 0)) flag = 1; addlist = addlist->next; } pthread_mutex_unlock(&crackedmutex); if (flag == 0) { add_cracked_list(mylist->username, mylist->hash, mylist->salt, plain); mylist->salt2[0]=1; } } } } bzero(rule_found_ind[self],ocl_rule_workset[self]*sizeof(cl_uint)); _clEnqueueWriteBuffer(rule_oclqueue[self], rule_found_ind_buf[self], CL_FALSE, 0, ocl_rule_workset[self]*sizeof(cl_uint), rule_found_ind[self], 0, NULL, NULL); *found = 0; _clEnqueueWriteBuffer(rule_oclqueue[self], rule_found_buf[self], CL_FALSE, 0, 4, found, 0, NULL, NULL); } _clEnqueueUnmapMemObject(rule_oclqueue[self],rule_found_buf[self],(void *)found,0,NULL,NULL); mylist = mylist->next; } }
/* Crack callback */ static void ocl_apr1_crack_callback(char *line, int self) { int a,b,c,e; int *found; int err; struct hash_list_s *mylist, *addlist; char plain[MAX]; char hex1[16]; cl_uint16 addline; cl_uint16 salt; cl_uint16 singlehash; unsigned char base64[64]; int cc,cc1; size_t gws,gws1; cc = self_kernel16[self]; cc1 = self_kernel16[self]+strlen(line); if (cc1>15) cc1=15; mylist = hash_list; while (mylist) { if (attack_over!=0) pthread_exit(NULL); if (mylist->salt2[0]==1) {mylist=mylist->next;continue;} /* setup addline */ addline.s0=addline.s1=addline.s2=addline.s3=addline.s4=addline.s5=addline.s6=addline.s7=addline.sF=0; addline.sF=strlen(line); addline.s0=line[0]|(line[1]<<8)|(line[2]<<16)|(line[3]<<24); addline.s1=line[4]|(line[5]<<8)|(line[6]<<16)|(line[7]<<24); addline.s2=line[8]|(line[9]<<8)|(line[10]<<16)|(line[11]<<24); addline.s3=line[12]|(line[13]<<8)|(line[14]<<16)|(line[15]<<24); _clSetKernelArg(rule_kernel162[cc][self], 4, sizeof(cl_uint16), (void*) &addline); /* setup salt */ char tempsalt[16]; bzero(tempsalt,16); strcpy(tempsalt,mylist->salt); a=strlen(tempsalt)-1; tempsalt[a]=0;tempsalt[a+1]=0;tempsalt[a+2]=0;tempsalt[a+3]=0;tempsalt[a+4]=0; tempsalt[a+5]=0;tempsalt[a+6]=0;tempsalt[a+7]=0;tempsalt[a+8]=0; salt.s0=salt.s4=salt.s8=salt.sB=0; salt.sC=strlen(tempsalt)-6; salt.sE=(tempsalt[6])|(tempsalt[7]<<8)|(tempsalt[8]<<16)|(tempsalt[9]<<24); salt.sF=(tempsalt[10])|(tempsalt[11]<<8)|(tempsalt[12]<<16)|(tempsalt[13]<<24); salt.sD=cc1; salt.sB=cc; salt.s9=4; salt.sA=('a')|('p'<<8)|('r'<<16)|('1'<<24); _clSetKernelArg(rule_kernel162[cc][self], 5, sizeof(cl_uint16), (void*) &salt); _clSetKernelArg(rule_kernel16[cc1][self], 5, sizeof(cl_uint16), (void*) &salt); _clSetKernelArg(rule_kernel162[cc][self], 0, sizeof(cl_mem), (void*) &rule_images16_buf[cc1][self]); unsigned char mhash[22]; memcpy(base64,mylist->hash,22); b64_pton_crypt(base64,mhash); unsigned int A,B,C,D; memcpy(hex1,mhash,4); memcpy(&A, hex1, 4); memcpy(hex1,mhash+4,4); memcpy(&B, hex1, 4); memcpy(hex1,mhash+8,4); memcpy(&C, hex1, 4); memcpy(hex1,mhash+12,4); memcpy(&D, hex1, 4); singlehash.x=A;singlehash.y=B;singlehash.z=C;singlehash.w=D; _clSetKernelArg(rule_kernel16[cc1][self], 4, sizeof(cl_uint4), (void*) &singlehash); pthread_mutex_lock(&wthreads[self].tempmutex); pthread_mutex_unlock(&wthreads[self].tempmutex); gws = (rule_counts[self][cc] / wthreads[self].vectorsize); while ((gws%64)!=0) gws++; gws1 = gws*wthreads[self].vectorsize; if (gws1==0) gws1=64; if (gws==0) gws=64; wthreads[self].tries+=(gws1)/get_hashes_num(); _clEnqueueNDRangeKernel(rule_oclqueue[self], rule_kernel162[cc][self], 1, NULL, &gws1, rule_local_work_size, 0, NULL, NULL); _clFinish(rule_oclqueue[self]); _clEnqueueNDRangeKernel(rule_oclqueue[self], rule_kernel16[cc1][self], 1, NULL, &gws, rule_local_work_size, 0, NULL, NULL); found = _clEnqueueMapBuffer(rule_oclqueue[self], rule_found_buf[self], CL_TRUE,CL_MAP_READ, 0, 4, 0, 0, NULL, &err); if (err!=CL_SUCCESS) continue; if (*found>0) { _clEnqueueReadBuffer(rule_oclqueue[self], rule_found_ind_buf[self], CL_TRUE, 0, ocl_rule_workset[self]*sizeof(cl_uint), rule_found_ind[self], 0, NULL, NULL); for (a=0;a<ocl_rule_workset[self];a++) if (rule_found_ind[self]!=0) { b=a*wthreads[self].vectorsize; _clEnqueueReadBuffer(rule_oclqueue[self], rule_buffer[self], CL_TRUE, b*hash_ret_len1, hash_ret_len1*wthreads[self].vectorsize, rule_ptr[self]+b*hash_ret_len1, 0, NULL, NULL); for (c=0;c<wthreads[self].vectorsize;c++) { e=(a)*wthreads[self].vectorsize+c; unsigned char mhash[20]; memcpy(base64,mylist->hash,22); b64_pton_crypt(base64,mhash); if (memcmp(mhash, (char *)rule_ptr[self]+(e)*hash_ret_len1, hash_ret_len1-1) == 0) { int flag = 0; strcpy(plain,&rule_images162[cc][self][0]+(e*16)); strcat(plain,line); pthread_mutex_lock(&crackedmutex); addlist = cracked_list; while (addlist) { if ((strcmp(addlist->username, mylist->username) == 0) && (memcmp(addlist->hash, mylist->hash, hash_ret_len1) == 0)) flag = 1; addlist = addlist->next; } pthread_mutex_unlock(&crackedmutex); if (flag == 0) { add_cracked_list(mylist->username, mylist->hash, mylist->salt, plain); mylist->salt2[0]=1; } } } } bzero(rule_found_ind[self],ocl_rule_workset[self]*sizeof(cl_uint)); _clEnqueueWriteBuffer(rule_oclqueue[self], rule_found_ind_buf[self], CL_FALSE, 0, ocl_rule_workset[self]*sizeof(cl_uint), rule_found_ind[self], 0, NULL, NULL); *found = 0; _clEnqueueWriteBuffer(rule_oclqueue[self], rule_found_buf[self], CL_FALSE, 0, 4, found, 0, NULL, NULL); } _clEnqueueUnmapMemObject(rule_oclqueue[self],rule_found_buf[self],(void *)found,0,NULL,NULL); mylist = mylist->next; } }
int main(int argc, char ** argv) { //float *hIn1, *hIn2; //cl_mem dIn1, dIn2; @hIn; @dIn; float *hOut, *rOut; cl_mem dOut; try{ _clParseCommandLine(argc, argv); string strSubfix = string(argv[2]); _clInit(platform_id, device_type, device_id); int cdim = atoi(argv[1]); int rdim = atoi(argv[1]); int r = atoi(argv[3]); @cdimIn @rdimIn // different between iMAP1 and iMAP2 printf("cdim=%d, rdim=%d, radius=%d\n", cdim, rdim, r); int iIter = 10; int elems = @elems; double dataAmount = (double)cdim * (double)rdim * (double)(elems) * (double)sizeof(float) * 1e-6; #if defined TIME double start_time = 0; double end_time = 0; double delta_time = 0; int cnt = 0; string dat_name= string("data.") + strSubfix + string(".dat"); FILE * fp = fopen(dat_name.c_str(), "a+"); if(fp==NULL) { printf("failed to open file!!!\n"); exit(-1); } #endif //hIn1 = (float *)malloc(cdim * rdim * sizeof(float)); //hIn2 = (float *)malloc(cdim * rdim * sizeof(float)); @hAlc hOut = (float *)malloc(cdim * rdim * sizeof(float)); rOut = (float *)malloc(cdim * rdim * sizeof(float)); //fill<float>(hIn1, cdim * rdim, 5); //fill<float>(hIn2, cdim * rdim, 5); @hFill //dIn1 = _clMalloc(cdim * rdim * sizeof(float)); //dIn2 = _clMalloc(cdim * rdim * sizeof(float)); @dAlc dOut = _clMalloc(cdim * rdim * sizeof(float)); //_clMemcpyH2D(dIn1, hIn1, cdim * rdim * sizeof(float)); //_clMemcpyH2D(dIn2, hIn2, cdim * rdim * sizeof(float)); @h2dTrans _clFinish(); // warmup //OCLRun(dIn1, dIn2, dOut, cdim, rdim); OCLRun(@oclArgs, dOut, cdim, rdim, cdimIn, rdimIn); #ifdef VARIFY //OMPRun(hIn1, hIn2, rOut, cdim, rdim); OMPRun(@ompArgs, rOut, cdim, rdim, cdimIn, rdimIn); #endif //VARIFY #ifdef TIME delta_time = 0; cnt = 0; #endif for(int i=0; i<iIter; i++) { #ifdef TIME cnt++; start_time = gettime(); #endif OCLRun(@oclArgs, dOut, cdim, rdim, cdimIn, rdimIn); #ifdef TIME end_time = gettime(); delta_time += end_time - start_time; if(fabs(delta_time-600000.0)>0.1) break; // ???? #endif } #ifdef TIME fprintf(fp, "%lf\t", dataAmount * (double)cnt/delta_time); #endif #ifdef VARIFY _clMemcpyD2H(hOut, dOut, cdim * rdim * sizeof(float)); verify_array<float>(rOut, hOut, cdim * rdim); #endif //VARIFY #ifdef TIME fprintf(fp, "\n"); fclose(fp); #endif } catch(string msg){ printf("ERR:%s\n", msg.c_str()); printf("Error catched\n"); exit(-1); } //_clFree(dIn1); //_clFree(dIn2); @clFree _clFree(dOut); _clRelease(); //if(hIn1!=NULL) free(hIn1); //if(hIn2!=NULL) free(hIn2); @hFree if(hOut!=NULL) free(hOut); if(rOut!=NULL) free(rOut); return 1; }
/* Crack callback */ static void ocl_keyring_crack_callback(char *line, int self) { int a,b,c,e; int *found; int err; char plain[MAX]; char hex1[16]; cl_uint16 addline; cl_uint16 salt; cl_uint16 singlehash; char mhash[32]; size_t gws,gws1; /* setup addline */ addline.s0=addline.s1=addline.s2=addline.s3=addline.s4=addline.s5=addline.s6=addline.s7=addline.sF=0; addline.sF=strlen(line); addline.s0=line[0]|(line[1]<<8)|(line[2]<<16)|(line[3]<<24); addline.s1=line[4]|(line[5]<<8)|(line[6]<<16)|(line[7]<<24); addline.s2=line[8]|(line[9]<<8)|(line[10]<<16)|(line[11]<<24); addline.s3=line[12]|(line[13]<<8)|(line[14]<<16)|(line[15]<<24); /* setup salt */ salt.s0=(cs.salt[0])|(cs.salt[1]<<8)|(cs.salt[2]<<16)|(cs.salt[3]<<24); salt.s1=(cs.salt[4])|(cs.salt[5]<<8)|(cs.salt[6]<<16)|(cs.salt[7]<<24); salt.s4=(cs.ct[0])|(cs.ct[1]<<8)|(cs.ct[2]<<16)|(cs.ct[3]<<24); salt.s5=(cs.ct[4])|(cs.ct[5]<<8)|(cs.ct[6]<<16)|(cs.ct[7]<<24); salt.s6=(cs.ct[8])|(cs.ct[9]<<8)|(cs.ct[10]<<16)|(cs.ct[11]<<24); salt.s7=(cs.ct[12])|(cs.ct[13]<<8)|(cs.ct[14]<<16)|(cs.ct[15]<<24); memcpy(mhash,cs.hash,16); unsigned int A,B,C,D; memcpy(hex1,mhash,4); memcpy(&A, hex1, 4); memcpy(hex1,mhash+4,4); memcpy(&B, hex1, 4); memcpy(hex1,mhash+8,4); memcpy(&C, hex1, 4); memcpy(hex1,mhash+12,4); memcpy(&D, hex1, 4); singlehash.s0=A; singlehash.s1=B; singlehash.s2=C; singlehash.s3=D; if (attack_over!=0) pthread_exit(NULL); pthread_mutex_lock(&wthreads[self].tempmutex); pthread_mutex_unlock(&wthreads[self].tempmutex); _clSetKernelArg(rule_kernelmod[self], 0, sizeof(cl_mem), (void*) &rule_images2_buf[self]); _clSetKernelArg(rule_kernelmod[self], 1, sizeof(cl_mem), (void*) &rule_images_buf[self]); _clSetKernelArg(rule_kernelmod[self], 2, sizeof(cl_mem), (void*) &rule_sizes_buf[self]); _clSetKernelArg(rule_kernelmod[self], 3, sizeof(cl_uint16), (void*) &addline); _clSetKernelArg(rule_kernelmod[self], 4, sizeof(cl_uint16), (void*) &salt); _clSetKernelArg(rule_kernelpre1[self], 0, sizeof(cl_mem), (void*) &rule_images3_buf[self]); _clSetKernelArg(rule_kernelpre1[self], 1, sizeof(cl_mem), (void*) &rule_images2_buf[self]); _clSetKernelArg(rule_kernelpre1[self], 2, sizeof(cl_mem), (void*) &rule_sizes_buf[self]); _clSetKernelArg(rule_kernelpre1[self], 3, sizeof(cl_mem), (void*) &rule_found_ind_buf[self]); _clSetKernelArg(rule_kernelpre1[self], 4, sizeof(cl_mem), (void*) &rule_found_buf[self]); _clSetKernelArg(rule_kernelpre1[self], 5, sizeof(cl_uint16), (void*) &singlehash); _clSetKernelArg(rule_kernelpre1[self], 6, sizeof(cl_uint16), (void*) &salt); _clSetKernelArg(rule_kernelbl1[self], 0, sizeof(cl_mem), (void*) &rule_images3_buf[self]); _clSetKernelArg(rule_kernelbl1[self], 1, sizeof(cl_mem), (void*) &rule_images3_buf[self]); _clSetKernelArg(rule_kernelbl1[self], 2, sizeof(cl_mem), (void*) &rule_sizes_buf[self]); _clSetKernelArg(rule_kernelbl1[self], 3, sizeof(cl_mem), (void*) &rule_found_ind_buf[self]); _clSetKernelArg(rule_kernelbl1[self], 4, sizeof(cl_mem), (void*) &rule_found_buf[self]); _clSetKernelArg(rule_kernelbl1[self], 5, sizeof(cl_uint16), (void*) &singlehash); _clSetKernelArg(rule_kernelbl1[self], 6, sizeof(cl_uint16), (void*) &salt); _clSetKernelArg(rule_kernellast[self], 0, sizeof(cl_mem), (void*) &rule_buffer[self]); _clSetKernelArg(rule_kernellast[self], 1, sizeof(cl_mem), (void*) &rule_images3_buf[self]); _clSetKernelArg(rule_kernellast[self], 2, sizeof(cl_mem), (void*) &rule_sizes_buf[self]); _clSetKernelArg(rule_kernellast[self], 3, sizeof(cl_mem), (void*) &rule_found_ind_buf[self]); _clSetKernelArg(rule_kernellast[self], 4, sizeof(cl_mem), (void*) &rule_found_buf[self]); _clSetKernelArg(rule_kernellast[self], 5, sizeof(cl_uint16), (void*) &singlehash); _clSetKernelArg(rule_kernellast[self], 6, sizeof(cl_uint16), (void*) &salt); if (rule_counts[self][0]==-1) return; gws = (rule_counts[self][0] / wthreads[self].vectorsize); while ((gws%64)!=0) gws++; gws1 = gws*wthreads[self].vectorsize; if (gws1==0) gws1=64; if (gws==0) gws=64; _clEnqueueNDRangeKernel(rule_oclqueue[self], rule_kernelmod[self], 1, NULL, &gws1, rule_local_work_size, 0, NULL, NULL); _clFinish(rule_oclqueue[self]); _clEnqueueNDRangeKernel(rule_oclqueue[self], rule_kernelpre1[self], 1, NULL, &gws1, rule_local_work_size, 0, NULL, NULL); _clFinish(rule_oclqueue[self]); for (a=0;a<((cs.iterations-1)/200);a++) { _clEnqueueNDRangeKernel(rule_oclqueue[self], rule_kernelbl1[self], 1, NULL, &gws, rule_local_work_size, 0, NULL, NULL); _clFinish(rule_oclqueue[self]); wthreads[self].tries+=(gws1)/(cs.iterations/200); } salt.sA=((cs.iterations-1)%200); _clSetKernelArg(rule_kernellast[self], 6, sizeof(cl_uint16), (void*) &salt); _clEnqueueNDRangeKernel(rule_oclqueue[self], rule_kernellast[self], 1, NULL, &gws1, rule_local_work_size, 0, NULL, NULL); found = _clEnqueueMapBuffer(rule_oclqueue[self], rule_found_buf[self], CL_TRUE,CL_MAP_READ, 0, 4, 0, 0, NULL, &err); if (*found>0) { _clEnqueueReadBuffer(rule_oclqueue[self], rule_found_ind_buf[self], CL_TRUE, 0, ocl_rule_workset[self]*sizeof(cl_uint), rule_found_ind[self], 0, NULL, NULL); for (a=0;a<ocl_rule_workset[self];a++) if (rule_found_ind[self][a]==1) { b=a*wthreads[self].vectorsize; _clEnqueueReadBuffer(rule_oclqueue[self], rule_buffer[self], CL_TRUE, b*hash_ret_len1, hash_ret_len1*wthreads[self].vectorsize, rule_ptr[self]+b*hash_ret_len1, 0, NULL, NULL); for (c=0;c<wthreads[self].vectorsize;c++) { e=(a)*wthreads[self].vectorsize+c; memcpy(mhash,cs.hash,hash_ret_len1); if (memcmp(mhash, (char *)rule_ptr[self]+(e)*hash_ret_len1, hash_ret_len1) == 0) { strcpy(plain,&rule_images[self][0]+(e*MAX)); strcat(plain,line); pthread_mutex_lock(&crackedmutex); if (!cracked_list) { pthread_mutex_unlock(&crackedmutex); add_cracked_list(hash_list->username, hash_list->hash, hash_list->salt, plain); } else pthread_mutex_unlock(&crackedmutex); } } } bzero(rule_found_ind[self],ocl_rule_workset[self]*sizeof(cl_uint)); _clEnqueueWriteBuffer(rule_oclqueue[self], rule_found_ind_buf[self], CL_FALSE, 0, ocl_rule_workset[self]*sizeof(cl_uint), rule_found_ind[self], 0, NULL, NULL); *found = 0; _clEnqueueWriteBuffer(rule_oclqueue[self], rule_found_buf[self], CL_FALSE, 0, 4, found, 0, NULL, NULL); } _clEnqueueUnmapMemObject(rule_oclqueue[self],rule_found_buf[self],(void *)found,0,NULL,NULL); }