コード例 #1
0
ファイル: cl_bfs.c プロジェクト: Quadra-H/QH-GPU
cl_mem _clCreateAndCpyPinnedMem(int size, float* h_mem_source) {

	cl_mem d_mem, d_mem_pinned;
	unsigned char* h_mem_pinned = NULL;
	d_mem_pinned = clCreateBuffer(oclHandles.context, CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR, size, NULL, &oclHandles.cl_status);

	if (oclHandles.cl_status != CL_SUCCESS)
		printf("excpetion in _clMalloc. CL_MEM_READ_ONLY|CL_MEM_ALLOC_HOST_PTR in _clCreateAndCpyPinnedMem\n");

	h_mem_pinned = (unsigned char*) clEnqueueMapBuffer(oclHandles.queue, d_mem_pinned, CL_FALSE, CL_MAP_WRITE, 0, size, NULL, NULL, NULL, &oclHandles.cl_status);

	memcpy(h_mem_pinned, h_mem_source, size);

	d_mem = clCreateBuffer(oclHandles.context, CL_MEM_READ_ONLY, size, NULL, &oclHandles.cl_status);

	if (oclHandles.cl_status != CL_SUCCESS)
		printf("excpetion in _clMalloc. CL_MEM_READ_ONLY in _clCreateAndCpyPinnedMem\n");

	clEnqueueUnmapMemObject(oclHandles.queue, d_mem_pinned, (void*) h_mem_pinned, 0, NULL, NULL);

//	if(accMode == DIRECT)
//	{
	h_mem_pinned = (unsigned char*) clEnqueueMapBuffer(oclHandles.queue, d_mem_pinned, CL_FALSE, CL_MAP_WRITE, 0, size, NULL, NULL, NULL, &oclHandles.cl_status);
	// DIRECT:  API access to device buffer
	//for(int i = 0; i < MEMCOPY_ITERATIONS; i++)
	oclHandles.cl_status = clEnqueueWriteBuffer(oclHandles.queue, d_mem, CL_FALSE, 0, size, h_mem_pinned, 0, NULL, NULL);

	_clFinish();

#ifdef ERRMSG
	if(oclHandles.cl_status != CL_SUCCESS)
	printf("excpetion in _clCreateAndCpyMem() -> clEnqueueWriteBuffer.\n");
#endif
//	}
//	else
//	{

	// MAPPED: mapped pointers to device buffer and conventional pointer access
//		void* dm_idata = clEnqueueMapBuffer(oclHandles.queue, d_mem, CL_TRUE, CL_MAP_WRITE, 0, size, 0, NULL, NULL, &oclHandles.cl_status);
//
//
//		memcpy(dm_idata, h_mem_pinned, size);
//
//		clEnqueueUnmapMemObject(oclHandles.queue, d_mem, dm_idata, 0, NULL, NULL);
	//}

	return d_mem;
}
コード例 #2
0
ファイル: cl_bfs.c プロジェクト: Quadra-H/QH-GPU
cl_mem _clCreateAndCpyPagedMem(int size, unsigned char* h_mem_source) {
	cl_mem d_mem;
	float * h_mem_pagealbe = NULL;

	d_mem = clCreateBuffer(oclHandles.context, CL_MEM_READ_WRITE, size, NULL, &oclHandles.cl_status);

	h_mem_pagealbe = (float*) malloc(size);

	h_mem_pagealbe = (float *) clEnqueueMapBuffer(oclHandles.queue, d_mem, CL_TRUE, CL_MAP_WRITE, 0, size, NULL, NULL, NULL, &oclHandles.cl_status);

	memcpy(h_mem_pagealbe, h_mem_source, size);

	oclHandles.cl_status = clEnqueueWriteBuffer(oclHandles.queue, d_mem, CL_FALSE, 0, size, h_mem_pagealbe, 0, NULL, NULL);
	_clFinish();

	return d_mem;
}
コード例 #3
0
ファイル: bfs.cpp プロジェクト: ebads67/ipmacc
//----------------------------------------------------------
//--breadth first search on GPUs
//----------------------------------------------------------
void run_bfs_gpu(int no_of_nodes, Node *h_graph_nodes, int edge_list_size, \
		int *h_graph_edges, bool *h_graph_mask, bool *h_updating_graph_mask, \
		bool *h_graph_visited, int *h_cost) 
					throw(std::string){

	//int number_elements = height*width;
	bool h_over;
	cl_mem d_graph_nodes, d_graph_edges, d_graph_mask, d_updating_graph_mask, \
			d_graph_visited, d_cost, d_over;
//	try{
		//--1 transfer data from host to device
		//printf("initializing\n");
		_clInit();			
		//printf("allocating\n");
		d_graph_nodes = _clMalloc(no_of_nodes*sizeof(Node), h_graph_nodes);
		d_graph_edges = _clMalloc(edge_list_size*sizeof(int), h_graph_edges);
		d_graph_mask = _clMalloc(no_of_nodes*sizeof(bool), h_graph_mask);
		d_updating_graph_mask = _clMalloc(no_of_nodes*sizeof(bool), h_updating_graph_mask);
		d_graph_visited = _clMalloc(no_of_nodes*sizeof(bool), h_graph_visited);
		d_cost = _clMallocRW(no_of_nodes*sizeof(int), h_cost);
		d_over = _clMallocRW(sizeof(bool), &h_over);
		
		//printf("copyin\n");
		_clMemcpyH2D(d_graph_nodes, no_of_nodes*sizeof(Node), h_graph_nodes);
		_clMemcpyH2D(d_graph_edges, edge_list_size*sizeof(int), h_graph_edges);	
		_clMemcpyH2D(d_graph_mask, no_of_nodes*sizeof(bool), h_graph_mask);	
		_clMemcpyH2D(d_updating_graph_mask, no_of_nodes*sizeof(bool), h_updating_graph_mask);	
		_clMemcpyH2D(d_graph_visited, no_of_nodes*sizeof(bool), h_graph_visited);	
		_clMemcpyH2D(d_cost, no_of_nodes*sizeof(int), h_cost);	
			
		//--2 invoke kernel
#ifdef	PROFILING
		timer kernel_timer;
		double kernel_time = 0.0;		
		kernel_timer.reset();
		kernel_timer.start();
#endif
		int kerId=0;
	//	printf("launching kernel\n");
		do{
	//		printf("copy in\n");
			h_over = false;
			_clMemcpyH2D(d_over, sizeof(bool), &h_over);
			//--kernel 0
			int kernel_id = 0;
			int kernel_idx = 0;
	//		printf("set arg 1\n");
			_clSetArgs(kernel_id, kernel_idx++, d_graph_nodes);
			_clSetArgs(kernel_id, kernel_idx++, d_graph_edges);
			_clSetArgs(kernel_id, kernel_idx++, d_graph_mask);
			_clSetArgs(kernel_id, kernel_idx++, d_updating_graph_mask);
			_clSetArgs(kernel_id, kernel_idx++, d_graph_visited);
			_clSetArgs(kernel_id, kernel_idx++, d_cost);
			_clSetArgs(kernel_id, kernel_idx++, &no_of_nodes, sizeof(int));
			
			//int work_items = no_of_nodes;
	//		printf("invoke 1\n");
			_clInvokeKernel(kernel_id, no_of_nodes, work_group_size);
			
			//--kernel 1
			kernel_id = 1;
			kernel_idx = 0;			
	//		printf("set arg 2\n");
			_clSetArgs(kernel_id, kernel_idx++, d_graph_mask);
			_clSetArgs(kernel_id, kernel_idx++, d_updating_graph_mask);
			_clSetArgs(kernel_id, kernel_idx++, d_graph_visited);
			_clSetArgs(kernel_id, kernel_idx++, d_over);
			_clSetArgs(kernel_id, kernel_idx++, &no_of_nodes, sizeof(int));
			
			//work_items = no_of_nodes;
	//		printf("invoke 2\n");
			_clInvokeKernel(kernel_id, no_of_nodes, work_group_size);			
			
	//		printf("copy back\n");
			_clMemcpyD2H(d_over,sizeof(bool), &h_over);
	//		printf("done\n");
	//		printf("K%d\n",kerId++);
			}while(h_over);
	//	printf("done!");
		_clFinish();
#ifdef	PROFILING
		kernel_timer.stop();
		kernel_time = kernel_timer.getTimeInSeconds();
#endif
		//--3 transfer data from device to host
		_clMemcpyD2H(d_cost,no_of_nodes*sizeof(int), h_cost);
		//--statistics
#ifdef	PROFILING
		std::cout<<"kernel time(s):"<<kernel_time<<std::endl;		
#endif
		//--4 release cl resources.
		_clFree(d_graph_nodes);
		_clFree(d_graph_edges);
		_clFree(d_graph_mask);
		_clFree(d_updating_graph_mask);
		_clFree(d_graph_visited);
		_clFree(d_cost);
		_clFree(d_over);
		_clRelease();
//	}
//	catch(std::string msg){		
//		_clFree(d_graph_nodes);
//		_clFree(d_graph_edges);
//		_clFree(d_graph_mask);
//		_clFree(d_updating_graph_mask);
//		_clFree(d_graph_visited);
//		_clFree(d_cost);
//		_clFree(d_over);
//		_clRelease();
//		std::string e_str = "in run_transpose_gpu -> ";
//		e_str += msg;
//		throw(e_str);
//	}
	return ;
}
コード例 #4
0
ファイル: bfs.cpp プロジェクト: dylanzika/rodinia-1
//----------------------------------------------------------
//--breadth first search on GPUs
//----------------------------------------------------------
void run_bfs_gpu(int no_of_nodes, Node *h_graph_nodes, int edge_list_size, \
		int *h_graph_edges, char *h_graph_mask, char *h_updating_graph_mask, \
		char *h_graph_visited, int *h_cost) 
					throw(std::string){

	//int number_elements = height*width;
	char h_over;
	cl_mem d_graph_nodes, d_graph_edges, d_graph_mask, d_updating_graph_mask, \
			d_graph_visited, d_cost, d_over;
	try{
		//--1 transfer data from host to device
		_clInit();			
		d_graph_nodes = _clMalloc(no_of_nodes*sizeof(Node), h_graph_nodes);
		d_graph_edges = _clMalloc(edge_list_size*sizeof(int), h_graph_edges);
		d_graph_mask = _clMallocRW(no_of_nodes*sizeof(char), h_graph_mask);
		d_updating_graph_mask = _clMallocRW(no_of_nodes*sizeof(char), h_updating_graph_mask);
		d_graph_visited = _clMallocRW(no_of_nodes*sizeof(char), h_graph_visited);


		d_cost = _clMallocRW(no_of_nodes*sizeof(int), h_cost);
		d_over = _clMallocRW(sizeof(char), &h_over);
		
		_clMemcpyH2D(d_graph_nodes, no_of_nodes*sizeof(Node), h_graph_nodes);
		_clMemcpyH2D(d_graph_edges, edge_list_size*sizeof(int), h_graph_edges);	
		_clMemcpyH2D(d_graph_mask, no_of_nodes*sizeof(char), h_graph_mask);	
		_clMemcpyH2D(d_updating_graph_mask, no_of_nodes*sizeof(char), h_updating_graph_mask);	
		_clMemcpyH2D(d_graph_visited, no_of_nodes*sizeof(char), h_graph_visited);	
		_clMemcpyH2D(d_cost, no_of_nodes*sizeof(int), h_cost);	
			
		//--2 invoke kernel
#ifdef	PROFILING
		timer kernel_timer;
		double kernel_time = 0.0;		
		kernel_timer.reset();
		kernel_timer.start();
#endif
		do{
			h_over = false;
			_clMemcpyH2D(d_over, sizeof(char), &h_over);
			//--kernel 0
			int kernel_id = 0;
			int kernel_idx = 0;
			_clSetArgs(kernel_id, kernel_idx++, d_graph_nodes);
			_clSetArgs(kernel_id, kernel_idx++, d_graph_edges);
			_clSetArgs(kernel_id, kernel_idx++, d_graph_mask);
			_clSetArgs(kernel_id, kernel_idx++, d_updating_graph_mask);
			_clSetArgs(kernel_id, kernel_idx++, d_graph_visited);
			_clSetArgs(kernel_id, kernel_idx++, d_cost);
			_clSetArgs(kernel_id, kernel_idx++, &no_of_nodes, sizeof(int));
			
			//int work_items = no_of_nodes;
			_clInvokeKernel(kernel_id, no_of_nodes, work_group_size);
			
			//--kernel 1
			kernel_id = 1;
			kernel_idx = 0;			
			_clSetArgs(kernel_id, kernel_idx++, d_graph_mask);
			_clSetArgs(kernel_id, kernel_idx++, d_updating_graph_mask);
			_clSetArgs(kernel_id, kernel_idx++, d_graph_visited);
			_clSetArgs(kernel_id, kernel_idx++, d_over);
			_clSetArgs(kernel_id, kernel_idx++, &no_of_nodes, sizeof(int));
			
			//work_items = no_of_nodes;
			_clInvokeKernel(kernel_id, no_of_nodes, work_group_size);			
			
			_clMemcpyD2H(d_over,sizeof(char), &h_over);
			}while(h_over);
			
		_clFinish();
#ifdef	PROFILING
		kernel_timer.stop();
		kernel_time = kernel_timer.getTimeInSeconds();
#endif
		//--3 transfer data from device to host
		_clMemcpyD2H(d_cost,no_of_nodes*sizeof(int), h_cost);
		//--statistics
#ifdef	PROFILING
		std::cout<<"kernel time(s):"<<kernel_time<<std::endl;		
#endif
		//--4 release cl resources.
		_clFree(d_graph_nodes);
		_clFree(d_graph_edges);
		_clFree(d_graph_mask);
		_clFree(d_updating_graph_mask);
		_clFree(d_graph_visited);
		_clFree(d_cost);
		_clFree(d_over);
		_clRelease();
	}
	catch(std::string msg){		
		_clFree(d_graph_nodes);
		_clFree(d_graph_edges);
		_clFree(d_graph_mask);
		_clFree(d_updating_graph_mask);
		_clFree(d_graph_visited);
		_clFree(d_cost);
		_clFree(d_over);
		_clRelease();
		std::string e_str = "in run_transpose_gpu -> ";
		e_str += msg;
		throw(e_str);
	}
	return ;
}
コード例 #5
0
ファイル: ocl_lastpass.c プロジェクト: 6e6f36/hashkill
/* Crack callback */
static void ocl_lastpass_crack_callback(char *line, int self)
{
    int a;
    int *found;
    int err;
    struct  hash_list_s  *mylist, *addlist;
    char plain[MAX];
    char hex1[16];
    cl_uint16 addline;
    cl_uint16 salt;
    cl_uint16 singlehash;
    unsigned char mhash[64];
    size_t gws,gws1;

    mylist = hash_list;
    while (mylist)
    {
        if (mylist->salt2[0]==1) {mylist=mylist->next;continue;}
	/* setup addline */
	addline.s0=addline.s1=addline.s2=addline.s3=addline.s4=addline.s5=addline.s6=addline.s7=addline.sF=0;
	addline.sF=strlen(line);
	addline.s0=line[0]|(line[1]<<8)|(line[2]<<16)|(line[3]<<24);
	addline.s1=line[4]|(line[5]<<8)|(line[6]<<16)|(line[7]<<24);
	addline.s2=line[8]|(line[9]<<8)|(line[10]<<16)|(line[11]<<24);
	addline.s3=line[12]|(line[13]<<8)|(line[14]<<16)|(line[15]<<24);

        memcpy(mhash,mylist->hash,16);
        unsigned int A,B,C,D;
        memcpy(hex1,mhash,4);
        memcpy(&A, hex1, 4);
        memcpy(hex1,mhash+4,4);
        memcpy(&B, hex1, 4);
        memcpy(hex1,mhash+8,4);
        memcpy(&C, hex1, 4);
        memcpy(hex1,mhash+12,4);
        memcpy(&D, hex1, 4);
	singlehash.s0=A;
	singlehash.s1=B;
	singlehash.s2=C;
	singlehash.s3=D;

        if (attack_over!=0) pthread_exit(NULL);
        pthread_mutex_lock(&wthreads[self].tempmutex);
        pthread_mutex_unlock(&wthreads[self].tempmutex);

	/* setup salt */
	char mysalt[512];
        char myuser[512];
        char *tok;
        char *save;
        int iterations;
        int len;
	strcpy(mysalt,mylist->salt);
        tok = strtok_r(mysalt,":",&save);
        tok = strtok_r(NULL,":",&save);
        if (!tok) return;
        strcpy(myuser,tok);
        tok = strtok_r(NULL,":",&save);
        if (!tok) return;
        tok = strtok_r(NULL,":",&save);
        if (!tok) return;
        iterations = atoi(tok);
        len = strlen(myuser);
	bzero(mhash,64);
	strcpy((char*)mhash,myuser);
	memcpy(mhash+len,"\x00\x00\x00\x01\x80",5);
	unsigned int tmp,tmp1,tmp2;
        salt.s0=(mhash[0])|(mhash[1]<<8)|(mhash[2]<<16)|(mhash[3]<<24);
        salt.s1=(mhash[4])|(mhash[5]<<8)|(mhash[6]<<16)|(mhash[7]<<24);
        salt.s2=(mhash[8])|(mhash[9]<<8)|(mhash[10]<<16)|(mhash[11]<<24);
        salt.s3=(mhash[12])|(mhash[13]<<8)|(mhash[14]<<16)|(mhash[15]<<24);
        salt.s4=(mhash[16])|(mhash[17]<<8)|(mhash[18]<<16)|(mhash[19]<<24);
        salt.s5=(mhash[20])|(mhash[21]<<8)|(mhash[22]<<16)|(mhash[23]<<24);
        salt.s6=(mhash[24])|(mhash[25]<<8)|(mhash[26]<<16)|(mhash[27]<<24);
        salt.s7=(mhash[28])|(mhash[29]<<8)|(mhash[30]<<16)|(mhash[31]<<24);
        salt.s8=(mhash[32])|(mhash[33]<<8)|(mhash[34]<<16)|(mhash[35]<<24);
        salt.s9=(mhash[36])|(mhash[37]<<8)|(mhash[38]<<16)|(mhash[39]<<24);
        salt.sA=(mhash[40])|(mhash[41]<<8)|(mhash[42]<<16)|(mhash[43]<<24);
        salt.sB=(mhash[44])|(mhash[45]<<8)|(mhash[46]<<16)|(mhash[47]<<24);
        salt.sC=(mhash[48])|(mhash[49]<<8)|(mhash[50]<<16)|(mhash[51]<<24);
        salt.sD=(mhash[52])|(mhash[53]<<8)|(mhash[54]<<16)|(mhash[55]<<24);
        REV(salt.s0);
        REV(salt.s1);
        REV(salt.s2);
        REV(salt.s3);
        REV(salt.s4);
        REV(salt.s5);
        REV(salt.s6);
        REV(salt.s7);
        REV(salt.s8);
        REV(salt.s9);
        REV(salt.sA);
        REV(salt.sB);
        REV(salt.sC);
        REV(salt.sD);
	salt.sF=len+4;

	_clSetKernelArg(rule_kernelmod[self], 0, sizeof(cl_mem), (void*) &rule_images2_buf[self]);
	_clSetKernelArg(rule_kernelmod[self], 1, sizeof(cl_mem), (void*) &rule_images_buf[self]);
	_clSetKernelArg(rule_kernelmod[self], 2, sizeof(cl_mem), (void*) &rule_sizes_buf[self]);
	_clSetKernelArg(rule_kernelmod[self], 3, sizeof(cl_uint16), (void*) &addline);
	_clSetKernelArg(rule_kernelmod[self], 4, sizeof(cl_uint16), (void*) &salt);
	_clSetKernelArg(rule_kernelpre1[self], 0, sizeof(cl_mem), (void*) &rule_images3_buf[self]);
	_clSetKernelArg(rule_kernelpre1[self], 1, sizeof(cl_mem), (void*) &rule_images2_buf[self]);
	_clSetKernelArg(rule_kernelpre1[self], 2, sizeof(cl_mem), (void*) &rule_sizes_buf[self]);
	_clSetKernelArg(rule_kernelpre1[self], 3, sizeof(cl_mem), (void*) &rule_found_ind_buf[self]);
	_clSetKernelArg(rule_kernelpre1[self], 4, sizeof(cl_mem), (void*) &rule_found_buf[self]);
	_clSetKernelArg(rule_kernelpre1[self], 5, sizeof(cl_uint16), (void*) &singlehash);
	_clSetKernelArg(rule_kernelpre1[self], 6, sizeof(cl_uint16), (void*) &salt);
	_clSetKernelArg(rule_kernelbl1[self], 0, sizeof(cl_mem), (void*) &rule_images3_buf[self]);
	_clSetKernelArg(rule_kernelbl1[self], 1, sizeof(cl_mem), (void*) &rule_images2_buf[self]);
	_clSetKernelArg(rule_kernelbl1[self], 2, sizeof(cl_mem), (void*) &rule_sizes_buf[self]);
	_clSetKernelArg(rule_kernelbl1[self], 3, sizeof(cl_mem), (void*) &rule_found_ind_buf[self]);
	_clSetKernelArg(rule_kernelbl1[self], 4, sizeof(cl_mem), (void*) &rule_found_buf[self]);
	_clSetKernelArg(rule_kernelbl1[self], 5, sizeof(cl_uint16), (void*) &singlehash);
	_clSetKernelArg(rule_kernelbl1[self], 6, sizeof(cl_uint16), (void*) &salt);
	_clSetKernelArg(rule_kernellast[self], 0, sizeof(cl_mem), (void*) &rule_buffer[self]);
	_clSetKernelArg(rule_kernellast[self], 1, sizeof(cl_mem), (void*) &rule_images3_buf[self]);
	_clSetKernelArg(rule_kernellast[self], 2, sizeof(cl_mem), (void*) &rule_sizes_buf[self]);
	_clSetKernelArg(rule_kernellast[self], 3, sizeof(cl_mem), (void*) &rule_found_ind_buf[self]);
	_clSetKernelArg(rule_kernellast[self], 4, sizeof(cl_mem), (void*) &rule_found_buf[self]);
	_clSetKernelArg(rule_kernellast[self], 5, sizeof(cl_uint16), (void*) &singlehash);
	_clSetKernelArg(rule_kernellast[self], 6, sizeof(cl_uint16), (void*) &salt);

        gws = (rule_counts[self][0] / wthreads[self].vectorsize);
        while ((gws%64)!=0) gws++;
        gws1 = gws*wthreads[self].vectorsize;
        if (gws1==0) gws1=64;
        if (gws==0) gws=64;

        _clEnqueueNDRangeKernel(rule_oclqueue[self], rule_kernelmod[self], 1, NULL, &gws1, rule_local_work_size, 0, NULL, NULL);
	_clFinish(rule_oclqueue[self]);
        _clEnqueueNDRangeKernel(rule_oclqueue[self], rule_kernelpre1[self], 1, NULL, &gws, rule_local_work_size, 0, NULL, NULL);
	_clFinish(rule_oclqueue[self]);

	for (a=1;a<iterations;a+=100)
	{
	    salt.sA=a;
	    salt.sB=a+100;
	    if (salt.sB>iterations) salt.sB=iterations;
	    _clSetKernelArg(rule_kernelbl1[self], 6, sizeof(cl_uint16), (void*) &salt);
    	    _clEnqueueNDRangeKernel(rule_oclqueue[self], rule_kernelbl1[self], 1, NULL, &gws, rule_local_work_size, 0, NULL, NULL);
	    _clFinish(rule_oclqueue[self]);
    	    wthreads[self].tries+=(gws1)/(get_hashes_num()*(iterations/100));
	}
	
        bzero(mhash,64);
        strcpy((char*)mhash,"lastpass rocks\x02\x02");
        salt.s0=(mhash[0])|(mhash[1]<<8)|(mhash[2]<<16)|(mhash[3]<<24);
        salt.s1=(mhash[4])|(mhash[5]<<8)|(mhash[6]<<16)|(mhash[7]<<24);
        salt.s2=(mhash[8])|(mhash[9]<<8)|(mhash[10]<<16)|(mhash[11]<<24);
        salt.s3=(mhash[12])|(mhash[13]<<8)|(mhash[14]<<16)|(mhash[15]<<24);
	_clSetKernelArg(rule_kernellast[self], 6, sizeof(cl_uint16), (void*) &salt);
	_clSetKernelArg(rule_kernellast[self], 6, sizeof(cl_uint16), (void*) &salt);
    	_clEnqueueNDRangeKernel(rule_oclqueue[self], rule_kernellast[self], 1, NULL, &gws1, rule_local_work_size, 0, NULL, NULL);

        found = _clEnqueueMapBuffer(rule_oclqueue[self], rule_found_buf[self], CL_TRUE,CL_MAP_READ, 0, 4, 0, 0, NULL, &err);
        if (err!=CL_SUCCESS) continue;
        if (*found>0) 
        {
            _clEnqueueReadBuffer(rule_oclqueue[self], rule_found_ind_buf[self], CL_TRUE, 0, ocl_rule_workset[self]*sizeof(cl_uint)*wthreads[self].vectorsize, rule_found_ind[self], 0, NULL, NULL);
    	    for (a=0;a<gws1;a++)
	    if (rule_found_ind[self][a]==1)
	    {
    		_clEnqueueReadBuffer(rule_oclqueue[self], rule_buffer[self], CL_TRUE, a*hash_ret_len1, hash_ret_len1, rule_ptr[self]+a*hash_ret_len1, 0, NULL, NULL);
    		if (memcmp("lastpass rocks\x02\x02", (char *)rule_ptr[self]+(a)*hash_ret_len1, 16) == 0)
    		{
            	    int flag = 0;
            	    strcpy(plain,&rule_images[self][0]+(a*MAX));
            	    strcat(plain,line);
            	    pthread_mutex_lock(&crackedmutex);
            	    addlist = cracked_list;
            	    while (addlist)
            	    {
            	        if ((strcmp(addlist->username, mylist->username) == 0) && (memcmp(addlist->hash, mylist->hash, hash_ret_len1) == 0))
                            flag = 1;
                        addlist = addlist->next;
            	    }
            	    pthread_mutex_unlock(&crackedmutex);
            	    if (flag == 0)
            	    {
            	        add_cracked_list(mylist->username, mylist->hash, mylist->salt, plain);
            	        mylist->salt2[0]=1;
            	    }
    		}
	    }
	    bzero(rule_found_ind[self],ocl_rule_workset[self]*sizeof(cl_uint));
    	    _clEnqueueWriteBuffer(rule_oclqueue[self], rule_found_ind_buf[self], CL_FALSE, 0, ocl_rule_workset[self]*sizeof(cl_uint), rule_found_ind[self], 0, NULL, NULL);
    	    *found = 0;
    	    _clEnqueueWriteBuffer(rule_oclqueue[self], rule_found_buf[self], CL_FALSE, 0, 4, found, 0, NULL, NULL);
	}
	_clEnqueueUnmapMemObject(rule_oclqueue[self],rule_found_buf[self],(void *)found,0,NULL,NULL);
	mylist = mylist->next;
    }
}
コード例 #6
0
ファイル: ocl_pdf.c プロジェクト: Forage/hashkill
/* Crack callback */
static void ocl_pdf_crack_callback(char *line, int self)
{
    int a;
    int *found;
    int err;
    char plain[MAX];
    cl_uint16 addline;
    cl_uint16 salt;
    cl_uint16 singlehash;
    size_t gws,gws1;

    /* setup addline */
    addline.s0=addline.s1=addline.s2=addline.s3=addline.s4=addline.s5=addline.s6=addline.s7=addline.sF=0;
    addline.sF=strlen(line);
    addline.s0=line[0]|(line[1]<<8)|(line[2]<<16)|(line[3]<<24);
    addline.s1=line[4]|(line[5]<<8)|(line[6]<<16)|(line[7]<<24);
    addline.s2=line[8]|(line[9]<<8)|(line[10]<<16)|(line[11]<<24);
    addline.s3=line[12]|(line[13]<<8)|(line[14]<<16)|(line[15]<<24);

    singlehash = get_singlehash();

    /* setup salt */
    salt.s0=(cs.o[0])|(cs.o[1]<<8)|(cs.o[2]<<16)|(cs.o[3]<<24);
    salt.s1=(cs.o[4])|(cs.o[5]<<8)|(cs.o[6]<<16)|(cs.o[7]<<24);
    salt.s2=(cs.o[8])|(cs.o[9]<<8)|(cs.o[10]<<16)|(cs.o[11]<<24);
    salt.s3=(cs.o[12])|(cs.o[13]<<8)|(cs.o[14]<<16)|(cs.o[15]<<24);
    salt.s4=(cs.o[16])|(cs.o[17]<<8)|(cs.o[18]<<16)|(cs.o[19]<<24);
    salt.s5=(cs.o[20])|(cs.o[21]<<8)|(cs.o[22]<<16)|(cs.o[23]<<24);
    salt.s6=(cs.o[24])|(cs.o[25]<<8)|(cs.o[26]<<16)|(cs.o[27]<<24);
    salt.s7=(cs.o[28])|(cs.o[29]<<8)|(cs.o[30]<<16)|(cs.o[31]<<24);
    salt.s8=(cs.o[32])|(cs.o[33]<<8)|(cs.o[34]<<16)|(cs.o[35]<<24);
    salt.s9=(cs.o[36])|(cs.o[37]<<8)|(cs.o[38]<<16)|(cs.o[39]<<24);

    salt.sA=(cs.id[0])|(cs.id[1]<<8)|(cs.id[2]<<16)|(cs.id[3]<<24);
    salt.sB=(cs.id[4])|(cs.id[5]<<8)|(cs.id[6]<<16)|(cs.id[7]<<24);
    salt.sC=(cs.id[8])|(cs.id[9]<<8)|(cs.id[10]<<16)|(cs.id[11]<<24);
    salt.sD=(cs.id[12])|(cs.id[13]<<8)|(cs.id[14]<<16)|(cs.id[15]<<24);
    salt.sF=cs.P;
    salt.sE=cs.encrypt_metadata;

    if (attack_over!=0) pthread_exit(NULL);
    pthread_mutex_lock(&wthreads[self].tempmutex);
    pthread_mutex_unlock(&wthreads[self].tempmutex);

    _clSetKernelArg(rule_kernelmod[self], 0, sizeof(cl_mem), (void*) &rule_images2_buf[self]);
    _clSetKernelArg(rule_kernelmod[self], 1, sizeof(cl_mem), (void*) &rule_images_buf[self]);
    _clSetKernelArg(rule_kernelmod[self], 2, sizeof(cl_mem), (void*) &rule_sizes_buf[self]);
    _clSetKernelArg(rule_kernelmod[self], 3, sizeof(cl_uint16), (void*) &addline);
    _clSetKernelArg(rule_kernelmod[self], 4, sizeof(cl_uint16), (void*) &salt);
    _clSetKernelArg(rule_kernelpre1[self], 0, sizeof(cl_mem), (void*) &rule_images3_buf[self]);
    _clSetKernelArg(rule_kernelpre1[self], 1, sizeof(cl_mem), (void*) &rule_images2_buf[self]);
    _clSetKernelArg(rule_kernelpre1[self], 2, sizeof(cl_mem), (void*) &rule_sizes_buf[self]);
    _clSetKernelArg(rule_kernelpre1[self], 3, sizeof(cl_mem), (void*) &rule_found_ind_buf[self]);
    _clSetKernelArg(rule_kernelpre1[self], 4, sizeof(cl_mem), (void*) &rule_found_buf[self]);
    _clSetKernelArg(rule_kernelpre1[self], 5, sizeof(cl_uint16), (void*) &singlehash);
    _clSetKernelArg(rule_kernelpre1[self], 6, sizeof(cl_uint16), (void*) &salt);
    if (cs.R==6)
    {
	_clSetKernelArg(rule_kernelbl1[self], 0, sizeof(cl_mem), (void*) &rule_images3_buf[self]);
	_clSetKernelArg(rule_kernelbl1[self], 1, sizeof(cl_mem), (void*) &rule_images3_buf[self]);
	_clSetKernelArg(rule_kernelbl1[self], 2, sizeof(cl_mem), (void*) &rule_images2_buf[self]);
	_clSetKernelArg(rule_kernelbl1[self], 3, sizeof(cl_mem), (void*) &rule_sizes_buf[self]);
	_clSetKernelArg(rule_kernelbl1[self], 4, sizeof(cl_mem), (void*) &rule_found_ind_buf[self]);
	_clSetKernelArg(rule_kernelbl1[self], 5, sizeof(cl_mem), (void*) &rule_found_buf[self]);
	_clSetKernelArg(rule_kernelbl1[self], 6, sizeof(cl_uint16), (void*) &singlehash);
	_clSetKernelArg(rule_kernelbl1[self], 7, sizeof(cl_uint16), (void*) &salt);
    }
    else
    {
	_clSetKernelArg(rule_kernelbl1[self], 0, sizeof(cl_mem), (void*) &rule_images3_buf[self]);
	_clSetKernelArg(rule_kernelbl1[self], 1, sizeof(cl_mem), (void*) &rule_images3_buf[self]);
	_clSetKernelArg(rule_kernelbl1[self], 2, sizeof(cl_mem), (void*) &rule_sizes_buf[self]);
	_clSetKernelArg(rule_kernelbl1[self], 3, sizeof(cl_mem), (void*) &rule_found_ind_buf[self]);
	_clSetKernelArg(rule_kernelbl1[self], 4, sizeof(cl_mem), (void*) &rule_found_buf[self]);
	_clSetKernelArg(rule_kernelbl1[self], 5, sizeof(cl_uint16), (void*) &singlehash);
	_clSetKernelArg(rule_kernelbl1[self], 6, sizeof(cl_uint16), (void*) &salt);
    }
    _clSetKernelArg(rule_kernellast[self], 0, sizeof(cl_mem), (void*) &rule_buffer[self]);
    _clSetKernelArg(rule_kernellast[self], 1, sizeof(cl_mem), (void*) &rule_images3_buf[self]);
    _clSetKernelArg(rule_kernellast[self], 2, sizeof(cl_mem), (void*) &rule_sizes_buf[self]);
    _clSetKernelArg(rule_kernellast[self], 3, sizeof(cl_mem), (void*) &rule_found_ind_buf[self]);
    _clSetKernelArg(rule_kernellast[self], 4, sizeof(cl_mem), (void*) &rule_found_buf[self]);
    _clSetKernelArg(rule_kernellast[self], 5, sizeof(cl_uint16), (void*) &singlehash);
    _clSetKernelArg(rule_kernellast[self], 6, sizeof(cl_uint16), (void*) &salt);


    if (rule_counts[self][0]==-1) return;
    gws = (rule_counts[self][0] / wthreads[self].vectorsize);
    while ((gws%64)!=0) gws++;
    gws1 = gws*wthreads[self].vectorsize;
    if (gws1==0) gws1=64;
    if (gws==0) gws=64;

    _clEnqueueNDRangeKernel(rule_oclqueue[self], rule_kernelmod[self], 1, NULL, &gws, rule_local_work_size, 0, NULL, NULL);
    _clFinish(rule_oclqueue[self]);
    _clEnqueueNDRangeKernel(rule_oclqueue[self], rule_kernelpre1[self], 1, NULL, &gws, rule_local_work_size, 0, NULL, NULL);
    _clFinish(rule_oclqueue[self]);

    if (cs.R==6)
    {
	for (a=0;a<256+32;a++)
	{
	    if (attack_over!=0) return;
	    singlehash.sE = a;
	    _clSetKernelArg(rule_kernelbl1[self], 6, sizeof(cl_uint16), (void*) &singlehash);
	    _clEnqueueNDRangeKernel(rule_oclqueue[self], rule_kernelbl1[self], 1, NULL, &gws, rule_local_work_size, 0, NULL, NULL);
	    _clFinish(rule_oclqueue[self]);
	    wthreads[self].tries+=(gws)/(256+32);
	}
	_clEnqueueNDRangeKernel(rule_oclqueue[self], rule_kernellast[self], 1, NULL, &gws, rule_local_work_size, 0, NULL, NULL);
	_clFinish(rule_oclqueue[self]);

    }
    else if (cs.R<5)
    {
	_clEnqueueNDRangeKernel(rule_oclqueue[self], rule_kernelbl1[self], 1, NULL, &gws, rule_local_work_size, 0, NULL, NULL);
	_clFinish(rule_oclqueue[self]);
	_clEnqueueNDRangeKernel(rule_oclqueue[self], rule_kernellast[self], 1, NULL, &gws, rule_local_work_size, 0, NULL, NULL);
	_clFinish(rule_oclqueue[self]);
    }

    found = _clEnqueueMapBuffer(rule_oclqueue[self], rule_found_buf[self], CL_TRUE,CL_MAP_READ, 0, 4, 0, 0, NULL, &err);
    if (cs.R!=6) wthreads[self].tries+=(gws);
    if (*found>0) 
    {
        _clEnqueueReadBuffer(rule_oclqueue[self], rule_found_ind_buf[self], CL_TRUE, 0, ocl_rule_workset[self]*sizeof(cl_uint), rule_found_ind[self], 0, NULL, NULL);
    	for (a=0;a<ocl_rule_workset[self];a++)
	if (rule_found_ind[self][a]==1)
	{
    	    {
                strcpy(plain,&rule_images[self][0]+(a*MAX));
                strcat(plain,line);
                pthread_mutex_lock(&crackedmutex);
                if (!cracked_list)
                {
        	    pthread_mutex_unlock(&crackedmutex);
            	    add_cracked_list(hash_list->username, hash_list->hash, hash_list->salt, plain);
        	}
        	else pthread_mutex_unlock(&crackedmutex);
    	    }
	}
	bzero(rule_found_ind[self],ocl_rule_workset[self]*sizeof(cl_uint));
    	_clEnqueueWriteBuffer(rule_oclqueue[self], rule_found_ind_buf[self], CL_FALSE, 0, ocl_rule_workset[self]*sizeof(cl_uint), rule_found_ind[self], 0, NULL, NULL);
        *found = 0;
	_clEnqueueWriteBuffer(rule_oclqueue[self], rule_found_buf[self], CL_FALSE, 0, 4, found, 0, NULL, NULL);
    }
    _clEnqueueUnmapMemObject(rule_oclqueue[self],rule_found_buf[self],(void *)found,0,NULL,NULL);
}
コード例 #7
0
ファイル: ocl_androidfde.c プロジェクト: greatyao/hashkill
/* Crack callback */
static void ocl_androidfde_crack_callback(char *line, int self)
{
    int a,b,c,e;
    char plain[MAX];
    cl_uint16 addline;
    cl_uint16 salt;
    cl_uint16 salt2;
    size_t nws1;
    size_t nws;


    /* setup addline */
    addline.s0=addline.s1=addline.s2=addline.s3=addline.s4=addline.s5=addline.s6=addline.s7=addline.sF=0;
    addline.sF=strlen(line);
    addline.s0=line[0]|(line[1]<<8)|(line[2]<<16)|(line[3]<<24);
    addline.s1=line[4]|(line[5]<<8)|(line[6]<<16)|(line[7]<<24);
    addline.s2=line[8]|(line[9]<<8)|(line[10]<<16)|(line[11]<<24);
    addline.s3=line[12]|(line[13]<<8)|(line[14]<<16)|(line[15]<<24);

    /* setup salt */
    salt=ocl_get_salt();
    salt2=ocl_get_salt2();

    if (attack_over!=0) pthread_exit(NULL);

    if (rule_counts[self][0]==-1) return;
    nws = (rule_counts[self][0] / wthreads[self].vectorsize);
    while ((nws%64)!=0) nws++;
    nws1 = nws*wthreads[self].vectorsize;
    if (nws1==0) nws1=64;
    if (nws==0) nws=64;


    _clSetKernelArg(rule_kernelend[self], 0, sizeof(cl_mem), (void*) &rule_buffer[self]);
    _clSetKernelArg(rule_kernelend[self], 1, sizeof(cl_mem), (void*) &rule_images2_buf[self]);
    _clSetKernelArg(rule_kernelend[self], 2, sizeof(cl_mem), (void*) &rule_images4_buf[self]);
    _clSetKernelArg(rule_kernelend[self], 3, sizeof(cl_uint16), (void*) &addline);
    _clSetKernelArg(rule_kernelend[self], 4, sizeof(cl_uint16), (void*) &salt);
    _clSetKernelArg(rule_kernelend[self], 6, sizeof(cl_uint16), (void*) &salt2);
    _clSetKernelArg(rule_kernelend[self], 7, sizeof(cl_mem), (void*) &rule_found_buf[self]);
    _clSetKernelArg(rule_kernelend[self], 8, sizeof(cl_mem), (void*) &rule_found_ind_buf[self]);
    _clSetKernelArg(rule_kernelend2[self], 0, sizeof(cl_mem), (void*) &rule_buffer[self]);
    _clSetKernelArg(rule_kernelend2[self], 1, sizeof(cl_mem), (void*) &rule_images2_buf[self]);
    _clSetKernelArg(rule_kernelend2[self], 2, sizeof(cl_mem), (void*) &rule_images4_buf[self]);
    _clSetKernelArg(rule_kernelend2[self], 3, sizeof(cl_uint16), (void*) &addline);
    _clSetKernelArg(rule_kernelend2[self], 4, sizeof(cl_uint16), (void*) &salt);
    _clSetKernelArg(rule_kernelend2[self], 6, sizeof(cl_uint16), (void*) &salt2);
    _clSetKernelArg(rule_kernelend2[self], 7, sizeof(cl_mem), (void*) &rule_found_buf[self]);
    _clSetKernelArg(rule_kernelend2[self], 8, sizeof(cl_mem), (void*) &rule_found_ind_buf[self]);
    _clSetKernelArg(rule_kernelmod[self], 0, sizeof(cl_mem), (void*) &rule_images2_buf[self]);
    _clSetKernelArg(rule_kernelmod[self], 1, sizeof(cl_mem), (void*) &rule_images_buf[self]);
    _clSetKernelArg(rule_kernelmod[self], 2, sizeof(cl_mem), (void*) &rule_images3_buf[self]);
    _clSetKernelArg(rule_kernelmod[self], 3, sizeof(cl_uint16), (void*) &addline);
    _clSetKernelArg(rule_kernelmod[self], 4, sizeof(cl_uint16), (void*) &salt);
    _clSetKernelArg(rule_kernelmod[self], 6, sizeof(cl_uint16), (void*) &salt2);
    _clSetKernelArg(rule_kernelmod[self], 7, sizeof(cl_mem), (void*) &rule_sizes_buf[self]);
    _clSetKernelArg(rule_kernelpre1[self], 0, sizeof(cl_mem), (void*) &rule_images4_buf[self]);
    _clSetKernelArg(rule_kernelpre1[self], 1, sizeof(cl_mem), (void*) &rule_images2_buf[self]);
    _clSetKernelArg(rule_kernelpre1[self], 2, sizeof(cl_mem), (void*) &rule_images3_buf[self]);
    _clSetKernelArg(rule_kernelpre1[self], 3, sizeof(cl_uint16), (void*) &addline);
    _clSetKernelArg(rule_kernelpre1[self], 4, sizeof(cl_uint16), (void*) &salt);
    _clSetKernelArg(rule_kernelpre1[self], 6, sizeof(cl_uint16), (void*) &salt2);
    _clSetKernelArg(rule_kernelbl1[self], 0, sizeof(cl_mem), (void*) &rule_images4_buf[self]);
    _clSetKernelArg(rule_kernelbl1[self], 1, sizeof(cl_mem), (void*) &rule_images2_buf[self]);
    _clSetKernelArg(rule_kernelbl1[self], 2, sizeof(cl_mem), (void*) &rule_images3_buf[self]);
    _clSetKernelArg(rule_kernelbl1[self], 3, sizeof(cl_uint16), (void*) &addline);
    _clSetKernelArg(rule_kernelbl1[self], 4, sizeof(cl_uint16), (void*) &salt);
    _clSetKernelArg(rule_kernelbl1[self], 6, sizeof(cl_uint16), (void*) &salt2);
    _clSetKernelArg(rule_kernelpre2[self], 0, sizeof(cl_mem), (void*) &rule_images4_buf[self]);
    _clSetKernelArg(rule_kernelpre2[self], 1, sizeof(cl_mem), (void*) &rule_images2_buf[self]);
    _clSetKernelArg(rule_kernelpre2[self], 2, sizeof(cl_mem), (void*) &rule_images3_buf[self]);
    _clSetKernelArg(rule_kernelpre2[self], 3, sizeof(cl_uint16), (void*) &addline);
    _clSetKernelArg(rule_kernelpre2[self], 4, sizeof(cl_uint16), (void*) &salt);
    _clSetKernelArg(rule_kernelpre2[self], 6, sizeof(cl_uint16), (void*) &salt2);
    _clSetKernelArg(rule_kernelbl2[self], 0, sizeof(cl_mem), (void*) &rule_images4_buf[self]);
    _clSetKernelArg(rule_kernelbl2[self], 1, sizeof(cl_mem), (void*) &rule_images2_buf[self]);
    _clSetKernelArg(rule_kernelbl2[self], 2, sizeof(cl_mem), (void*) &rule_images3_buf[self]);
    _clSetKernelArg(rule_kernelbl2[self], 3, sizeof(cl_uint16), (void*) &addline);
    _clSetKernelArg(rule_kernelbl2[self], 4, sizeof(cl_uint16), (void*) &salt);
    _clSetKernelArg(rule_kernelbl2[self], 6, sizeof(cl_uint16), (void*) &salt2);

    _clEnqueueNDRangeKernel(rule_oclqueue[self], rule_kernelmod[self], 1, NULL, &nws1, rule_local_work_size, 0, NULL, NULL);
    _clFinish(rule_oclqueue[self]);
    _clEnqueueNDRangeKernel(rule_oclqueue[self], rule_kernelpre1[self], 1, NULL, &nws, rule_local_work_size, 0, NULL, NULL);
    _clFinish(rule_oclqueue[self]);

    b=(2*2000)/1000;
    for (a=0;a<2000;a+=1000)
    {
	if (attack_over!=0) pthread_exit(NULL);
	addline.sA=a;
	addline.sB=a+1000;
	if (a==0) addline.sA=1;
	_clSetKernelArg(rule_kernelbl1[self], 3, sizeof(cl_uint16), (void*) &addline);
	_clEnqueueNDRangeKernel(rule_oclqueue[self], rule_kernelbl1[self], 1, NULL, &nws, rule_local_work_size, 0, NULL, NULL);
	_clFinish(rule_oclqueue[self]);
        wthreads[self].tries+=(nws1)/b;
        pthread_mutex_lock(&wthreads[self].tempmutex);
        pthread_mutex_unlock(&wthreads[self].tempmutex);
    }
    _clEnqueueNDRangeKernel(rule_oclqueue[self], rule_kernelpre2[self], 1, NULL, &nws, rule_local_work_size, 0, NULL, NULL);
    _clFinish(rule_oclqueue[self]);

    for (a=0;a<2000;a+=1000)
    {
	if (attack_over!=0) pthread_exit(NULL);
	addline.sA=a;
	addline.sB=a+1000;
	if (addline.sB>2000) addline.sB=2000;
	if (a==0) addline.sA=1;
	_clSetKernelArg(rule_kernelbl2[self], 3, sizeof(cl_uint16), (void*) &addline);
	_clEnqueueNDRangeKernel(rule_oclqueue[self], rule_kernelbl2[self], 1, NULL, &nws, rule_local_work_size, 0, NULL, NULL);
	_clFinish(rule_oclqueue[self]);
        wthreads[self].tries+=(nws1)/b;
        pthread_mutex_lock(&wthreads[self].tempmutex);
        pthread_mutex_unlock(&wthreads[self].tempmutex);
    }
    _clEnqueueNDRangeKernel(rule_oclqueue[self], rule_kernelend[self], 1, NULL, &nws, rule_local_work_size, 0, NULL, NULL);

    _clEnqueueReadBuffer(rule_oclqueue[self], rule_buffer[self], CL_TRUE, 0, hash_ret_len1*wthreads[self].vectorsize*nws, rule_ptr[self], 0, NULL, NULL);
    for (a=0;a<nws;a++)
    for (c=0;c<wthreads[self].vectorsize;c++)
    {
        e=(a)*wthreads[self].vectorsize+c;
        if (check_androidfde(rule_ptr[self]+e*32)==hash_ok)
        {
    	    strcpy(plain,&rule_images[self][0]+(e*MAX));
    	    strcat(plain,line);
    	    add_cracked_list(hash_list->username, hash_list->hash, hash_list->salt, plain);
	}
    }
}
コード例 #8
0
ファイル: ocl_kwallet.c プロジェクト: greatyao/hashkill
/* Crack callback */
static void ocl_kwallet_crack_callback(char *line, int self)
{
    int a,c,d,e;
    char plainimg[MAX*2];
    cl_uint16 addline;
    cl_uint16 salt;
    char key[20];
    size_t gws,gws1;

    /* setup addline */
    addline.s0=addline.s1=addline.s2=addline.s3=addline.s4=addline.s5=addline.s6=addline.s7=addline.sF=0;
    addline.sF=strlen(line);
    addline.s0=line[0]|(line[1]<<8)|(line[2]<<16)|(line[3]<<24);
    addline.s1=line[4]|(line[5]<<8)|(line[6]<<16)|(line[7]<<24);
    addline.s2=line[8]|(line[9]<<8)|(line[10]<<16)|(line[11]<<24);
    addline.s3=line[12]|(line[13]<<8)|(line[14]<<16)|(line[15]<<24);

    if (attack_over!=0) pthread_exit(NULL);
    pthread_mutex_lock(&wthreads[self].tempmutex);
    pthread_mutex_unlock(&wthreads[self].tempmutex);

    _clSetKernelArg(rule_kernelmod[self], 0, sizeof(cl_mem), (void*) &rule_images2_buf[self]);
    _clSetKernelArg(rule_kernelmod[self], 1, sizeof(cl_mem), (void*) &rule_images_buf[self]);
    _clSetKernelArg(rule_kernelmod[self], 2, sizeof(cl_mem), (void*) &rule_sizes_buf[self]);
    _clSetKernelArg(rule_kernelmod[self], 3, sizeof(cl_uint16), (void*) &addline);
    _clSetKernelArg(rule_kernelpre1[self], 0, sizeof(cl_mem), (void*) &rule_images3_buf[self]);
    _clSetKernelArg(rule_kernelpre1[self], 1, sizeof(cl_mem), (void*) &rule_images2_buf[self]);
    _clSetKernelArg(rule_kernelpre1[self], 2, sizeof(cl_mem), (void*) &rule_sizes_buf[self]);
    _clSetKernelArg(rule_kernelbl1[self], 0, sizeof(cl_mem), (void*) &rule_images3_buf[self]);
    _clSetKernelArg(rule_kernelbl1[self], 1, sizeof(cl_mem), (void*) &rule_images3_buf[self]);
    _clSetKernelArg(rule_kernelbl1[self], 2, sizeof(cl_mem), (void*) &rule_sizes_buf[self]);
    _clSetKernelArg(rule_kernellast[self], 0, sizeof(cl_mem), (void*) &rule_buffer[self]);
    _clSetKernelArg(rule_kernellast[self], 1, sizeof(cl_mem), (void*) &rule_images3_buf[self]);
    _clSetKernelArg(rule_kernellast[self], 2, sizeof(cl_mem), (void*) &rule_sizes_buf[self]);
    _clSetKernelArg(rule_kernellast[self], 3, sizeof(cl_uint16), (void*) &salt);


    if (rule_counts[self][0]==-1) return;
    gws = (rule_counts[self][0] / wthreads[self].vectorsize);
    while ((gws%64)!=0) gws++;
    gws1 = gws*wthreads[self].vectorsize;
    if (gws1==0) gws1=64;
    if (gws==0) gws=64;

    _clEnqueueNDRangeKernel(rule_oclqueue[self], rule_kernelmod[self], 1, NULL, &gws1, rule_local_work_size, 0, NULL, NULL);
    _clFinish(rule_oclqueue[self]);
    _clEnqueueNDRangeKernel(rule_oclqueue[self], rule_kernelpre1[self], 1, NULL, &gws1, rule_local_work_size, 0, NULL, NULL);
    _clFinish(rule_oclqueue[self]);

    for (a=0;a<((cs.iterations)/300);a++)
    {
    	_clEnqueueNDRangeKernel(rule_oclqueue[self], rule_kernelbl1[self], 1, NULL, &gws, rule_local_work_size, 0, NULL, NULL);
	_clFinish(rule_oclqueue[self]);
	wthreads[self].tries+=(gws1)/(cs.iterations/300);
    }
    salt.sA=((cs.iterations)%300);
    _clSetKernelArg(rule_kernellast[self], 3, sizeof(cl_uint16), (void*) &salt);
    _clEnqueueNDRangeKernel(rule_oclqueue[self], rule_kernellast[self], 1, NULL, &gws, rule_local_work_size, 0, NULL, NULL);

    _clEnqueueReadBuffer(rule_oclqueue[self], rule_buffer[self], CL_TRUE, 0, hash_ret_len1*wthreads[self].vectorsize*ocl_rule_workset[self], rule_ptr[self], 0, NULL, NULL);
    for (a=0;a<gws;a++)
    {
        for (c=0;c<wthreads[self].vectorsize;c++)
        {
            e=(a)*wthreads[self].vectorsize+c;
            memcpy(key,(char *)rule_ptr[self]+(e)*hash_ret_len1,hash_ret_len1);
            if (check_kwallet(key)==hash_ok)
            {
                for (d=0;d<MAX;d++) plainimg[d] = rule_images[self][e*MAX+d];
                strncat(plainimg,line,32);
                plainimg[31]=0;
                if (!cracked_list) add_cracked_list(hash_list->username, hash_list->hash, hash_list->salt, (char *)plainimg);
            }
        }
    }
}
コード例 #9
0
ファイル: bfs.cpp プロジェクト: bs5ht/Parallella
//----------------------------------------------------------
//--breadth first search on GPUs
//----------------------------------------------------------
void run_bfs_gpu(int no_of_nodes, Node *h_graph_nodes, int edge_list_size, \
		int *h_graph_edges, int *h_graph_mask, int *h_updating_graph_mask, \
		int *h_graph_visited, int *h_cost)
					throw(std::string){

	//int number_elements = height*width;
	int h_over;
	cl_mem d_graph_nodes, d_graph_edges, d_graph_mask, d_updating_graph_mask, \
			d_graph_visited, d_cost, d_over;
	try{
		//--1 transfer data from host to device
		_clInit();
		d_graph_nodes = _clMalloc(no_of_nodes*sizeof(Node), h_graph_nodes);
		d_graph_edges = _clMalloc(edge_list_size*sizeof(int), h_graph_edges);
		d_graph_mask = _clMallocRW(no_of_nodes*sizeof(int), h_graph_mask);
		d_updating_graph_mask = _clMallocRW(no_of_nodes*sizeof(int), h_updating_graph_mask);
		d_graph_visited = _clMallocRW(no_of_nodes*sizeof(int), h_graph_visited);


		d_cost = _clMallocRW(no_of_nodes*sizeof(int), h_cost);
		d_over = _clMallocRW(sizeof(int), &h_over);

		_clMemcpyH2D(d_graph_nodes, no_of_nodes*sizeof(Node), h_graph_nodes);
		_clMemcpyH2D(d_graph_edges, edge_list_size*sizeof(int), h_graph_edges);
		_clMemcpyH2D(d_graph_mask, no_of_nodes*sizeof(int), h_graph_mask);
		_clMemcpyH2D(d_updating_graph_mask, no_of_nodes*sizeof(int), h_updating_graph_mask);
		_clMemcpyH2D(d_graph_visited, no_of_nodes*sizeof(int), h_graph_visited);
		_clMemcpyH2D(d_cost, no_of_nodes*sizeof(int), h_cost);

		//--2 invoke kernel
#ifdef	PROFILING
		timer kernel_timer;
		double kernel_time = 0.0;
		kernel_timer.reset();
		kernel_timer.start();


#endif
		struct timespec startT, endT;
		clock_gettime(CLOCK_MONOTONIC, &startT);
		do{
			h_over = false;
			_clMemcpyH2D(d_over, sizeof(int), &h_over);
			//--kernel 0
			int kernel_id = 0;
			int kernel_idx = 0;
			_clSetArgs(kernel_id, kernel_idx++, d_graph_nodes);
			_clSetArgs(kernel_id, kernel_idx++, d_graph_edges);
			_clSetArgs(kernel_id, kernel_idx++, d_graph_mask);
			_clSetArgs(kernel_id, kernel_idx++, d_updating_graph_mask);
			_clSetArgs(kernel_id, kernel_idx++, d_graph_visited);
			_clSetArgs(kernel_id, kernel_idx++, d_cost);
			_clSetArgs(kernel_id, kernel_idx++, &no_of_nodes, sizeof(int));

			//int work_items = no_of_nodes;
			_clInvokeKernel(kernel_id, no_of_nodes, work_group_size);

			//--kernel 1
			kernel_id = 1;
			kernel_idx = 0;
			_clSetArgs(kernel_id, kernel_idx++, d_graph_mask);
			_clSetArgs(kernel_id, kernel_idx++, d_updating_graph_mask);
			_clSetArgs(kernel_id, kernel_idx++, d_graph_visited);
			_clSetArgs(kernel_id, kernel_idx++, d_over);
			_clSetArgs(kernel_id, kernel_idx++, &no_of_nodes, sizeof(int));

			//work_items = no_of_nodes;
			_clInvokeKernel(kernel_id, no_of_nodes, work_group_size);

			_clMemcpyD2H(d_over,sizeof(int), &h_over);
			}while(h_over);

		_clFinish();
		clock_gettime(CLOCK_MONOTONIC, &endT);
		uint64_t diff = 1000000000 * (endT.tv_sec - startT.tv_sec);
		uint64_t nanodiff = endT.tv_nsec - startT.tv_nsec;
		//printf("elapsed accelerator time = %llu nanoseconds\n", (long long unsigned int) diff);
		//printf("start time seconds%u \n", startT.tv_sec);
		//printf("end time seconds %u \n", endT.tv_sec);
        //printf("difference %u \n", diff);
		//printf("start time nanoseconds %u \n", startT.tv_nsec);
		//printf("end time nanoseconds %u \n", endT.tv_nsec);
        printf(" accelerator time %u \n", nanodiff + diff);
#ifdef	PROFILING
		kernel_timer.stop();
		kernel_time = kernel_timer.getTimeInSeconds();
#endif
		//--3 transfer data from device to host
		_clMemcpyD2H(d_cost,no_of_nodes*sizeof(int), h_cost);
		//--statistics
#ifdef	PROFILING
		std::cout<<"kernel time(s):"<<kernel_time<<std::endl;
#endif
		//--4 release cl resources.
		_clFree(d_graph_nodes);
		_clFree(d_graph_edges);
		_clFree(d_graph_mask);
		_clFree(d_updating_graph_mask);
		_clFree(d_graph_visited);
		_clFree(d_cost);
		_clFree(d_over);
		_clRelease();
	}
	catch(std::string msg){
		_clFree(d_graph_nodes);
		_clFree(d_graph_edges);
		_clFree(d_graph_mask);
		_clFree(d_updating_graph_mask);
		_clFree(d_graph_visited);
		_clFree(d_cost);
		_clFree(d_over);
		_clRelease();
		std::string e_str = "in run_transpose_gpu -> ";
		e_str += msg;
		throw(e_str);
	}
	return ;
}
コード例 #10
0
ファイル: ocl_sha512unix.c プロジェクト: 6e6f36/hashkill
/* Crack callback */
static void ocl_sha512unix_crack_callback(char *line, int self)
{
    int a,b,c,e;
    int *found;
    int err;
    struct  hash_list_s  *mylist, *addlist;
    char plain[MAX];
    char hex1[16];
    cl_uint16 salt;
    cl_ulong8 singlehash;
    unsigned char base64[89];
    int cc,cc1;
    size_t gws,gws1;

    cc = self_kernel16[self];
    cc1 = self_kernel16[self]+strlen(line);
    if (cc1>15) cc1=15;
    mylist = hash_list;
    while (mylist)
    {
        if (mylist->salt2[0]==1) {mylist=mylist->next;continue;}
	salt.sC=cc1;
	/* setup_psalt */
        unsigned char mhash[89];
        memcpy(base64,mylist->hash,88);
        b64_pton_crypt(base64,mhash);
        uint64_t A1,A2,A3,A4,A5,A6,A7,A8;
        memcpy(hex1,mhash,8);
        memcpy(&A1, hex1, 8);
        memcpy(hex1,mhash+8,8);
        memcpy(&A2, hex1, 8);
        memcpy(hex1,mhash+16,8);
        memcpy(&A3, hex1, 8);
        memcpy(hex1,mhash+24,8);
        memcpy(&A4, hex1, 8);
        memcpy(hex1,mhash+32,8);
        memcpy(&A5, hex1, 8);
        memcpy(hex1,mhash+40,8);
        memcpy(&A6, hex1, 8);
        memcpy(hex1,mhash+48,8);
        memcpy(&A7, hex1, 8);
        memcpy(hex1,mhash+56,8);
        memcpy(&A8, hex1, 8);
        singlehash.s0=A1;singlehash.s1=A2;singlehash.s2=A3;singlehash.s3=A4;
        singlehash.s4=A5;singlehash.s5=A6;singlehash.s6=A7;singlehash.s7=A8;

	if (rule_counts[self][cc]==-1) return;
        gws = (rule_counts[self][cc] / wthreads[self].vectorsize);
        while ((gws%64)!=0) gws++;
        gws1 = gws*wthreads[self].vectorsize;
        if (gws1==0) gws1=64;
        if (gws==0) gws=64;

        for (a=0;a<gws;a++)
        {
	    char candidate[32];
	    bzero(candidate,32);
	    bzero(hex1,16);
            memcpy(hex1,mylist->salt+3,strlen(mylist->salt)-4);
	    salt.sD=strlen(hex1);
            strcpy(candidate,rule_images162[cc][self]+(a*16));
            strcat(candidate,line);
            setup_spint0(candidate,hex1,&rule_images16[cc1][self][0]+(a*96));
            if (attack_over!=0) pthread_exit(NULL);
        }

	_clEnqueueWriteBuffer(rule_oclqueue[self], rule_images16_buf[cc1][self], CL_FALSE, 0, ocl_rule_workset[self]*wthreads[self].vectorsize*96, rule_images16[cc1][self], 0, NULL, NULL);
        if (attack_over!=0) pthread_exit(NULL);
        pthread_mutex_lock(&wthreads[self].tempmutex);
        pthread_mutex_unlock(&wthreads[self].tempmutex);


	/* Set sha512unixm, sha512unixe then the transform kernels */
        _clSetKernelArg(rule_kernelmod[self], 0, sizeof(cl_mem), (void*) &rule_images163_buf[cc1][self]);
        _clSetKernelArg(rule_kernelmod[self], 1, sizeof(cl_mem), (void*) &rule_images16_buf[cc1][self]);
        _clSetKernelArg(rule_kernelmod[self], 2, sizeof(cl_mem), (void*) &rule_sizes162_buf[cc1][self]);
        _clSetKernelArg(rule_kernelmod[self], 3, sizeof(cl_mem), (void*) &rule_sizes16_buf[cc1][self]);
        _clSetKernelArg(rule_kernelmod[self], 4, sizeof(cl_uint16), (void*) &salt);
        _clSetKernelArg(rule_kernelmod[self], 5, sizeof(cl_uint16), (void*) &salt);
        _clSetKernelArg(rule_kernelpre1[self], 0, sizeof(cl_mem), (void*) &rule_images163_buf[cc1][self]);
        _clSetKernelArg(rule_kernelpre1[self], 1, sizeof(cl_mem), (void*) &rule_images163_buf[cc1][self]);
        _clSetKernelArg(rule_kernelpre1[self], 2, sizeof(cl_mem), (void*) &rule_sizes162_buf[cc1][self]);
        _clSetKernelArg(rule_kernelpre1[self], 3, sizeof(cl_mem), (void*) &rule_found_ind_buf[self]);
        _clSetKernelArg(rule_kernelpre1[self], 4, sizeof(cl_mem), (void*) &rule_found_buf[self]);
        _clSetKernelArg(rule_kernelpre1[self], 5, sizeof(cl_uint16), (void*) &salt);
        _clSetKernelArg(rule_kernelpre1[self], 6, sizeof(cl_ulong8), (void*) &singlehash);
        _clSetKernelArg(rule_kernelbl1[self], 0, sizeof(cl_mem), (void*) &rule_images163_buf[cc1][self]);
        _clSetKernelArg(rule_kernelbl1[self], 1, sizeof(cl_mem), (void*) &rule_images163_buf[cc1][self]);
        _clSetKernelArg(rule_kernelbl1[self], 2, sizeof(cl_mem), (void*) &rule_sizes162_buf[cc1][self]);
        _clSetKernelArg(rule_kernelbl1[self], 3, sizeof(cl_mem), (void*) &rule_found_ind_buf[self]);
        _clSetKernelArg(rule_kernelbl1[self], 4, sizeof(cl_mem), (void*) &rule_found_buf[self]);
        _clSetKernelArg(rule_kernelbl1[self], 5, sizeof(cl_uint16), (void*) &salt);
        _clSetKernelArg(rule_kernelbl1[self], 6, sizeof(cl_ulong8), (void*) &singlehash);
        _clSetKernelArg(rule_kernelend[self], 0, sizeof(cl_mem), (void*) &rule_buffer[self]);
        _clSetKernelArg(rule_kernelend[self], 1, sizeof(cl_mem), (void*) &rule_images163_buf[cc1][self]);
        _clSetKernelArg(rule_kernelend[self], 2, sizeof(cl_mem), (void*) &rule_sizes162_buf[cc1][self]);
        _clSetKernelArg(rule_kernelend[self], 3, sizeof(cl_mem), (void*) &rule_found_ind_buf[self]);
        _clSetKernelArg(rule_kernelend[self], 4, sizeof(cl_mem), (void*) &rule_found_buf[self]);
        _clSetKernelArg(rule_kernelend[self], 5, sizeof(cl_uint16), (void*) &salt);
        _clSetKernelArg(rule_kernelend[self], 6, sizeof(cl_ulong8), (void*) &singlehash);


	/* Now call first transform00+4999*(transformX+sha512unixm+sha512unixe) */
	_clEnqueueNDRangeKernel(rule_oclqueue[self], rule_kernelmod[self], 1, NULL, &gws1, rule_local_work_size, 0, NULL, NULL);
	_clFinish(rule_oclqueue[self]);
	_clEnqueueNDRangeKernel(rule_oclqueue[self], rule_kernelpre1[self], 1, NULL, &gws1, rule_local_work_size, 0, NULL, NULL);
	_clFinish(rule_oclqueue[self]);
	for (a=0;a<5000;a+=200)
	{
	    salt.sA=a;
	    salt.sB=a+200;
	    if (salt.sB>5000) salt.sB=5000;
	    _clSetKernelArg(rule_kernelbl1[self], 5, sizeof(cl_uint16), (void*) &salt);
	    _clEnqueueNDRangeKernel(rule_oclqueue[self], rule_kernelbl1[self], 1, NULL, &gws1, rule_local_work_size, 0, NULL, NULL);
	    _clFinish(rule_oclqueue[self]);
    	    wthreads[self].tries+=(wthreads[self].vectorsize*ocl_rule_workset[self])/(get_hashes_num()*25);
            if (attack_over!=0) pthread_exit(NULL);
	}
	_clEnqueueNDRangeKernel(rule_oclqueue[self], rule_kernelend[self], 1, NULL, &gws1, rule_local_work_size, 0, NULL, NULL);
	_clFinish(rule_oclqueue[self]);
        found = _clEnqueueMapBuffer(rule_oclqueue[self], rule_found_buf[self], CL_TRUE,CL_MAP_READ, 0, 4, 0, 0, NULL, &err);
        if (err!=CL_SUCCESS) continue;
        if (*found>0) 
        {
            _clEnqueueReadBuffer(rule_oclqueue[self], rule_found_ind_buf[self], CL_TRUE, 0, ocl_rule_workset[self]*sizeof(cl_uint), rule_found_ind[self], 0, NULL, NULL);
    	    for (a=0;a<gws;a++)
	    if (rule_found_ind[self]!=0)
	    {
		b=a*wthreads[self].vectorsize;
    		_clEnqueueReadBuffer(rule_oclqueue[self], rule_buffer[self], CL_TRUE, b*hash_ret_len1, hash_ret_len1*wthreads[self].vectorsize, rule_ptr[self]+b*hash_ret_len1, 0, NULL, NULL);
		for (c=0;c<wthreads[self].vectorsize;c++)
		{
	    	    e=(a)*wthreads[self].vectorsize+c;
                    unsigned char mhash[89];
                    memcpy(base64,mylist->hash,88);
                    b64_pton_crypt(base64,mhash);
    		    if (memcmp(mhash, (char *)rule_ptr[self]+(e)*hash_ret_len1, hash_ret_len1-2) == 0)
    		    {
            		int flag = 0;
                	strcpy(plain,&rule_images162[cc][self][0]+(e*16));
                	strcat(plain,line);
                	pthread_mutex_lock(&crackedmutex);
                	addlist = cracked_list;
                	while (addlist)
                	{
                	    if ((strcmp(addlist->username, mylist->username) == 0) && (memcmp(addlist->hash, mylist->hash, hash_ret_len1) == 0))
                                flag = 1;
                    	    addlist = addlist->next;
                	}
                	pthread_mutex_unlock(&crackedmutex);
                	if (flag == 0)
                	{
                	    add_cracked_list(mylist->username, mylist->hash, mylist->salt, plain);
                	    mylist->salt2[0]=1;
                	}
    		    }
		}
	    }
	    bzero(rule_found_ind[self],ocl_rule_workset[self]*sizeof(cl_uint));
    	    _clEnqueueWriteBuffer(rule_oclqueue[self], rule_found_ind_buf[self], CL_FALSE, 0, ocl_rule_workset[self]*sizeof(cl_uint), rule_found_ind[self], 0, NULL, NULL);
    	    *found = 0;
    	    _clEnqueueWriteBuffer(rule_oclqueue[self], rule_found_buf[self], CL_FALSE, 0, 4, found, 0, NULL, NULL);
	}
	_clEnqueueUnmapMemObject(rule_oclqueue[self],rule_found_buf[self],(void *)found,0,NULL,NULL);
	mylist = mylist->next;
    }
}
コード例 #11
0
ファイル: ocl_wpa.c プロジェクト: Forage/hashkill
/* Crack callback */
static void ocl_wpa_crack_callback(char *line, int self)
{
    int a,b,c,e;
    int *found;
    int err;
    char plain[MAX];
    cl_uint16 addline;
    cl_uint16 salt;
    cl_uint16 salt2;


    /* setup addline */
    addline.s0=addline.s1=addline.s2=addline.s3=addline.s4=addline.s5=addline.s6=addline.s7=addline.sF=0;
    addline.sF=strlen(line);
    addline.s0=line[0]|(line[1]<<8)|(line[2]<<16)|(line[3]<<24);
    addline.s1=line[4]|(line[5]<<8)|(line[6]<<16)|(line[7]<<24);
    addline.s2=line[8]|(line[9]<<8)|(line[10]<<16)|(line[11]<<24);
    addline.s3=line[12]|(line[13]<<8)|(line[14]<<16)|(line[15]<<24);

    /* setup salt */
    salt=ocl_get_salt();
    salt2=ocl_get_salt2();

    if (attack_over!=0) pthread_exit(NULL);
    pthread_mutex_lock(&wthreads[self].tempmutex);
    pthread_mutex_unlock(&wthreads[self].tempmutex);

    size_t nws1=ocl_rule_workset[self]*wthreads[self].vectorsize;
    size_t nws=ocl_rule_workset[self];

    _clSetKernelArg(rule_kernelend[self], 0, sizeof(cl_mem), (void*) &rule_buffer[self]);
    _clSetKernelArg(rule_kernelend[self], 1, sizeof(cl_mem), (void*) &rule_images2_buf[self]);
    _clSetKernelArg(rule_kernelend[self], 2, sizeof(cl_mem), (void*) &rule_images4_buf[self]);
    _clSetKernelArg(rule_kernelend[self], 3, sizeof(cl_uint16), (void*) &addline);
    _clSetKernelArg(rule_kernelend[self], 4, sizeof(cl_uint16), (void*) &salt);
    _clSetKernelArg(rule_kernelend[self], 6, sizeof(cl_uint16), (void*) &salt2);
    _clSetKernelArg(rule_kernelend[self], 7, sizeof(cl_mem), (void*) &block_buf[self]);
    _clSetKernelArg(rule_kernelend[self], 8, sizeof(cl_mem), (void*) &eapol_buf[self]);
    _clSetKernelArg(rule_kernelend[self], 9, sizeof(cl_mem), (void*) &rule_found_buf[self]);
    _clSetKernelArg(rule_kernelend[self], 10, sizeof(cl_mem), (void*) &rule_found_ind_buf[self]);
    _clSetKernelArg(rule_kernelmod[self], 0, sizeof(cl_mem), (void*) &rule_images2_buf[self]);
    _clSetKernelArg(rule_kernelmod[self], 1, sizeof(cl_mem), (void*) &rule_images_buf[self]);
    _clSetKernelArg(rule_kernelmod[self], 2, sizeof(cl_mem), (void*) &rule_images3_buf[self]);
    _clSetKernelArg(rule_kernelmod[self], 3, sizeof(cl_uint16), (void*) &addline);
    _clSetKernelArg(rule_kernelmod[self], 4, sizeof(cl_uint16), (void*) &salt);
    _clSetKernelArg(rule_kernelmod[self], 6, sizeof(cl_uint16), (void*) &salt2);
    _clSetKernelArg(rule_kernelmod[self], 7, sizeof(cl_mem), (void*) &block_buf[self]);
    _clSetKernelArg(rule_kernelmod[self], 8, sizeof(cl_mem), (void*) &eapol_buf[self]);
    _clSetKernelArg(rule_kernelmod[self], 9, sizeof(cl_mem), (void*) &rule_sizes_buf[self]);
    _clSetKernelArg(rule_kernelpre1[self], 0, sizeof(cl_mem), (void*) &rule_images4_buf[self]);
    _clSetKernelArg(rule_kernelpre1[self], 1, sizeof(cl_mem), (void*) &rule_images2_buf[self]);
    _clSetKernelArg(rule_kernelpre1[self], 2, sizeof(cl_mem), (void*) &rule_images3_buf[self]);
    _clSetKernelArg(rule_kernelpre1[self], 3, sizeof(cl_uint16), (void*) &addline);
    _clSetKernelArg(rule_kernelpre1[self], 4, sizeof(cl_uint16), (void*) &salt);
    _clSetKernelArg(rule_kernelpre1[self], 6, sizeof(cl_uint16), (void*) &salt2);
    _clSetKernelArg(rule_kernelpre1[self], 7, sizeof(cl_mem), (void*) &block_buf[self]);
    _clSetKernelArg(rule_kernelpre1[self], 8, sizeof(cl_mem), (void*) &eapol_buf[self]);
    _clSetKernelArg(rule_kernelbl1[self], 0, sizeof(cl_mem), (void*) &rule_images4_buf[self]);
    _clSetKernelArg(rule_kernelbl1[self], 1, sizeof(cl_mem), (void*) &rule_images2_buf[self]);
    _clSetKernelArg(rule_kernelbl1[self], 2, sizeof(cl_mem), (void*) &rule_images3_buf[self]);
    _clSetKernelArg(rule_kernelbl1[self], 3, sizeof(cl_uint16), (void*) &addline);
    _clSetKernelArg(rule_kernelbl1[self], 4, sizeof(cl_uint16), (void*) &salt);
    _clSetKernelArg(rule_kernelbl1[self], 6, sizeof(cl_uint16), (void*) &salt2);
    _clSetKernelArg(rule_kernelbl1[self], 7, sizeof(cl_mem), (void*) &block_buf[self]);
    _clSetKernelArg(rule_kernelbl1[self], 8, sizeof(cl_mem), (void*) &eapol_buf[self]);
    _clSetKernelArg(rule_kernelpre2[self], 0, sizeof(cl_mem), (void*) &rule_images4_buf[self]);
    _clSetKernelArg(rule_kernelpre2[self], 1, sizeof(cl_mem), (void*) &rule_images2_buf[self]);
    _clSetKernelArg(rule_kernelpre2[self], 2, sizeof(cl_mem), (void*) &rule_images3_buf[self]);
    _clSetKernelArg(rule_kernelpre2[self], 3, sizeof(cl_uint16), (void*) &addline);
    _clSetKernelArg(rule_kernelpre2[self], 4, sizeof(cl_uint16), (void*) &salt);
    _clSetKernelArg(rule_kernelpre2[self], 6, sizeof(cl_uint16), (void*) &salt2);
    _clSetKernelArg(rule_kernelpre2[self], 7, sizeof(cl_mem), (void*) &block_buf[self]);
    _clSetKernelArg(rule_kernelpre2[self], 8, sizeof(cl_mem), (void*) &eapol_buf[self]);
    _clSetKernelArg(rule_kernelbl2[self], 0, sizeof(cl_mem), (void*) &rule_images4_buf[self]);
    _clSetKernelArg(rule_kernelbl2[self], 1, sizeof(cl_mem), (void*) &rule_images2_buf[self]);
    _clSetKernelArg(rule_kernelbl2[self], 2, sizeof(cl_mem), (void*) &rule_images3_buf[self]);
    _clSetKernelArg(rule_kernelbl2[self], 3, sizeof(cl_uint16), (void*) &addline);
    _clSetKernelArg(rule_kernelbl2[self], 4, sizeof(cl_uint16), (void*) &salt);
    _clSetKernelArg(rule_kernelbl2[self], 6, sizeof(cl_uint16), (void*) &salt2);
    _clSetKernelArg(rule_kernelbl2[self], 7, sizeof(cl_mem), (void*) &block_buf[self]);
    _clSetKernelArg(rule_kernelbl2[self], 8, sizeof(cl_mem), (void*) &eapol_buf[self]);

    _clEnqueueNDRangeKernel(rule_oclqueue[self], rule_kernelmod[self], 1, NULL, &nws1, rule_local_work_size, 0, NULL, NULL);
    _clFinish(rule_oclqueue[self]);
    _clEnqueueNDRangeKernel(rule_oclqueue[self], rule_kernelpre1[self], 1, NULL, &nws, rule_local_work_size, 0, NULL, NULL);
    _clFinish(rule_oclqueue[self]);
    for (a=0;a<7;a++)
    {
	if (attack_over==1) pthread_exit(NULL);
	addline.sA=a*1170;
	_clSetKernelArg(rule_kernelbl1[self], 3, sizeof(cl_uint16), (void*) &addline);
	_clEnqueueNDRangeKernel(rule_oclqueue[self], rule_kernelbl1[self], 1, NULL, &nws, rule_local_work_size, 0, NULL, NULL);
	_clFinish(rule_oclqueue[self]);
        wthreads[self].tries+=(ocl_rule_workset[self]*wthreads[self].vectorsize)/14;
    }

    _clEnqueueNDRangeKernel(rule_oclqueue[self], rule_kernelpre2[self], 1, NULL, &nws, rule_local_work_size, 0, NULL, NULL);
    _clFinish(rule_oclqueue[self]);
    for (a=0;a<7;a++)
    {
	if (attack_over==1) pthread_exit(NULL);
	addline.sA=a*1170;
	_clSetKernelArg(rule_kernelbl2[self], 3, sizeof(cl_uint16), (void*) &addline);
	_clEnqueueNDRangeKernel(rule_oclqueue[self], rule_kernelbl2[self], 1, NULL, &nws, rule_local_work_size, 0, NULL, NULL);
	_clFinish(rule_oclqueue[self]);
        wthreads[self].tries+=(ocl_rule_workset[self]*wthreads[self].vectorsize)/14;
    }
    _clEnqueueNDRangeKernel(rule_oclqueue[self], rule_kernelend[self], 1, NULL, &nws, rule_local_work_size, 0, NULL, NULL);

    found = _clEnqueueMapBuffer(rule_oclqueue[self], rule_found_buf[self], CL_TRUE,CL_MAP_READ, 0, 4, 0, 0, NULL, &err);
    if (err!=CL_SUCCESS) return;
    if (*found>0) 
    {
        _clEnqueueReadBuffer(rule_oclqueue[self], rule_found_ind_buf[self], CL_TRUE, 0, ocl_rule_workset[self]*sizeof(cl_uint), rule_found_ind[self], 0, NULL, NULL);
        for (a=0;a<ocl_rule_workset[self];a++)
        if (rule_found_ind[self][a]==1)
	{
	    b=a*wthreads[self].vectorsize;
    	    _clEnqueueReadBuffer(rule_oclqueue[self], rule_buffer[self], CL_TRUE, b*hash_ret_len1, hash_ret_len1*wthreads[self].vectorsize, rule_ptr[self]+b*hash_ret_len1, 0, NULL, NULL);
	    for (c=0;c<wthreads[self].vectorsize;c++)
	    {
	        e=(a)*wthreads[self].vectorsize+c;
    	        if (memcmp(hccap.keymic, (char *)rule_ptr[self]+(e)*hash_ret_len1, hash_ret_len1-1) == 0)
    	        {
            	    strcpy(plain,&rule_images[self][0]+(e*MAX));
            	    strcat(plain,line);
            	    add_cracked_list(hash_list->username, hash_list->hash, hash_list->salt, plain);
    		}
	    }
	}
	bzero(rule_found_ind[self],ocl_rule_workset[self]*sizeof(cl_uint));
    	_clEnqueueWriteBuffer(rule_oclqueue[self], rule_found_ind_buf[self], CL_FALSE, 0, ocl_rule_workset[self]*sizeof(cl_uint), rule_found_ind[self], 0, NULL, NULL);
    	*found = 0;
    	_clEnqueueWriteBuffer(rule_oclqueue[self], rule_found_buf[self], CL_FALSE, 0, 4, found, 0, NULL, NULL);
    }
    _clEnqueueUnmapMemObject(rule_oclqueue[self],rule_found_buf[self],(void *)found,0,NULL,NULL);
}
コード例 #12
0
ファイル: app.cpp プロジェクト: haibo031031/vdt
double run_gpu(datatype *h_i_vector, datatype *h_o_vector, datatype *h_o_vector_ref,\
			int w, int h, int kernel_id, bool verify)throw(std::string){	
	cl_mem d_i_vector, d_o_vector;
	int number_elements_out = w * h;
	int number_elements_in = w * h;
	try{
		//--1 transfer data from host to device		
		d_i_vector = _clMalloc(number_elements_in * sizeof(datatype));
		d_o_vector = _clMalloc(number_elements_out * sizeof(datatype));
		_clMemcpyH2D(d_i_vector, h_i_vector, number_elements_in*sizeof(datatype));
		_clFinish();
		//--2 invoke kernel
		int args_idx = 0;
		_clSetArgs(kernel_id, args_idx++, d_i_vector);
		_clSetArgs(kernel_id, args_idx++, d_o_vector);
		_clSetArgs(kernel_id, args_idx++, &w, sizeof(int));
		_clSetArgs(kernel_id, args_idx++, &h, sizeof(int));

		int work_group_unit = 16;
		int range_x = -1; 
		int range_y = -1;
		switch(kernel_id){
			case 0:
				range_x = w, range_y = h;
				break;
			case 1:
				range_x = w/2, range_y = h;
				break;
			case 2:
				range_x = w/4, range_y = h;
				break;				
			case 3:
				range_x = w/8, range_y = h;
				break;								
			case 4:
				range_x = w/16, range_y = h;
				break;								
			default:
				throw(string("Unknown kernel id!!!"));
				break;	
		}
		int group_x = work_group_unit * 4;
		int group_y = 1;
		int number_iterations = 1;
		unsigned long deltaT = 0.0f;
		unsigned long kernel_exe_time = 0.0f;
		std::cout<<"--testing..."<<std::endl;		
		for(int i=-1; i<number_iterations; i++){
			_clInvokeKernel2D(kernel_id, range_x, range_y, group_x, group_y, &kernel_exe_time);			
			if(i==0)
			  deltaT += kernel_exe_time;
		}		
		deltaT = deltaT/number_iterations;
		std::cout<<"--done."<<std::endl;
		_clMemcpyD2H(h_o_vector, d_o_vector, number_elements_out*sizeof(datatype));
	
		if(verify){
			verify_array<datatype>(h_o_vector, h_o_vector_ref, number_elements_out);
		}
		//--4 release cl resources.
		_clFree(d_i_vector);
		_clFree(d_o_vector);

		return (double)(((double)w*(double)h*(double)(h+1))*sizeof(datatype))/(double)deltaT;		
	}
	catch(std::string msg){		
		std::string e_str = "in run_gpu -> ";
		e_str += msg;
		throw(e_str);
	}
	return 0.0;
}
コード例 #13
0
ファイル: ocl_wordpress.c プロジェクト: 6e6f36/hashkill
/* Crack callback */
static void ocl_wordpress_crack_callback(char *line, int self)
{
    int a,b,c,e,iter;
    int *found;
    int err;
    struct  hash_list_s  *mylist, *addlist;
    char plain[MAX];
    char hex1[16];
    cl_uint16 addline;
    cl_uint16 salt;
    cl_uint16 singlehash;
    char mhash[20];
    char base64[64];

    mylist = hash_list;
    while (mylist)
    {
        if (mylist->salt2[0]==1) {mylist=mylist->next;continue;}
	/* setup addline */
	addline.s0=addline.s1=addline.s2=addline.s3=addline.s4=addline.s5=addline.s6=addline.s7=addline.sF=0;
	addline.sF=strlen(line);
	addline.s0=line[0]|(line[1]<<8)|(line[2]<<16)|(line[3]<<24);
	addline.s1=line[4]|(line[5]<<8)|(line[6]<<16)|(line[7]<<24);
	addline.s2=line[8]|(line[9]<<8)|(line[10]<<16)|(line[11]<<24);
	addline.s3=line[12]|(line[13]<<8)|(line[14]<<16)|(line[15]<<24);

	/* setup salt */
        salt.sE=(mylist->salt[4])|(mylist->salt[5]<<8)|(mylist->salt[6]<<16)|(mylist->salt[7]<<24);
        salt.sF=(mylist->salt[8])|(mylist->salt[9]<<8)|(mylist->salt[10]<<16)|(mylist->salt[11]<<24);
        char *p = strchr((char *)cov_2char, mylist->salt[3]);
        if (!p) return;
        iter = 1 << (p - (char *)cov_2char);

        memcpy(base64,mylist->hash,34);
        b64_pton(base64+12,mhash);
        unsigned int A,B,C,D;
        memcpy(hex1,mhash,4);
        memcpy(&A, hex1, 4);
        memcpy(hex1,mhash+4,4);
        memcpy(&B, hex1, 4);
        memcpy(hex1,mhash+8,4);
        memcpy(&C, hex1, 4);
        memcpy(hex1,mhash+12,4);
        memcpy(&D, hex1, 4);
	singlehash.x=A;
	singlehash.y=B;
	singlehash.z=C;
	singlehash.w=D;

        if (attack_over!=0) pthread_exit(NULL);
        pthread_mutex_lock(&wthreads[self].tempmutex);
        pthread_mutex_unlock(&wthreads[self].tempmutex);


        _clSetKernelArg(rule_kernelmod[self], 0, sizeof(cl_mem), (void*) &rule_images2_buf[self]);
        _clSetKernelArg(rule_kernelmod[self], 1, sizeof(cl_mem), (void*) &rule_images_buf[self]);
        _clSetKernelArg(rule_kernelmod[self], 2, sizeof(cl_mem), (void*) &rule_sizes2_buf[self]);
        _clSetKernelArg(rule_kernelmod[self], 3, sizeof(cl_mem), (void*) &rule_sizes_buf[self]);
	_clSetKernelArg(rule_kernelmod[self], 4, sizeof(cl_uint16), (void*) &addline);
	_clSetKernelArg(rule_kernelmod[self], 5, sizeof(cl_uint16), (void*) &salt);

        _clSetKernelArg(rule_kernelpre1[self], 0, sizeof(cl_mem), (void*) &rule_images3_buf[self]);
        _clSetKernelArg(rule_kernelpre1[self], 1, sizeof(cl_mem), (void*) &rule_images2_buf[self]);
        _clSetKernelArg(rule_kernelpre1[self], 2, sizeof(cl_mem), (void*) &rule_sizes2_buf[self]);
        _clSetKernelArg(rule_kernelpre1[self], 3, sizeof(cl_mem), (void*) &rule_found_ind_buf[self]);
        _clSetKernelArg(rule_kernelpre1[self], 4, sizeof(cl_mem), (void*) &rule_found_buf[self]);
	_clSetKernelArg(rule_kernelpre1[self], 5, sizeof(cl_uint4), (void*) &singlehash);
	_clSetKernelArg(rule_kernelpre1[self], 6, sizeof(cl_uint16), (void*) &salt);

        _clSetKernelArg(rule_kernelbl1[self], 0, sizeof(cl_mem), (void*) &rule_images3_buf[self]);
        _clSetKernelArg(rule_kernelbl1[self], 1, sizeof(cl_mem), (void*) &rule_images2_buf[self]);
        _clSetKernelArg(rule_kernelbl1[self], 2, sizeof(cl_mem), (void*) &rule_sizes2_buf[self]);
        _clSetKernelArg(rule_kernelbl1[self], 3, sizeof(cl_mem), (void*) &rule_found_ind_buf[self]);
        _clSetKernelArg(rule_kernelbl1[self], 4, sizeof(cl_mem), (void*) &rule_found_buf[self]);
	_clSetKernelArg(rule_kernelbl1[self], 5, sizeof(cl_uint4), (void*) &singlehash);
	_clSetKernelArg(rule_kernelbl1[self], 6, sizeof(cl_uint16), (void*) &salt);

        _clSetKernelArg(rule_kernellast[self], 0, sizeof(cl_mem), (void*) &rule_buffer[self]);
        _clSetKernelArg(rule_kernellast[self], 1, sizeof(cl_mem), (void*) &rule_images3_buf[self]);
        _clSetKernelArg(rule_kernellast[self], 2, sizeof(cl_mem), (void*) &rule_sizes2_buf[self]);
        _clSetKernelArg(rule_kernellast[self], 3, sizeof(cl_mem), (void*) &rule_found_ind_buf[self]);
        _clSetKernelArg(rule_kernellast[self], 4, sizeof(cl_mem), (void*) &rule_found_buf[self]);
	_clSetKernelArg(rule_kernellast[self], 5, sizeof(cl_uint4), (void*) &singlehash);
	_clSetKernelArg(rule_kernellast[self], 6, sizeof(cl_uint16), (void*) &salt);

        size_t nws=ocl_rule_workset[self]*wthreads[self].vectorsize;
        _clEnqueueNDRangeKernel(rule_oclqueue[self], rule_kernelmod[self], 1, NULL, &nws, rule_local_work_size, 0, NULL, NULL);
    	_clFinish(rule_oclqueue[self]);
        _clEnqueueNDRangeKernel(rule_oclqueue[self], rule_kernelpre1[self], 1, NULL, &ocl_rule_workset[self], rule_local_work_size, 0, NULL, NULL);
    	_clFinish(rule_oclqueue[self]);
	for (a=0;a<(iter/1024);a++)
	{
    	    if (attack_over!=0) pthread_exit(NULL);
    	    wthreads[self].tries+=(ocl_rule_workset[self]*wthreads[self].vectorsize)/((get_hashes_num()*(iter/1024)));
    	    _clEnqueueNDRangeKernel(rule_oclqueue[self], rule_kernelbl1[self], 1, NULL, &ocl_rule_workset[self], rule_local_work_size, 0, NULL, NULL);
    	    _clFinish(rule_oclqueue[self]);
    	}
	_clEnqueueNDRangeKernel(rule_oclqueue[self], rule_kernellast[self], 1, NULL, &ocl_rule_workset[self], rule_local_work_size, 0, NULL, NULL);

        found = _clEnqueueMapBuffer(rule_oclqueue[self], rule_found_buf[self], CL_TRUE,CL_MAP_READ, 0, 4, 0, 0, NULL, &err);
        if (err!=CL_SUCCESS) continue;
        if (*found>0) 
        {
            _clEnqueueReadBuffer(rule_oclqueue[self], rule_found_ind_buf[self], CL_TRUE, 0, ocl_rule_workset[self]*sizeof(cl_uint), rule_found_ind[self], 0, NULL, NULL);
    	    for (a=0;a<ocl_rule_workset[self];a++)
	    if (rule_found_ind[self][a]==1)
	    {
		b=a*wthreads[self].vectorsize;
    		_clEnqueueReadBuffer(rule_oclqueue[self], rule_buffer[self], CL_TRUE, b*hash_ret_len1, hash_ret_len1*wthreads[self].vectorsize, rule_ptr[self]+b*hash_ret_len1, 0, NULL, NULL);
		for (c=0;c<wthreads[self].vectorsize;c++)
		{
	    	    e=(a)*wthreads[self].vectorsize+c;
                    memcpy(base64,mylist->hash,34);
                    b64_pton(base64+12,mhash);
    		    if (memcmp(mhash, (char *)rule_ptr[self]+(e)*hash_ret_len1, hash_ret_len1-1) == 0)
    		    {
            		int flag = 0;
                	strcpy(plain,&rule_images[self][0]+(e*MAX));
                	strcat(plain,line);
                	pthread_mutex_lock(&crackedmutex);
                	addlist = cracked_list;
                	while (addlist)
                	{
                	    if ((strcmp(addlist->username, mylist->username) == 0) && (memcmp(addlist->hash, mylist->hash, hash_ret_len1) == 0))
                                flag = 1;
                    	    addlist = addlist->next;
                	}
                	pthread_mutex_unlock(&crackedmutex);
                	if (flag == 0)
                	{
                	    add_cracked_list(mylist->username, mylist->hash, mylist->salt, plain);
                	    mylist->salt2[0]=1;
                	}
    		    }
		}
	    }
	    bzero(rule_found_ind[self],ocl_rule_workset[self]*sizeof(cl_uint));
    	    _clEnqueueWriteBuffer(rule_oclqueue[self], rule_found_ind_buf[self], CL_FALSE, 0, ocl_rule_workset[self]*sizeof(cl_uint), rule_found_ind[self], 0, NULL, NULL);
    	    *found = 0;
    	    _clEnqueueWriteBuffer(rule_oclqueue[self], rule_found_buf[self], CL_FALSE, 0, 4, found, 0, NULL, NULL);
	}
	_clEnqueueUnmapMemObject(rule_oclqueue[self],rule_found_buf[self],(void *)found,0,NULL,NULL);
	mylist = mylist->next;
    }
}
コード例 #14
0
ファイル: ocl_apr1.c プロジェクト: 6e6f36/hashkill
/* Crack callback */
static void ocl_apr1_crack_callback(char *line, int self)
{
    int a,b,c,e;
    int *found;
    int err;
    struct  hash_list_s  *mylist, *addlist;
    char plain[MAX];
    char hex1[16];
    cl_uint16 addline;
    cl_uint16 salt;
    cl_uint16 singlehash;
    unsigned char base64[64];
    int cc,cc1;
    size_t gws,gws1;

    cc = self_kernel16[self];
    cc1 = self_kernel16[self]+strlen(line);
    if (cc1>15) cc1=15;
    mylist = hash_list;
    while (mylist)
    {
        if (attack_over!=0) pthread_exit(NULL);
        if (mylist->salt2[0]==1) {mylist=mylist->next;continue;}
	/* setup addline */
	addline.s0=addline.s1=addline.s2=addline.s3=addline.s4=addline.s5=addline.s6=addline.s7=addline.sF=0;
	addline.sF=strlen(line);
	addline.s0=line[0]|(line[1]<<8)|(line[2]<<16)|(line[3]<<24);
	addline.s1=line[4]|(line[5]<<8)|(line[6]<<16)|(line[7]<<24);
	addline.s2=line[8]|(line[9]<<8)|(line[10]<<16)|(line[11]<<24);
	addline.s3=line[12]|(line[13]<<8)|(line[14]<<16)|(line[15]<<24);
	_clSetKernelArg(rule_kernel162[cc][self], 4, sizeof(cl_uint16), (void*) &addline);

	/* setup salt */
	char tempsalt[16];
        bzero(tempsalt,16);
        strcpy(tempsalt,mylist->salt);
        a=strlen(tempsalt)-1;
        tempsalt[a]=0;tempsalt[a+1]=0;tempsalt[a+2]=0;tempsalt[a+3]=0;tempsalt[a+4]=0;
        tempsalt[a+5]=0;tempsalt[a+6]=0;tempsalt[a+7]=0;tempsalt[a+8]=0;
        salt.s0=salt.s4=salt.s8=salt.sB=0;
        salt.sC=strlen(tempsalt)-6;
        salt.sE=(tempsalt[6])|(tempsalt[7]<<8)|(tempsalt[8]<<16)|(tempsalt[9]<<24);
        salt.sF=(tempsalt[10])|(tempsalt[11]<<8)|(tempsalt[12]<<16)|(tempsalt[13]<<24);
        salt.sD=cc1;
        salt.sB=cc;
        salt.s9=4;
        salt.sA=('a')|('p'<<8)|('r'<<16)|('1'<<24);

	_clSetKernelArg(rule_kernel162[cc][self], 5, sizeof(cl_uint16), (void*) &salt);
	_clSetKernelArg(rule_kernel16[cc1][self], 5, sizeof(cl_uint16), (void*) &salt);
	_clSetKernelArg(rule_kernel162[cc][self], 0, sizeof(cl_mem), (void*) &rule_images16_buf[cc1][self]);

        unsigned char mhash[22];
        memcpy(base64,mylist->hash,22);
        b64_pton_crypt(base64,mhash);
        unsigned int A,B,C,D;
        memcpy(hex1,mhash,4);
        memcpy(&A, hex1, 4);
        memcpy(hex1,mhash+4,4);
        memcpy(&B, hex1, 4);
        memcpy(hex1,mhash+8,4);
        memcpy(&C, hex1, 4);
        memcpy(hex1,mhash+12,4);
        memcpy(&D, hex1, 4);
	singlehash.x=A;singlehash.y=B;singlehash.z=C;singlehash.w=D;
	_clSetKernelArg(rule_kernel16[cc1][self], 4, sizeof(cl_uint4), (void*) &singlehash);

        pthread_mutex_lock(&wthreads[self].tempmutex);
        pthread_mutex_unlock(&wthreads[self].tempmutex);

	gws = (rule_counts[self][cc] / wthreads[self].vectorsize);
	while ((gws%64)!=0) gws++;
	gws1 = gws*wthreads[self].vectorsize;
	if (gws1==0) gws1=64;
	if (gws==0) gws=64;
        wthreads[self].tries+=(gws1)/get_hashes_num();
        _clEnqueueNDRangeKernel(rule_oclqueue[self], rule_kernel162[cc][self], 1, NULL, &gws1, rule_local_work_size, 0, NULL, NULL);
	_clFinish(rule_oclqueue[self]);
        _clEnqueueNDRangeKernel(rule_oclqueue[self], rule_kernel16[cc1][self], 1, NULL, &gws, rule_local_work_size, 0, NULL, NULL);
        found = _clEnqueueMapBuffer(rule_oclqueue[self], rule_found_buf[self], CL_TRUE,CL_MAP_READ, 0, 4, 0, 0, NULL, &err);
        if (err!=CL_SUCCESS) continue;
        if (*found>0) 
        {
            _clEnqueueReadBuffer(rule_oclqueue[self], rule_found_ind_buf[self], CL_TRUE, 0, ocl_rule_workset[self]*sizeof(cl_uint), rule_found_ind[self], 0, NULL, NULL);
    	    for (a=0;a<ocl_rule_workset[self];a++)
	    if (rule_found_ind[self]!=0)
	    {
		b=a*wthreads[self].vectorsize;
    		_clEnqueueReadBuffer(rule_oclqueue[self], rule_buffer[self], CL_TRUE, b*hash_ret_len1, hash_ret_len1*wthreads[self].vectorsize, rule_ptr[self]+b*hash_ret_len1, 0, NULL, NULL);
		for (c=0;c<wthreads[self].vectorsize;c++)
		{
	    	    e=(a)*wthreads[self].vectorsize+c;
                    unsigned char mhash[20];
                    memcpy(base64,mylist->hash,22);
                    b64_pton_crypt(base64,mhash);
    		    if (memcmp(mhash, (char *)rule_ptr[self]+(e)*hash_ret_len1, hash_ret_len1-1) == 0)
    		    {
            		int flag = 0;
                	strcpy(plain,&rule_images162[cc][self][0]+(e*16));
                	strcat(plain,line);
                	pthread_mutex_lock(&crackedmutex);
                	addlist = cracked_list;
                	while (addlist)
                	{
                	    if ((strcmp(addlist->username, mylist->username) == 0) && (memcmp(addlist->hash, mylist->hash, hash_ret_len1) == 0))
                                flag = 1;
                    	    addlist = addlist->next;
                	}
                	pthread_mutex_unlock(&crackedmutex);
                	if (flag == 0)
                	{
                	    add_cracked_list(mylist->username, mylist->hash, mylist->salt, plain);
                	    mylist->salt2[0]=1;
                	}
    		    }
		}
	    }
	    bzero(rule_found_ind[self],ocl_rule_workset[self]*sizeof(cl_uint));
    	    _clEnqueueWriteBuffer(rule_oclqueue[self], rule_found_ind_buf[self], CL_FALSE, 0, ocl_rule_workset[self]*sizeof(cl_uint), rule_found_ind[self], 0, NULL, NULL);
    	    *found = 0;
    	    _clEnqueueWriteBuffer(rule_oclqueue[self], rule_found_buf[self], CL_FALSE, 0, 4, found, 0, NULL, NULL);
	}

	_clEnqueueUnmapMemObject(rule_oclqueue[self],rule_found_buf[self],(void *)found,0,NULL,NULL);
	mylist = mylist->next;
    }
}
コード例 #15
0
ファイル: main.cpp プロジェクト: haibo031031/aristotle
int main(int argc, char ** argv)
{
	//float *hIn1, *hIn2;
	//cl_mem dIn1, dIn2;
	@hIn;
	@dIn;
	float *hOut, *rOut;
	cl_mem dOut;
try{
	_clParseCommandLine(argc, argv);
	string strSubfix = string(argv[2]);
	_clInit(platform_id, device_type, device_id);
	int cdim = atoi(argv[1]); 
	int rdim = atoi(argv[1]); 
	int r = atoi(argv[3]);
	@cdimIn
	@rdimIn
	// different between iMAP1 and iMAP2
	printf("cdim=%d, rdim=%d, radius=%d\n", cdim, rdim, r);
	int iIter = 10;
	int elems = @elems;
	double dataAmount = (double)cdim * (double)rdim * (double)(elems) * (double)sizeof(float) * 1e-6;

	
#if defined TIME
	double start_time = 0;
	double end_time = 0;
	double delta_time = 0;
	int cnt = 0;
	string dat_name= string("data.") + strSubfix + string(".dat");

	FILE * fp = fopen(dat_name.c_str(), "a+");
	if(fp==NULL)
	{
		printf("failed to open file!!!\n");
		exit(-1);
	}
#endif
	
	//hIn1 = (float *)malloc(cdim * rdim * sizeof(float));
	//hIn2 = (float *)malloc(cdim * rdim * sizeof(float));
	@hAlc
	hOut = (float *)malloc(cdim * rdim * sizeof(float));
	rOut = (float *)malloc(cdim * rdim * sizeof(float));

	//fill<float>(hIn1, cdim * rdim, 5);
	//fill<float>(hIn2, cdim * rdim, 5);		
	@hFill

	//dIn1 = _clMalloc(cdim * rdim * sizeof(float));
	//dIn2 = _clMalloc(cdim * rdim * sizeof(float));
	@dAlc
	dOut = _clMalloc(cdim * rdim * sizeof(float));

	//_clMemcpyH2D(dIn1, hIn1, cdim * rdim * sizeof(float));
	//_clMemcpyH2D(dIn2, hIn2, cdim * rdim * sizeof(float));
	@h2dTrans

	_clFinish();
	
	// warmup
	//OCLRun(dIn1, dIn2, dOut, cdim, rdim);
	OCLRun(@oclArgs, dOut, cdim, rdim, cdimIn, rdimIn);

#ifdef VARIFY	
	//OMPRun(hIn1, hIn2, rOut, cdim, rdim);
	OMPRun(@ompArgs, rOut, cdim, rdim, cdimIn, rdimIn);
#endif //VARIFY
	

#ifdef TIME
	delta_time = 0;
	cnt = 0;
#endif	
	for(int i=0; i<iIter; i++)
	{
#ifdef TIME
	cnt++;
	start_time = gettime();
#endif

		OCLRun(@oclArgs, dOut, cdim, rdim, cdimIn, rdimIn);
#ifdef TIME	
	end_time = gettime();
	delta_time += end_time - start_time;
	if(fabs(delta_time-600000.0)>0.1) break;	// ????
#endif	
	}

#ifdef TIME
	fprintf(fp, "%lf\t", dataAmount * (double)cnt/delta_time);
#endif

#ifdef VARIFY
	_clMemcpyD2H(hOut, dOut, cdim * rdim * sizeof(float));	
	verify_array<float>(rOut, hOut, cdim * rdim);	
#endif //VARIFY


#ifdef TIME	
	fprintf(fp, "\n");	
	fclose(fp);
#endif	
}
catch(string msg){
	printf("ERR:%s\n", msg.c_str());
	printf("Error catched\n");
	exit(-1);
	}
	//_clFree(dIn1);
	//_clFree(dIn2);
	@clFree
	_clFree(dOut);
	_clRelease();
	//if(hIn1!=NULL) free(hIn1);
	//if(hIn2!=NULL) free(hIn2);
	@hFree
	if(hOut!=NULL) free(hOut);
	if(rOut!=NULL) free(rOut);

	return 1;
}
コード例 #16
0
ファイル: ocl_keyring.c プロジェクト: 6e6f36/hashkill
/* Crack callback */
static void ocl_keyring_crack_callback(char *line, int self)
{
    int a,b,c,e;
    int *found;
    int err;
    char plain[MAX];
    char hex1[16];
    cl_uint16 addline;
    cl_uint16 salt;
    cl_uint16 singlehash;
    char mhash[32];
    size_t gws,gws1;

    /* setup addline */
    addline.s0=addline.s1=addline.s2=addline.s3=addline.s4=addline.s5=addline.s6=addline.s7=addline.sF=0;
    addline.sF=strlen(line);
    addline.s0=line[0]|(line[1]<<8)|(line[2]<<16)|(line[3]<<24);
    addline.s1=line[4]|(line[5]<<8)|(line[6]<<16)|(line[7]<<24);
    addline.s2=line[8]|(line[9]<<8)|(line[10]<<16)|(line[11]<<24);
    addline.s3=line[12]|(line[13]<<8)|(line[14]<<16)|(line[15]<<24);

    /* setup salt */
    salt.s0=(cs.salt[0])|(cs.salt[1]<<8)|(cs.salt[2]<<16)|(cs.salt[3]<<24);
    salt.s1=(cs.salt[4])|(cs.salt[5]<<8)|(cs.salt[6]<<16)|(cs.salt[7]<<24);
    salt.s4=(cs.ct[0])|(cs.ct[1]<<8)|(cs.ct[2]<<16)|(cs.ct[3]<<24);
    salt.s5=(cs.ct[4])|(cs.ct[5]<<8)|(cs.ct[6]<<16)|(cs.ct[7]<<24);
    salt.s6=(cs.ct[8])|(cs.ct[9]<<8)|(cs.ct[10]<<16)|(cs.ct[11]<<24);
    salt.s7=(cs.ct[12])|(cs.ct[13]<<8)|(cs.ct[14]<<16)|(cs.ct[15]<<24);
    

    memcpy(mhash,cs.hash,16);
    unsigned int A,B,C,D;
    memcpy(hex1,mhash,4);
    memcpy(&A, hex1, 4);
    memcpy(hex1,mhash+4,4);
    memcpy(&B, hex1, 4);
    memcpy(hex1,mhash+8,4);
    memcpy(&C, hex1, 4);
    memcpy(hex1,mhash+12,4);
    memcpy(&D, hex1, 4);
    singlehash.s0=A;
    singlehash.s1=B;
    singlehash.s2=C;
    singlehash.s3=D;

    if (attack_over!=0) pthread_exit(NULL);
    pthread_mutex_lock(&wthreads[self].tempmutex);
    pthread_mutex_unlock(&wthreads[self].tempmutex);

    _clSetKernelArg(rule_kernelmod[self], 0, sizeof(cl_mem), (void*) &rule_images2_buf[self]);
    _clSetKernelArg(rule_kernelmod[self], 1, sizeof(cl_mem), (void*) &rule_images_buf[self]);
    _clSetKernelArg(rule_kernelmod[self], 2, sizeof(cl_mem), (void*) &rule_sizes_buf[self]);
    _clSetKernelArg(rule_kernelmod[self], 3, sizeof(cl_uint16), (void*) &addline);
    _clSetKernelArg(rule_kernelmod[self], 4, sizeof(cl_uint16), (void*) &salt);
    _clSetKernelArg(rule_kernelpre1[self], 0, sizeof(cl_mem), (void*) &rule_images3_buf[self]);
    _clSetKernelArg(rule_kernelpre1[self], 1, sizeof(cl_mem), (void*) &rule_images2_buf[self]);
    _clSetKernelArg(rule_kernelpre1[self], 2, sizeof(cl_mem), (void*) &rule_sizes_buf[self]);
    _clSetKernelArg(rule_kernelpre1[self], 3, sizeof(cl_mem), (void*) &rule_found_ind_buf[self]);
    _clSetKernelArg(rule_kernelpre1[self], 4, sizeof(cl_mem), (void*) &rule_found_buf[self]);
    _clSetKernelArg(rule_kernelpre1[self], 5, sizeof(cl_uint16), (void*) &singlehash);
    _clSetKernelArg(rule_kernelpre1[self], 6, sizeof(cl_uint16), (void*) &salt);
    _clSetKernelArg(rule_kernelbl1[self], 0, sizeof(cl_mem), (void*) &rule_images3_buf[self]);
    _clSetKernelArg(rule_kernelbl1[self], 1, sizeof(cl_mem), (void*) &rule_images3_buf[self]);
    _clSetKernelArg(rule_kernelbl1[self], 2, sizeof(cl_mem), (void*) &rule_sizes_buf[self]);
    _clSetKernelArg(rule_kernelbl1[self], 3, sizeof(cl_mem), (void*) &rule_found_ind_buf[self]);
    _clSetKernelArg(rule_kernelbl1[self], 4, sizeof(cl_mem), (void*) &rule_found_buf[self]);
    _clSetKernelArg(rule_kernelbl1[self], 5, sizeof(cl_uint16), (void*) &singlehash);
    _clSetKernelArg(rule_kernelbl1[self], 6, sizeof(cl_uint16), (void*) &salt);
    _clSetKernelArg(rule_kernellast[self], 0, sizeof(cl_mem), (void*) &rule_buffer[self]);
    _clSetKernelArg(rule_kernellast[self], 1, sizeof(cl_mem), (void*) &rule_images3_buf[self]);
    _clSetKernelArg(rule_kernellast[self], 2, sizeof(cl_mem), (void*) &rule_sizes_buf[self]);
    _clSetKernelArg(rule_kernellast[self], 3, sizeof(cl_mem), (void*) &rule_found_ind_buf[self]);
    _clSetKernelArg(rule_kernellast[self], 4, sizeof(cl_mem), (void*) &rule_found_buf[self]);
    _clSetKernelArg(rule_kernellast[self], 5, sizeof(cl_uint16), (void*) &singlehash);
    _clSetKernelArg(rule_kernellast[self], 6, sizeof(cl_uint16), (void*) &salt);


    if (rule_counts[self][0]==-1) return;
    gws = (rule_counts[self][0] / wthreads[self].vectorsize);
    while ((gws%64)!=0) gws++;
    gws1 = gws*wthreads[self].vectorsize;
    if (gws1==0) gws1=64;
    if (gws==0) gws=64;

    _clEnqueueNDRangeKernel(rule_oclqueue[self], rule_kernelmod[self], 1, NULL, &gws1, rule_local_work_size, 0, NULL, NULL);
    _clFinish(rule_oclqueue[self]);
    _clEnqueueNDRangeKernel(rule_oclqueue[self], rule_kernelpre1[self], 1, NULL, &gws1, rule_local_work_size, 0, NULL, NULL);
    _clFinish(rule_oclqueue[self]);

    for (a=0;a<((cs.iterations-1)/200);a++)
    {
    	_clEnqueueNDRangeKernel(rule_oclqueue[self], rule_kernelbl1[self], 1, NULL, &gws, rule_local_work_size, 0, NULL, NULL);
	_clFinish(rule_oclqueue[self]);
	wthreads[self].tries+=(gws1)/(cs.iterations/200);
    }
    salt.sA=((cs.iterations-1)%200);
    _clSetKernelArg(rule_kernellast[self], 6, sizeof(cl_uint16), (void*) &salt);
    _clEnqueueNDRangeKernel(rule_oclqueue[self], rule_kernellast[self], 1, NULL, &gws1, rule_local_work_size, 0, NULL, NULL);
    found = _clEnqueueMapBuffer(rule_oclqueue[self], rule_found_buf[self], CL_TRUE,CL_MAP_READ, 0, 4, 0, 0, NULL, &err);
    if (*found>0) 
    {
        _clEnqueueReadBuffer(rule_oclqueue[self], rule_found_ind_buf[self], CL_TRUE, 0, ocl_rule_workset[self]*sizeof(cl_uint), rule_found_ind[self], 0, NULL, NULL);
    	for (a=0;a<ocl_rule_workset[self];a++)
	if (rule_found_ind[self][a]==1)
	{
	    b=a*wthreads[self].vectorsize;
    	    _clEnqueueReadBuffer(rule_oclqueue[self], rule_buffer[self], CL_TRUE, b*hash_ret_len1, hash_ret_len1*wthreads[self].vectorsize, rule_ptr[self]+b*hash_ret_len1, 0, NULL, NULL);
	    for (c=0;c<wthreads[self].vectorsize;c++)
	    {
	    	e=(a)*wthreads[self].vectorsize+c;
                memcpy(mhash,cs.hash,hash_ret_len1);
    		if (memcmp(mhash, (char *)rule_ptr[self]+(e)*hash_ret_len1, hash_ret_len1) == 0)
    		{
            	    strcpy(plain,&rule_images[self][0]+(e*MAX));
            	    strcat(plain,line);
            	    pthread_mutex_lock(&crackedmutex);
            	    if (!cracked_list)
            	    {
                	pthread_mutex_unlock(&crackedmutex);
                	add_cracked_list(hash_list->username, hash_list->hash, hash_list->salt, plain);
        	    }
        	    else pthread_mutex_unlock(&crackedmutex);
    	        }
	    }
	}
	bzero(rule_found_ind[self],ocl_rule_workset[self]*sizeof(cl_uint));
    	_clEnqueueWriteBuffer(rule_oclqueue[self], rule_found_ind_buf[self], CL_FALSE, 0, ocl_rule_workset[self]*sizeof(cl_uint), rule_found_ind[self], 0, NULL, NULL);
        *found = 0;
	_clEnqueueWriteBuffer(rule_oclqueue[self], rule_found_buf[self], CL_FALSE, 0, 4, found, 0, NULL, NULL);
    }
    _clEnqueueUnmapMemObject(rule_oclqueue[self],rule_found_buf[self],(void *)found,0,NULL,NULL);
}