static int md5_final(struct shash_desc *desc, u8 *out) { struct md5_state *mctx = shash_desc_ctx(desc); const unsigned int offset = mctx->byte_count & 0x3f; char *p = (char *)mctx->block + offset; int padding = 56 - (offset + 1); *p++ = 0x80; if (padding < 0) { memset(p, 0x00, padding + sizeof (u64)); md5_transform_helper(mctx); p = (char *)mctx->block; padding = 56; } memset(p, 0, padding); mctx->block[14] = mctx->byte_count << 3; mctx->block[15] = mctx->byte_count >> 29; le32_to_cpu_array(mctx->block, (sizeof(mctx->block) - sizeof(u64)) / sizeof(u32)); md5_transform(mctx->hash, mctx->block); cpu_to_le32_array(mctx->hash, sizeof(mctx->hash) / sizeof(u32)); memcpy(out, mctx->hash, sizeof(mctx->hash)); memset(mctx, 0, sizeof(*mctx)); return 0; }
void md5_finish(struct md_context *context) { int buflen = context->count & 63; uint64_t bitcount; context->buffer[buflen++] = 0x80; memset(&context->buffer[buflen], 0, 64-buflen); if(buflen > 56) { md5_transform(context); memset(context->buffer, 0, 64); } // *(uint64_t *)(&context->buffer[56]) = context->count * 8; bitcount = context->count * 8; memcpy(&context->buffer[56], &bitcount, sizeof(uint64_t)); md5_transform(context); memcpy(context->digest, context->h, 16); return; }
static inline void md5_transform_helper(struct md5_ctx *ctx) { #ifdef DEBUG_MD5 printk("md5_transform_helper running\n"); #endif //DEBUG_MD5 //le32_to_cpu_array(ctx->block, sizeof(ctx->block) / sizeof(u32)); md5_transform(ctx->hash, ctx->block); }
__u32 secure_ipv6_id(const __be32 daddr[4]) { __u32 hash[4]; memcpy(hash, daddr, 16); md5_transform(hash, net_secret); return hash[0]; }
void md5_hash (unsigned char * sum, unsigned char * data, int data_len) { struct md5_context context; int i; context.A = md5_A; context.B = md5_B; context.C = md5_C; context.D = md5_D; data_len %= 55; // turned out faster than memset for (i = 0; i < 16; i++) context.M[i] ^= context.M[i]; // faster than memcpy for (i = 0; i < data_len / 4; i++) context.M[i] = ((unsigned int *) data)[i]; i <<= 2; switch (data_len & 3) { case 0 : ((unsigned char *) context.M)[i] = 0x80; break; case 1 : ((unsigned char *) context.M)[i] = data[i]; i++; ((unsigned char *) context.M)[i] = 0x80; break; case 2 : ((unsigned char *) context.M)[i] = data[i]; i++; ((unsigned char *) context.M)[i] = data[i]; i++; ((unsigned char *) context.M)[i] = 0x80; break; case 3 : ((unsigned char *) context.M)[i] = data[i]; i++; ((unsigned char *) context.M)[i] = data[i]; i++; ((unsigned char *) context.M)[i] = data[i]; i++; ((unsigned char *) context.M)[i] = 0x80; break; } context.M[14] = data_len << 3; // * 8 md5_transform(&context); *((uint32_t *) &(sum[ 0])) = context.A; *((uint32_t *) &(sum[ 4])) = context.B; *((uint32_t *) &(sum[ 8])) = context.C; *((uint32_t *) &(sum[12])) = context.D; }
static unsigned int get_random_int(void) { __u32 hash, random; unsigned int ret; hash = get_cycles(); md5_transform(&hash, &random); ret = hash; return ret; }
void md5_update(struct md_context *context, uint8_t *chunk, uint64_t chunk_size) { int buflen = context->count & 63; context->count += chunk_size; if((buflen + chunk_size) < 64) { memcpy(&context->buffer[buflen], chunk, chunk_size); return; } memcpy(&context->buffer[buflen], chunk, 64 - buflen); md5_transform(context); chunk_size -= (64 - buflen); chunk += (64 - buflen); while(chunk_size >= 64) { memcpy(context->buffer, chunk, 64); md5_transform(context); chunk_size -= 64; chunk += 64; } memcpy(context->buffer, chunk, chunk_size); return; }
static unsigned char * md5_final() { int i, buflen = length & 63; buffer[buflen++] = 0x80; memset (buffer+buflen, 0, 64 - buflen); if (buflen > 56) { md5_transform (buffer); memset (buffer, 0, 64); buflen = 0; } *(UINT4 *) (buffer + 56) = cpu_to_le32 (8 * length); *(UINT4 *) (buffer + 60) = 0; md5_transform (buffer); for (i = 0; i < 4; i++) state[i] = cpu_to_le32 (state[i]); return (unsigned char *) state; }
u32 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport) { u32 hash[MD5_DIGEST_WORDS]; hash[0] = (__force u32)saddr; hash[1] = (__force u32)daddr; hash[2] = (__force u32)dport ^ net_secret[14]; hash[3] = net_secret[15]; md5_transform(hash, net_secret); return hash[0]; }
__u32 secure_ip_id(__be32 daddr) { u32 hash[MD5_DIGEST_WORDS]; hash[0] = (__force __u32) daddr; hash[1] = net_secret[13]; hash[2] = net_secret[14]; hash[3] = net_secret[15]; md5_transform(hash, net_secret); return hash[0]; }
/* exact copy of tcp_sequence number function from net/core/secure.c */ __u32 secure_tcp_sequence_number(u32 saddr, u32 daddr, u16 sport, u16 dport) { u32 hash[MD5_DIGEST_WORDS]; hash[0] = saddr; hash[1] = daddr; hash[2] = (sport << 16) + dport; hash[3] = net_secret[15]; md5_transform(hash, net_secret); return seq_scale(hash[0]); }
void md5_final (MD5Schedule_ref ctx, char *digest /* 16 chars */) { register unsigned int count = (ctx->bits[0] >> 3) & 0x3F ; register unsigned char *p = ctx->in + count ; *p++ = 0x80; count = 63 - count ; if (count < 8) { byte_zero(p, count) ; uint32_little_endian(ctx->in, 16) ; md5_transform(ctx->buf, (uint32 *)ctx->in) ; byte_zero(ctx->in, 56) ; } else byte_zero(p, count - 8) ; uint32_little_endian(ctx->in, 14) ; byte_copy(ctx->in + 56, 4, (char *)&ctx->bits[0]) ; byte_copy(ctx->in + 60, 4, (char *)&ctx->bits[1]) ; md5_transform(ctx->buf, (uint32 *)ctx->in) ; uint32_little_endian((char *)ctx->buf, 4) ; byte_copy(digest, 16, (char *)ctx->buf) ; }
__u32 secure_tcp_sequence_number(__be32 saddr, __be32 daddr, __be16 sport, __be16 dport) { u32 hash[MD5_DIGEST_WORDS]; hash[0] = (__force u32)saddr; hash[1] = (__force u32)daddr; hash[2] = ((__force u16)sport << 16) + (__force u16)dport; hash[3] = net_secret[15]; md5_transform(hash, net_secret); return seq_scale(hash[0]); }
/* Generate HMAC-MD5 intermediate Hash */ static void sa_hmac_md5_get_pad(const u8 *key, u16 key_sz, u32 *ipad, u32 *opad) { u8 k_ipad[MD5_MESSAGE_BYTES]; u8 k_opad[MD5_MESSAGE_BYTES]; int i; for (i = 0; i < key_sz; i++) { k_ipad[i] = key[i] ^ 0x36; k_opad[i] = key[i] ^ 0x5c; } /* Instead of XOR with 0 */ for (; i < SHA_MESSAGE_BYTES; i++) { k_ipad[i] = 0x36; k_opad[i] = 0x5c; } /* SHA-1 on k_ipad */ md5_init(ipad); md5_transform(ipad, (u32 *)k_ipad); /* SHA-1 on k_opad */ md5_init(opad); md5_transform(ipad, (u32 *)k_opad); }
/* * Final wrapup - pad to 64-byte boundary with the bit pattern * 1 0* (64-bit count of bits processed, MSB-first) */ static inline void md5_final (md5_ctx_t * ctx, u8 out[16]) { /* Number of bytes in ctx->in */ const int offset = ctx->byte_count & 0x3f; /* p points after last content byte in ctx->in */ u8 *p = (u8 *) ctx->in + offset; /* Bytes of padding needed to make 56 bytes (-8..55) */ int padding = 56 - (offset + 1); /* Set the first byte of padding to 0x80. There is always room. */ *p++ = 0x80; if (padding < 0) { /* Padding forces an extra block */ memset (p, 0x00, padding + sizeof (u64)); md5_transform_helper (ctx); p = (u8 *) ctx->in; padding = 56; } /* pad remaining bytes w/ 0x00 */ memset (p, 0x00, padding); /* Append length in bits and transform */ ctx->in[14] = ctx->byte_count << 3; /* low order word first */ ctx->in[15] = ctx->byte_count >> 29; /* keep the appended bit-count words in host order! */ le32_to_cpu_array (ctx->in, (sizeof (ctx->in) - sizeof (u64)) / sizeof (u32)); md5_transform (ctx->hash, ctx->in); /* convert digest buf from host to LE byteorder */ cpu_to_le32_array (ctx->hash, sizeof (ctx->hash) / sizeof (u32)); /* copy to output buffer */ memcpy (out, ctx->hash, sizeof (ctx->hash)); /* wipe context */ memset (ctx, 0, sizeof (ctx)); }
u32 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr, __be16 dport) { u32 secret[MD5_MESSAGE_BYTES / 4]; u32 hash[MD5_DIGEST_WORDS]; u32 i; memcpy(hash, saddr, 16); for (i = 0; i < 4; i++) secret[i] = net_secret[i] + (__force u32) daddr[i]; secret[4] = net_secret[4] + (__force u32)dport; for (i = 5; i < MD5_MESSAGE_BYTES / 4; i++) secret[i] = net_secret[i]; md5_transform(hash, secret); return hash[0]; }
__u32 secure_tcpv6_sequence_number(__be32 *saddr, __be32 *daddr, __be16 sport, __be16 dport) { u32 secret[MD5_MESSAGE_BYTES / 4]; u32 hash[MD5_DIGEST_WORDS]; u32 i; memcpy(hash, saddr, 16); for (i = 0; i < 4; i++) secret[i] = net_secret[i] + daddr[i]; secret[4] = net_secret[4] + (((__force u16)sport << 16) + (__force u16)dport); for (i = 5; i < MD5_MESSAGE_BYTES / 4; i++) secret[i] = net_secret[i]; md5_transform(hash, secret); return seq_scale(hash[0]); }
u64 secure_dccp_sequence_number(__be32 saddr, __be32 daddr, __be16 sport, __be16 dport) { u32 hash[MD5_DIGEST_WORDS]; u64 seq; hash[0] = (__force u32)saddr; hash[1] = (__force u32)daddr; hash[2] = ((__force u16)sport << 16) + (__force u16)dport; hash[3] = net_secret[15]; md5_transform(hash, net_secret); seq = hash[0] | (((u64)hash[1]) << 32); seq += ktime_to_ns(ktime_get_real()); seq &= (1ull << 48) - 1; return seq; }
/*! \fn static void md5_final(struct crypto_tfm *tfm, u8 *out) * \ingroup IFX_MD5_FUNCTIONS * \brief compute final md5 value * \param tfm linux crypto algo transform * \param out final md5 output value */ static int md5_final(struct shash_desc *desc, u8 *out) { struct md5_ctx *mctx = shash_desc_ctx(desc); const unsigned int offset = mctx->byte_count & 0x3f; char *p = (char *)mctx->block + offset; int padding = 56 - (offset + 1); volatile struct deu_hash_t *hashs = (struct deu_hash_t *) HASH_START; unsigned long flag; *p++ = 0x80; if (padding < 0) { memset(p, 0x00, padding + sizeof (u64)); md5_transform_helper(mctx); p = (char *)mctx->block; padding = 56; } memset(p, 0, padding); mctx->block[14] = endian_swap(mctx->byte_count << 3); mctx->block[15] = endian_swap(mctx->byte_count >> 29); #if 0 le32_to_cpu_array(mctx->block, (sizeof(mctx->block) - sizeof(u64)) / sizeof(u32)); #endif md5_transform(mctx, mctx->hash, mctx->block); CRTCL_SECT_START; *((u32 *) out + 0) = endian_swap (hashs->D1R); *((u32 *) out + 1) = endian_swap (hashs->D2R); *((u32 *) out + 2) = endian_swap (hashs->D3R); *((u32 *) out + 3) = endian_swap (hashs->D4R); CRTCL_SECT_END; // Wipe context memset(mctx, 0, sizeof(*mctx)); return 0; }
u64 secure_dccpv6_sequence_number(__be32 *saddr, __be32 *daddr, __be16 sport, __be16 dport) { u32 secret[MD5_MESSAGE_BYTES / 4]; u32 hash[MD5_DIGEST_WORDS]; u64 seq; u32 i; memcpy(hash, saddr, 16); for (i = 0; i < 4; i++) secret[i] = net_secret[i] + daddr[i]; secret[4] = net_secret[4] + (((__force u16)sport << 16) + (__force u16)dport); for (i = 5; i < MD5_MESSAGE_BYTES / 4; i++) secret[i] = net_secret[i]; md5_transform(hash, secret); seq = hash[0] | (((u64)hash[1]) << 32); seq += ktime_to_ns(ktime_get_real()); seq &= (1ull << 48) - 1; return seq; }
static inline void md5_transform_helper(struct md5_ctx *ctx) { le32_to_cpu_array(ctx->block, sizeof(ctx->block) / sizeof(u32)); md5_transform(ctx->hash, ctx->block); }
static inline void md5_transform_helper (md5_ctx_t * ctx) { le32_to_cpu_array (ctx->in, sizeof (ctx->in) / sizeof (u32)); md5_transform (ctx->hash, ctx->in); }
static void md5_final(void *ctx, u8 *out) { /*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/ struct md5_ctx *mctx = ctx; const unsigned int offset = mctx->byte_count & 0x3f; char *p = (char *) mctx->block + offset; int padding = 56 - (offset + 1); unsigned long flag; #ifndef ONLY_IN_MEM volatile struct deu_hash_t *hashs = (struct hash_t *) HASH_START; #else unsigned char *prin = NULL; struct deu_hash_t *hashs = NULL; /*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/ hashs = (struct deu_hash_t *) kmalloc(sizeof(*hashs), GFP_KERNEL); memset(hashs, 0, sizeof(*hashs)); prin = (unsigned char *) hashs; #endif #ifdef DEBUG_MD5 printk("md5_final running\n"); //hexdump(mctx,sizeof(*mctx)); printk("block before and after transform\n"); hexdump(mctx->block, sizeof(mctx->block)); #endif //DEBUG_MD5 *p++ = 0x80; if(padding < 0) { memset(p, 0x00, padding + sizeof(u64)); md5_transform_helper(mctx); p = (char *) mctx->block; padding = 56; } memset(p, 0, padding); #ifdef DEBUG_MD5 hexdump(&mctx->byte_count, sizeof(mctx->byte_count)); #endif //DEBUG_MD5 mctx->block[14] = cpu_to_le32(mctx->byte_count << 3); mctx->block[15] = cpu_to_le32(mctx->byte_count >> 29); #ifdef DEBUG_MD5 hexdump(mctx->block, sizeof(mctx->block)); #endif //DEBUG_MD5 //le32_to_cpu_array(mctx->block, (sizeof(mctx->block) - // sizeof(u64)) / sizeof(u32)); md5_transform(mctx->hash, mctx->block); local_irq_save(flag); #ifdef CONFIG_CRYPTO_DEV_DANUBE_DMA //wait for processing while(hashs->controlr.BSY) { // this will not take long } #endif *((u32 *) out + 0) = le32_to_cpu(hashs->D1R); *((u32 *) out + 1) = le32_to_cpu(hashs->D2R); *((u32 *) out + 2) = le32_to_cpu(hashs->D3R); *((u32 *) out + 3) = le32_to_cpu(hashs->D4R); hashs->controlr.SM = 0; // switch off again for next dma transfer /*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/ #ifdef CONFIG_CRYPTO_DEV_DANUBE_DMA struct dma_device_info *dma_device; _ifx_deu_device *pDev = ifx_deu; /*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/ dma_device = pDev->dma_device; //if(dma_device) dma_device_release(dma_device); if(dma_device) { dma_device_unregister(pDev->dma_device); dma_device_release(pDev->dma_device); } #endif //cpu_to_le32_array(mctx->hash, sizeof(mctx->hash) / sizeof(u32)); //memcpy(out, mctx->hash, sizeof(mctx->hash)); local_irq_restore(flag); // Wipe context memset(mctx, 0, sizeof(*mctx)); }
int main (int argc, char *argv[]) { uint64_t skip = 0; uint64_t left = -1; if (argc >= 2) skip = atoll (argv[1]); if (argc >= 3) left = atoll (argv[2]); printf ("Loading Kernel...\n"); const char *filename = KERNEL_SRC; struct stat s; if (stat (filename, &s) == -1) { fprintf (stderr, "%s: %s in line %d\n", filename, strerror (errno), __LINE__); return (-1); } FILE *fp = fopen (filename, "rb"); if (fp == NULL) { fprintf (stderr, "%s: %s in line %d\n", filename, strerror (errno), __LINE__); return (-1); } char *source_buf = (char *) malloc (s.st_size + 1); if (!fread (source_buf, sizeof (char), s.st_size, fp)) { fprintf (stderr, "%s: %s in line %d\n", filename, strerror (errno), __LINE__); return (-1); } source_buf[s.st_size] = 0; fclose (fp); const char *sourceBuf[] = { source_buf }; const size_t sourceLen[] = { s.st_size + 1 }; printf ("Initializing OpenCL...\n"); cl_platform_id platform; cl_uint num_devices = 0; cl_device_id devices[MAX_PLATFORM]; gc_clGetPlatformIDs (1, &platform, NULL); gc_clGetDeviceIDs (platform, DEV_TYPE, MAX_PLATFORM, devices, &num_devices); gpu_ctx_t gpu_ctxs[MAX_GPU]; memset (gpu_ctxs, 0, sizeof (gpu_ctxs)); for (cl_uint device_id = 0; device_id < num_devices; device_id++) { cl_device_id device = devices[device_id]; cl_context context = gc_clCreateContext (NULL, 1, &device, NULL, NULL); cl_program program = gc_clCreateProgramWithSource (context, 1, sourceBuf, sourceLen); gc_clBuildProgram (program, 1, &device, BUILD_OPTS, NULL, NULL); cl_kernel kernel = gc_clCreateKernel (program, KERNEL_NAME); cl_command_queue command_queue = gc_clCreateCommandQueue (context, device, 0); cl_uint max_compute_units; gc_clGetDeviceInfo (device, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof (max_compute_units), &max_compute_units, NULL); char device_name[BUFSIZ]; memset (device_name, 0, sizeof (device_name)); gc_clGetDeviceInfo (device, CL_DEVICE_NAME, sizeof (device_name), &device_name, NULL); printf ("Found new device #%2d: %s, %u compute units\n", device_id, device_name, max_compute_units); const int num_threads = GPU_THREADS; const int num_elements = max_compute_units * num_threads * GPU_ACCEL; /** * GPU memory */ const size_t size_block = num_elements * sizeof (block_t); const size_t size_results = num_threads * sizeof (uint32_t); cl_mem d_block = gc_clCreateBuffer (context, CL_MEM_READ_ONLY, size_block, NULL); cl_mem d_results = gc_clCreateBuffer (context, CL_MEM_WRITE_ONLY, size_results, NULL); gc_clSetKernelArg (kernel, 0, sizeof (cl_mem), (void *) &d_block); gc_clSetKernelArg (kernel, 1, sizeof (cl_mem), (void *) &d_results); /** * Host memory */ block_t *h_block = (block_t *) malloc (size_block); uint32_t *h_results = (uint32_t *) malloc (size_results); memset (h_results, 0xff, size_results); gc_clEnqueueWriteBuffer (command_queue, d_results, CL_TRUE, 0, size_results, h_results, 0, NULL, NULL); /** * Buffers for candidates */ uint8_t **plains_buf = (uint8_t **) calloc (num_elements * VECT_SIZE, sizeof (uint8_t *)); for (int i = 0; i < num_elements * VECT_SIZE; i++) { /* Agreed, this is not nice. But who cares nowadays? */ plains_buf[i] = (uint8_t *) malloc (MAX_LINELEN); } size_t *plains_len = (size_t *) calloc (num_elements * VECT_SIZE, sizeof (size_t)); gpu_ctx_t *gpu_ctx = &gpu_ctxs[device_id]; gpu_ctx->context = context; gpu_ctx->program = program; gpu_ctx->kernel = kernel; gpu_ctx->command_queue = command_queue; gpu_ctx->max_compute_units = max_compute_units; gpu_ctx->d_block = d_block; gpu_ctx->d_results = d_results; gpu_ctx->h_block = h_block; gpu_ctx->h_results = h_results; gpu_ctx->num_threads = num_threads; gpu_ctx->num_elements = num_elements; gpu_ctx->plains_buf = plains_buf; gpu_ctx->plains_len = plains_len; } /* static salt */ const uint8_t salt_buf[16] = { 0x97, 0x48, 0x6C, 0xAA, 0x22, 0x5F, 0xE8, 0x77, 0xC0, 0x35, 0xCC, 0x03, 0x73, 0x23, 0x6D, 0x51 }; const size_t salt_len = sizeof (salt_buf); /* main loop */ printf ("Initialization done, accepting candidates from stdin...\n\n"); cl_uint cur_device_id = 0; while (!feof (stdin)) { /* Get new password candidate from stdin */ uint8_t line_buf[MAX_LINELEN]; int cur_c = 0; int prev_c = 0; size_t line_len = 0; for (size_t i = 0; i < MAX_LINELEN - 100; i++) // - 100 = we need some space for salt and padding { cur_c = getchar (); if (cur_c == EOF) break; if ((prev_c == '\n') && (cur_c == '\0')) { line_len--; break; } line_buf[line_len] = cur_c; line_len++; prev_c = cur_c; } /* chop \r if it exists for some reason (in case user used a dictionary) */ if (line_len >= 2) { if ((prev_c == '\r') && (cur_c == '\0')) line_len -= 2; } /* skip empty lines */ if (line_len == 0) continue; /* The following enables distributed computing / resume work */ if (skip) { skip--; continue; } if (left) { left--; } else { break; } /* Append constant salt */ memcpy (line_buf + line_len, salt_buf, salt_len); line_len += salt_len; /* Generate digest out of it */ uint32_t digest[4]; md5_transform ((uint32_t *) line_buf, (uint32_t) line_len, digest); /* Next garanteed free GPU */ gpu_ctx_t *gpu_ctx = &gpu_ctxs[cur_device_id]; /* Save original buffer in case it cracks it */ memcpy (gpu_ctx->plains_buf[gpu_ctx->num_cached], line_buf, line_len - salt_len); gpu_ctx->plains_len[gpu_ctx->num_cached] = line_len - salt_len; /* Next garanteed free memory element on that GPU */ const uint32_t element_div = gpu_ctx->num_cached / 4; const uint32_t element_mod = gpu_ctx->num_cached % 4; /* Copy new digest */ gpu_ctx->h_block[element_div].A[element_mod] = digest[0]; gpu_ctx->h_block[element_div].B[element_mod] = digest[1]; gpu_ctx->h_block[element_div].C[element_mod] = digest[2]; gpu_ctx->h_block[element_div].D[element_mod] = digest[3]; gpu_ctx->num_cached++; /* If memory elements on that GPU are full, switch to the next GPU */ if ((gpu_ctx->num_cached / VECT_SIZE) < gpu_ctx->num_elements) continue; cur_device_id++; /* If there is no more GPU left, run the calculation */ if (cur_device_id < num_devices) continue; /* Fire! */ calc_work (num_devices, gpu_ctxs); launch_kernel (num_devices, gpu_ctxs); /* Collecting data has a blocking effect */ check_results (num_devices, gpu_ctxs); /* Reset buffer state */ for (cl_uint device_id = 0; device_id < num_devices; device_id++) { gpu_ctx_t *gpu_ctx = &gpu_ctxs[device_id]; gpu_ctx->num_cached = 0; } cur_device_id = 0; } /* Final calculation of leftovers */ calc_work (num_devices, gpu_ctxs); launch_kernel (num_devices, gpu_ctxs); check_results (num_devices, gpu_ctxs); return -1; }