// Sets up and runs the parallel kenken solver void runParallel(unsigned P) { int i, pid; long long myNodeCount; job_t* myJob; cell_t* myCells; constraint_t* myConstraints; struct timeval startCompTime, endCompTime; // Begin parallel omp_set_num_threads(P); // Run algorithm #pragma omp parallel default(shared) private(i, pid, myNodeCount, myJob, \ myCells, myConstraints) { // Initialize local variables and data-structures pid = omp_get_thread_num(); myNodeCount = 0; myConstraints = (constraint_t*)calloc(sizeof(constraint_t), numConstraints); if (!myConstraints) unixError("Failed to allocate memory for myConstraints"); myCells = (cell_t*)calloc(sizeof(cell_t), totalNumCells); if (!myCells) unixError("Failed to allocate memory for myCells"); myJob = (job_t*)malloc(sizeof(job_t)); if (!myJob) unixError("Failed to allocate memory for myJob"); // Record start of computation time #pragma omp single gettimeofday(&startCompTime, NULL); // Get and complete new job until none left, or solution found while (getNextJob(pid, myJob)) { memcpy(myConstraints, constraints, numConstraints * sizeof(constraint_t)); memcpy(myCells, cells, totalNumCells * sizeof(cell_t)); for (i = 0; i < myJob->length; i++) applyValue(myCells, myConstraints, myJob->assignments[i].cellIndex, myJob->assignments[i].value); if (ADD_TO_QUEUE(&(jobQueues[pid]), myJob)) { myNodeCount++; // Guarenteed to succeed given ADD_TO_QUEUE(...) returned true addToQueue(myJob->length, myCells, myConstraints, &(jobQueues[pid]), myJob->assignments, AVAILABLE(&jobQueues[pid])); } else solve(myJob->length, myCells, myConstraints, &myNodeCount); } #pragma omp critical nodeCount += myNodeCount; } // Calculate computation time gettimeofday(&endCompTime, NULL); compTime = TIME_DIFF(endCompTime, startCompTime); }
int region_copy (struct descriptor_d* dout, struct descriptor_d* din, unsigned flags) { #if defined (USE_COPY_VERIFY) struct descriptor_d din_v; struct descriptor_d dout_v; #endif ssize_t cbCopied = 0; /* Make sure we try to copy the no more than either descriptor can handle. */ ssize_t cbCopy = din->length - din->index; if (cbCopy > dout->length - dout->index) cbCopy = dout->length - dout->index; #if defined (USE_COPY_VERIFY) /* Create descriptors for rereading and verification */ /* *** FIXME: we ought to perform a dup () */ memcpy (&din_v, din, sizeof (*din)); memcpy (&dout_v, dout, sizeof (*dout)); dout_v.length = din_v.length; #endif { char __aligned rgb[512]; ssize_t cb; size_t available; int report_last = -1; int step = DRIVER_PROGRESS (din, dout); if (step) step += 10; #define AVAILABLE(c,s) (((c) < (s)) ? c : s) for (available = AVAILABLE (cbCopy, sizeof (rgb)) ; (cb = din->driver->read (din, rgb, available)) > 0; cbCopy -= cb, cbCopied += cb, available = AVAILABLE (cbCopy, sizeof (rgb))) { int report; size_t cbWrote; if (cb == 0) ERROR_RETURN (ERROR_FAILURE, "premature end of input"); #if defined (USE_COPY_VERIFY) if (flags & regionCopyVerify) { char __aligned rgbVerify[512]; ssize_t cbVerify = din_v.driver->read (&din_v, rgbVerify, sizeof (rgbVerify)); if (cbVerify != cb) { if (!(flags & regionCopyQuiet)) printf ("\rVerify failed: reread of input %d, expected %d, at" " 0x%x+0x%x\n", cbVerify, cb, cbCopied, 512); return ERROR_FAILURE; } if (memcmp (rgb, rgbVerify, cb)) { if (!(flags & regionCopyQuiet)) printf ("\rVerify failed: reread input compare at 0x%x+0x%x\n", cbCopied, 512); return ERROR_FAILURE; } } #endif if (flags & regionCopySwap) { int i; unsigned long* p = (unsigned long*) rgb; for (i = cb/4; i-- > 0; ++p) *p = swab32 (*p); } if (flags & regionCopySpinner) SPINNER_STEP; cbWrote = dout->driver->write (dout, rgb, cb); if (cbWrote != cb) ERROR_RETURN (ERROR_FAILURE, "truncated write"); #if defined (USE_COPY_VERIFY) if (verify) { char rgbVerify[512]; ssize_t cbVerify = dout_v.driver->read (&dout_v, rgbVerify, sizeof (rgbVerify)); if (cbVerify != cb) { if (flags & regionCopyReportVerifyErrors) printf ("\rVerify failed: reread of output %d, expected %d, at" " 0x%x+0x%x\n", cbVerify, cb, cbCopied, 512); return ERROR_FAILURE; } if (swap) { int i; unsigned long* p = (unsigned long*) rgbVerify; for (i = cb/4; i-- > 0; ++p) *p = swab32 (*p); } if (memcmp (rgb, rgbVerify, cb)) { if (flags & regionCopyReportVerifyErrors) printf ("\rVerify failed: reread output compare at 0x%x+0x%x\n", cbCopied, 512); return ERROR_FAILURE; } } #endif report = cbCopied>>step; if ((flags & regionCopySpinner) && step && report != report_last) { printf ("\r %d KiB\r", cbCopied/1024); report_last = report; } } if (cb < 0) ERROR_RETURN (ERROR_FAILURE, "copy overrun"); } return cbCopied; }
/* * Perform a Boyer-Moore substring search, using the precompiled * shift_table in ctx. Implementation mostly copied from glibc. */ static char* cksum_strstr(const cksum_strstr_ctx_t *ctx, const char *haystack_start) { const char *needle_start = ctx->needle; const char *needle = needle_start; const char *haystack = haystack_start; const size_t *shift_table = ctx->shift_table; size_t needle_len = ctx->needle_len; size_t suffix = ctx->suffix; size_t period = ctx->period; bool ok = true; size_t j = 0; size_t haystack_len; size_t shift; size_t i; while (*haystack && *needle) ok &= *haystack++ == *needle++; if (*needle) return NULL; if (ok) return (char*) haystack_start; haystack = strchr (haystack_start + 1, *needle_start); if (!haystack) return (char *) haystack; needle -= needle_len; haystack_len = (haystack > haystack_start + needle_len ? 1 : needle_len + haystack_start - haystack); while (AVAILABLE (haystack, haystack_len, j, needle_len)) { const char *pneedle; const char *phaystack; /* Check the last byte first; if it does not match, then shift to the next possible match location. */ shift = shift_table[(unsigned char) CANON_ELEMENT (haystack[j + needle_len - 1])]; if (0 < shift) { j += shift; continue; } /* Scan for matches in right half. The last byte has already been matched, by virtue of the shift table. */ i = suffix; pneedle = &needle[i]; phaystack = &haystack[i + j]; while (i < needle_len - 1 && (CANON_ELEMENT (*pneedle++) == CANON_ELEMENT (*phaystack++))) ++i; if (needle_len - 1 <= i) { /* Scan for matches in left half. */ i = suffix - 1; pneedle = &needle[i]; phaystack = &haystack[i + j]; while (i != SIZE_MAX && (CANON_ELEMENT (*pneedle--) == CANON_ELEMENT (*phaystack--))) --i; if (i == SIZE_MAX) return (char*) (haystack + j); j += period; } else { j += i - suffix + 1; } } return NULL; }