int produce_opcode_coverage_report(CRM_ANALYSIS_REPORT_DATA *report_data, FILE *of, int compressed_report) { int i; double total_time = 0.0; double total_time_per_piece = 0.0; int total_count = 0; fprintf(of, "CRM script opcode test coverage%s:\n" "===============================%s=\n" "\n" " opcode # hits time spent (seconds) %% a piece (seconds) %%\n" "-----------------------------------------------------------------------------------\n", (compressed_report ? " (only items which have been tested)" : ""), (compressed_report ? "====================================" : "")); for (i = 0; i < WIDTHOF(report_data->opcode_counts); i++) { const STMT_DEF_TYPE *stmt_def = get_stmt_def(i); if ((report_data->opcode_counts[i] > 0 && i != CRM_UNIMPLEMENTED + 1) || (!compressed_report && stmt_def->stmt_code != CRM_BOGUS)) { total_time += report_data->opcode_times[i]; if (report_data->opcode_counts[i] > 0) { total_time_per_piece += ((double)report_data->opcode_times[i]) / report_data->opcode_counts[i]; } total_count += report_data->opcode_counts[i]; } } total_time /= 100.0; total_time_per_piece /= 100.0; for (i = 0; i < WIDTHOF(report_data->opcode_counts); i++) { const STMT_DEF_TYPE *stmt_def = get_stmt_def(i); // in uncompressed report, only show the bogus/unknown lines when there's actually some time spent to report there. if ((report_data->opcode_counts[i] > 0 && i != CRM_UNIMPLEMENTED + 1) || (!compressed_report && stmt_def->stmt_code != CRM_BOGUS)) { fprintf(of, "%11.11s %13d ", stmt_def->stmt_name, report_data->opcode_counts[i]); if (report_data->opcode_counts[i] > 0) { fprintf_nsecs2dhmsss(of, 5, 6, report_data->opcode_times[i]); fprintf(of, " %7.3f ", report_data->opcode_times[i] / total_time); fprintf_nsecs2dhmsss(of, 3, 6, report_data->opcode_times[i] / report_data->opcode_counts[i]); fprintf(of, " %7.3f\n", ((double)report_data->opcode_times[i]) / (report_data->opcode_counts[i] *total_time_per_piece)); } else { fprintf(of, "%21s %7s %19s %7s\n", "-", "-", "-", "-"); } } } fprintf(of, "-----------------------------------------------------------------------------------\n"); fprintf(of, "%11s %13d ", "Totals:", total_count); fprintf_nsecs2dhmsss(of, 5, 6, (int64_t)(total_time * 100.0)); fprintf(of, " %7.3f %19s %7s\n\n\n", 100.0, "(N.A.)", "(N.A.)"); return 0; }
// this guy does linear interpolation, it's fun double crm_log(double x) { double r = 0.0, g; int i; while (x >= 2.0) { r += log_lookup_table[768]; //this is log(2) x /= 2.0; } i = (int)(x * 384.0); g = x - ((double)i) / 384.0; CRM_ASSERT(i < 768); CRM_ASSERT(WIDTHOF(log_lookup_table) == 768); r += (1.0 - g) * log_lookup_table[i] + g * log_lookup_table[i + 1]; return r; }
// from crm_vector_tokenize.c int main(void) { char input[1024]; char arg[8192]; char opts[1024]; int i, j; int ret; int k; crmhash_t feavec[2048]; uint32_t feamult[2048]; uint32_t feaord[2048]; ARGPARSE_BLOCK apb = { 0 }; VT_USERDEF_TOKENIZER tokenizer = { 0 }; VT_USERDEF_COEFF_MATRIX our_coeff = { 0 }; int use_default_re_and_coeff = USE_DEFAULT_RE_AND_COEFF; char my_regex[256]; static const int coeff[] = { 1, 3, 0, 0, 0, 1, 0, 5, 0, 0, 1, 0, 0, 11, 0, 1, 0, 0, 0, 23 }; init_stdin_out_err_as_os_handles(); #if 0 setvbuf(stdout, stdout_buf, _IOFBF, sizeof(stdout_buf)); setvbuf(stderr, stderr_buf, _IOFBF, sizeof(stderr_buf)); #endif #if (defined(WIN32) || defined(_WIN32) || defined(_WIN64) || defined(WIN64)) && defined(_DEBUG) /* * Hook in our client-defined reporting function. * Every time a _CrtDbgReport is called to generate * a debug report, our function will get called first. */ _CrtSetReportHook(crm_dbg_report_function); /* * Define the report destination(s) for each type of report * we are going to generate. In this case, we are going to * generate a report for every report type: _CRT_WARN, * _CRT_ERROR, and _CRT_ASSERT. * The destination(s) is defined by specifying the report mode(s) * and report file for each report type. */ _CrtSetReportMode(_CRT_WARN, _CRTDBG_MODE_DEBUG); _CrtSetReportFile(_CRT_WARN, _CRTDBG_FILE_STDERR); _CrtSetReportMode(_CRT_ERROR, _CRTDBG_MODE_DEBUG); _CrtSetReportFile(_CRT_ERROR, _CRTDBG_FILE_STDERR); _CrtSetReportMode(_CRT_ASSERT, _CRTDBG_MODE_DEBUG); _CrtSetReportFile(_CRT_ASSERT, _CRTDBG_FILE_STDERR); // Store a memory checkpoint in the s1 memory-state structure _CrtMemCheckpoint(&crm_memdbg_state_snapshot1); atexit(crm_report_mem_analysis); // Get the current bits i = _CrtSetDbgFlag(_CRTDBG_REPORT_FLAG); // Set the debug-heap flag so that freed blocks are kept on the // linked list, to catch any inadvertent use of freed memory #if 0 i |= _CRTDBG_DELAY_FREE_MEM_DF; #endif // Set the debug-heap flag so that memory leaks are reported when // the process terminates. Then, exit. //i |= _CRTDBG_LEAK_CHECK_DF; // Clear the upper 16 bits and OR in the desired freqency //i = (i & 0x0000FFFF) | _CRTDBG_CHECK_EVERY_16_DF; i |= _CRTDBG_CHECK_ALWAYS_DF; // Set the new bits _CrtSetDbgFlag(i); // // set a malloc marker we can use it in the leak dump at the end of the program: // (void)_calloc_dbg(1, 1, _CLIENT_BLOCK, __FILE__, __LINE__); #endif // fprintf(stderr, " args: %d \n", argc); // for (i = 0; i < argc; i++) // fprintf(stderr, " argi: %d, argv: %s \n", i, argv[i]); atexit(crm_final_cleanup); #if defined(HAVE__SET_OUTPUT_FORMAT) _set_output_format(_TWO_DIGIT_EXPONENT); // force MSVC (& others?) to produce floating point %f with 2 digits for power component instead of 3 for easier comparison with 'knowngood'. #endif // force MSwin/Win32 console I/O into binary mode: treat \r\n and \n as completely different - like it is on *NIX boxes! #if defined(HAVE__SETMODE) && defined(HAVE__FILENO) && defined(O_BINARY) (void)_setmode(_fileno(crm_stdin), O_BINARY); (void)_setmode(_fileno(crm_stdout), O_BINARY); (void)_setmode(_fileno(crm_stderr), O_BINARY); #endif user_trace = 1; internal_trace = 1; do { strcpy(my_regex, "[[:alpha:]]+"); memset(&tokenizer, 0, sizeof(tokenizer)); memset(&our_coeff, 0, sizeof(our_coeff)); fprintf(stdout, "Enter a test string: "); fgets(input, sizeof(input), stdin); input[sizeof(input) - 1] = 0; fprintf(stdout, "Input = '%s'\n", input); // fscanf(stdin, "%1023s", input); // fprintf(stdout, "Input = '%s'\n", input); fprintf(stdout, "Enter optional 'vector: ...' arg (don't forget the 'vector: prefix in there!): "); fgets(arg, sizeof(arg), stdin); arg[sizeof(arg) - 1] = 0; fprintf(stdout, "Args = '%s'\n", arg); apb.s1start = my_regex; apb.s1len = (int)strlen(my_regex); apb.s2start = arg; apb.s2len = (int)strlen(arg); apb.sflags = CRM_MARKOVIAN | CRM_UNIQUE; fprintf(stdout, "Optional OSBF style token globbing: type integer values for max_token_size and count (must specify both!): "); fgets(opts, sizeof(opts), stdin); opts[sizeof(opts) - 1] = 0; k = sscanf(opts, "%d %d", &i, &j); if (k == 2) { fprintf(stdout, "using max_token_size %d and count %d.\n", i, j); tokenizer.max_token_length = i; tokenizer.max_big_token_count = j; } tokenizer.regex = my_regex; tokenizer.regexlen = (int)strlen(my_regex); if (strlen(arg) < 3) { CRM_VERIFY(transfer_matrix_to_VT(&our_coeff, coeff, 5, 4, 1)); } memset(feavec, 0, sizeof(feavec)); memset(feamult, 0, sizeof(feamult)); memset(feaord, 0, sizeof(feaord)); tokenizer.input_next_offset = 0; ret = crm_vector_tokenize_selector(&apb, vht, tdw, input, (int)strlen(input), 0, (use_default_re_and_coeff ? NULL : &tokenizer), (use_default_re_and_coeff ? NULL : &our_coeff), feavec, WIDTHOF(feavec), feamult, feaord, &j); for (k = 0; k < j; k++) { fprintf(stdout, "feature[%4d] = %12lu (%08lX) / mul: %d, order: %d\n", k, (unsigned long int)feavec[k], (unsigned long int)feavec[k], feamult[k], feaord[k]); } fprintf(stdout, "... and next_offset is %d\n", tokenizer.input_next_offset); tokenizer.input_next_offset = 0; memset(feavec, 0, sizeof(feavec)); ret = crm_vector_tokenize_selector_old(&apb, input, 0, (int)strlen(input), (use_default_re_and_coeff ? NULL : my_regex), (use_default_re_and_coeff ? 0 : (int)strlen(my_regex)), (use_default_re_and_coeff ? NULL : coeff), (use_default_re_and_coeff ? 0 : 5), (use_default_re_and_coeff ? 0 : 4), feavec, WIDTHOF(feavec), &j, &tokenizer.input_next_offset); for (k = 0; k < j; k++) { fprintf(stdout, "feature[%4d] = %12lu (%08lX)\n", k, (unsigned long int)feavec[k], (unsigned long int)feavec[k]); } fprintf(stdout, "... and next_offset is %d\n", tokenizer.input_next_offset); fprintf(stdout, "Another round? (enter 'y' for yes): "); fgets(input, sizeof(input), stdin); input[sizeof(input) - 1] = 0; } while (input[0] == 'y'); return ret >= 0 ? EXIT_SUCCESS : EXIT_FAILURE; }
void I420_R5G5B5_MMX(void * pHandle, uint8_t *p_y, uint8_t *p_u, uint8_t *p_v, uint16_t *p_d) { PSOFTCONV pconv = pHandle; if (!pHandle) return; int b_hscale = pconv->b_hscale; /* horizontal scaling type */ unsigned int i_vscale = pconv->i_vscale; /* vertical scaling type */ unsigned int i_x, i_y; /* horizontal and vertical indexes */ CC_RECT * dst_rect = &pconv->dst_dis_rect; CC_RECT * src_rect = &pconv->src_rect; int dst_stride = pconv->dst_pitch; uint16_t *p_pic = (uint16_t *)((uint8_t *)p_d + pconv->dst_offset); int i_right_margin = pconv->i_right_margin; int i_rewind; int i_scale_count; /* scale modulo counter */ int i_chroma_width = WIDTHOF(src_rect) / 2; /* chroma width */ uint16_t * p_pic_start; /* beginning of the current line for copy */ /* Conversion buffer pointer */ uint16_t * p_buffer_start = pconv->pConvBuffer; uint16_t * p_buffer; /* Offset array pointer */ int * p_offset_start = pconv->pOffsetBuffer; int * p_offset; const int i_source_margin = pconv->i_source_margin; const int i_source_margin_c = pconv->i_source_margin_c; /* * Perform conversion */ i_scale_count = ( i_vscale == 1 ) ? HEIGHTOF(dst_rect) : HEIGHTOF(src_rect); if( WIDTHOF(src_rect) & 7 ) { i_rewind = 8 - ( WIDTHOF(src_rect) & 7 ); } else { i_rewind = 0; } for( i_y = 0; i_y < HEIGHTOF(src_rect); i_y++ ) { p_pic_start = p_pic; p_buffer = b_hscale ? p_buffer_start : p_pic; for ( i_x = WIDTHOF(src_rect) / 8; i_x--; ) { MMX_CALL ( MMX_INIT_16 MMX_YUV_MUL MMX_YUV_ADD MMX_UNPACK_15 ); p_y += 8; p_u += 4; p_v += 4; p_buffer += 8; } /* Here we do some unaligned reads and duplicate conversions, but * at least we have all the pixels */ if( i_rewind ) { p_y -= i_rewind; p_u -= i_rewind >> 1; p_v -= i_rewind >> 1; p_buffer -= i_rewind; MMX_CALL ( MMX_INIT_16 MMX_YUV_MUL MMX_YUV_ADD MMX_UNPACK_15 ); p_y += 8; p_u += 4; p_v += 4; p_buffer += 8; } SCALE_WIDTH; SCALE_HEIGHT( 2 ); p_y += i_source_margin; if( i_y % 2 ) { p_u += i_source_margin_c; p_v += i_source_margin_c; } } /* re-enable FPU registers */ MMX_END; }
// strpnmath - do a basic math evaluation of very simple expressions. // // This does math, in RPN, on a string, and returns a string value. // int strpnmath(char *buf, int inlen, int maxlen, int *retstat) { double stack[DEFAULT_MATHSTK_LIMIT]; // the evaluation stack double sd; // how many 10^n's we've seen since a decimal int od; // output decimal flag int ip, op; // in string pointer, out string pointer int sp; // stack pointer - points to next (vacant) space int sinc; // stack incrment enable - do we start a new number int errstat; // error status char outformat[64]; // output format int outstringlen; // start off by initializing things ip = 0; // in pointer is zero op = 0; // output pointer is zero sp = 0; // still at the top of the stack od = 0; // no decimals seen yet, so no flag to output in decimal sinc = 0; // no autopush. outformat[0] = 0; // now our number-inputting hacks stack[sp] = 0.0; sd = 1.0; // all initialized... let's begin. if (internal_trace) { fprintf(stderr, "Math on '%s' len %d retstat %p\n", buf, inlen, (void *)retstat); } for (ip = 0; ip < inlen; ip++) { if (internal_trace) fprintf(stderr, "ip = %d, sp = %d, stack[sp] = %f, ch='%c'\n", ip, sp, stack[sp], (crm_isascii(buf[ip]) && crm_isprint(buf[ip]) ? buf[ip] : '.')); if (sp < 0) { errstat = nonfatalerror("Stack Underflow in math evaluation", ""); return 0; } if (sp >= DEFAULT_MATHSTK_LIMIT) { errstat = nonfatalerror("Stack Overflow in math evaluation.\n " "CRM114 Barbie says 'This math is too hard'.", buf); return 0; } switch (buf[ip]) { // // a digit,or maybe a number - big change - we now use strtod // case '.': case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': case '-': case '+': { char *frejected; // handle the case of a minus sign that isn't a unary -. if (buf[ip] == '-' && !(crm_isdigit(buf[ip + 1]))) { if (sp > 0) { sp--; stack[sp] = stack[sp] - stack[sp + 1]; sinc = 1; } break; } if (buf[ip] == '+' && !(crm_isdigit(buf[ip + 1]))) { if (sp > 0) { sp--; stack[sp] = stack[sp] + stack[sp + 1]; sinc = 1; } break; } // Neither unary +/- so we use strtod to convert // the string we're looking at to floating point. sp++; stack[sp] = strtod(&buf[ip], &frejected); if (user_trace) fprintf(stderr, "got number: %e\n", stack[sp]); // // Now, move [ip] over to accomodate characters used. // (the -1 is because there's an auto-increment in the big // FOR-loop) ip = (int)(frejected - buf) - 1; } break; // // and now the standard math operators (except for - and + above) // case '*': { if (sp > 0) { sp--; stack[sp] = stack[sp] *stack[sp + 1]; sinc = 1; } } break; case '/': { if (sp > 0) { sp--; // don't worry about divide-by-zero, we get INF in IEEE. stack[sp] = stack[sp] / stack[sp + 1]; sinc = 1; } } break; case '%': { if (sp > 0) { sp--; #ifdef CRM_SUPPORT_FMOD stack[sp] = fmod(stack[sp], stack[sp + 1]); #else stack[sp] = ((int64_t)stack[sp]) % ((int64_t)stack[sp + 1]); #endif sinc = 1; } } break; case '^': // exponentiation - for positive bases, neg base + int exp. if (sp > 0) { sp--; if (stack[sp] < 0.0 /* use FLT_EPSILON to compensate for added inaccuracy due to previous calculations */ && ((int64_t)(stack[sp + 1])) >= stack[sp + 1] - FLT_EPSILON && ((int64_t)(stack[sp + 1])) <= stack[sp + 1] + FLT_EPSILON) { stack[sp] = stack[sp] / 0.0; } else { stack[sp] = pow(stack[sp], stack[sp + 1]); } if (internal_trace) fprintf(stderr, "exp out: %f\n", stack[sp]); sinc = 1; } break; case 'v': // logs as BASE v ARG; (NaN on BASE <= 0) if (sp > 0) { sp--; if (stack[sp] <= 0.0) { stack[sp] = stack[sp] / 0.0; } else { stack[sp] = log(stack[sp + 1]) / log(stack[sp]); } sinc = 1; } break; case '=': { if (sp > 0) { sp--; /* use FLT_EPSILON to compensate for added inaccuracy due to previous calculations */ if ((stack[sp] <= stack[sp + 1] + FLT_EPSILON) || (stack[sp] >= stack[sp + 1] - FLT_EPSILON)) /* if (stack[sp] == stack[sp + 1]) */ { if (retstat) *retstat = 0; stack[sp] = 1.0; } else { if (retstat) *retstat = 1; stack[sp] = 0.0; } sinc = 1; } } break; case '!': { if (sp > 0 && buf[ip + 1] == '=') { ip++; // gobble up the equals sign sp--; /* use FLT_EPSILON to compensate for added inaccuracy due to previous calculations */ if ((stack[sp] > stack[sp + 1] + FLT_EPSILON) || (stack[sp] < stack[sp + 1] - FLT_EPSILON)) /* if (stack[sp] != stack[sp + 1]) */ { if (retstat) *retstat = 0; stack[sp] = 1.0; } else { if (retstat) *retstat = 1; stack[sp] = 0.0; } sinc = 1; } } break; case '>': { if (buf[ip + 1] == '=') { ip++; // gobble up the equals sign too... if (sp > 0) { sp--; if (stack[sp] >= stack[sp + 1]) { if (retstat) *retstat = 0; stack[sp] = 1.0; } else { if (retstat) *retstat = 1; stack[sp] = 0.0; } sinc = 1; } } else { if (sp > 0) { sp--; if (stack[sp] > stack[sp + 1]) { if (retstat) *retstat = 0; stack[sp] = 1; } else { if (retstat) *retstat = 1; stack[sp] = 0; } sinc = 1; } } } break; case '<': { if (buf[ip + 1] == '=') { ip++; // gobble up the equals sign if (sp > 0) { sp--; if (stack[sp] <= stack[sp + 1]) { if (retstat) *retstat = 0; stack[sp] = 1; } else { if (retstat) *retstat = 1; stack[sp] = 0; } sinc = 1; } } else { if (sp > 0) { sp--; if (stack[sp] < stack[sp + 1]) { if (retstat) *retstat = 0; stack[sp] = 1; } else { if (retstat) *retstat = 1; stack[sp] = 0; } sinc = 1; } } } break; case 'e': case 'E': case 'f': case 'F': case 'g': case 'G': case 'x': case 'X': // User-specified formatting; use the user's // top-of-stack value as a format. // { if (sp > 0) { char tempstring[2048]; tempstring[0] = 0; sp--; // Special case - if the format is an integer, add a ".0" // to the format string so we get integer output. if (buf[ip] == 'x' || buf[ip] == 'X') { if (((int)stack[sp + 1]) <= stack[sp + 1] + FLT_EPSILON && ((int)stack[sp + 1]) >= stack[sp + 1] - FLT_EPSILON) { snprintf(outformat, WIDTHOF(outformat), "%%%.0g%s%c", stack[sp + 1], #if defined(_MSC_VER) "I64", #elif defined(HAVE_LONG_LONG_INT) && (SIZEOF_LONG_INT < 8) "ll", #else "l", #endif (short)buf[ip]); outformat[WIDTHOF(outformat) - 1] = 0; } else { snprintf(outformat, WIDTHOF(outformat), "%%0%.0g%s%c", stack[sp + 1], #if defined(_MSC_VER) "I64", #elif defined(HAVE_LONG_LONG_INT) && (SIZEOF_LONG_INT < 8) "ll", #else "l", #endif (short)buf[ip]); outformat[WIDTHOF(outformat) - 1] = 0; } } else { if (((int)stack[sp + 1]) <= stack[sp + 1] + FLT_EPSILON && ((int)stack[sp + 1]) >= stack[sp + 1] - FLT_EPSILON) { snprintf(outformat, WIDTHOF(outformat), "%%%.0g.0%c", stack[sp + 1], buf[ip]); outformat[WIDTHOF(outformat) - 1] = 0; } else { snprintf(outformat, WIDTHOF(outformat), "%%%g%c", stack[sp + 1], buf[ip]); outformat[WIDTHOF(outformat) - 1] = 0; } } if (internal_trace) fprintf(stderr, "Format string -->%s<-- \n", outformat); stack[sp + 1] = 0; if (buf[ip] != 'x' && buf[ip] != 'X') { snprintf(tempstring, WIDTHOF(tempstring), outformat, stack[sp]); tempstring[WIDTHOF(tempstring) - 1] = 0; if (internal_trace) { fprintf(stderr, "Intermediate result string -->%s<-- \n", tempstring); } } else { #if defined(_MSC_VER) int64_t intpart; intpart = (int64_t)stack[sp]; #elif defined(HAVE_LONG_LONG_INT) && (SIZEOF_LONG_INT < 8) long long int intpart; intpart = (long long int)stack[sp]; #else long int intpart; intpart = (long int)stack[sp]; #endif snprintf(tempstring, WIDTHOF(tempstring), outformat, intpart); tempstring[WIDTHOF(tempstring) - 1] = 0; if (internal_trace) { fprintf(stderr, "Intermediate hex result string -->%s<-- \n", tempstring); } } // And now do the back conversion of the result. // Note that X formatting (hexadecimal) does NOT do the // back conversion; the only effect is to store the // format string for later. if (buf[ip] != 'x' && buf[ip] != 'X') { stack[sp] = strtod(tempstring, NULL); } } } break; case ' ': case '\r': case '\n': case '\t': // // a space is just an end-of-number - push the number we're // seeing. { sinc = 1; } break; case '(': case ')': // why are you using parenthesis in RPN code?? { nonfatalerror("It's just silly to use parenthesis in RPN!", " Perhaps you should check your setups?"); sinc = 1; } break; default: { char bogus[4]; bogus[0] = buf[ip]; bogus[1] = 0; nonfatalerror(" Sorry, but I can't do RPN math on the un-mathy " "character found: ", bogus); sinc = 1; } break; } } if (internal_trace) { fprintf(stderr, "Final qexpand state: ip = %d, sp = %d, stack[sp] = %f, ch='%c'\n", ip, sp, stack[sp], (crm_isascii(buf[ip]) && crm_isprint(buf[ip]) ? buf[ip] : '.')); if (retstat) fprintf(stderr, "retstat = %d\n", *retstat); } // now the top of stack contains the result of the calculation. // fprintf it into the output buffer, and we're done. outstringlen = math_formatter(stack[sp], outformat, buf, maxlen); CRM_ASSERT(outstringlen >= 0); CRM_ASSERT(outstringlen < maxlen); return outstringlen; }
int stralmath(char *buf, int inlen, int maxlen, int *retstat) { double leftarg[DEFAULT_MATHSTK_LIMIT]; // left float arg int opstack[DEFAULT_MATHSTK_LIMIT]; // operand double rightarg; // right float arg int validstack[DEFAULT_MATHSTK_LIMIT]; // validity markers int sp; // stack pointer int ip, op; // input and output pointer int errstat; // error status char *frejected; // done loc. for a strtod. char outformat[256]; // how to format our result int state; // Local copy of state, in case // retstat is NULL (not used) // Start off by initializing things ip = 0; op = 0; sp = 0; outformat[0] = 0; state = 0; // Set up the stacks // leftarg[0] = 0.0; rightarg = 0.0; opstack[0] = 0; validstack[0] = 0; // initialization done... begin the work. if (internal_trace) { fprintf(stderr, "Starting Algebraic Math on '%s' (len %d)\n", buf, inlen); } for (ip = 0; ip < inlen; ip++) { // Debugging trace if (internal_trace) { fprintf(stderr, "ip = %d, sp = %d, L=%f, Op=%c, R=%f, V=%x next='%c'\n", ip, sp, leftarg[sp], (short)opstack[sp], rightarg, (short)validstack[sp], (crm_isascii(buf[ip]) && crm_isprint(buf[ip]) ? buf[ip] : '.')); } // Top of the loop- we're a state machine driven by the top of // the stack's validity. if (sp >= DEFAULT_MATHSTK_LIMIT) { errstat = nonfatalerror("Stack Overflow in math evaluation. ", "CRM114 Barbie says 'This math is too hard'."); if (retstat) *retstat = 0; return 0; } switch (validstack[sp]) { case (0): // empty top of stack; can accept either number or monadic operator if (internal_trace) fprintf(stderr, "stacktop empty\n"); switch (buf[ip]) { // Monadic operators and numbers case '-': case '+': case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': case '.': case ',': // for those locales that use , not . as decimal { if (internal_trace) fprintf(stderr, "found left numeric\n"); leftarg[sp] = strtod(&buf[ip], &frejected); if (user_trace) fprintf(stderr, " Got left arg %e\n", leftarg[sp]); ip = (int)(frejected - buf) - 1; validstack[sp] = LEFTVALID; } break; case '(': { if (internal_trace) fprintf(stderr, "Open Paren - start new math stack level\n"); sp++; leftarg[sp] = 0.0; rightarg = 0.0; opstack[sp] = 0; validstack[sp] = 0; } break; // deal with a possible rightarg strtod situation case ' ': break; default: errstat = nonfatalerror("Math expression makes no sense", " (need to have a number here)."); if (retstat) *retstat = 0; return 0; } break; // if left arg is valid; next thing must be an operator; // however op then op is also valid and should form composite // operators like '>=' and '!=' (see below). case (LEFTVALID): if (internal_trace) fprintf(stderr, "leftvalid\n"); switch (buf[ip]) { case '-': case '+': case '*': case '/': case '%': case '>': case '<': case '=': case '!': case '^': case 'v': case 'e': case 'E': case 'f': case 'F': case 'g': case 'G': case 'x': case 'X': { if (internal_trace) fprintf(stderr, "found op\n"); opstack[sp] = (buf[ip] & 0xFF); validstack[sp] = LEFTVALID | OPVALID; // is the next char also an op? If so, gobble it up? switch ((opstack[sp] << 8) | buf[ip + 1]) { case (('<' << 8) + '='): /* [i_a] */ case (('>' << 8) + '='): case (('!' << 8) + '='): if (internal_trace) fprintf(stderr, "two-char operator\n"); opstack[sp] = ((opstack[sp] << 8) | buf[ip + 1]); ip++; } } break; case ')': // close paren pops the stack, and returns the left arg // to "whereever", which might be leftarg stack, or rightarg if (internal_trace) fprintf(stderr, "close parenthesis, pop stack down\n"); sp--; if (validstack[sp] == (LEFTVALID | OPVALID)) { rightarg = leftarg[sp + 1]; validstack[sp] = LEFTVALID | OPVALID | RIGHTVALID; } else { leftarg[sp] = leftarg[sp + 1]; validstack[sp] = LEFTVALID; } break; case ' ': break; default: errstat = nonfatalerror("Math needs an operator in: ", buf); if (retstat) *retstat = 0; return 0; } break; case (LEFTVALID | OPVALID): // left arg and op are both valid; right now we can have // an enhanced operator (next char is also an op) if (internal_trace) fprintf(stderr, "left + opvalid\n"); switch (buf[ip]) { case '(': { if (internal_trace) { fprintf(stderr, "Open Paren - start new math stack level\n"); } sp++; leftarg[sp] = 0.0; rightarg = 0.0; opstack[sp] = 0; validstack[sp] = 0; } break; // deal with a possible rightarg strtod situation case '-': case '+': case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': case '.': case ',': { rightarg = strtod(&buf[ip], &frejected); if (internal_trace) fprintf(stderr, " Got right arg %e\n", rightarg); ip = (int)(frejected - buf) - 1; validstack[sp] = validstack[sp] | RIGHTVALID; } case ' ': break; default: errstat = nonfatalerror("Math is missing a number in: ", buf); if (retstat) *retstat = 0; return 0; } } ////////////////////////////////////////////////// // // Now if we have a left-op-right situation, and can // execute the operator right here and now. // while (validstack[sp] == (LEFTVALID | OPVALID | RIGHTVALID)) { if (internal_trace) fprintf(stderr, "Executing %c operator\n", (short)opstack[sp]); switch (opstack[sp]) { // Math operators case '+': leftarg[sp] += rightarg; break; case '-': leftarg[sp] -= rightarg; break; case '*': leftarg[sp] *= rightarg; break; case '/': leftarg[sp] /= rightarg; break; case '%': #ifdef CRM_SUPPORT_FMOD leftarg[sp] = fmod(leftarg[sp], rightarg); #else leftarg[sp] = (int64_t)leftarg[sp] % (int64_t)rightarg; #endif break; case '^': // since we don't do complex numbers (yet) handle as NaN if (leftarg[sp] < 0.0 /* use FLT_EPSILON to compensate for added inaccuracy due to previous calculations */ && ((int64_t)(leftarg[sp])) >= leftarg[sp] - FLT_EPSILON && ((int64_t)(leftarg[sp])) <= leftarg[sp] + FLT_EPSILON) { leftarg[sp] /= 0.0; } else { leftarg[sp] = pow(leftarg[sp], rightarg); } if (internal_trace) fprintf(stderr, "exp out: %f\n", leftarg[sp]); break; case 'v': // Logarithm BASE v ARG // Negative bases on logarithms? Not for us! force NaN if (leftarg[sp] <= 0.0) { leftarg[sp] /= 0.0; } else { leftarg[sp] = log(rightarg) / log(leftarg[sp]); } break; // Relational operators case '<': if (leftarg[sp] < rightarg) { leftarg[sp] = 1.0; state = 0; } else { leftarg[sp] = 0; state = 1; } break; case '>': if (leftarg[sp] > rightarg) { leftarg[sp] = 1; state = 0; } else { leftarg[sp] = 0; state = 1; } break; case '=': if (leftarg[sp] == rightarg) { leftarg[sp] = 1; state = 0; } else { leftarg[sp] = 0; state = 1; } break; case (('<' << 8) + '='): if (leftarg[sp] <= rightarg) { leftarg[sp] = 1; state = 0; } else { leftarg[sp] = 0; state = 1; } break; case (('>' << 8) + '='): if (leftarg[sp] >= rightarg) { leftarg[sp] = 1; state = 0; } else { leftarg[sp] = 0; state = 1; } break; case (('!' << 8) + '='): if (leftarg[sp] != rightarg) { leftarg[sp] = 1; state = 0; } else { leftarg[sp] = 0; state = 1; } break; // Formatting operators case 'e': case 'E': case 'f': case 'F': case 'g': case 'G': case 'x': case 'X': { char tempstring[2048]; if (internal_trace) { fprintf(stderr, "Formatting operator '%c'\n", (short)opstack[sp]); } // char tempstring [2048]; // Do we have a float or an int format? if (opstack[sp] == 'x' || opstack[sp] == 'X') { if (((int)rightarg) <= rightarg + FLT_EPSILON && ((int)rightarg) >= rightarg - FLT_EPSILON) { snprintf(outformat, WIDTHOF(outformat), "%%%.0gll%c", rightarg, (short)opstack[sp]); outformat[WIDTHOF(outformat) - 1] = 0; } else { snprintf(outformat, WIDTHOF(outformat), "%%0%.0gll%c", rightarg, (short)opstack[sp]); outformat[WIDTHOF(outformat) - 1] = 0; } } else { if (((int)rightarg) <= rightarg + FLT_EPSILON && ((int)rightarg) >= rightarg - FLT_EPSILON) { snprintf(outformat, WIDTHOF(outformat), "%%%.0g.0%c", rightarg, (short)opstack[sp]); outformat[WIDTHOF(outformat) - 1] = 0; } else { snprintf(outformat, WIDTHOF(outformat), "%%%g%c", rightarg, (short)opstack[sp]); outformat[WIDTHOF(outformat) - 1] = 0; } } if (internal_trace) fprintf(stderr, "Format string -->%s<-- \n", outformat); // A little more funny business needed for // hexadecimal print out, because X format // can't take IEEE floating point as inputs. if (opstack[sp] != 'x' && opstack[sp] != 'X') { if (internal_trace) fprintf(stderr, "Normal convert "); snprintf(tempstring, WIDTHOF(tempstring), outformat, leftarg[sp]); tempstring[WIDTHOF(tempstring) - 1] = 0; leftarg[sp] = strtod(tempstring, NULL); validstack[sp] = LEFTVALID; } else { // Note that we actually don't use the // results of octal conversion; the only // effect is to set the final format // string. int64_t equiv; if (internal_trace) fprintf(stderr, "Oct/Hex Convert "); equiv = (int64_t)leftarg[sp]; if (internal_trace) fprintf(stderr, "equiv -->%10lld<-- \n", (long long int)equiv); snprintf(tempstring, WIDTHOF(tempstring), outformat, equiv); tempstring[WIDTHOF(tempstring) - 1] = 0; } } break; default: errstat = nonfatalerror("Math operator makes no sense in: ", buf); if (retstat) *retstat = 0; return 0; break; } validstack[sp] = LEFTVALID; } // Check to see that the stack is still valid. if (sp < 0) { errstat = nonfatalerror("Too many close parenthesis in this math: ", buf); if (retstat) *retstat = 0; return 0; } } // We made it all the way through. Now return the math formatter result if (internal_trace) fprintf(stderr, "Returning at sp= %d and value %f\n", sp, leftarg[sp]); if (retstat) *retstat = state; // Check that we made it all the way down the stack if (sp != 0) { errstat = nonfatalerror("Not enough close parenthesis in this math: ", buf); if (retstat) *retstat = 0; return 0; } // All's good, return with a value. { int return_length; return_length = math_formatter(leftarg[sp], outformat, buf, maxlen); CRM_ASSERT(return_length >= 0); CRM_ASSERT(return_length < maxlen); return return_length; } }
ColorsPreflet::ColorsPreflet(PrefsWindow *parent) : Preflet(parent) { // clear colors listview BRect lvrc(Bounds()); lvrc.InsetBy(20, 50); lvrc.OffsetBy(0, -5); lvrc.right--; lvrc.top += 12; // make room for cut/paste instructions lvrc.bottom -= 2; // looks nicer lvrc.OffsetBy(0, 1); // create list lvrc.InsetBy(2, 2); lvrc.right -= B_V_SCROLL_BAR_WIDTH; fColorsList = new ColorView(lvrc, parent); // create scrollview fScrollView = new BScrollView("sv", fColorsList, B_FOLLOW_ALL, 0, false, true); AddChild(fScrollView); lvrc.right += B_V_SCROLL_BAR_WIDTH; // the real TargetedByScrollView is for some reason called BEFORE the scrollbars // are created, so it doesn't work properly, fix up... fColorsList->TargetedByScrollView(fScrollView); // cut/paste instructions BStringView *paste; BRect rc(lvrc); rc.bottom = rc.top - 1; rc.top -= 18; //rc.bottom = rc.top + 15; rc.right = rc.left + (WIDTHOF(rc) / 2); AddChild(new BStringView(rc, "", "Right-click: pick up color", 0)); rc.left = rc.right + 1; rc.right = lvrc.right; paste = new BStringView(rc, "", "Ctrl-click: paste color", 0); paste->SetAlignment(B_ALIGN_RIGHT); AddChild(paste); // font selector area fFontMenu = new BPopUpMenu("fontsel"); fFontMenu->AddItem(new BMenuItem("System Fixed Font ", NULL)); int x = 10; int y = 273; rc.Set(x, y, x+20, y+20); fFontField = new BMenuField(rc, "fontfld", "", fFontMenu); fFontMenu->ItemAt(0)->SetMarked(true); AddChild(fFontField); rc.Set(200, 270, 353, 290); fFontSize = new Spinner(rc, "fontsz", "Point size", new BMessage(M_POINTSIZE_CHANGED)); fFontSize->SetRange(4, 24); fFontSize->SetValue(editor.settings.font_size); fFontSize->SetTarget(Looper()); AddChild(fFontSize); // scheme selector area fSchemeMenu = new BPopUpMenu("schemesel"); UpdateSchemesMenu(); x = 10; y = 10; rc.Set(x, y, x+20, y+20); fSchemeField = new BMenuField(rc, "schemefld", "", fSchemeMenu); AddChild(fSchemeField); rc.right = lvrc.right; rc.left = rc.right - 48; rc.OffsetBy(-80, 0); BButton *delbtn = new BButton(rc, "", "Del", new BMessage(M_SCHEME_DELETE)); rc.OffsetBy(-58, 0); BButton *newbtn = new BButton(rc, "", "New", new BMessage(M_SCHEME_NEW)); rc.OffsetBy(115, 0); rc.right = lvrc.right + 1; BButton *defaultsbtn = new BButton(rc, "", "Defaults", new BMessage(M_SCHEME_DEFAULTS)); newbtn->SetTarget(Looper()); delbtn->SetTarget(Looper()); defaultsbtn->SetTarget(Looper()); AddChild(newbtn); AddChild(delbtn); AddChild(defaultsbtn); }
int produce_hash_distribution_report(CRM_ANALYSIS_REPORT_DATA *report_data, FILE *of) { int i; int avg_max_count = 0; int max_max_count = 0; int width = 64; int height = 64; int *avg_img; int *max_img; int idx_divisor; double avg_quantize_divisor; double max_quantize_divisor; double avg_log2_quantize_divisor; double max_log2_quantize_divisor; avg_img = (int *)calloc(width * height, sizeof(avg_img[0])); max_img = (int *)calloc(width * height, sizeof(max_img[0])); fprintf(of, "CRM hash distribution (avg/max):\n" "================================\n" "\n"); idx_divisor = WIDTHOF(report_data->hash_distro_counts) /* HASH_DISTRIBUTION_GRANULARITY */ / (width * height); for (i = 0; i < WIDTHOF(report_data->hash_distro_counts); i++) { int idx; idx = i / idx_divisor; CRM_ASSERT(idx < width * height); avg_img[idx] += report_data->hash_distro_counts[i]; if (max_img[idx] < report_data->hash_distro_counts[i]) { max_img[idx] = report_data->hash_distro_counts[i]; } } for (i = width * height; --i >= 0;) { if (avg_max_count < avg_img[i]) { avg_max_count = avg_img[i]; } if (max_max_count < max_img[i]) { max_max_count = max_img[i]; } } avg_quantize_divisor = (avg_max_count + 1) / 12.0; // sizeof(".-0123456789#") - 2 avg_log2_quantize_divisor = log2(avg_max_count + 1.0) / 12.0; // sizeof(".-0123456789#") - 2 fprintf(of, "AVG: max: %d, quantize divisor: %f\n", avg_max_count, (double)avg_quantize_divisor); if (avg_quantize_divisor) { int x; int y; for (y = height; --y >= 0;) { for (x = 0; x < width; x++) { int idx = y + height * x; int qv; if (avg_img[idx]) { qv = 1 + (int)(avg_img[idx] / avg_quantize_divisor); } else { qv = 0; } if (qv > 12) { qv = 12; } fputc(".-0123456789#"[qv], of); } fputc(' ', of); for (x = 0; x < width; x++) { int idx = y + height * x; int qv; if (avg_img[idx]) { qv = 1 + (int)(log2((double)avg_img[idx]) / avg_log2_quantize_divisor); } else { qv = 0; } if (qv > 12) { qv = 12; } fputc(".-0123456789#"[qv], of); } fputc('\n', of); } } max_quantize_divisor = (max_max_count + 1) / 12.0; // sizeof(".-0123456789#") - 2 max_log2_quantize_divisor = log2(max_max_count + 1.0) / 12.0; // sizeof(".-0123456789#") - 2 fprintf(of, "\n" "PEAK: max: %d, quantize divisor: %f\n", max_max_count, (double)max_quantize_divisor); if (max_quantize_divisor) { int x; int y; for (y = height; --y >= 0;) { for (x = 0; x < width; x++) { int idx = y + height * x; int qv; if (max_img[idx]) { qv = 1 + (int)(max_img[idx] / max_quantize_divisor); } else { qv = 0; } if (qv > 12) { qv = 12; } fputc(".-0123456789#"[qv], of); } fputc(' ', of); for (x = 0; x < width; x++) { int idx = y + height * x; int qv; if (max_img[idx]) { qv = 1 + (int)(log2((double)max_img[idx]) / max_log2_quantize_divisor); } else { qv = 0; } if (qv > 12) { qv = 12; } fputc(".-0123456789#"[qv], of); } fputc('\n', of); } } free(avg_img); free(max_img); fprintf(of, "\n" "----------------------\n" "\n" "\n"); return 0; }
// How to do a correlate-style CLASSIFY on some text. // int crm_expr_correlate_classify(CSL_CELL *csl, ARGPARSE_BLOCK *apb, VHT_CELL **vht, CSL_CELL *tdw, char *txtptr, int txtstart, int txtlen) { // classify the sparse spectrum of this input window // as belonging to a particular type. // // This code should look very familiar- it's cribbed from // the code for LEARN // int i, j, k; char ptext[MAX_PATTERN]; // the regex pattern int plen; // the hash file names char htext[MAX_PATTERN + MAX_CLASSIFIERS * MAX_FILE_NAME_LEN]; int htext_maxlen = MAX_PATTERN + MAX_CLASSIFIERS * MAX_FILE_NAME_LEN; int hlen; // the match statistics variable char stext[MAX_PATTERN + MAX_CLASSIFIERS * (MAX_FILE_NAME_LEN + 100)]; int stext_maxlen = MAX_PATTERN + MAX_CLASSIFIERS * (MAX_FILE_NAME_LEN + 100); int slen; char svrbl[MAX_PATTERN]; // the match statistics text buffer int svlen; int fnameoffset; char fname[MAX_FILE_NAME_LEN]; int eflags; int cflags; struct stat statbuf; // for statting the hash file //regex_t regcb; unsigned int fcounts[MAX_CLASSIFIERS]; // total counts for feature normalize double cpcorr[MAX_CLASSIFIERS]; // corpus correction factors int64_t linear_hits[MAX_CLASSIFIERS]; // actual hits per classifier int64_t square_hits[MAX_CLASSIFIERS]; // square of runlenths of match int64_t cube_hits[MAX_CLASSIFIERS]; // cube of runlength matches int64_t quad_hits[MAX_CLASSIFIERS]; // quad of runlength matches int incr_hits[MAX_CLASSIFIERS]; // 1+2+3... hits per classifier int64_t total_linear_hits; // actual total linear hits for all classifiers int64_t total_square_hits; // actual total square hits for all classifiers int64_t total_cube_hits; // actual total cube hits for all classifiers int64_t total_quad_hits; // actual total cube hits for all classifiers int64_t total_features; // total number of characters in the system hitcount_t totalhits[MAX_CLASSIFIERS]; double tprob; // total probability in the "success" domain. int textlen; // text length - rougly corresponds to // information content of the text to classify double ptc[MAX_CLASSIFIERS]; // current running probability of this class double renorm = 0.0; char *hashes[MAX_CLASSIFIERS]; int hashlens[MAX_CLASSIFIERS]; char *hashname[MAX_CLASSIFIERS]; int succhash; int vbar_seen; // did we see '|' in classify's args? int maxhash; int fnstart, fnlen; int fn_start_here; int textoffset; int bestseen; int thistotal; if (internal_trace) fprintf(stderr, "executing a CLASSIFY\n"); // we use the main line txtptr, txtstart, and txtlen now, // so we don't need to extract anything from the b1start stuff. // extract the hash file names hlen = crm_get_pgm_arg(htext, htext_maxlen, apb->p1start, apb->p1len); hlen = crm_nexpandvar(htext, hlen, htext_maxlen, vht, tdw); // extract the "this is a word" regex // plen = crm_get_pgm_arg(ptext, MAX_PATTERN, apb->s1start, apb->s1len); plen = crm_nexpandvar(ptext, plen, MAX_PATTERN, vht, tdw); // extract the optional "match statistics" variable // svlen = crm_get_pgm_arg(svrbl, MAX_PATTERN, apb->p2start, apb->p2len); svlen = crm_nexpandvar(svrbl, svlen, MAX_PATTERN, vht, tdw); { int vstart, vlen; if (crm_nextword(svrbl, svlen, 0, &vstart, &vlen)) { crm_memmove(svrbl, &svrbl[vstart], vlen); svlen = vlen; svrbl[vlen] = 0; } else { svlen = 0; svrbl[0] = 0; } } // status variable's text (used for output stats) // stext[0] = 0; slen = 0; // set our flags, if needed. The defaults are // "case" cflags = REG_EXTENDED; eflags = 0; if (apb->sflags & CRM_NOCASE) { if (user_trace) fprintf(stderr, " setting NOCASE for tokenization\n"); cflags += REG_ICASE; eflags = 1; } // Now, the loop to open the files. bestseen = 0; thistotal = 0; // initialize our arrays for N .css files for (i = 0; i < MAX_CLASSIFIERS; i++) { fcounts[i] = 0; // check later to prevent a divide-by-zero // error on empty .css file cpcorr[i] = 0.0; // corpus correction factors linear_hits[i] = 0; // linear hits square_hits[i] = 0; // square of the runlength cube_hits[i] = 0; // cube of the runlength quad_hits[i] = 0; // quad of the runlength incr_hits[i] = 0; // 1+2+3... hits hits totalhits[i] = 0; // absolute hit counts ptc[i] = 0.5; // priori probability } // vbar_seen = 0; maxhash = 0; succhash = 0; fnameoffset = 0; // now, get the file names and mmap each file // get the file name (grody and non-8-bit-safe, but doesn't matter // because the result is used for open() and nothing else. // GROT GROT GROT this isn't NULL-clean on filenames. But then // again, stdio.h itself isn't NULL-clean on filenames. if (user_trace) fprintf(stderr, "Classify list: -%.*s-\n", hlen, htext); fn_start_here = 0; fnlen = 1; while (fnlen > 0 && ((maxhash < MAX_CLASSIFIERS - 1))) { if (crm_nextword(htext, hlen, fn_start_here, &fnstart, &fnlen) && fnlen > 0) { strncpy(fname, &htext[fnstart], fnlen); fname[fnlen] = 0; // fprintf(stderr, "fname is '%s' len %d\n", fname, fnlen); fn_start_here = fnstart + fnlen + 1; if (user_trace) { fprintf(stderr, "Classifying with file -%s- succhash=%d, maxhash=%d\n", fname, succhash, maxhash); } if (fname[0] == '|' && fname[1] == 0) { if (vbar_seen) { nonfatalerror("Only one '|' allowed in a CLASSIFY.\n", "We'll ignore it for now."); } else { succhash = maxhash; } vbar_seen++; } else { // be sure the file exists // stat the file to get it's length k = stat(fname, &statbuf); // quick check- does the file even exist? if (k != 0) { nonfatalerror("Nonexistent Classify table named: ", fname); } else { // [i_a] check hashes[] range BEFORE adding another one! if (maxhash >= MAX_CLASSIFIERS) { nonfatalerror("Too many classifier files.", "Some may have been disregarded"); } else { // file exists - do the mmap // hashlens[maxhash] = statbuf.st_size; // [i_a] hashlens[maxhash] must be fixed for the header size! hashes[maxhash] = crm_mmap_file(fname, 0, hashlens[maxhash], PROT_READ, MAP_SHARED, CRM_MADV_RANDOM, &hashlens[maxhash]); if (hashes[maxhash] == MAP_FAILED) { nonfatalerror("Couldn't memory-map the table file", fname); } else { // // Check to see if this file is the right version // // FIXME : for now, there's no version number // associated with a .correllation file // int fev; // if (0) //(hashes[maxhash][0].hash != 1 || // hashes[maxhash][0].key != 0) //{ // fev = fatalerror ("The .css file is the wrong version! Filename is: ", // fname); // return (fev); //} // // save the name for later... // hashname[maxhash] = (char *)calloc((fnlen + 10), sizeof(hashname[maxhash][0])); if (!hashname[maxhash]) { untrappableerror( "Couldn't alloc hashname[maxhash]\n", "We need that part later, so we're stuck. Sorry."); } else { strncpy(hashname[maxhash], fname, fnlen); hashname[maxhash][fnlen] = 0; } maxhash++; } } } } } } // // If there is no '|', then all files are "success" files. if (succhash == 0) succhash = maxhash; // a CLASSIFY with no arguments is always a "success". if (maxhash == 0) return 0; if (user_trace) { fprintf(stderr, "Running with %d files for success out of %d files\n", succhash, maxhash); } // sanity checks... Uncomment for super-strict CLASSIFY. // // do we have at least 1 valid .css files? if (maxhash == 0) { return nonfatalerror("Couldn't open at least 1 .css file for classify().", ""); } #if 0 // do we have at least 1 valid .css file at both sides of '|'? if (!vbar_seen || succhash <= 0 || (maxhash <= succhash)) { return nonfatalerror("Couldn't open at least 1 .css file per SUCC | FAIL category " "for classify().\n", "Hope you know what are you doing."); } #endif // // now all of the files are mmapped into memory, // and we can do the correlations and add up matches. i = 0; j = 0; k = 0; thistotal = 0; // put in the ptr/start/len values we got from the outside caller textoffset = txtstart; textlen = txtlen; // // We keep track of the hits in these categories // linear_hits[MAX_CLASSIFIERS]; // actual hits per classifier // square_hits[MAX_CLASSIFIERS]; // square of runlenths of match // incr_hits[MAX_CLASSIFIERS]; // 1+2+3... hits per classifier // // Now we do the actual correllation. // for each file... // slide the incoming text (mdw->filetext[textofset]) // across the corpus text (hashes[] from 0 to hashlens[]) // and count the bytes that are the same, the runlengths, // etc. for (k = 0; k < maxhash; k++) { int it; // it is the start index into the tested text int ik; // ik is the start index into the known corpus text int ilm; // ilm is the "local" matches (N in a row) // for each possible displacement of the known (ik) text... for (ik = 0; ik < hashlens[k]; ik++) { int itmax; ilm = 0; itmax = textlen; if (ik + itmax > hashlens[k]) itmax = hashlens[k] - ik; // for each position in the test (it) text... for (it = 0; it < itmax; it++) { // do the characters in this position match? if (hashes[k][ik + it] == txtptr[textoffset + it]) { // yes they matched linear_hits[k]++; ilm++; square_hits[k] = square_hits[k] + (ilm * ilm); cube_hits[k] = cube_hits[k] + (ilm * ilm * ilm); quad_hits[k] = quad_hits[k] + (ilm * ilm * ilm * ilm); } else { // nope, they didn't match. // So, we do the end-of-runlength stuff: ilm = 0; } if (0) fprintf(stderr, "ik: %d it: %d chars %c %c lin: %lld sqr: %lld cube: %lld quad: %lld\n", ik, it, hashes[k][ik + it], txtptr[textoffset + it], (long long int)linear_hits[k], (long long int)square_hits[k], (long long int)cube_hits[k], (long long int)quad_hits[k]); } } } // Now we have the total hits for each text corpus. We can then // turn that into a vague probability measure, and then renormalize // that to get probabilities. // // But first, let's reflect on what we've got here. We our test // text, and we have a corpus which is "nominally correllated", // and another corpus that is nominally uncorrellated. // // The uncorrellated text will have an average match rate of 1/256'th // in the linear domain (well, for random bytes; english text will match // a lot more often, due to the fact that ASCII only uses the low 7 // bits, most text is written in lower case, Zipf's law, etc. // // We can calculate a predicted total on a per-character basis for all // of the corpi, then use that as an average expectation. // Calculate total hits total_linear_hits = 0; total_square_hits = 0; total_cube_hits = 0; total_quad_hits = 0; total_features = 0; for (k = 0; k < maxhash; k++) { total_linear_hits += linear_hits[k]; total_square_hits += square_hits[k]; total_cube_hits += cube_hits[k]; total_quad_hits += quad_hits[k]; total_features += hashlens[k]; } for (k = 0; k < maxhash; k++) { if (hashlens[k] > 0 && total_features > 0) { // Note that we don't normalize the probabilities yet- we do // that down below. // // .00397 is not a magic number - it's the random coincidence // rate for 1 chance in 256, with run-length-squared boost. // .00806 is the random coincidence rate for 7-bit characters. // //ptc[k] = ((0.0+square_hits[k] - (.00397 * hashlens[k] ))); // ptc[k] = ((0.0+square_hits[k] - (.00806 * hashlens[k] ))) // / hashlens[k]; // ptc[k] = (0.0+square_hits[k] ) / hashlens[k]; // ptc[k] = (0.0+ quad_hits[k] ) / hashlens[k]; ptc[k] = (0.0 + quad_hits[k]) / linear_hits[k]; if (ptc[k] < 0) ptc[k] = 10 * DBL_MIN; } else { ptc[k] = 0.5; } } // ptc[k] = (sqrt (0.0 + square_hits[k])-linear_hits[k] ) / hashlens[k] ; // ptc[k] = (0.0 + square_hits[k] - linear_hits[k] ) ; // ptc[k] = ((0.0 + square_hits[k]) / hashlens[k]) ; // ptc[k] = sqrt ((0.0 + square_hits[k]) / hashlens[k]) ; // ptc[k] = ((0.0 + linear_hits[k]) / hashlens[k]) ; // calculate renormalizer (the Bayesian formula's denomenator) renorm = 0.0; // now calculate the per-ptc numerators for (k = 0; k < maxhash; k++) renorm = renorm + (ptc[k]); // check for a zero normalizer if (renorm == 0) renorm = 1.0; // and renormalize for (k = 0; k < maxhash; k++) ptc[k] = ptc[k] / renorm; // if we have underflow (any probability == 0.0 ) then // bump the probability back up to 10^-308, or // whatever a small multiple of the minimum double // precision value is on the current platform. // for (k = 0; k < maxhash; k++) { if (ptc[k] < 10 * DBL_MIN) ptc[k] = 10 * DBL_MIN; } if (internal_trace) { for (k = 0; k < maxhash; k++) { fprintf(stderr, " file: %d linear: %lld square: %lld RMS: %6.4e ptc[%d] = %6.4e\n", k, (long long int)linear_hits[k], (long long int)square_hits[k], sqrt(0.0 + square_hits[k]), k, ptc[k]); } } // end of repeat-the-regex loop // cleanup time! // remember to let go of the fd's and mmaps for (k = 0; k < maxhash; k++) { crm_munmap_file(hashes[k]); } if (user_trace) { for (k = 0; k < maxhash; k++) fprintf(stderr, "Probability of match for file %d: %f\n", k, ptc[k]); } // tprob = 0.0; for (k = 0; k < succhash; k++) tprob = tprob + ptc[k]; // // Do the calculations and format some output, which we may or may // not use... but we need the calculated result anyway. // if (1 /* svlen > 0 */) { char buf[1024]; double accumulator; double remainder; double overall_pR; int m; buf[0] = 0; accumulator = 10 * DBL_MIN; for (m = 0; m < succhash; m++) { accumulator += ptc[m]; } remainder = 10 * DBL_MIN; for (m = succhash; m < maxhash; m++) { remainder += ptc[m]; } overall_pR = log10(accumulator) - log10(remainder); // note also that strcat _accumulates_ in stext. // There would be a possible buffer overflow except that _we_ control // what gets written here. So it's no biggie. if (tprob > 0.5) { sprintf(buf, "CLASSIFY succeeds; (correlate) success probability: %6.4f pR: %6.4f\n", tprob, overall_pR); } else { sprintf(buf, "CLASSIFY fails; (correlate) success probability: %6.4f pR: %6.4f\n", tprob, overall_pR); } if (strlen(stext) + strlen(buf) <= stext_maxlen) strcat(stext, buf); // find best single matching file // bestseen = 0; for (k = 0; k < maxhash; k++) { if (ptc[k] > ptc[bestseen]) { bestseen = k; } } remainder = 10 * DBL_MIN; for (m = 0; m < maxhash; m++) { if (bestseen != m) { remainder += ptc[m]; } } // ... and format some output of best single matching file // snprintf(buf, WIDTHOF(buf), "Best match to file #%d (%s) " "prob: %6.4f pR: %6.4f\n", bestseen, hashname[bestseen], ptc[bestseen], (log10(ptc[bestseen]) - log10(remainder))); buf[WIDTHOF(buf) - 1] = 0; if (strlen(stext) + strlen(buf) <= stext_maxlen) strcat(stext, buf); sprintf(buf, "Total features in input file: %d\n", hashlens[bestseen]); if (strlen(stext) + strlen(buf) <= stext_maxlen) strcat(stext, buf); // Now do the per-file breakdowns: // for (k = 0; k < maxhash; k++) { int m; remainder = 10 * DBL_MIN; for (m = 0; m < maxhash; m++) { if (k != m) { remainder += ptc[m]; } } snprintf(buf, WIDTHOF(buf), "#%d (%s):" " features: %d, L1: %lld L2: %lld L3: %lld, L4: %lld prob: %3.2e, pR: %6.2f\n", k, hashname[k], hashlens[k], (long long int)linear_hits[k], (long long int)square_hits[k], (long long int)cube_hits[k], (long long int)quad_hits[k], ptc[k], (log10(ptc[k]) - log10(remainder))); buf[WIDTHOF(buf) - 1] = 0; // strcat (stext, buf); if (strlen(stext) + strlen(buf) <= stext_maxlen) strcat(stext, buf); } // check here if we got enough room in stext to stuff everything // perhaps we'd better rise a nonfatalerror, instead of just // whining on stderr if (strcmp(&(stext[strlen(stext) - strlen(buf)]), buf) != 0) { nonfatalerror("WARNING: not enough room in the buffer to create " "the statistics text. Perhaps you could try bigger " "values for MAX_CLASSIFIERS or MAX_FILE_NAME_LEN?", " "); } if (svlen > 0) { crm_destructive_alter_nvariable(svrbl, svlen, stext, (int)strlen(stext), csl->calldepth); } } // // Free the hashnames, to avoid a memory leak. // for (i = 0; i < maxhash; i++) free(hashname[i]); if (tprob <= 0.5) { if (user_trace) fprintf(stderr, "CLASSIFY was a FAIL, skipping forward.\n"); // and do what we do for a FAIL here CRM_ASSERT(csl->cstmt >= 0); CRM_ASSERT(csl->cstmt <= csl->nstmts); #if defined(TOLERATE_FAIL_AND_OTHER_CASCADES) csl->next_stmt_due_to_fail = csl->mct[csl->cstmt]->fail_index; #else csl->cstmt = csl->mct[csl->cstmt]->fail_index - 1; #endif if (internal_trace) { fprintf(stderr, "CLASSIFY.CORRELATE is jumping to statement line: %d/%d\n", csl->mct[csl->cstmt]->fail_index, csl->nstmts); } CRM_ASSERT(csl->cstmt >= 0); CRM_ASSERT(csl->cstmt <= csl->nstmts); csl->aliusstk[csl->mct[csl->cstmt]->nest_level] = -1; return 0; } // // all done... if we got here, we should just continue execution if (user_trace) fprintf(stderr, "CLASSIFY was a SUCCESS, continuing execution.\n"); // regcomp_failed: return 0; }
int crm_expr_correlate_learn(CSL_CELL *csl, ARGPARSE_BLOCK *apb, VHT_CELL **vht, CSL_CELL *tdw, char *txtptr, int txtstart, int txtlen) { // learn the given text as correlative text // belonging to a particular type. // learn <flags> (classname) /regex/ (regex is ignored) // int i, j, k; char ptext[MAX_PATTERN]; // the regex pattern int plen; char ltext[MAX_PATTERN]; // the variable to learn int llen; char htext[MAX_PATTERN]; // the hash name int hlen; int cflags, eflags; struct stat statbuf; // for statting the hash file FILE *f; // hashfile fd // //regex_t regcb; int textoffset; int textlen; int sense; int vhtindex; int microgroom; int fev; int made_new_file; char *learnfilename; if (internal_trace) fprintf(stderr, "executing a LEARN (correlation format)\n"); // Keep the gcc compiler from complaining about unused variables // i = hctable[0]; // extract the hash file name hlen = crm_get_pgm_arg(htext, MAX_PATTERN, apb->p1start, apb->p1len); hlen = crm_nexpandvar(htext, hlen, MAX_PATTERN, vht, tdw); // // extract the variable name (if present) llen = crm_get_pgm_arg(ltext, MAX_PATTERN, apb->b1start, apb->b1len); llen = crm_nexpandvar(ltext, llen, MAX_PATTERN, vht, tdw); // get the "this is a word" regex plen = crm_get_pgm_arg(ptext, MAX_PATTERN, apb->s1start, apb->s1len); plen = crm_nexpandvar(ptext, plen, MAX_PATTERN, vht, tdw); // set our cflags, if needed. The defaults are // "case" and "affirm", (both zero valued). // and "microgroom" disabled. cflags = REG_EXTENDED; eflags = 0; sense = +1; if (apb->sflags & CRM_NOCASE) { cflags = cflags | REG_ICASE; eflags = 1; if (user_trace) fprintf(stderr, "turning oncase-insensitive match\n"); } if (apb->sflags & CRM_REFUTE) { sense = -sense; if (user_trace) fprintf(stderr, " refuting learning\n"); } microgroom = 0; if (apb->sflags & CRM_MICROGROOM) { microgroom = 1; if (user_trace) fprintf(stderr, " enabling microgrooming.\n"); } // // grab the filename, and stat the file // note that neither "stat", "fopen", nor "open" are // fully 8-bit or wchar clean... if (!crm_nextword(htext, hlen, 0, &i, &j) || j == 0) { fev = nonfatalerror_ex(SRC_LOC(), "\nYou didn't specify a valid filename: '%.*s'\n", (int)hlen, htext); return fev; } j += i; CRM_ASSERT(i < hlen); CRM_ASSERT(j <= hlen); // filename starts at i, ends at j. null terminate it. htext[j] = 0; learnfilename = &htext[i]; if (!learnfilename) { untrappableerror("Cannot allocate classifier memory", "Stick a fork in us; we're _done_."); } // and stat it to get it's length k = stat(learnfilename, &statbuf); made_new_file = 0; // quick check- does the file even exist? if (k != 0) { // file didn't exist... create it CRM_PORTA_HEADER_INFO classifier_info = { 0 }; if (user_trace) { fprintf(stderr, "\nCreating new correlate file %s\n", learnfilename); fprintf(stderr, "Opening file %s for write\n", learnfilename); } f = fopen(learnfilename, "wb"); if (!f) { char dirbuf[DIRBUFSIZE_MAX]; fev = fatalerror_ex(SRC_LOC(), "\n Couldn't open your new CORRELATE file %s for writing; (full path: '%s') errno=%d(%s)\n", learnfilename, mk_absolute_path(dirbuf, WIDTHOF(dirbuf), learnfilename), errno, errno_descr(errno)); return fev; } classifier_info.classifier_bits = CRM_CORRELATE; classifier_info.hash_version_in_use = selected_hashfunction; if (0 != fwrite_crm_headerblock(f, &classifier_info, NULL)) { fev = nonfatalerror_ex(SRC_LOC(), "\n Couldn't write header to file %s; errno=%d(%s)\n", learnfilename, errno, errno_descr(errno)); fclose(f); return fev; } // file_memset(f, 0, count); // don't do any output at all. made_new_file = 1; statbuf.st_size = 0; } else { if (user_trace) { fprintf(stderr, "Opening correlate file %s for append\n", learnfilename); } // Now a nasty bit. Because there might be data of the // file retained, we need to force an unmap-by-name which will allow a remap // with the new file length later on. if (internal_trace) { fprintf(stderr, "un-mmap-ping file %s for known state\n", learnfilename); } crm_force_munmap_filename(learnfilename); f = fopen(learnfilename, "ab+"); if (!f) { char dirbuf[DIRBUFSIZE_MAX]; fev = fatalerror_ex(SRC_LOC(), "\n Couldn't open your CORRELATE file %s for append; (full path: '%s') errno=%d(%s)\n", learnfilename, mk_absolute_path(dirbuf, WIDTHOF(dirbuf), learnfilename), errno, errno_descr(errno)); return fev; } if (is_crm_headered_file(f)) { statbuf.st_size -= CRM114_HEADERBLOCK_SIZE; } // And make sure the file pointer is at EOF. (void)fseek(f, 0, SEEK_END); if (ftell(f) == 0) { CRM_PORTA_HEADER_INFO classifier_info = { 0 }; classifier_info.classifier_bits = CRM_CORRELATE; classifier_info.hash_version_in_use = selected_hashfunction; if (0 != fwrite_crm_headerblock(f, &classifier_info, NULL)) { int err = errno; fclose(f); fev = nonfatalerror_ex(SRC_LOC(), "Couldn't write the header to the .hypsvm file named '%s': error %d(%s)", learnfilename, err, errno_descr(err)); return fev; } // file_memset(f, 0, count); // don't do any output at all. made_new_file = 1; statbuf.st_size = 0; } } // if (user_trace) { fprintf(stderr, "Correlation text file %s has length %d characters\n", learnfilename, (int)(statbuf.st_size / sizeof(FEATUREBUCKET_TYPE))); } // // get the text to "learn" (well, append to the correlation file) // // This is the text that we'll append to the correlation file. /* removed i=0: re-init here: important! */ if (llen > 0) { if (!crm_is_legal_variable(ltext, llen)) { int q = fatalerror_ex(SRC_LOC(), "Attempt to LEARN from an illegal variable '%.*s'. How very bizarre.", llen, ltext); return q; } vhtindex = crm_vht_lookup(vht, ltext, llen, csl->calldepth); } else { vhtindex = crm_vht_lookup(vht, ":_dw:", 5, csl->calldepth); } if (vht[vhtindex] == NULL) { int q; CRM_ASSERT(f != NULL); fclose(f); q = nonfatalerror(" Attempt to LEARN from a nonexistent variable ", ltext); return q; } mdw = NULL; if (tdw->filetext == vht[vhtindex]->valtxt) mdw = tdw; if (cdw->filetext == vht[vhtindex]->valtxt) mdw = cdw; if (mdw == NULL) { int q; CRM_ASSERT(f != NULL); fclose(f); q = nonfatalerror(" Bogus text block containing variable ", ltext); return q; } else { ssize_t old_fileoffset; textoffset = vht[vhtindex]->vstart; textlen = vht[vhtindex]->vlen; if (user_trace) { fprintf(stderr, "learning the text (len %d) :", textlen); fwrite4stdio(&(mdw->filetext[textoffset]), ((textlen < 128) ? textlen : 128), stderr); fprintf(stderr, "\n"); } // append the "learn" text to the end of the file. // CRM_ASSERT(f != NULL); (void)fseek(f, 0, SEEK_END); old_fileoffset = ftell(f); if (textlen != fwrite(&(mdw->filetext[textoffset]), 1, textlen, f)) { int fev; int err = errno; fclose(f); // try to correct the failure by ditching the new, partially(?) written(?) data truncate(learnfilename, old_fileoffset); fev = nonfatalerror_ex(SRC_LOC(), "Failed to append the 'learn' text to the correlation file '%s': error %d(%s)\n", learnfilename, err, errno_descr(err)); return fev; } } CRM_ASSERT(f != NULL); fclose(f); return 0; }