static int check_second_line(struct reader_t *rsol, struct reader_t *rout) { if (rsol->c == EOF) return ANSW_OK; /* skip_blank_lines(rout) will be called later */ reader_read(rsol); reader_read(rout); for (;;) { reader_skipblank(rsol); reader_skipblank(rout); if ((rsol->c == EOF) || isnewline(rsol->c)) return ANSW_OK; /* skip_blank_lines(rout) will be called later */ if (rsol->c != rout->c) return ANSW_FAIL; read_digit_while_eq(rsol, rout); if ((rsol->c == EOF) || isnewline(rsol->c)) return ANSW_OK; /* skip_blank_lines(rout) will be called later */ if ((rsol->c == rout->c) || (isblank(rsol->c) && isblank(rout->c))) continue; return ANSW_FAIL; } }
void dstrcatline(dstr_t *ds, const char *str) { size_t eol = strcspn(str, "\n\r"); if (isnewline(str[eol])) eol++; dstrncat(ds, str, eol); }
static void inc_line(lexer_state *ls) { char first = ls->current; next(ls); // skip \r or \n // handle \r\n or \n\r if(isnewline(ls) && ls->current != first) next(ls); ls->linenum++; }
static int parse_u8_array_string(unsigned char *d, const char *s) { int i = 0; bool sop = 0; while (!iseop(*s)) { if (iscomment(*s)) { if (s[1] == '*') { s += 2; while (!(s[0] == '*' && s[1] == '/')) s++; s += 2; pr_info("%s: find end of comment\n", __func__); continue; } else if (s[1] == '/') { s += 2; while (!isnewline(*s++)); pr_info("%s: find new line\n", __func__); continue; } else { /* syntax error */ pr_info("%s: syntax error\n", __func__); return -EINVAL; } } if (sop) { if (isspace(*s)) { s++; continue; } if (!isxdigit(s[0]) || !isxdigit(s[1])) { /* syntax error */ pr_err("%s: syntax error\n", __func__); return -EINVAL; } sscanf(s, "%2hhx", &d[i]); s += 2; i++; if (i >= MAX_BUF_SIZE) { pr_err("%s: exceeded dst buf size(%d)!!\n", __func__, MAX_BUF_SIZE); return -EINVAL; } continue; } if (issop(*s)) sop = true; s++; } return i; }
void read_line(ParserData* data) { // Read one line if (fgets(data->line, data->buffer_size, data->file) == NULL) { data->eof = true; return; } data->linelength = strlen(data->line); // If the buffer was too small, extend it and keep reading. // Note: when the last character in the buffer is the CR of a CRLF-terminated line, // the parser thinks the line is terminated and the next line parsed is an empty // LF-terminated line. However, as empty lines are ignored, this is not a problem. while (data->linelength == data->buffer_size - 1 && !isnewline(last_char(data))) { char* old_buffer = data->line; size_t old_buffer_size = data->buffer_size; // Create new buffer data->buffer_size = old_buffer_size * 2; data->line = malloc(data->buffer_size * sizeof (char)); // Fill it with the previously read part of the line strcpy(data->line, old_buffer); // Destroy old buffer free(old_buffer); // Continue reading fgets(&(data->line[old_buffer_size - 1]), data->buffer_size - (old_buffer_size - 1), data->file); data->linelength = strlen(data->line); } // Remove end-line character - iterative to catch CRLF and LFCR while (isnewline(last_char(data))) { data->line[data->linelength - 1] = '\0'; data->linelength--; } }
static char *next_token(char *buf, char **token) { char *comment = NULL; while (buf) { buf = skip_spaces(buf); if ('#' != *buf) break; comment = buf; while (!isnewline(buf)) ++buf; *buf++ = '\0'; vmm_printf("Comment: %s\n", comment); while (isnewline(buf)) ++buf; } *token = buf; while (!isspace(*buf)) ++buf; *buf = '\0'; return buf + 1; }
int main(void) { int number; char name[LENGTH]; printf("Enter a sequence of integers and alphabetic names:\n"); while(!isnewline()) if(getinteger(&number)) printf("\nInteger value:%8d", number); else if(strlen(getname(name, LENGTH)) > 0) printf("\nName: %s", name); else { printf("\nInvalid input."); return 1; } return 0; }
int main(void) { int number; char name[LENGTH] = {'\0'}; printf("Enter a sequence of integers and alphabetic names in a single line:\n"); while(!isnewline()) if(getinteger(&number)) printf("Integer value:%8d\n", number); else if(strnlen_s(getname(name, LENGTH), LENGTH) > 0) printf("Name: %s\n", name); else { printf("Invalid input.\n"); return 1; } return 0; }
int next_token(char **s) { while (iswhitespace(**s)) (*s)++; str_clear(&curtok); while (!iswhitespace(**s) && **s != '\0') { str_append(&curtok, **s); (*s)++; } if (strcmp("{%", curtok.s) == 0) token = EXP_START; else if (strcmp("%}", curtok.s) == 0) { token = EXP_END; /* swallow whitespace to not affect output */ while (isnewline(**s)) (*s)++; } else if (strcmp("{$", curtok.s) == 0) token = SH_START; else if (strcmp("$}", curtok.s) == 0) token = SH_END; else if (strcmp("{{", curtok.s) == 0) token = VAR_START; else if (strcmp("}}", curtok.s) == 0) token = VAR_END; else if (strcmp("for", curtok.s) == 0) token = FOR; else if (strcmp("in", curtok.s) == 0) token = IN; else if (strcmp("do", curtok.s) == 0) token = DO; else if (strcmp("done", curtok.s) == 0) token = DONE; else if (strcmp("include", curtok.s) == 0) token = INCLUDE; else { token = IDENT; } return token; }
static int check_first_line(struct reader_t *rsol, struct reader_t *rout) { reader_read(rsol); reader_read(rout); reader_skipblank(rsol); reader_skipblank(rout); read_digit_while_eq(rsol, rout); if (rsol->c == rout->c) return ANSW_OK; if ((rsol->c != EOF) && isdigit(rsol->c)) return ANSW_FAIL; reader_skipblank(rout); if ((rout->c == EOF) || isnewline(rout->c)) return ANSW_OK; return ANSW_FAIL; }
int main(void) { int number; char name[LENGTH]; printf("Enther a sequence of integers and alphabetic names:\n"); while(!isnewline()) { if(getinteger(&number)) { printf("number value = %d\n",number); } else if(strlen(getname(name,LENGTH)) > 0) { printf("name is:%s\n",name); } else { printf("Invalid input.\n"); return 1; } } return 0; }
//// // case_redirection // // Returns true if any error is produced and false otherwise. Handles // the case where the current character is a redirection meta-character. // A redirection meta-character is a token by itself preceded by a '2'. // A following null plug will case a parsing error. // A following meta-character will cause a parsing error. // A following backslash begins a token preceded by a '3'. // A following non-whitespace begins a token preceded by a '3'. // A following whitespace is ignored. // bool case_redirection (char *line, char *buffer, char **tokens, int *ii, int *jj, int *kk, int *isarg) { // make the passed indices more accessible int i = (*ii); int j = (*jj); int k = (*kk); // if we are looking for a redirection arg then throw a parsing error if (*isarg) return TRUE; // '2' represents a meta-char buffer[j] = '2'; // a meta-char *isarg = 1; // next token should be a redirection arg tokens[k] = &buffer[j]; // begin a new token // place redirection symbol into buffer buffer[j+1] = line[i]; // meta-char placement buffer[j+2] = '\0'; // null plug ++(*jj); ++(*jj); ++(*jj); j = (*jj); // increment buffer index ++(*kk); k = (*kk); // increment tokens index // a following null plug will cause a parsing error // a following meta-char will cause a parsing error // a following non-whitespace begins a token (including a backslash) // a following whitespace is ignored if (line[i+1] == '\0' || ismetachar (line[i+1]) || isnewline (line[i+1])) { return TRUE; // parsing error }else if (!iswhitespace (line[i+1])) { // non-whitespace begins a redirection argument buffer[j] = '3'; tokens[k] = &buffer[j]; ++(*jj); ++(*kk); } return FALSE; }
// Make sure that we can use Mac, DOS, or Unix style text files on any system // and they will work, by making sure the definition of whitespace is consistent bool portable_isspace(const char c) { // returns true only on ASCII spaces if (static_cast<unsigned char>(c) >= 128) return false; return isnewline(c) || isspace(c); }
static int lex(lexer_state *ls, token_info *info) { buffer_reset(ls->buf); if(setjmp(ls->error.buf)) return TK_ERROR; for(;;) { switch(ls->current) { case '\n': case '\r': { // newline inc_line(ls); break; } case ' ': case '\t': { // whitespace next(ls); break; } case '-': { // comment or minus // minus if(next(ls) != '-') return '-'; // comment, skip line while(next(ls) != EOS && !isnewline(ls)); break; } case '=': { // EQ next(ls); return '='; } case '<': { // LT, LTE, ASSIGN next(ls); if(ls->current == '=') { next(ls); return TK_LTE; } else if(ls->current == '-'){ next(ls); return TK_ASSIGN; } else return '<'; } case '>': { // GT, GTE next(ls); if(ls->current == '=') { next(ls); return TK_GTE; } else return '>'; } case '/': { // NEQ, DIV next(ls); if(ls->current == '=') { next(ls); return TK_NEQ; } else return '/'; } case '"': { // STRING read_string(ls, info); return TK_STRING; } case EOS: { // EOS return TK_EOS; } default: { if(lisdigit(ls->current)) { // NUMERIC read_numeric(ls, info); return TK_REAL; } if(lisalpha(ls->current)) { // ID or RESERVED return read_id_or_reserved(ls, info); } int c = ls->current; // valid operators, single character tokens, etc. switch(ls->current) { case '+': case '-': case '*': case '/': case '!': case '>': case '<': case '=': case '(': case ')': case '[': case ']': case '{': case '}': case ':': case '.': case ',': next(ls); return c; default: lexer_error(ls, "unrecognized symbol %c", c); next(ls); } } } } }
int check( char *name, boolean nocheck, long *errline, char *errchar ) { int iscomma(int), isblank(int), isminus(int), isnewline(int); int print_check_report(int, char *, long, char); int check_line(char *); char line[MAX_LINE_LENGTH]; int numcommas, fieldlength, linelength, c, status, errors, rc; long linecount; #ifdef DIAGNOSTICS fprintf(stderr, "entering CHECK\n"); #endif errors = 0; if (nocheck) return(errors); if ((fp_check = fopen(name, "r")) == NULL) { perror("LIST"); fclose(fp_check); return(-1); } /* ** This function is basically deterministic finite-state automaton ** that recognizes valid BANCStar code. Actually, all it does is ** look for extra or missing commas, too many digits, blank lines ** or illegal characters. Once it has assembled an entire line, ** it sends it off to check_line() for closer analysis. ** ** As with most such clever things, I don't entirely understand ** how it works any more, either. But it does work. */ linecount = 0; numcommas = 0; fieldlength = 0; linelength = 0; status = 0; while ((c = fgetc(fp_check)) != EOF) { line[linelength] = (char)c; ++linelength; if (isdigit(c)) { ++fieldlength; if (fieldlength > 5) { status = 1; break; } } if (iscomma(c)) { ++numcommas; fieldlength = 0; } if (isnewline(c)) { ++linecount; if (linelength == 1) { status = 2; break; } if (numcommas < 3) { status = 3; break; } if (numcommas > 3) { status = 4; break; } line[linelength] = '\0'; numcommas = 0; fieldlength = 0; linelength = 0; if ((rc = check_line(line)) != 0) { status = rc; break; } } if (isblank(c)) { status = 5; break; } if ((!isdigit(c))&&(!iscomma(c))&&(!isnewline(c))&&(!isminus(c))) { status = 6; break; } /* ** Check to see if the user wants to escape out of the program. */ if ( kbhit() ) { if ( getch() == 27 ) { fprintf(stdout, "\nAre you sure you want to exit? (Y/N) "); console_input = getch(); if ( ( console_input == 13 ) || ( console_input == 27 ) || ( console_input == 'Y') || ( console_input == 'y' ) ) { fclose(fp_input); fclose(fp_output); fclose(fp_prompt); fclose(fp_check); fprintf(stdout, "\nOkay.\n"); exit(-2); } } else fprintf(stdout, "\nContinuing . . .\n"); } /* end of user exit loop */ } /* end while fgetc(fp_check */ /* ** report any errors found to LIST High Command. */ fclose(fp_check); if (status != 0) errors = 1; *errline = linecount; *errchar = (char)c; #ifdef DIAGNOSTICS fprintf(stderr, "exit CHECK\n"); #endif return(errors); } /* end check() */
static int match(Reinst *pc, const char *sp, const char *bol, int flags, Resub *out) { Rethread ready[MAXTHREAD]; Resub scratch; Resub sub; Rune c; int nready; int i; /* queue initial thread */ spawn(ready + 0, pc, sp, out); nready = 1; /* run threads in stack order */ while (nready > 0) { --nready; pc = ready[nready].pc; sp = ready[nready].sp; memcpy(&sub, &ready[nready].sub, sizeof sub); for (;;) { switch (pc->opcode) { case I_END: for (i = 0; i < MAXSUB; ++i) { out->sub[i].sp = sub.sub[i].sp; out->sub[i].ep = sub.sub[i].ep; } return 1; case I_JUMP: pc = pc->x; continue; case I_SPLIT: if (nready >= MAXTHREAD) { fprintf(stderr, "regexec: backtrack overflow!\n"); return 0; } spawn(&ready[nready++], pc->y, sp, &sub); pc = pc->x; continue; case I_PLA: if (!match(pc->x, sp, bol, flags, &sub)) goto dead; pc = pc->y; continue; case I_NLA: memcpy(&scratch, &sub, sizeof scratch); if (match(pc->x, sp, bol, flags, &scratch)) goto dead; pc = pc->y; continue; case I_ANYNL: sp += chartorune(&c, sp); if (c == 0) goto dead; break; case I_ANY: sp += chartorune(&c, sp); if (c == 0) goto dead; if (isnewline(c)) goto dead; break; case I_CHAR: sp += chartorune(&c, sp); if (c == 0) goto dead; if (flags & REG_ICASE) c = canon(c); if (c != pc->c) goto dead; break; case I_CCLASS: sp += chartorune(&c, sp); if (c == 0) goto dead; if (flags & REG_ICASE) { if (!incclasscanon(pc->cc, canon(c))) goto dead; } else { if (!incclass(pc->cc, c)) goto dead; } break; case I_NCCLASS: sp += chartorune(&c, sp); if (c == 0) goto dead; if (flags & REG_ICASE) { if (incclasscanon(pc->cc, canon(c))) goto dead; } else { if (incclass(pc->cc, c)) goto dead; } break; case I_REF: i = sub.sub[pc->n].ep - sub.sub[pc->n].sp; if (flags & REG_ICASE) { if (strncmpcanon(sp, sub.sub[pc->n].sp, i)) goto dead; } else { if (strncmp(sp, sub.sub[pc->n].sp, i)) goto dead; } if (i > 0) sp += i; break; case I_BOL: if (sp == bol && !(flags & REG_NOTBOL)) break; if (flags & REG_NEWLINE) if (sp > bol && isnewline(sp[-1])) break; goto dead; case I_EOL: if (*sp == 0) break; if (flags & REG_NEWLINE) if (isnewline(*sp)) break; goto dead; case I_WORD: i = sp > bol && iswordchar(sp[-1]); i ^= iswordchar(sp[0]); if (i) break; goto dead; case I_NWORD: i = sp > bol && iswordchar(sp[-1]); i ^= iswordchar(sp[0]); if (!i) break; goto dead; case I_LPAR: sub.sub[pc->n].sp = sp; break; case I_RPAR: sub.sub[pc->n].ep = sp; break; default: goto dead; } pc = pc + 1; } dead: ; } return 0; }
//// // iswhitespace // // Takes a character and returns true if the character is a whitespace // character. Whitespace characters include spaces (' '), tabs ('\t'), // newlines ('\n'), and carriage returns ('\r'). // bool iswhitespace (char test) { if (test == ' ' || test == '\t' || isnewline (test)) return TRUE; return FALSE; }
static int parse_file(const char *section, char *key, const char *buf,int *sec_s,int *sec_e, int *key_s,int *key_e, int *value_s, int *value_e) { const char *p = buf; int i=0; assert(buf!=NULL); assert(section != NULL && strlen(section)); assert(key != NULL && strlen(key)); *sec_e = *sec_s = *key_e = *key_s = *value_s = *value_e = -1; while( !isend(p[i]) ) { //find the section if( ( 0==i || isnewline(p[i-1]) ) && isleftbarce(p[i]) ) { int section_start=i+1; //find the ']' do { i++; } while( !isrightbrace(p[i]) && !isend(p[i])); if( 0 == strncmp(p+section_start,section, i-section_start)) { int newline_start=0; i++; //Skip over space char after ']' while(isspace(p[i])) { i++; } //find the section *sec_s = section_start; *sec_e = i; while( ! (isnewline(p[i-1]) && isleftbarce(p[i])) && !isend(p[i]) ) { int j=0; //get a new line newline_start = i; while( !isnewline(p[i]) && !isend(p[i]) ) { i++; } //now i is equal to end of the line j = newline_start; if(';' != p[j]) //skip over comment { while(j < i && p[j]!='=') { j++; if('=' == p[j]) { if(strncmp(key,p+newline_start,j-newline_start)==0) { //find the key ok *key_s = newline_start; *key_e = j-1; *value_s = j+1; *value_e = i; return 1; } } } } i++; } } } else { i++; } } return 0; }
bool portable_isspace(char c) { return isnewline(c) || isspace(c); }
int main(int argc, char *argv[], char *envp[]) { FILE *fdata; /* .data file (input)*/ FILE *fmodel; /* .model file (output) */ FILE *fauc; /* .model.1st file (output) */ int atnl; /* at newline (flag) */ char *aucname; /* file name of the AUC file */ double baseline; char c; int divider; /* #of bins to divide in this round */ int failures_seen; /* in this bin */ int i, j, k, l; int maxrounds; /* max(@ROUNDS) */ int numbins; /* misnomer, really "this round" */ char *sca0, *sca, *scc; /* tmp variables */ int t; /* XXX - some kind of counter? */ int this_first, this_last; double this_pauc; double randnum; int *rand_seed = NULL; int total_failures; int num_scores; int init_permute_flag = 1; int unknown_meth = REL_ORDER; Score *p; Score *best; double min_auc; gargc = argc; gargv = argv; genvp = envp; /* * Make sure we grok NaNs * (unknown entries in the input file, given as '?', are * stored as not-a-number values) */ if (!isnan(nan(""))) errx(1, "Implementation does not understand NaNs"); /* * PARSE ARGUMENTS */ if (argc < 3) usage(); if ((fdata = fopen(argv[1], "r")) == NULL) err(1, "cannot open %s for reading", argv[1]); if ((fmodel = fopen(argv[2], "w")) == NULL) err(1, "cannot open %s for writing", argv[2]); if ((aucname = (char *)calloc(strlen(argv[2]) + sizeof (".1st"), 1)) == NULL) err(1, "allocating aucname"); strcpy(aucname, argv[2]); strcat(aucname, ".1st"); if ((fauc = fopen(aucname, "w")) == NULL) err(1, "cannot open %s for writing", aucname); argc -= 3; argv += 3; while (argc > 0) { if (!strcmp(argv[0], "rounds") || !strcmp(argv[0], "--rounds")) { if (argc < 2) usage(); if ((sca0 = sca = strdup(argv[1])) == NULL) err(1, "strdup: copying %s", argv[1]); while (*sca == ',') /* strip leading commas, if any */ sca++; scc = sca + strlen(sca); if (scc == sca) /* must have at least one digit! */ usage(); while (*--scc == ',') /* strip trailing commas */ *scc = '\0'; if (strchr(sca, ',')) { /* * comma-separated list of rounds, parse */ n_rounds = 0; for (scc = sca; *scc; scc++) if (*scc == ',') n_rounds++; n_rounds++; if ((rounds = (int *)calloc(n_rounds, sizeof (*rounds))) == NULL) err(1, "calloc %d rounds", n_rounds); for (i = 0; i < n_rounds; i++) { rounds[i] = strtol(sca, &scc, 10); if (rounds[i] <= 0) errx(1, "round %d must be positive", i); sca = scc + 1; } } else { n_rounds = strtol(sca, NULL, 10); } if (n_rounds <= 0) usage(); argc -= 2; argv += 2; } else if (!strcmp(argv[0], "topk") || !strcmp(argv[0], "--topk")) { if (argc < 2) usage(); if ((sca0 = sca = strdup(argv[1])) == NULL) err(1, "strdup: copying %s", argv[1]); scc = sca + strlen(sca); if (scc == sca) /* must have at least one digit! */ usage(); topk = strtol(sca, NULL, 10); if (topk <= 0) usage(); argc -= 2; argv += 2; } else if (!strcmp(argv[0], "miss-limit") || !strcmp(argv[0], "--miss-limit")) { if (argc < 2) usage(); if ((sca0 = sca = strdup(argv[1])) == NULL) err(1, "strdup: copying %s", argv[1]); scc = sca + strlen(sca); if (scc == sca) /* must have at least one digit! */ usage(); unknown_limit = atof(sca); if (unknown_limit < 0 || unknown_limit > 1) usage(); argc -= 2; argv += 2; } else if (!strcmp(argv[0], "--no-prob-dist")) { prob_dist_flag = 0; argc -= 1; argv += 1; } else if (!strcmp(argv[0], "--no-permute") || !strcmp(argv[0], "no-permute")) { init_permute_flag = 0; argc -= 1; argv += 1; } else if (!strcmp(argv[0], "--sort-unknowns") || !strcmp(argv[0], "sort-unknowns")) { if (argc < 2) usage(); if ((sca = strdup(argv[1])) == NULL) err(1, "strdup: copying %s", argv[1]); unknown_meth = atoi(sca); if (unknown_meth != RAND_ORDER || unknown_meth != REL_ORDER) { usage(); } argc -= 2; argv += 2; } else if (!strcmp(argv[0], "seed") || !strcmp(argv[0], "--seed")) { if (argc < 2) usage(); if ((sca0 = sca = strdup(argv[1])) == NULL) err(1, "strdup: copying %s", argv[1]); scc = sca + strlen(sca); if (scc == sca) { //must have one digit usage(); } if ((rand_seed = (int *) malloc(sizeof(int))) == NULL) err(1, "calloc one integer", 1); *rand_seed = atoi(sca); argc -= 2; argv += 2; } else { /* No other options supported */ usage(); } } /* * if we got a single number as the "rounds" argument, we * interpret it as the list 1,2,...,n */ if (rounds == NULL) { if ((rounds = (int *)calloc(n_rounds, sizeof (*rounds))) == NULL) err(1, "calloc %d rounds", n_rounds); for (i = 0; i < n_rounds; i++) rounds[i] = i+1; } /* * Prep @F and @last */ /* * find the max value in rounds[], * needed for allocating space for failures[][] and last[][] */ for (i = 0, maxrounds = 0; i < n_rounds; i++) if (maxrounds < rounds[i]) maxrounds = rounds[i]; if ((failures = (int **)calloc(n_rounds + 3, sizeof (*failures))) == NULL) err(1, "calloc %d failures", n_rounds); if ((last = (int **)calloc(n_rounds + 3, sizeof (*last))) == NULL) err(1, "calloc %d last", n_rounds); for (i = 0; i <= n_rounds; i++) { if ((failures[i] = (int *)calloc(maxrounds + 3, sizeof (**failures))) == NULL) err(1, "calloc failures[%d]", i); if ((last[i] = (int *)calloc(maxrounds + 3, sizeof (**last))) == NULL) err(1, "calloc last[%d]", i); } /* * COUNT NAMES and IDS */ /* * Start reading the first line, counting fields */ //first check to make sure there is data in the file c = fgetc(fdata); if (c == EOF) { fclose(fdata); err(1, "Error: Data file %s is empty\n", gargv[1]); } n_names = 1; /* at least one! */ /* attributes a comma separated. So count the number of commas in the first line to calculate the number of attributes in the data. */ while (!isnewline(c) && c != EOF) { if (c == ',') n_names++; c = fgetc(fdata); } /* * We've read the first line; let's keep counting lines. * There is some cruftiness in the code in order to deal with * text files with lines ending in \r\n and not just \n */ n_ids = 1; /* we've already read one line! */ atnl = 0; while ((c = fgetc(fdata)) != EOF) if (isnewline(c)) { if (!atnl) { n_ids++; atnl++; } } else { atnl = 0; } fclose(fdata); if ((fdata = fopen(gargv[1], "r")) == NULL) err(1, "cannot open %s for reading", gargv[1]); if ((tabula = (double **)calloc(n_ids, sizeof (*tabula))) == NULL) err(1, "allocating tabula"); for (i = 0; i < n_ids; i++) { if ((tabula[i] = (double *)calloc(n_names, sizeof (**tabula))) == NULL) err(1, "allocating %d-th row of table\n", i); for (j = 0; j < n_names - 1; j++) if (fscanf(fdata, "%lg,", &tabula[i][j]) != 1) { tabula[i][j] = nan(""); while (fgetc(fdata) != ',') ; } fscanf(fdata, "%lg", &tabula[i][j]); /* * XXX - a well-formed file * MUST not have the result * value (last column) be a '?' * DOUBLE_CHECK */ } total_failures = 0; for (i = 0; i < n_ids; i++) total_failures += tabula[i][n_names - 1]; printf("data_file = %s\n", gargv[1]); printf("model = %s\n", gargv[2]); printf("nr rounds = %d\tnr splits=", n_rounds); for (i = 0; i < n_rounds; i++) printf(" %d", rounds[i]); printf("\nnr_examples = %d\ttotal_failures = %d", n_ids, total_failures); printf("\tnr_attribs = %d\n", n_names); // seed random no generator if (rand_seed == NULL) { srand((unsigned)time(NULL)); } else { srand(*rand_seed); //don't need rand_seed anymore... free(rand_seed); rand_seed = NULL; } if ((order = (int *)calloc(n_ids, sizeof (*order))) == NULL) err(1, "calloc %d order", n_ids); if ((sublist_order = (int *)calloc(n_ids, sizeof (*sublist_order))) == NULL) err(1, "calloc %d sublist_order", n_ids); if ((scores = (Score *)calloc(MAX_VARS, sizeof (Score))) == NULL) err(1, "calloc %d scores", MAX_VARS); for (i = 0; i < MAX_VARS; i++) { scores[i].auc = 0; if ((scores[i].order = (int *)calloc(n_ids, sizeof (*(scores[i].order)))) == NULL) err(1, "calloc %d scores[%d].order", n_ids, i); } if ((ignore_set = (int *)calloc(n_names-1, sizeof (*ignore_set))) == NULL) err(1, "calloc %d ignore_set", n_names-1); if ((this_order = (int *)calloc(n_ids, sizeof (*this_order))) == NULL) err(1, "calloc %d this_order", n_ids); if ((best_order = (int *)calloc(n_ids, sizeof (*best_order))) == NULL) err(1, "calloc %d best_order", n_ids); /* randomize initial ordering */ if (init_permute_flag) { for (i = 0; i < n_ids; i++) order[i] = -1; for (i = 0; i < n_ids; i++) { randnum = (double)rand()/((unsigned)RAND_MAX+1); // [0,1) j = (int)(randnum*n_ids); while (order[j] != -1) j = (j + 1) % n_ids; order[j] = i; } } else { for (i = 0; i < n_ids; i++) { order[i] = i; } } /* find variables to ignore */ for (i = 0; i < n_names-1; i++) { ignore_set[i] = 0; if (check_var(i,n_ids, unknown_limit) < 0) ignore_set[i] = 1; } /* iteration over rounds */ for (numbins = 1; numbins <= n_rounds; numbins++) { printf("round = %d\tsplits = %d\n", numbins, rounds[numbins - 1]); t = 0; failures_seen = 0; for (divider = 0; divider < rounds[numbins - 1]; divider++) { while ((failures_seen < ((divider + 1) * (double)total_failures / rounds[numbins - 1])) && (t < n_ids)) { failures_seen += tabula[order[t]][n_names-1]; t++; } last[numbins][divider] = t - 1; failures[numbins][divider] = failures_seen; if (divider == (rounds[numbins - 1] - 1)) failures[numbins][divider+1] = total_failures - failures_seen; } this_first = 0; /* find the first element of the sublist */ /* iteration over bins in this round */ for (j = 0; j < rounds[numbins - 1]; j++) { if (j < rounds[numbins - 1] - 1) this_last = last[numbins][j]; else this_last = n_ids - 1; printf("\tbin %d: [%d..%d] %d failures", j, this_first, this_last, failures[numbins][j] - (j ? failures[numbins][j-1] : 0)); printf("\t(%.12f%%)\n", (this_last - this_first + 1) * 100.0 / n_ids); // ordering from previous round for (k = this_first; k <= this_last; k++) sublist_order[k - this_first] = order[k]; baseline = pauc(sublist_order, this_last - this_first + 1); // reset scores for (k = 1; k < MAX_VARS; k++) scores[k].auc = 0; min_score_ptr = NULL; num_scores = 0; /* iteration over variables for this bin */ for (exti = 0; exti < n_names - 1; exti++) { if (ignore_set[exti]) continue; /* variable ascending */ for (k = this_first; k <= this_last; k++) this_order[k - this_first] = sublist_order[k - this_first]; if (sort_examples((void *)this_order, this_last - this_first + 1, sizeof (*this_order), compasc, unknown_meth) < 0) err(1, "sort_examples this_order ascending"); this_pauc = pauc(this_order, this_last - this_first + 1); if (numbins == 1) fprintf(fauc, "VAR=%d AUC=%f DIR=asc\n", exti, this_pauc); if (this_pauc > baseline) insert_score(scores, &num_scores, exti, "a", this_pauc, this_order, this_last - this_first + 1); /* variable descending */ for (k = this_first; k <= this_last; k++) this_order[k - this_first] = sublist_order[k - this_first]; if (sort_examples((void *)this_order, this_last - this_first + 1, sizeof (*this_order), compdesc, unknown_meth) < 0) err(1, "sort_examples this_order descending"); this_pauc = pauc(this_order, this_last - this_first + 1); if (numbins == 1) fprintf(fauc, "VAR=%d AUC=%f DIR=desc\n", exti, this_pauc); if (this_pauc > baseline) insert_score(scores, &num_scores, exti, "d", this_pauc, this_order, this_last - this_first + 1); } /* end variables loop */ if (num_scores > 0) { // sort scores in desc order of auc if (mergesort((void *)scores, num_scores, sizeof (*scores), comp_auc_desc) < 0) err(1, "mergesort scores descending"); if (prob_dist_flag) { // pick top variable probabilistically // XXX: pick topk vars and compute avg sort auc_to_dist(scores, num_scores); best = weighted_rand(scores, num_scores); } else best = scores; // merge results into main array for (k = this_first; k <= this_last; k++) order[k] = (*best).order[k - this_first]; // update model fprintf(fmodel, "%.12f,%1d,%s", (this_last + 1) / (double) n_ids, best->var, best->dir); fflush(fmodel); } else { fprintf(fmodel, "%.12f,nop", (this_last + 1) / (double) n_ids); fflush(fmodel); } if (j < rounds[numbins - 1] - 1) { fprintf(fmodel, ";"); fflush(fmodel); } this_first = this_last + 1; } /* end bins loop */ fprintf(fmodel, "\n"); fflush(fmodel); printf(" Overall training AUC %.6f\n", pauc(order, n_ids)); } /* end rounds loop */ fclose(fmodel); fclose(fauc); exit(0); }