int pair_array_set(pair_array_t* array, str_t key, str_t val) { size_t i; int ret = 0; for (i = 0; i < array->count; ++i) { if (strncmp(key.ptr, array->keys[i].ptr, key.len) == 0) { str_free(array->vals[i]); array->vals[i] = str_ndup(val.ptr, val.len); ret = 1; break; } } return ret; }
/* internal function to copy a word into a new conjunct */ static struct conjunct *conjunct_add(struct query *query, struct vocab_vector * sve, const char *term, unsigned int termlen, int type, unsigned int *maxterms) { struct conjunct *ret = NULL; ret = conjunct_find(query, sve, term, termlen, type); if (ret != NULL) { ret->f_qt++; } else { /* couldn't find a match, might have to insert the word */ if (query->terms < *maxterms) { ret = &query->term[query->terms]; ret->type = type; ret->f_qt = 1; ret->f_t = sve->header.docwp.docs; ret->F_t = sve->header.docwp.occurs; ret->term.next = NULL; ret->vecmem = NULL; ret->terms = 1; ret->sloppiness = 0; ret->cutoff = 0; query->terms++; if (!(ret->term.term = str_ndup(term, termlen))) { query->terms--; ret = NULL; } memcpy(&ret->term.vocab, sve, sizeof(*sve)); /* allocate memory for vector part of vector */ if (ret->term.vocab.location == VOCAB_LOCATION_VOCAB) { if ((ret->term.vecmem = malloc(sve->size))) { memcpy(ret->term.vecmem, sve->loc.vocab.vec, sve->size); } else { free(ret->term.term); return 0; } } else { ret->term.vecmem = NULL; } } /* else we might have to steal a slot (OPTIMIZE) */ } return ret; }
/* internal function to append a new word to a conjunct */ static int conjunct_append(struct query *query, struct conjunct *conj, struct vocab_vector * sve, const char *term, unsigned int termlen, unsigned int *maxterms) { struct term *currterm; /* OPTIMISE: search existing AND conjunct for word */ /* we're building a new phrase */ if (query->terms < *maxterms) { /* iterate to the end of the phrase */ for (currterm = &conj->term; currterm->next; currterm = currterm->next) ; /* add next word on */ (*maxterms)--; currterm = currterm->next = &query->term[*maxterms].term; if (!(currterm->term = str_ndup(term, termlen))) { return 0; } memcpy(&currterm->vocab, sve, sizeof(*sve)); /* allocate memory for vector part of vector */ if (currterm->vocab.location == VOCAB_LOCATION_VOCAB) { if ((currterm->vecmem = malloc(currterm->vocab.size))) { memcpy(currterm->vecmem, currterm->vocab.loc.vocab.vec, currterm->vocab.size); } else { free(currterm->term); return 0; } } else { currterm->vecmem = NULL; } currterm->next = NULL; conj->terms++; } /* else we need to steal slots (OPTIMIZE) */ return 1; }
union node* expand_param(struct nargparam* param, union node** nptr, struct vartab* varstack, char* argv[], int exitcode, int flags) { union node* n = *nptr; stralloc value; char* str = NULL; const char *v = NULL; unsigned long argc, vlen = 0; for(argc = 0; argv[argc]; ++argc) ; stralloc_init(&value); /* treat special arguments */ if(param->flag & S_SPECIAL) { switch(param->flag & S_SPECIAL) { /* $# substitution */ case S_ARGC: { stralloc_catulong0(&value, argc, 0); break; } /* $* substitution */ case S_ARGV: { char** s; for(s = argv; *s;) { stralloc_cats(&n->narg.stra, *s); if(*++s) stralloc_catc(&n->narg.stra, ' '); } break; } /* $@ substitution */ case S_ARGVS: { unsigned int i = 0; while(i < argc) { param->flag &= ~S_SPECIAL; param->flag |= S_ARG; param->numb = 1 + i; n = expand_param(param, nptr, varstack, argv, exitcode,flags); if(++i < argc) nptr = &n->list.next; } return n; } /* $? substitution */ case S_EXITCODE: { stralloc_catulong0(&value, exitcode, 0); break; } /* $- substitution */ case S_FLAGS: break; /* $! substitution */ case S_BGEXCODE: break; /* $[0-9] arg subst */ case S_ARG: { if(param->numb == 0) { /* stralloc_cats(&value, sh_argv0); */ } else if(param->numb - 1 < argc) { stralloc_cats(&value, argv[param->numb - 1]); } break; } /* $$ arg subst */ case S_PID: { stralloc_catulong0(&value, getpid(), 0); break; } } /* special parameters are always set */ if(value.len) { stralloc_nul(&value); v = value.s; } vlen = value.len; } /* ..and variable substitutions */ else { size_t offset; /* look for the variable. if the S_NULL flag is set and we have a var which is null set v to NULL */ if((v = var_get(varstack, param->name, &offset))) { if(v[offset] == '\0' && (param->flag & S_NULL)) { v = NULL; vlen = 0; } else { v = &v[offset]; vlen = str_len(v); } } } /* check for S_STRLEN substitution */ if(param->flag & S_STRLEN) { char lstr[FMT_ULONG]; n = expand_cat(lstr, fmt_ulong(lstr, vlen), nptr, varstack, flags); stralloc_free(&value); return n; } str = str_ndup(v, vlen); /* otherwise expand the apropriate variable/word subst */ switch(param->flag & S_VAR) { /* return word if parameter unset (or null) */ case S_DEFAULT: { if(v) n = expand_cat(v, vlen, nptr, varstack, flags); /* unset, substitute */ else n = expand_arg(¶m->word->narg, nptr, varstack, argv, exitcode, flags); break; } /* if parameter unset (or null) then expand word to it and substitute paramter */ case S_ASGNDEF: { if(v) n = expand_cat(v, vlen, nptr, varstack, flags); else { n = expand_arg(¶m->word->narg, nptr, varstack, argv, exitcode, flags | X_NOSPLIT); var_setvsa(param->name, /* BUG */ &n->narg.stra, V_DEFAULT); } break; } /* indicate error if null or unset */ case S_ERRNULL: { if(v) n = expand_cat(v, vlen, nptr, varstack, flags); else { union node* tmpnode = NULL; n = expand_arg(¶m->word->narg, &tmpnode, varstack, argv, exitcode, flags); errmsg_warn((n && n->narg.stra.s) ? n->narg.stra.s : "parameter null or not set", 0); if(tmpnode) tree_free(tmpnode); } break; } /* if parameter unset (or null) then substitute null, otherwise substitute word */ case S_ALTERNAT: { if(v) n = expand_arg(¶m->word->narg, nptr, varstack, argv, exitcode, flags); break; /* remove smallest matching suffix */ case S_RSSFX: { int i; stralloc sa; if(v && vlen) { expand_copysa(param->word, &sa, varstack, argv, exitcode, 0); stralloc_nul(&sa); for(i = vlen - 1; i >= 0; i--) if(fnmatch(sa.s, str + i, FNM_PERIOD) == 0) break; n = expand_cat(v, (i < 0 ? vlen : i), nptr, varstack, flags); } break; } } /* remove largest matching suffix */ case S_RLSFX: { unsigned int i; stralloc sa; if(v && vlen) { expand_copysa(param->word, &sa, varstack, argv, exitcode, 0); stralloc_nul(&sa); for(i = 0; i <= vlen; i++) if(fnmatch(sa.s, str + i, FNM_PERIOD) == 0) break; n = expand_cat(v, (i > vlen ? vlen : i), nptr, varstack, flags); } break; } /* remove smallest matching prefix */ case S_RSPFX: { unsigned int i; stralloc sa; if(v && vlen) { expand_copysa(param->word, &sa, varstack, argv, exitcode, 0); stralloc_nul(&sa); for(i = 1; i <= vlen; i++) { str_copyn(str, v, i); if(fnmatch(sa.s, (char*)v, FNM_PERIOD) == 0) break; } if(i > vlen) i = 0; n = expand_cat(v + i, vlen - i, nptr, varstack, flags); str_copy(str, v); } break; } /* remove largest matching prefix */ case S_RLPFX: { unsigned int i; stralloc sa; if(v && vlen) { expand_copysa(param->word, &sa, varstack, argv, exitcode, 0); stralloc_nul(&sa); for(i = vlen; i > 0; i--) { str_copyn(str, v, i); if(fnmatch(sa.s, (char*)v, FNM_PERIOD) == 0) break; } if(i == 0) i = vlen; n = expand_cat(v + i, vlen - i, nptr, varstack, flags); str_copy(str, v); } break; } } free(str); stralloc_free(&value); return n; }
static void tinput_text_complete(tinput_t *ti) { void *state; size_t cstart; char *ctmp; char **compl; /* Array of completions */ size_t compl_len; /* Current length of @c compl array */ size_t cnum; size_t i; int rc; if (ti->compl_ops == NULL) return; /* * Obtain list of all possible completions (growing array). */ rc = (*ti->compl_ops->init)(ti->buffer, ti->pos, &cstart, &state); if (rc != EOK) return; cnum = 0; compl_len = 1; compl = malloc(compl_len * sizeof(char *)); if (compl == NULL) { printf("Error: Out of memory.\n"); return; } while (true) { rc = (*ti->compl_ops->get_next)(state, &ctmp); if (rc != EOK) break; if (cnum >= compl_len) { /* Extend array */ compl_len = 2 * compl_len; compl = realloc(compl, compl_len * sizeof(char *)); if (compl == NULL) { printf("Error: Out of memory.\n"); break; } } compl[cnum] = str_dup(ctmp); if (compl[cnum] == NULL) { printf("Error: Out of memory.\n"); break; } cnum++; } (*ti->compl_ops->fini)(state); if (cnum > 1) { /* * More than one match. Determine maximum common prefix. */ size_t cplen; cplen = str_length(compl[0]); for (i = 1; i < cnum; i++) cplen = min(cplen, common_pref_len(compl[0], compl[i])); /* Compute how many bytes we should skip. */ size_t istart = str_lsize(compl[0], ti->pos - cstart); if (cplen > istart) { /* Insert common prefix. */ /* Copy remainder of common prefix. */ char *cpref = str_ndup(compl[0] + istart, str_lsize(compl[0], cplen - istart)); /* Insert it. */ tinput_insert_string(ti, cpref); free(cpref); } else { /* No common prefix. Sort and display all entries. */ qsort(compl, cnum, sizeof(char *), compl_cmp, NULL); tinput_jump_after(ti); tinput_show_completions(ti, compl, cnum); tinput_display(ti); } } else if (cnum == 1) { /* * We have exactly one match. Insert it. */ /* Compute how many bytes of completion string we should skip. */ size_t istart = str_lsize(compl[0], ti->pos - cstart); /* Insert remainder of completion string at current position. */ tinput_insert_string(ti, compl[0] + istart); } for (i = 0; i < cnum; i++) free(compl[i]); free(compl); }
/* internal function to copy a conjunction and add an new word onto the end of * it (convenience function) */ static struct conjunct *conjunct_copy(struct query *query, struct conjunct *conj, unsigned int matches, struct vocab_vector * sve, const char *term, unsigned int termlen, unsigned int *maxterms) { struct conjunct *ret = NULL, *next; struct term *currterm, *nextterm; if (!matches) { return NULL; } /* copy head of non-matching phrase */ if (query->terms < *maxterms) { ret = next = &query->term[query->terms++]; memcpy(&next->term.vocab, &conj->term.vocab, sizeof(conj->term.vocab)); if (!(next->term.term = str_dup(conj->term.term))) { /* FIXME: need to cleanup properly here */ return NULL; } /* allocate memory for vector part of vector */ if (next->term.vocab.location == VOCAB_LOCATION_VOCAB) { if ((next->term.vecmem = malloc(conj->term.vocab.size))) { memcpy(next->term.vecmem, conj->term.vecmem, conj->term.vocab.size); } else { free(next->term.term); return NULL; } } next->term.next = NULL; next->terms = 1; next->f_qt = 1; next->type = conj->type; matches--; nextterm = &next->term; currterm = conj->term.next; while ((query->terms < *maxterms) && matches) { (*maxterms)--; matches--; nextterm->next = &query->term[*maxterms].term; nextterm = nextterm->next; memcpy(&nextterm->vocab, &currterm->vocab, sizeof(currterm->vocab)); if (!(nextterm->term = str_dup(currterm->term))) { /* FIXME: need to cleanup properly here */ return NULL; } /* allocate memory for vector part of vector */ if (nextterm->vocab.location == VOCAB_LOCATION_VOCAB) { if ((nextterm->vecmem = malloc(currterm->vocab.size))) { memcpy(nextterm->vecmem, currterm->vecmem, currterm->vocab.size); } else { free(nextterm->term); return NULL; } } next->terms++; } nextterm->next = NULL; /* append new term to phrase */ if ((query->terms < *maxterms) && sve) { /* add next word on */ (*maxterms)--; currterm = nextterm->next = &query->term[*maxterms].term; if (!(currterm->term = str_ndup(term, termlen))) { /* FIXME: need to cleanup properly here */ return NULL; } memcpy(&currterm->vocab, sve, sizeof(*sve)); /* allocate memory for vector part of vector */ if (currterm->vocab.location == VOCAB_LOCATION_VOCAB) { if ((currterm->vecmem = malloc(currterm->vocab.size))) { memcpy(currterm->vecmem, currterm->vocab.loc.vocab.vec, currterm->vocab.size); } else { free(nextterm->term); return NULL; } } currterm->next = NULL; next->terms++; } } /* else we need to steal slots (OPTIMIZE) */ return ret; }
int test_file(FILE *fp, int argc, char **argv) { char buf[65535 + 1]; char *pos; unsigned int strategy = 0; /* what bucketing strategy we're using */ void *ptr = NULL; unsigned int bucketsize = 0; struct params params = {0}; struct chash *hash = NULL; char name[256]; if (!parse_params(argc, argv, ¶ms)) { fprintf(stderr, "failed to parse params\n"); return 0; } while (fgets((char *) buf, 65535, fp)) { str_rtrim(buf); pos = (char *) str_ltrim(buf); if (!str_casecmp(pos, "new")) { /* creating a new bucket */ unsigned int size = -1; if (ptr) { chash_delete(hash); free(ptr); } /* read parameters */ if ((fscanf(fp, "%255s %u %u", name, &strategy, &size) == 3) && (size <= 65535) && (bucketsize = size) && (ptr = malloc(size)) && (hash = chash_ptr_new(1, 2.0, /* some fn pointer casting dodginess */ (unsigned int (*)(const void *)) str_len, (int (*)(const void *, const void *)) str_cmp)) && (bucket_new(ptr, bucketsize, strategy))) { /* succeeded, do nothing */ if (params.verbose) { printf("%s: new bucket with size %u strategy %u\n", name, size, strategy); } } else { fprintf(stderr, "%s: failed to create bucket\n", name); return 0; } } else if (!str_casecmp(pos, "add")) { /* adding a term to the bucket */ void *ret; unsigned int veclen, succeed, len; int toobig; if (!ptr) { return 0; } /* read parameters */ if ((fscanf(fp, "%65535s %u %u", buf, &veclen, &succeed) == 3) && (veclen <= 65535)) { len = str_len(buf); if ((((ret = bucket_alloc(ptr, bucketsize, strategy, buf, len, veclen, &toobig, NULL)) && succeed) || (!ret && !succeed))) { /* do nothing */ if (params.verbose) { printf("%s: added term '%s'\n", name, buf); } } else if (succeed) { fprintf(stderr, "%s: failed to add '%s' to bucket\n", name, buf); return 0; } else if (!succeed) { fprintf(stderr, "%s: add '%s' succeeded but shouldn't " "have\n", name, buf); return 0; } } else { fprintf(stderr, "%s: failed to add\n", name); return 0; } } else if (!str_casecmp(pos, "ls")) { /* matching stuff in the bucket */ unsigned int numterms, i, len, veclen, veclen2, state; void *addr; struct chash *tmphash; const char *term; void **tmpptr, *tmp; if (!ptr) { return 0; } if (!(tmphash = chash_ptr_new(1, 2.0, /* some fn pointer casting dodginess */ (unsigned int (*)(const void *)) str_len, (int (*)(const void *, const void *)) str_cmp))) { fprintf(stderr, "%s: failed to init hashtable\n", name); return 0; } /* first, fill hashtable with all terms from bucket */ state = 0; while ((term = bucket_next_term(ptr, bucketsize, strategy, &state, &len, &addr, &veclen))) { if (!((term = str_ndup(term, len)) && (chash_ptr_ptr_insert(tmphash, term, (void*) term) == CHASH_OK))) { fprintf(stderr, "%s: failed to init hashtable\n", name); return 0; } } /* now, take terms from file, comparing them with hashtable * entries */ if (fscanf(fp, "%u", &numterms)) { for (i = 0; i < numterms; i++) { if (fscanf(fp, "%65535s %u ", buf, &veclen)) { if (params.verbose) { printf("%s: ls checking %s\n", name, buf); } if ((addr = bucket_find(ptr, bucketsize, strategy, buf, str_len(buf), &veclen2, NULL)) /* remove it from hashtable */ && chash_ptr_ptr_find(tmphash, buf, &tmpptr) == CHASH_OK && chash_ptr_ptr_remove(tmphash, *tmpptr, &tmp) == CHASH_OK && (free(tmp), 1) && (veclen <= 65535) && (veclen2 == veclen) && fread(buf, veclen, 1, fp) && ((buf[veclen] = '\0'), 1) && (!params.verbose || printf("%s: ls check read '%s'\n", name, buf)) && !memcmp(buf, addr, veclen)) { /* do nothing */ } else { unsigned int j; fprintf(stderr, "%s: ls failed cmp '%s' with '", name, buf); for (j = 0; j < veclen; j++) { putc(((char *) addr)[j], stderr); } fprintf(stderr, "'\n"); return 0; } } else { fprintf(stderr, "%s: ls failed\n", name); return 0; } } if (chash_size(tmphash)) { fprintf(stderr, "%s: ls failed\n", name); return 0; } } else { fprintf(stderr, "%s: ls failed\n", name); return 0; } chash_delete(tmphash); if (params.verbose) { printf("%s: matched all (%u) entries\n", name, numterms); } } else if (!str_casecmp(pos, "set")) { /* setting the vector for a term in the bucket */ unsigned int veclen, reallen; void *addr; if (!ptr) { return 0; } /* read parameters */ if ((fscanf(fp, "%65535s %u ", buf, &veclen) == 2) && (veclen <= 65535)) { addr = bucket_find(ptr, bucketsize, strategy, buf, str_len(buf), &reallen, NULL); if (addr && (reallen == veclen) && fread(addr, 1, veclen, fp)) { /* do nothing */ if (params.verbose) { unsigned int j; printf("%s: set term '%s' to '", name, buf); for (j = 0; j < reallen; j++) { putc(((char *) addr)[j], stdout); } printf("'\n"); } } else { fprintf(stderr, "%s: failed to set!\n", name); return 0; } } else { fprintf(stderr, "%s: failed to set\n", name); return 0; } } else if (!str_casecmp(pos, "realloc")) { /* reallocating a term in the bucket */ unsigned int veclen, succeed; int toobig; if (!ptr) { return 0; } /* read parameters */ if ((fscanf(fp, "%65535s %u %u", buf, &veclen, &succeed) == 3) && (veclen <= 65535)) { if (!bucket_realloc(ptr, bucketsize, strategy, buf, str_len(buf), veclen, &toobig)) { fprintf(stderr, "%s: failed to realloc!\n", name); return 0; } } else { fprintf(stderr, "%s: failed to realloc\n", name); return 0; } if (params.verbose) { printf("%s: realloc'd term '%s'\n", name, buf); } } else if (!str_casecmp(pos, "rm")) { /* removing something from the bucket */ unsigned int succeed; if (!ptr) { return 0; } if (fscanf(fp, "%65535s %u", buf, &succeed) == 2) { if (succeed) { if (!(bucket_remove(ptr, bucketsize, strategy, buf, str_len(buf)))) { fprintf(stderr, "%s: failed to rm '%s'\n", name, buf); return 0; } else if (params.verbose) { printf("%s: rm term '%s'\n", name, buf); } } else if (succeed) { fprintf(stderr, "%s: failed to rm\n", name); return 0; } } else { fprintf(stderr, "%s: failed to rm\n", name); return 0; } } else if (!str_casecmp(pos, "print")) { /* printing out the bucket contents */ unsigned int state = 0, len, veclen; const char *term; char format[100]; void *addr; if (!ptr) { printf("can't print, no bucket\n"); } else { do { term = bucket_next_term(ptr, bucketsize, strategy, &state, &len, &addr, &veclen); } while (term && memcpy(buf, term, len) && ((buf[len] = '\0') || 1) && snprintf(format, 100, "%%.%us (%%u): '%%.%us' (%%u) " "(off %%u)\n", len, veclen) && printf(format, term, len, (char*) addr, veclen, ((char *) addr) - (char *) ptr)); if (!state) { printf("(empty)\n"); } printf("%u entries, %u data, %u string, %u overhead, %u free\n", bucket_entries(ptr, bucketsize, strategy), bucket_utilised(ptr, bucketsize, strategy), bucket_string(ptr, bucketsize, strategy), bucket_overhead(ptr, bucketsize, strategy), bucket_unused(ptr, bucketsize, strategy)); } } else if (!str_casecmp(pos, "match")) { unsigned int veclen, veclen2; void *addr; if (fscanf(fp, "%65535s %u ", buf, &veclen)) { if ((addr = bucket_find(ptr, bucketsize, strategy, buf, str_len(buf), &veclen2, NULL)) && (veclen <= 65535) && (veclen2 >= veclen) && (!params.verbose || printf("%s: match on '%s' ", name, buf)) && fread(buf, veclen, 1, fp) && !memcmp(buf, addr, veclen)) { if (params.verbose) { printf("content succeeded\n"); } } else { fprintf(stderr, "%s: match failed (%s vs %s)\n", name, buf, (char *) addr); return 0; } } else { fprintf(stderr, "%s: match failed\n", name); return 0; } } else if ((*pos != '#') && str_len(pos)) { fprintf(stderr, "%s: unknown command '%s'\n", name, pos); return 0; } } if (ptr) { chash_delete(hash); free(ptr); } return 1; }
/** Recursively create items in sysinfo tree * * Should be called with sysinfo_lock held. * * @param name Current sysinfo path suffix. * @param psubtree Pointer to an already existing (sub)tree root * item or where to store a new tree root item. * * @return Existing or newly allocated sysinfo item or NULL * if the current tree configuration does not allow to * create a new item. * */ NO_TRACE static sysinfo_item_t *sysinfo_create_path(const char *name, sysinfo_item_t **psubtree) { ASSERT(psubtree != NULL); if (*psubtree == NULL) { /* No parent */ size_t i = 0; /* Find the first delimiter in name */ while ((name[i] != 0) && (name[i] != '.')) i++; *psubtree = (sysinfo_item_t *) slab_alloc(sysinfo_item_slab, 0); ASSERT(*psubtree); /* Fill in item name up to the delimiter */ (*psubtree)->name = str_ndup(name, i); ASSERT((*psubtree)->name); /* Create subtree items */ if (name[i] == '.') { (*psubtree)->subtree_type = SYSINFO_SUBTREE_TABLE; return sysinfo_create_path(name + i + 1, &((*psubtree)->subtree.table)); } /* No subtree needs to be created */ return *psubtree; } sysinfo_item_t *cur = *psubtree; /* Walk all siblings */ while (cur != NULL) { size_t i = 0; /* Compare name with path */ while ((cur->name[i] != 0) && (name[i] == cur->name[i])) i++; /* Check for perfect name and path match * -> item is already present. */ if ((name[i] == 0) && (cur->name[i] == 0)) return cur; /* Partial match up to the delimiter */ if ((name[i] == '.') && (cur->name[i] == 0)) { switch (cur->subtree_type) { case SYSINFO_SUBTREE_NONE: /* No subtree yet, create one */ cur->subtree_type = SYSINFO_SUBTREE_TABLE; return sysinfo_create_path(name + i + 1, &(cur->subtree.table)); case SYSINFO_SUBTREE_TABLE: /* Subtree already created, add new sibling */ return sysinfo_create_path(name + i + 1, &(cur->subtree.table)); default: /* Subtree items handled by a function, this * cannot be overriden by a constant item. */ return NULL; } } /* No match and no more siblings to check * -> create a new sibling item. */ if (cur->next == NULL) { /* Find the first delimiter in name */ i = 0; while ((name[i] != 0) && (name[i] != '.')) i++; sysinfo_item_t *item = (sysinfo_item_t *) slab_alloc(sysinfo_item_slab, 0); ASSERT(item); cur->next = item; /* Fill in item name up to the delimiter */ item->name = str_ndup(name, i); ASSERT(item->name); /* Create subtree items */ if (name[i] == '.') { item->subtree_type = SYSINFO_SUBTREE_TABLE; return sysinfo_create_path(name + i + 1, &(item->subtree.table)); } /* No subtree needs to be created */ return item; } cur = cur->next; } /* Unreachable */ ASSERT(false); return NULL; }
int rss_board_update(Board *board, char *path) { HttpRequest r; //int http_err_flag = 0; //char *errmsg = NULL; char *rsstxt = NULL, *p; char *rss_title = NULL; XMLBlock xmlb; int pos, refresh_request = -1, cest_bon_je_connais_la_suite = 0; time_t temps_debut = time(NULL), temps_last_modified; prelog_clear(); clear_XMLBlock(&xmlb); wmcc_init_http_request(&r, board->site->prefs, path); /* Triton> Tant qu'a faire de mettre un header "Accept:", autant le mettre partout Hooo, c'est cool, y'en a un prevu pour les flux rss au lieu d'un bete text/xml generique et banal [:freekill] SeeSchloß> ouais ouais sauf qu'il y a plein de serveurs de merde qui ne comprennent pas ce type, alors non [:benou] */ //r.accept = strdup("application/rss+xml"); if (board->site->prefs->use_if_modified_since) { r.p_last_modified = &board->last_modified; } http_request_send(&r); if (!http_is_ok(&r)) { http_request_close(&r);return 1; } wmcc_log_http_request(board->site, &r); rsstxt = http_read_all(&r, path); http_request_close(&r); if (!http_is_ok(&r)) goto ratai; if (!rsstxt || !http_is_ok(&r)) return 1; /* "not modified" */ if (strlen(rsstxt)==0) goto RAS; /* tentative de conversion vers utf8 */ if ((pos = get_XMLBlock(rsstxt, strlen(rsstxt), "?xml", &xmlb))>=0) { XMLAttr *a; int found = 0; if (board->encoding) { free(board->encoding); board->encoding = NULL; } for (a = xmlb.attr; a; a = a->next) { if (str_case_startswith(a->name, "encoding")) { board->encoding = str_ndup(a->value,a->value_len); BLAHBLAH(1,printf("%s: found encoding: value = '%s'\n", board->site->prefs->site_name, board->encoding)); found = 1; break; } } if (!found) board->encoding = strdup("UTF-8"); /* defaut si pas d'encoding specifie */ convert_to_utf8(board->encoding, &rsstxt); } pos = get_XMLBlock(rsstxt, strlen(rsstxt), "title", &xmlb); if (pos < 0 || xmlb.content_len == 0) goto ratai; /*if (board->rss_title) free(board->rss_title); board->rss_title = str_ndup(xmlb.content, xmlb.content_len);*/ rss_title = str_ndup(xmlb.content, xmlb.content_len); BLAHBLAH(1, myprintf("got TITLE: '%<YEL %s>'\n", rss_title)); if (board->rss_title) { free(board->rss_title); } board->rss_title = str_ndup(rss_title, 100); if (get_XMLBlock(rsstxt, strlen(rsstxt), "ttl", &xmlb) >= 0) { refresh_request = atoi(xmlb.content) * 60; /* en minutes */ //printf("ttl detected, %d\n", refresh_request); } if (get_XMLBlock(rsstxt, strlen(rsstxt), "*:updatePeriod", &xmlb) >= 0) { int period = 1; if (str_case_startswith(xmlb.content, "hour")) period = 3600; else if (str_case_startswith(xmlb.content, "min")) period = 60; if (get_XMLBlock(rsstxt, strlen(rsstxt), "*:updateFrequency", &xmlb) >= 0) { refresh_request = period * atoi(xmlb.content); } } if (refresh_request != -1 && board->site->prefs->board_check_delay < refresh_request) { BLAHBLAH(0, myprintf("Changing update frequency for %<grn %s> to %<MAG %d> sec.\n", rss_title, refresh_request)); board->site->prefs->board_check_delay = refresh_request; } p = rsstxt; temps_last_modified = temps_debut; if (board->last_modified) { str_to_time_t(board->last_modified, &temps_last_modified); //printf("last_modified='%s' -> time_t = %ld\n", board->last_modified, temps_last_modified); } do { int pos_next_item; pos_next_item = get_XMLBlock(p, strlen(p), "item", &xmlb); if (pos_next_item >= 0) { XMLBlock b2; char *title, *link, *description, *msg, *author, *comments_url, *pubdate, *fake_ua; char msgd[BOARD_MSG_MAX_LEN]; char stimestamp[15]; time_t timestamp = time(NULL); title = link = description = msg = author = comments_url = pubdate = fake_ua = NULL; //time_t_to_tstamp(temps_debut, stimestamp); //temps_debut--; /* pour eviter d'avoir un paquet de news avec le meme tstamp */ clear_XMLBlock(&b2); if (get_XMLBlock(xmlb.content, xmlb.content_len, "title", &b2) && b2.content_len) { title = str_ndup(b2.content, b2.content_len); //printf("found title: '%s'\n", title); } if (get_XMLBlock(xmlb.content, xmlb.content_len, "link", &b2) && b2.content_len) { link = str_ndup(b2.content, b2.content_len); //printf("found link: '%s'\n", link); } if (!board->site->prefs->rss_ignore_description && get_XMLBlock(xmlb.content, xmlb.content_len, "description", &b2) && b2.content_len) { description = str_ndup(b2.content, b2.content_len); } if (get_XMLBlock(xmlb.content, xmlb.content_len, "author", &b2) && b2.content_len) { author = str_ndup(b2.content, b2.content_len); //printf("found author: '%s'\n", author); } if (get_XMLBlock(xmlb.content, xmlb.content_len, "comments", &b2) && b2.content_len) { comments_url = str_ndup(b2.content, b2.content_len); } /* format date: http://www.w3.org/TR/NOTE-datetime */ if (get_XMLBlock(xmlb.content, xmlb.content_len, "pubDate", &b2) && b2.content_len) { pubdate = str_ndup(b2.content, b2.content_len); } if (pubdate == NULL && get_XMLBlock(xmlb.content, xmlb.content_len, "*:date", &b2) && b2.content_len) { pubdate = str_ndup(b2.content, b2.content_len); } /* une petite remarque pour poser la problematique calmement: Comment determiner raisonnablement la date de publication d'une news - <pubDate>date_format_rfc_822</pubDate> - <dc:date>date_iso_8601</dc:date> - sinon : . si la news était connue par wmcc lors de sa precedente execution, on reprend la meme date sans paniquer. . sinon, on prend l'heure courante. * si le serveur web a renvoye un last-modified, on prend cette valeur. Pour un fun toujours plus extreme, il faut bien gérer tous les problemes de timezone: PUTAIN DE BORDERL DE MARDE */ /* c'est trop la merde avec les decalages horaires.. */ if (pubdate) { if (str_to_time_t(pubdate, ×tamp)) { time_t_to_tstamp(timestamp, stimestamp); BLAHBLAH(3,myprintf("converted %<YEL %s> to %<YEL %s> !\n", pubdate, stimestamp)); } else BLAHBLAH(0, printf("could not convert '%s' to a valid date..\n", pubdate)); } timestamp = MIN(timestamp, temps_debut); timestamp = MIN(timestamp, temps_last_modified); time_t_to_tstamp(timestamp, stimestamp); destroy_XMLBlock(&b2); str_trunc_nice(description, 512); if (link) { char *p = strstr(link, "*http://"); // enleve une couche de merde dans les liens yahoo if (p) { p++; memmove(link, p, strlen(p)+1); } } msg = NULL; if (title && link) msg = str_cat_printf(msg, "{<a href="%s"><u><b>%s</b></u></a>}", link, title); else if (title) msg = str_cat_printf(msg, "{<b>%s</b;>}", title); else if (link) msg = str_cat_printf(msg, "{<a href="%s">[News]</a>}", link); if (description) msg = str_cat_printf(msg, " %s", description); if (comments_url) msg = str_cat_printf(msg, " <a href="%s">[comments]</a>", comments_url); if (msg) { md5_byte_t md5[16]; md5_state_t ms; md5_init(&ms); int was_already_viewed = 0; if (title) md5_append(&ms, title, strlen(title)); if (link) md5_append(&ms, link, strlen(link)); if (description) md5_append(&ms, description, strlen(description)); md5_finish(&ms,md5); /* cherche le news dans le cache (au premier dl uniquement) */ if (board->oldmd5) { md5_and_time *m = find_md5_in_md5_array(md5,board->oldmd5); if (m && strlen(m->tstamp) == 14) { was_already_viewed = m->viewed; strcpy(stimestamp, m->tstamp); str_to_time_t(stimestamp, ×tamp); BLAHBLAH(1, myprintf("the news '%<GRN %s>' was found in the cache!\n", title)); } } /* cherche dans la liste des news dejà lues (après le premier dl) */ if (board_find_md5(board, md5)) { BLAHBLAH(1,myprintf("the news %<MAG %s>/%<CYA %s> is already known\n", rss_title, md5txt(md5))); //cest_bon_je_connais_la_suite = 1; // si on suppose que les rss se remplissent toujours par le haut.. } else { /* nettoyage des codes < 32 dans le message */ { int i; for (i=0; i < BOARD_MSG_MAX_LEN && msg[i]; ++i) if ((unsigned char)msg[i] < ' ') msg[i] = ' '; } fake_ua = str_printf("%s", rss_title ? rss_title : "?"); if (pubdate) { fake_ua = str_cat_printf(fake_ua, " pubDate: %s", pubdate); } /* attention, les '<' deviennent '\t<' et les '&lt;' devienne '<' */ board_decode_message(board, msgd, msg); { char *soupe = rss_nettoie_la_soupe_de_tags(msgd); strncpy(msgd, soupe, sizeof msgd); free(soupe); msgd[(sizeof msgd) - 1] = 0; } if (author && strlen(author)) { author = str_cat_printf(author, "@%s", rss_title); } else { FREE_STRING(author); author = strdup(rss_title); } { char author_tmp[1024]; convert_to_ascii(author_tmp, author, sizeof author_tmp); FREE_STRING(author); author = strdup(author_tmp); } prelog_add(fake_ua, author, timestamp, msgd, link, md5, was_already_viewed); board->nb_msg_at_last_check++; if (!was_already_viewed) board->nb_msg_since_last_viewed++; } } FREE_STRING(title); FREE_STRING(link); FREE_STRING(description); FREE_STRING(author); FREE_STRING(comments_url); FREE_STRING(msg); FREE_STRING(pubdate); FREE_STRING(fake_ua); } else { BLAHBLAH(1,printf("fin de '%s'\n", rss_title)); break; } p += pos_next_item; } while (!cest_bon_je_connais_la_suite); RAS: if (board->oldmd5 && board->last_post_id > 0) release_md5_array(board); destroy_XMLBlock(&xmlb); FREE_STRING(rss_title); FREE_STRING(rsstxt); prelog_commit(board); return 0; ratai: if (board->oldmd5 && board->last_post_id > 0) release_md5_array(board); destroy_XMLBlock(&xmlb); FREE_STRING(rss_title); FREE_STRING(rsstxt); prelog_commit(board); return 1; }