static void joust(void *picked_winner_as_vp, lights_t *l, unsigned pin) { unsigned picked_winner = (unsigned) picked_winner_as_vp; unsigned want_winner = picked_winner == WIN_2_PIN; unsigned winner_id = random_number_in_range(0, 1); stop_stop(pick_stop); fprintf(stderr, "Starting joust\n"); lights_blink_one(l, pin); track_play_asynchronously(jousting, stop); motor_forward(0, 1); motor_forward(1, 1); ms_sleep(WIN_MS); stop_stop(stop); track_play_asynchronously(crash, stop); motor_forward(0, TROT_DUTY); motor_forward(1, TROT_DUTY); ms_sleep(TROT_MS); stop_stop(stop); motor_stop(0); motor_stop(1); fprintf(stderr, "wanted %d got %d\n", want_winner, winner_id); if (winner_id == 0) { wb_set(MOTOR_BANK, WINNER_LIGHT_1, 1); } else { wb_set(MOTOR_BANK, WINNER_LIGHT_2, 1); } if (winner_id == want_winner) { track_play(winner); } else { track_play(loser); } ms_sleep(1000); wb_set(MOTOR_BANK, WINNER_LIGHT_1, 0); wb_set(MOTOR_BANK, WINNER_LIGHT_2, 0); lights_off(l); track_play_asynchronously(beeping, stop); go_to_start_position(); stop_stop(stop); }
struct vec *postings_vector(struct postings *post, char *term) { unsigned int hash; struct postings_node *node; if (!post->stop || (stop_stop(post->stop, term) == STOP_OK)) { hash = str_hash(term) & bit_lmask(post->tblbits); node = post->hash[hash]; while (node) { if (!str_cmp(term, node->term)) { return &node->vec; } } } return NULL; }
/* internal function to construct a query structure from a given string (query) * of length len. At most maxterms will be read from the query. */ unsigned int index_querybuild(struct index *idx, struct query *query, const char *querystr, unsigned int len, unsigned int maxterms, unsigned int maxtermlen, int impacts) { struct queryparse *qp; /* structure to parse the query */ struct conjunct *current = NULL; /* pointer to a current conjunction */ char word[TERMLEN_MAX + 1]; /* buffer to hold words */ unsigned int i, /* counter */ wordlen, /* length of word */ words = 0, /* number of words parsed */ currmatch = 0; /* number of matches against current * entry */ /* veclen; */ int state = CONJUNCT_TYPE_WORD, /* state variable, can take on values * from conjunct types enum */ stopped = 0; /* whether the last word was stopped */ /* void *ve; */ /* compressed vocabulary entry for a * word */ enum queryparse_ret parse_ret; /* value returned by parser */ /* last modifier seen; also, are we in a modifier */ enum { MODIFIER_NONE, MODIFIER_SLOPPY, MODIFIER_CUTOFF } modifier = MODIFIER_NONE; void (*stem)(void *, char *) = index_stemmer(idx); assert(maxtermlen <= TERMLEN_MAX); if (!(qp = queryparse_new(maxtermlen, querystr, len))) { return 0; } query->terms = 0; printf("SENGOR: "); /* This bit of code builds a structure that represents a query from an * array, where the array of words will be filled from 0 upwards, and * conjunctions that require a linked list of words (phrase, AND) take * additional words from the end of the array. */ do { struct vocab_vector entry; int retval; char vec_buf[MAX_VOCAB_VECTOR_LEN]; parse_ret = queryparse_parse(qp, word, &wordlen); switch (parse_ret) { case QUERYPARSE_WORD_EXCLUDE: /* this functionality not included yet, just ignore the word for * now */ /* look up word in vocab */ /* ve = hFetch(word, idx->vocab, NULL); */ /* OPTIMIZE: search existing conjunctions for word and remove * them */ /* FIXME: stop word */ /* if (ve) { conjunct_add(query, ve, CONJUNCT_TYPE_EXCLUDE, &maxterms); } */ current = NULL; /* this can't be the start of a conjunction */ words++; break; case QUERYPARSE_WORD_NOSTOP: case QUERYPARSE_WORD: if (modifier != MODIFIER_NONE) { /* this is not a query term, but an argument to a modifier */ switch (modifier) { case MODIFIER_SLOPPY: if (query->terms > 0) query->term[query->terms - 1].sloppiness = atoi(word); break; case MODIFIER_CUTOFF: if (query->terms > 0) query->term[query->terms - 1].cutoff = atoi(word); break; default: /* FIXME WARN */ break; } break; } /* look up word in vocab */ /* FIXME: word needs to be looked up in in-memory postings as * well */ if (stem) { word[wordlen] = '\0'; stem(idx->stem, word); wordlen = str_len(word); printf(" %s ", word); } retval = get_vocab_vector(idx->vocab, &entry, word, wordlen, vec_buf, sizeof(vec_buf), impacts); if (retval < 0) { return 0; } else if (retval == 0) { stopped = 0; /* so we know that this term wasn't stopped */ currmatch = 1; /* so that we know that phrases have started */ if (current && (state == CONJUNCT_TYPE_AND)) { /* need to remove current conjunction, as it contains a word * that isn't in the collection */ if (current->f_qt > 1) { current->f_qt--; } else { state = CONJUNCT_TYPE_WORD; /* stop AND condition */ maxterms += current->terms - 1; query->terms--; } } else if (current && (state == CONJUNCT_TYPE_PHRASE)) { /* same, except phrase continues until end-phrase */ if (current->f_qt > 1) { current->f_qt--; } else { maxterms += current->terms - 1; query->terms--; } } current = NULL; } else if (state == CONJUNCT_TYPE_PHRASE) { /* OPTIMIZE: check word against excluded terms */ struct term *currterm; /* processing a phrase */ if (!currmatch) { /* first word in phrase, match or add a conjunction */ current = conjunct_add(query, &entry, /* ve, veclen, */ word, wordlen, CONJUNCT_TYPE_PHRASE, &maxterms); currmatch = 1; } else if (current && (current->f_qt > 1)) { /* we're matching an existing phrase */ /* iterate to next term we need to match */ for (i = 0, currterm = ¤t->term; i < currmatch; i++, currterm = currterm->next) ; if (currterm && !str_cmp(currterm->term, word)) { /* matched */ currmatch++; } else { /* didn't match, copy non-matching phrase */ current->f_qt--; current = conjunct_copy(query, current, currmatch, &entry, word, wordlen, &maxterms); currmatch++; } } else if (current) { /* we're building a new phrase, add next word on */ conjunct_append(query, current, &entry, /* ve, veclen, */ word, wordlen, &maxterms); currmatch++; } /* otherwise we're ignoring this phrase (because it contains a * word thats not in the vocab) */ } else if (state == CONJUNCT_TYPE_AND) { /* we are constructing an AND conjunction */ /* FIXME: stop word stopped = 1; current = NULL; */ /* OPTIMIZE: check word against excluded terms */ if (current) { if ((current->type == CONJUNCT_TYPE_AND) || (current->f_qt == 1)) { /* add to current conjunct */ conjunct_append(query, current, &entry, word, wordlen, &maxterms); current->type = CONJUNCT_TYPE_AND; } else { /* copy matched word to new location for AND conjunct */ current->f_qt--; current = conjunct_copy(query, current, 1, &entry, word, wordlen, &maxterms); current->type = CONJUNCT_TYPE_AND; } } else if (stopped) { /* first word(s) in conjunct was stopped, so start a new * one */ current = conjunct_add(query, &entry, word, wordlen, CONJUNCT_TYPE_WORD, &maxterms); } state = CONJUNCT_TYPE_WORD; /* stop AND condition */ } else if (state == CONJUNCT_TYPE_WORD) { /* its a single word */ stopped = 0; if (parse_ret != QUERYPARSE_WORD_NOSTOP) { word[wordlen] = '\0'; if (idx->qstop && stop_stop(idx->qstop, word) == STOP_STOPPED) { /* it is a stopword */ stopped = 1; current = NULL; } } if (!stopped) { current = conjunct_add(query, &entry, /* ve, veclen, */ word, wordlen, CONJUNCT_TYPE_WORD, &maxterms); currmatch = 1; } } words++; break; case QUERYPARSE_OR: state = CONJUNCT_TYPE_WORD; /* or is the default mode anyway */ break; case QUERYPARSE_AND: state = CONJUNCT_TYPE_AND; break; case QUERYPARSE_START_PHRASE: /* phrase starts */ state = CONJUNCT_TYPE_PHRASE; current = NULL; currmatch = 0; break; case QUERYPARSE_END_PHRASE: if (current && (current->terms != currmatch)) { /* partial match, need to copy phrase */ current->f_qt--; current = conjunct_copy(query, current, currmatch, NULL, NULL, 0, &maxterms); } /* treat single-word phrases as, well, words */ if (current && (current->terms == 1)) { struct conjunct *ret; /* see if this single-word occurred previously */ ret = conjunct_find(query, ¤t->term.vocab, current->term.term, str_len(current->term.term), CONJUNCT_TYPE_WORD); if (ret == NULL) { /* ok, this is the first occurence */ current->type = CONJUNCT_TYPE_WORD; } else { /* there was a previous occurence: increment its f_qt, and free this one */ ret->f_qt++; assert(current == &query->term[query->terms - 1]); free(current->term.term); if (current->term.vocab.location == VOCAB_LOCATION_VOCAB) { free(current->term.vecmem); } query->terms--; } } current = NULL; state = CONJUNCT_TYPE_WORD; break; case QUERYPARSE_END_SENTENCE: /* we're ignoring this at the moment - it might be used later if * we don't want to match phrases across sentence boundaries */ break; case QUERYPARSE_END_MODIFIER: modifier = MODIFIER_NONE; break; case QUERYPARSE_START_MODIFIER: if (str_casecmp(word, "sloppy") == 0) modifier = MODIFIER_SLOPPY; else if (str_casecmp(word, "cutoff") == 0) modifier = MODIFIER_CUTOFF; else /* FIXME WARN */ modifier = MODIFIER_NONE; break; case QUERYPARSE_EOF: break; /* this will finish the parse */ default: /* unexpected return code, error */ queryparse_delete(qp); return 0; } } while ((parse_ret != QUERYPARSE_EOF) && (query->terms < maxterms)); /* FIXME: temporary stopping condition */ queryparse_delete(qp); printf("\n"); /* returning word count confuses errors with empty queries. */ /* return words; */ return 1; }
int postings_dump(struct postings* post, void *buf, unsigned int bufsize, int fd) { unsigned int i, j, stopped = 0, pos, /* position in current vector */ len, /* length of current term */ wlen, /* length of last write */ dbufsz; /* size of dbuf */ struct postings_node* node, /* current node */ ** arr; /* array of postings nodes */ char *dbuf, /* dumping buffer */ *dbufpos; /* position in dumping buffer */ struct vec v; /* FIXME: note, this should assert !post->update_required, but due to the * way that TREC documents are parsed (basically under the assumption that * another one is always coming) we end up with an empty document at the * end */ assert(!post->update); /* XXX: hack, allocate a big array of postings and then sort them by term. * This is so that postings go out sorted by term instead of hash value. */ if (!(arr = malloc(sizeof(*arr) * post->dterms))) { return 0; } /* the provided buffer is used to dump the postings */ dbuf = buf; dbufsz = bufsize; /* copy nodes into array */ j = 0; for (i = 0; i < post->tblsize; i++) { node = post->hash[i]; while (node) { /* perform stopping. Ideally we'd like to stop terms before * stemming them, and before they make their way into the postings. * However, this means that we have to call the stoplist * once-per-term, which makes it a big bottleneck. We stop here to * minimise the performance impact on the most common case, no * stopping. Note that if we really wanted to make stopping * (when actually used) go faster, it would be better to have a * sorted stoplist as well, and merge against that rather than * doing one hash lookup per term. */ if (!post->stop || stop_stop(post->stop, node->term) == STOP_OK) { arr[j++] = node; } else { assert(++stopped); /* count stopped terms while debugging */ } node = node->next; } /* reset hash node (memory free'd below) */ post->hash[i] = NULL; } assert(j + stopped == post->dterms); stopped = 0; qsort(arr, post->dterms, sizeof(*arr), post_cmp); v.pos = dbuf; v.end = dbuf + dbufsz; for (i = 0; i < j;) { while ((i < post->dterms) && ((len = str_len(arr[i]->term)), 1) && (((unsigned int) VEC_LEN(&v)) >= vec_vbyte_len(len) + len + vec_vbyte_len(arr[i]->docs) + vec_vbyte_len(arr[i]->occurs) + vec_vbyte_len(arr[i]->last_docno) + vec_vbyte_len(arr[i]->vec.pos - arr[i]->vecmem))) { unsigned int bytes; assert(len); assert(dbufsz > vec_vbyte_len(len) + len + vec_vbyte_len(arr[i]->docs) + vec_vbyte_len(arr[i]->occurs) + vec_vbyte_len(arr[i]->last_docno) + vec_vbyte_len(arr[i]->vec.pos - arr[i]->vecmem)); /* have enough space, copy stuff into buffer */ bytes = vec_vbyte_write(&v, len); assert(bytes); bytes = vec_byte_write(&v, arr[i]->term, len); assert(bytes == len); bytes = vec_vbyte_write(&v, arr[i]->docs); assert(bytes); bytes = vec_vbyte_write(&v, arr[i]->occurs); assert(bytes); bytes = vec_vbyte_write(&v, arr[i]->last_docno); assert(bytes); bytes = vec_vbyte_write(&v, arr[i]->vec.pos - arr[i]->vecmem); assert(bytes); /* copy the inverted list in */ pos = 0; while (((unsigned int) VEC_LEN(&v)) < (arr[i]->vec.pos - arr[i]->vecmem) - pos) { /* copy last bit we can in */ pos += vec_byte_write(&v, arr[i]->vecmem + pos, VEC_LEN(&v)); /* write the buffer out */ len = v.pos - dbuf; dbufpos = dbuf; while (len && ((wlen = write(fd, dbufpos, len)) >= 0)) { len -= wlen; dbufpos += wlen; } if (len) { free(arr); return 0; } v.pos = dbuf; v.end = dbuf + dbufsz; } /* copy last bit of inverted list in */ pos += vec_byte_write(&v, arr[i]->vecmem + pos, (arr[i]->vec.pos - arr[i]->vecmem) - pos); assert(arr[i]->vecmem + pos == arr[i]->vec.pos); free(arr[i]->vecmem); i++; } /* write the buffer out */ len = v.pos - dbuf; dbufpos = dbuf; while (len && ((wlen = write(fd, dbufpos, len)) >= 0)) { len -= wlen; dbufpos += wlen; } if (len) { free(arr); return 0; } v.pos = dbuf; v.end = dbuf + dbufsz; } /* reinitialise hash table */ post->size = 0; post->dterms = 0; post->terms = 0; post->docs = 0; poolalloc_clear(post->string_mem); objalloc_clear(post->node_mem); free(arr); return 1; }