Exemple #1
0
void token_init(void)
{
    static bool fTokenInit = false;

    yyinit();

    if ( fTokenInit) {
	token_clear();
    }
    else {
	fTokenInit = true;

	if (max_multi_token_len == 0)
	    max_multi_token_len = (max_token_len+1) * multi_token_count + MAX_PREFIX_LEN;

	yylval_text_size = max_multi_token_len + MSG_COUNT_PADDING;

	yylval_text = (byte *) malloc( yylval_text_size+D );
	yylval.leng   = 0;
	yylval.u.text   = yylval_text;

	/* First IP Address in Received: statement */
	msg_addr = word_new( NULL, max_token_len );

	/* Message ID */
	msg_id = word_new( NULL, max_token_len * 3 );

	/* Message's first queue ID */
	queue_id = word_new( NULL, max_token_len );

	ipsave = word_new( NULL, max_token_len );

	/* word_new() used to avoid compiler complaints */
	w_to   = word_news("to:");	/* To:          */
	w_from = word_news("from:");	/* From:        */
	w_rtrn = word_news("rtrn:");	/* Return-Path: */
	w_subj = word_news("subj:");	/* Subject:     */
	w_recv = word_news("rcvd:");	/* Received:    */
	w_head = word_news("head:");	/* Header:      */
	w_mime = word_news("mime:");	/* Mime:        */
	w_ip   = word_news("ip:");	/* ip:          */
	w_url  = word_news("url:");	/* url:         */
	nonblank_line = word_news(NONBLANK);

	/* do multi-word token initializations */
	init_token_array();
    }

    return;
}
Exemple #2
0
void sentence_split(sentence_t * self){
char * part = strtok(self->sentence," ,-");
while (part!= NULL){
list_add(self->words,word_new(part));
part = strtok(NULL," ,-");
}
}
Exemple #3
0
void sentence_divide(sentence_t * self){
char * part = strtok(self->sentence," ,-");
while (part!= NULL){
list_push_back(self->wordsList,word_new(part));
part = strtok(NULL," ,-");
}
}
Exemple #4
0
int main(){
    const char name[] = "text.txt";
    FILE * file;
    file = fopen(name, "r");
    if(NULL == file)
        return 1;
    char c;
    int len = 0;
    char word[25] = "";
    text_t * text = text_new();
    sen_t * sen = sen_new();
    while((c = fgetc(file)) != EOF){
        //printf("%c", c);
        if(ispunct(c)){
            if(c == '.' || c == '!' || c == '?'){
                if(len == 0)
                    continue;
                addWord(sen, word_new(word));
                addSen(text, sen);
                sen = sen_new();
                memset(word, 0, 25);
                len = 0;
            }
            continue;
        }
        else if(isspace(c)){
            if(len == 0)
                continue;
            addWord(sen, word_new(word));
            memset(word, 0, 25);
            len = 0;
        }
        else{
            word[len] = c;
            len++;
        }
    }

    fclose(file);
    fprint(text);
    text_free(text);
    return 0;
}
Exemple #5
0
void prelude_initialize() {
    int i, n;
    n = sizeof(entries) / sizeof(struct entry);
    prelude = list_nil;
    for (i = 0; i < n; ++i) {
        object_t word;
        word = word_new(string_new(entries[i].name),
                        primitive_new(entries[i].definition),
                        entries[i].parsing_p ? boolean_t : boolean_f);
        prelude = dictionary_insert(prelude, word);
    }
}
Exemple #6
0
int main (){
    FILE *input;
    input = fopen("text.txt", "r");

    char *str = strnew();

    sentence_t *currSentence = sentence_create();
    text_t *text = text_create();

    while (1) {
        char c = fgetc(input);
        if (c == EOF || isspace(c) || c == ',' || c == '.' || c == '!' || c == '?' || c == ';') {
            if (strlen(str) != 0) {
                word_t *word = word_new(str);
                sentence_add(currSentence, word);
                free (str);
                word_free (word);
                str = strnew();
            }
        }
        if (c == EOF || c == '.' || c == '!' || c == '?') {
            text_add (text, currSentence);
            sentence_free(currSentence);
            currSentence = sentence_create();
        }
        if (c == EOF) break;
        if (isalpha(c)) {
            c = tolower(c);
            char *w = stradd(str, c);
            free (str);
            str = w;
        }
    }
    fclose (input);
    FILE *output;
    output = fopen("result.txt", "w");
    input = fopen ("stopwords.txt", "r");

    int stopCount, i;
    fscanf (input, "%d", &stopCount);
    for (i = 0; i < stopCount; i++) {
        char s[15];
        fscanf (input, "%s", s);
        fprintf (output, "%s: %d\n", s, text_find (text, s));
    }

    fclose (input);
    fclose (output);
    free (str);
    sentence_free(currSentence);
    text_free(text);
    return 0;
}
Exemple #7
0
sent_t *sentense_new(char *contents){
    sent_t *asent = malloc(sizeof(struct sent_s));
    asent->prev = NULL;
    asent->next = NULL;
    asent->count = 0;
    asent->fword = word_new(" ");
    contents = clear_odd(contents, strlen(contents));
    char *p;
    p = strtok(contents, " ");
    if(!p)
        return asent;
    word_t *nword = word_new(p);
    asent->fword->next = nword;
    asent->count++;
    while(p){
        p = strtok(NULL, " ");
        if(p){
            nword->next = word_new(p);
            nword = nword->next;
            asent->count++;
        }
    }
    return asent;
}
Exemple #8
0
/* Make a deeper copy of a boxed reference. References within quotations are
   cloned using boxed_copy() rather than boxed_clone(). */
Boxed boxed_clone(Boxed reference) {
  trace("boxed_clone(%p)\n", reference);
  if (!reference)
    return NULL;
  switch (boxed_type(reference)) {
  case FLOAT:
    return float_new(float_value(reference));
  case INTEGER:
    return integer_new(integer_value(reference));
  case QUOTATION:
    {
      Boxed result = quotation_new(0);
      quotation_append(result, reference);
      return result;
    }
  case WORD:
    return word_new(word_value(reference));
  }
  return NULL;
}
Exemple #9
0
int main(int argc, char **argv) {
    size_t size;
    qp_mode mode = RFC2045;
    word_t *w;

    if (argc > 1 && strcasecmp(argv[1], "rfc2047")) mode = RFC2047;
    if (argc > 1 && strcasecmp(argv[1], "rfc-2047")) mode = RFC2047;

    if (fseek(stdin, 0, SEEK_END)) die();
    size = ftell(stdin);
    if (fseek(stdin, 0, SEEK_SET)) die();
    w = word_new(NULL, size);
    if (fread(w->u.text, 1, w->leng, stdin) != w->leng) die();
    size = qp_decode(w, mode);
    if (fwrite(w->u.text, 1, size, stdout) != size) die();
    word_free(w);
    if (fflush(stdout)) die();
    if (fclose(stdout)) die();
    return EXIT_SUCCESS;
}
Exemple #10
0
word_t * senToWords(char * str)
{
     char ** words = NULL;
     int wordscount = 1;
    if (strlen(str) == 0)
    {
        printf("string is empty\n");
        return NULL;
    }

    int flag = 0;// перший пробіл на рахує
    for (int i = 0; i < strlen(str); i++)
    {
        if (str[i] == ' ' && flag)
        {
            flag = 0;// перевірка на пробіли(убирає лишні пробіли(шоб не рахувало зайві пробіли як слово ))
            wordscount++;
        }
        if (str[i] != ' ')
            flag = 1;//не був пробіл
        if (str[i] == ' ' && str[i + 1] == '\0')// якщо останній символ  був пробіл
            wordscount--;//видаляєм слово
    }
    printf("%i\n", wordscount);


    words = malloc(sizeof(char *) * (wordscount + 1));// память під масив слів
    char * word;
  //  puts(str);
    const char tmpstr[2] = {' ', '\0'};

    word = strtok(str, tmpstr);//фкнкція розбиття строки на підстроки
// printf("%i\n", wordscount);


    for (int i = 0; i < wordscount; i++)
    {
        words[i] = malloc(sizeof(char) * strlen(word));
        strcpy(words[i], word);
       // puts(words[i]);
        word = NULL;
        if (i != wordscount - 1 )// поки не останнє слово
        {
        word = strtok(NULL, " ");// лишній раз стрток не виконувався
        while(strlen(word) == 0)
            word = strtok(NULL, " ");
            if( word == NULL)
                break;
        }

    }

    words[wordscount] = NULL;
    if (words[0] == NULL)
        exit(1);
    word_t * self = word_new();
   //puts("dsasfd");
    for(int  i = 0 ; i < wordscount; i++)
{
    word_add(self , words[i]);

}
//puts("dafsdf");

    return self;
}
Exemple #11
0
static int load_wordlist(bfpath *bfp)
{
    void *dsh;
    byte buf[BUFSIZE];
    byte *p;
    int rv = 0;
    size_t len;
    int load_count = 0;
    unsigned long line = 0;
    unsigned long count[IX_SIZE], date;
    YYYYMMDD today_save = today;

    void *dbe = ds_init(bfp);

    dsh = ds_open(dbe, bfp, (dbmode_t)(DS_WRITE | DS_LOAD));
    if (dsh == NULL)
	/* print error, cleanup, and exit */
	ds_open_failure(bfp, dbe);

    memset(buf, '\0', BUFSIZE);

    if (DST_OK != ds_txn_begin(dsh))
	exit(EX_ERROR);

    for (;;) {
	dsv_t data;
	word_t *token;
	if (fgets((char *)buf, BUFSIZE, fpin) == NULL) {
	    if (ferror(fpin)) {
		perror(progname);
		rv = 2;
	    }
	    break;
	}

	line++;

	len = strlen((char *)buf);

	/* too short. */
	if (len < 4)
	    continue;

	p = spanword(buf);
	len = strlen((const char *)buf);

	if (max_token_len != 0 &&
	    len > max_token_len)
	    continue;		/* too long - discard */

	spamcount = (uint) atoi((const char *)p);
	if ((int) spamcount < 0)
	    spamcount = 0;
	p = spanword(p);

	goodcount = (uint) atoi((const char *)p);
	if ((int) goodcount < 0)
	    goodcount = 0;
	p = spanword(p);

	date = (uint) atoi((const char *)p);
	p = spanword(p);

	if (*p != '\0') {
	    fprintf(stderr,
		    "%s: Unexpected input [%s] on line %lu. "
		    "Expecting whitespace before count.\n",
		    progname, buf, line);
	    rv = 1;
	    break;
	}

	if (date == 0)				/* date as YYYYMMDD */
	    date = today_save;

	if (replace_nonascii_characters)
	    do_replace_nonascii_characters(buf, len);
 
 	token = word_new(buf, len);
	data.goodcount = goodcount;
	data.spamcount = spamcount;
	data.date = date;

	if (is_count((const char *)buf)
		&& !(maintain && discard_token(token, &data))) {
	    load_count += 1;
	    /* Slower, but allows multiple lists to be concatenated */
	    set_date(date);
	    switch (ds_read(dsh, token, &data)) {
		case 0:
		case 1:
		    break;
		default:
		    rv = 1;
	    }
	    data.spamcount += spamcount;
	    data.goodcount += goodcount;
	    if (ds_write(dsh, token, &data)) rv = 1;
	}
	word_free(token);
    }

    if (rv) {
	fprintf(stderr, "read or write error, aborting.\n");
	ds_txn_abort(dsh);
    } else {
	switch (ds_txn_commit(dsh)) {
	    case DST_FAILURE:
	    case DST_TEMPFAIL:
		fprintf(stderr, "commit failed\n");
		exit(EX_ERROR);
	    case DST_OK:
		break;
	}
    }

    ds_close(dsh);

    ds_cleanup(dbe);

    if (verbose)
	fprintf(dbgout, "%d tokens loaded\n", load_count);

    return rv;
}