void token_init(void) { static bool fTokenInit = false; yyinit(); if ( fTokenInit) { token_clear(); } else { fTokenInit = true; if (max_multi_token_len == 0) max_multi_token_len = (max_token_len+1) * multi_token_count + MAX_PREFIX_LEN; yylval_text_size = max_multi_token_len + MSG_COUNT_PADDING; yylval_text = (byte *) malloc( yylval_text_size+D ); yylval.leng = 0; yylval.u.text = yylval_text; /* First IP Address in Received: statement */ msg_addr = word_new( NULL, max_token_len ); /* Message ID */ msg_id = word_new( NULL, max_token_len * 3 ); /* Message's first queue ID */ queue_id = word_new( NULL, max_token_len ); ipsave = word_new( NULL, max_token_len ); /* word_new() used to avoid compiler complaints */ w_to = word_news("to:"); /* To: */ w_from = word_news("from:"); /* From: */ w_rtrn = word_news("rtrn:"); /* Return-Path: */ w_subj = word_news("subj:"); /* Subject: */ w_recv = word_news("rcvd:"); /* Received: */ w_head = word_news("head:"); /* Header: */ w_mime = word_news("mime:"); /* Mime: */ w_ip = word_news("ip:"); /* ip: */ w_url = word_news("url:"); /* url: */ nonblank_line = word_news(NONBLANK); /* do multi-word token initializations */ init_token_array(); } return; }
void sentence_split(sentence_t * self){ char * part = strtok(self->sentence," ,-"); while (part!= NULL){ list_add(self->words,word_new(part)); part = strtok(NULL," ,-"); } }
void sentence_divide(sentence_t * self){ char * part = strtok(self->sentence," ,-"); while (part!= NULL){ list_push_back(self->wordsList,word_new(part)); part = strtok(NULL," ,-"); } }
int main(){ const char name[] = "text.txt"; FILE * file; file = fopen(name, "r"); if(NULL == file) return 1; char c; int len = 0; char word[25] = ""; text_t * text = text_new(); sen_t * sen = sen_new(); while((c = fgetc(file)) != EOF){ //printf("%c", c); if(ispunct(c)){ if(c == '.' || c == '!' || c == '?'){ if(len == 0) continue; addWord(sen, word_new(word)); addSen(text, sen); sen = sen_new(); memset(word, 0, 25); len = 0; } continue; } else if(isspace(c)){ if(len == 0) continue; addWord(sen, word_new(word)); memset(word, 0, 25); len = 0; } else{ word[len] = c; len++; } } fclose(file); fprint(text); text_free(text); return 0; }
void prelude_initialize() { int i, n; n = sizeof(entries) / sizeof(struct entry); prelude = list_nil; for (i = 0; i < n; ++i) { object_t word; word = word_new(string_new(entries[i].name), primitive_new(entries[i].definition), entries[i].parsing_p ? boolean_t : boolean_f); prelude = dictionary_insert(prelude, word); } }
int main (){ FILE *input; input = fopen("text.txt", "r"); char *str = strnew(); sentence_t *currSentence = sentence_create(); text_t *text = text_create(); while (1) { char c = fgetc(input); if (c == EOF || isspace(c) || c == ',' || c == '.' || c == '!' || c == '?' || c == ';') { if (strlen(str) != 0) { word_t *word = word_new(str); sentence_add(currSentence, word); free (str); word_free (word); str = strnew(); } } if (c == EOF || c == '.' || c == '!' || c == '?') { text_add (text, currSentence); sentence_free(currSentence); currSentence = sentence_create(); } if (c == EOF) break; if (isalpha(c)) { c = tolower(c); char *w = stradd(str, c); free (str); str = w; } } fclose (input); FILE *output; output = fopen("result.txt", "w"); input = fopen ("stopwords.txt", "r"); int stopCount, i; fscanf (input, "%d", &stopCount); for (i = 0; i < stopCount; i++) { char s[15]; fscanf (input, "%s", s); fprintf (output, "%s: %d\n", s, text_find (text, s)); } fclose (input); fclose (output); free (str); sentence_free(currSentence); text_free(text); return 0; }
sent_t *sentense_new(char *contents){ sent_t *asent = malloc(sizeof(struct sent_s)); asent->prev = NULL; asent->next = NULL; asent->count = 0; asent->fword = word_new(" "); contents = clear_odd(contents, strlen(contents)); char *p; p = strtok(contents, " "); if(!p) return asent; word_t *nword = word_new(p); asent->fword->next = nword; asent->count++; while(p){ p = strtok(NULL, " "); if(p){ nword->next = word_new(p); nword = nword->next; asent->count++; } } return asent; }
/* Make a deeper copy of a boxed reference. References within quotations are cloned using boxed_copy() rather than boxed_clone(). */ Boxed boxed_clone(Boxed reference) { trace("boxed_clone(%p)\n", reference); if (!reference) return NULL; switch (boxed_type(reference)) { case FLOAT: return float_new(float_value(reference)); case INTEGER: return integer_new(integer_value(reference)); case QUOTATION: { Boxed result = quotation_new(0); quotation_append(result, reference); return result; } case WORD: return word_new(word_value(reference)); } return NULL; }
int main(int argc, char **argv) { size_t size; qp_mode mode = RFC2045; word_t *w; if (argc > 1 && strcasecmp(argv[1], "rfc2047")) mode = RFC2047; if (argc > 1 && strcasecmp(argv[1], "rfc-2047")) mode = RFC2047; if (fseek(stdin, 0, SEEK_END)) die(); size = ftell(stdin); if (fseek(stdin, 0, SEEK_SET)) die(); w = word_new(NULL, size); if (fread(w->u.text, 1, w->leng, stdin) != w->leng) die(); size = qp_decode(w, mode); if (fwrite(w->u.text, 1, size, stdout) != size) die(); word_free(w); if (fflush(stdout)) die(); if (fclose(stdout)) die(); return EXIT_SUCCESS; }
word_t * senToWords(char * str) { char ** words = NULL; int wordscount = 1; if (strlen(str) == 0) { printf("string is empty\n"); return NULL; } int flag = 0;// перший пробіл на рахує for (int i = 0; i < strlen(str); i++) { if (str[i] == ' ' && flag) { flag = 0;// перевірка на пробіли(убирає лишні пробіли(шоб не рахувало зайві пробіли як слово )) wordscount++; } if (str[i] != ' ') flag = 1;//не був пробіл if (str[i] == ' ' && str[i + 1] == '\0')// якщо останній символ був пробіл wordscount--;//видаляєм слово } printf("%i\n", wordscount); words = malloc(sizeof(char *) * (wordscount + 1));// память під масив слів char * word; // puts(str); const char tmpstr[2] = {' ', '\0'}; word = strtok(str, tmpstr);//фкнкція розбиття строки на підстроки // printf("%i\n", wordscount); for (int i = 0; i < wordscount; i++) { words[i] = malloc(sizeof(char) * strlen(word)); strcpy(words[i], word); // puts(words[i]); word = NULL; if (i != wordscount - 1 )// поки не останнє слово { word = strtok(NULL, " ");// лишній раз стрток не виконувався while(strlen(word) == 0) word = strtok(NULL, " "); if( word == NULL) break; } } words[wordscount] = NULL; if (words[0] == NULL) exit(1); word_t * self = word_new(); //puts("dsasfd"); for(int i = 0 ; i < wordscount; i++) { word_add(self , words[i]); } //puts("dafsdf"); return self; }
static int load_wordlist(bfpath *bfp) { void *dsh; byte buf[BUFSIZE]; byte *p; int rv = 0; size_t len; int load_count = 0; unsigned long line = 0; unsigned long count[IX_SIZE], date; YYYYMMDD today_save = today; void *dbe = ds_init(bfp); dsh = ds_open(dbe, bfp, (dbmode_t)(DS_WRITE | DS_LOAD)); if (dsh == NULL) /* print error, cleanup, and exit */ ds_open_failure(bfp, dbe); memset(buf, '\0', BUFSIZE); if (DST_OK != ds_txn_begin(dsh)) exit(EX_ERROR); for (;;) { dsv_t data; word_t *token; if (fgets((char *)buf, BUFSIZE, fpin) == NULL) { if (ferror(fpin)) { perror(progname); rv = 2; } break; } line++; len = strlen((char *)buf); /* too short. */ if (len < 4) continue; p = spanword(buf); len = strlen((const char *)buf); if (max_token_len != 0 && len > max_token_len) continue; /* too long - discard */ spamcount = (uint) atoi((const char *)p); if ((int) spamcount < 0) spamcount = 0; p = spanword(p); goodcount = (uint) atoi((const char *)p); if ((int) goodcount < 0) goodcount = 0; p = spanword(p); date = (uint) atoi((const char *)p); p = spanword(p); if (*p != '\0') { fprintf(stderr, "%s: Unexpected input [%s] on line %lu. " "Expecting whitespace before count.\n", progname, buf, line); rv = 1; break; } if (date == 0) /* date as YYYYMMDD */ date = today_save; if (replace_nonascii_characters) do_replace_nonascii_characters(buf, len); token = word_new(buf, len); data.goodcount = goodcount; data.spamcount = spamcount; data.date = date; if (is_count((const char *)buf) && !(maintain && discard_token(token, &data))) { load_count += 1; /* Slower, but allows multiple lists to be concatenated */ set_date(date); switch (ds_read(dsh, token, &data)) { case 0: case 1: break; default: rv = 1; } data.spamcount += spamcount; data.goodcount += goodcount; if (ds_write(dsh, token, &data)) rv = 1; } word_free(token); } if (rv) { fprintf(stderr, "read or write error, aborting.\n"); ds_txn_abort(dsh); } else { switch (ds_txn_commit(dsh)) { case DST_FAILURE: case DST_TEMPFAIL: fprintf(stderr, "commit failed\n"); exit(EX_ERROR); case DST_OK: break; } } ds_close(dsh); ds_cleanup(dbe); if (verbose) fprintf(dbgout, "%d tokens loaded\n", load_count); return rv; }