char *concat(char *foo, char *bar) { char *r; if(!foo && !bar) return NULL; if(!foo) return strnnew(bar, strlen(bar)); if(!bar) return strnnew(foo, strlen(foo)); r = (char *) malloc(strlen(foo)+strlen(bar)+1); if(r) { strcpy(r, foo); strcat(r, bar); r[strlen(foo)+strlen(bar)] = '\0'; }; return r; };
/* READ_INDEX() ------------ */ ANT_link_extract_term *read_index(char *filename, long *terms_in_collection) { FILE *fp; ANT_link_extract_term *all_terms, *term; long unique_terms; char *term_end; char buffer[1024 * 1024]; if ((fp = fopen(filename, "rb")) == NULL) exit(printf("Cannot index file:%s\n", filename)); fgets(buffer, sizeof(buffer), fp); sscanf(buffer, "%d", &unique_terms); term = all_terms = new ANT_link_extract_term [unique_terms]; while (fgets(buffer, sizeof(buffer), fp) != NULL) { term_end = strrchr(buffer, ':'); term->term = strnnew(buffer, term_end - buffer); term->docs_containing_term = 0; term->total_occurences = 0; term->last_docid = -1; term++; } *terms_in_collection = unique_terms; return all_terms; }
/* ANT_DIRECTORY_ITERATOR_FILE::NEXT() ----------------------------------- */ ANT_directory_iterator_object *ANT_directory_iterator_file::next(ANT_directory_iterator_object *object) { char *document_id_start = NULL, *document_id_end = NULL; if (document_end == NULL) if (get_next_file() == NULL) return NULL; if ((document_start = strstr(document_end, "<DOC")) != NULL) { if (*(document_start + 4) == '>') { document_id_start = strstr(document_start, "<DOCNO>"); document_id_end = strstr(document_id_start += 7, "</DOCNO>"); } else { document_id_start = strstr(document_start, "id=\""); document_id_end = strchr(document_id_start += 4, '"'); if (document_id_end) document_start = strchr(document_id_end, '>') + 1; } if (document_id_end != NULL && (document_end = strstr(document_id_end, "</DOC>")) != NULL) { document_end += 6; object->filename = strnnew(document_id_start, document_id_end - document_id_start); if (get_file) read_entire_file(object); return object; } } if (get_next_file() == NULL) return NULL; else return next(object); }
char* ClassBuffer::Identifier (TextBuffer* tb, int& beg) { int i, j; const char* text = tb->Text(); char* string = nil; for (i = beg; i < tb->Length(); ++i) { if (IsValidChar(text[i])) { break; } } for (j = i+1; j < tb->Length(); ++j) { char c = text[j]; if (!IsValidChar(c) && !isdigit(c)) { break; } } if (j < tb->Length()) { string = strnnew(&text[i], j-i); beg = j; } return string; }
/* ANT_DIRECTORY_ITERATOR_CSV::NEXT() ---------------------------------- */ ANT_directory_iterator_object *ANT_directory_iterator_csv::next(ANT_directory_iterator_object *object) { if (file == NULL) return NULL; document_start = document_end; while (ANT_isspace(*document_start)) document_start++; document_end = document_start; while (*document_end != '\n' && *document_end != '\0') document_end++; if (*document_start != '\0') { object->filename = strnnew(document_start, document_end - document_start); if (get_file) read_entire_file(object); return object; } return NULL; }
char *strdup(char *s) { return strnnew(s, strlen(s)); }