/* output a message */ extern int m_print(message *mp, Biobuf *fp, char *remote, int mbox) { String *date, *sender; char *f[6]; int n; sender = unescapespecial(s_clone(mp->sender)); if (remote != 0){ if(print_remote_header(fp,s_to_c(sender),s_to_c(mp->date),remote) < 0){ s_free(sender); return -1; } } else { if(print_header(fp, s_to_c(sender), s_to_c(mp->date)) < 0){ s_free(sender); return -1; } } s_free(sender); if(!rmail && !mp->havedate){ /* add a date: line Date: Sun, 19 Apr 1998 12:27:52 -0400 */ date = s_copy(s_to_c(mp->date)); n = getfields(s_to_c(date), f, 6, 1, " \t"); if(n == 6) Bprint(fp, "Date: %s, %s %s %s %s %s\n", f[0], f[2], f[1], f[5], f[3], rewritezone(f[4])); } if(!rmail && !mp->havemime && isutf8(mp->body)) printutf8mime(fp); if(mp->to){ /* add the to: line */ if (Bprint(fp, "%s\n", s_to_c(mp->to)) < 0) return -1; /* add the from: line */ if (!mp->havefrom && printfrom(mp, fp) < 0) return -1; if(!mp->rfc822headers && *s_to_c(mp->body) != '\n') if (Bprint(fp, "\n") < 0) return -1; } else if(!rmail){ /* add the from: line */ if (!mp->havefrom && printfrom(mp, fp) < 0) return -1; if(!mp->rfc822headers && *s_to_c(mp->body) != '\n') if (Bprint(fp, "\n") < 0) return -1; } if (!mbox) return m_noescape(mp, fp); return m_escape(mp, fp); }
void F64ReadLine::TestEnc(void) { if(Args.getSourceCodepage() != NO_CODEPAGE) return; char* buf = new char[MEMPROTECT(32768)]; int size = 0; setpointer(0); ReadFile(hFile, buf, 32768, (LPDWORD)&size, NULL); do { if(size > 2) { // 逆天等级的 bug 发现! 非 unsigned 情况下, 0xFF 和 '\xFF' 不同! if(buf[0] == '\xEF' && buf[1] == '\xBB' && buf[2] == '\xBF') { Args.setSourceCodepage(codepage = 65001); setpointer(3); break; } } if(size > 1) { if(buf[0] == '\xFF' && buf[1] == '\xFE') { Args.setSourceCodepage(codepage = 1200); setpointer(2); break; } if(buf[0] == '\xFE' && buf[1] == '\xFF') { Args.setSourceCodepage(codepage = 1201); setpointer(2); break; } if(TRUE == IsTextUnicode(buf, 32768, NULL)) { Args.setSourceCodepage(codepage = 1200); setpointer(2); break; } } if(isutf8(buf, 32768)) { Args.setSourceCodepage(codepage = 65001); setpointer(0); break; } Args.setSourceCodepage(codepage = GetACP()); setpointer(0); break; } while(0); delete [] buf; }
/* CREATE_UTF8_TOKEN_LIST() ------------------------ */ int create_utf8_token_list(char *s, char **term_list) { char *start, *token, *where_to = s; long token_len = 0, term_count; char **current = term_list; term_count = 0; while (*where_to != '\0') { while (isspace(*where_to)) ++where_to; start = where_to; if ((*where_to & 0x80) &&isutf8(where_to)) { token_len = utf8_bytes(where_to); where_to += token_len; } else while (*where_to != '\0' && !isspace(*where_to) && !((*where_to & 0x80) && isutf8(where_to))) { ++token_len; ++where_to; } *current = token = new char[token_len + 1]; strncpy(*current, start, token_len); token[token_len] = '\0'; ++current; token_len = 0; ++term_count; } *current = NULL; return term_count; }
/* MAIN() ------ */ int main(int argc, char *argv[]) { static char *seperators = " "; char *file, *token, *where_to, *filename; // *start; char **term_list, **first, **last, **current; ANT_link_extract_term *link_index, *index_term; long terms_in_index, current_docid, param, file_number; long lowercase_only, first_param; long is_utf8_token, cmp, is_substring = FALSE; // token_len char *command; ANT_directory_iterator_object file_object; char buffer[1024 * 1024]; if (argc < 3) exit(printf("Usage:%s [-chinese] [-lowercase] <index> <file_to_link> ...\n", argv[0])); first_param = 1; lowercase_only = FALSE; chinese = FALSE; for (param = 1; param < argc; param++) { if (*argv[param] == '-') { command = argv[param] + 1; if (strcmp(command, "lowercase") == 0) { lowercase_only = TRUE; ++first_param; } else if (strcmp(command, "chinese") == 0) { chinese = TRUE; ++first_param; } else exit(printf("Unknown parameter:%s\n", argv[param])); } } link_index = read_index(argv[first_param], &terms_in_index); file_number = 1; for (param = first_param + 1; param < argc; param++) { ANT_directory_iterator_recursive disk(argv[param]); // make the recursive pattern matching as for default files reading if (disk.first(&file_object) == NULL) file = filename = NULL; else { filename = file_object.filename; file = ANT_disk::read_entire_file(filename); } while (file != NULL) { current_docid = get_doc_id(file); if (current_docid > 0) { // printf("ID:%d\n", current_docid); string_clean(file, lowercase_only, TRUE); current = term_list = new char *[strlen(file)]; // this is the worst case by far if (chinese) create_utf8_token_list(file, term_list); else { for (token = strtok(file, seperators); token != NULL; token = strtok(NULL, seperators)) *current++ = token; *current = NULL; } for (first = term_list; *first != NULL; first++) { // fprintf(stderr, "%s\n", *first); where_to = buffer; for (last = first; *last != NULL; last++) { if (where_to == buffer) { strcpy(buffer, *first); where_to = buffer + strlen(buffer); if (chinese) { if ((*first[0] & 0x80) && isutf8(*first)) is_utf8_token = TRUE; else is_utf8_token = FALSE; } } else { if (!chinese) *where_to++ = ' '; strcpy(where_to, *last); where_to += strlen(*last); } *where_to = '\0'; index_term = find_term_in_list(buffer, link_index, terms_in_index); if (index_term == NULL) break; // we're after the last term in the list so can stop because we can't be a substring if (chinese) { is_substring = FALSE; cmp = utf8_token_compare(buffer, index_term->term, &is_substring); } else cmp = string_compare(buffer, index_term->term); if (cmp == 0) // we're a term in the list { index_term->total_occurences++; if (index_term->last_docid != current_docid) { index_term->last_docid = current_docid; index_term->docs_containing_term++; } } else { if (chinese) cmp = is_substring == TRUE ? 0 : 1; else cmp = memcmp(buffer, index_term->term, strlen(buffer)); if (cmp != 0) break; // we're a not a substring so we can't find a longer term } } } if (chinese) free_utf8_token_list(term_list); delete [] term_list; delete [] file; if (file_number % 1000 == 0) fprintf(stderr, "Files processed:%d\n", file_number); file_number++; } else fprintf(stderr, "Error reading file %s\n", filename); //filename = disk.get_next_filename(); if (disk.next(&file_object) == NULL) file = filename = NULL; else { filename = file_object.filename; file = ANT_disk::read_entire_file(filename); } } } print_answer(link_index, terms_in_index); fprintf(stderr, "%s Completed\n", argv[0]); return 0; }