void TextParser::setState(unsigned code, bool bSet) { if (bSet) { if ((m_state & code) == code) return; m_state |= code; } else { if ((m_state & code) == 0) return; m_state &= ~code; } QString tag; switch (code) { case 1: tag = "b"; break; case 2: tag = "i"; break; case 4: tag = "u"; break; default: return; } if (bSet) { push_tag(tag); } else { pop_tag(tag); } }
hcerr_t start_element (xml_writer *writer, char *element_name, char **attribute_names, char **attribute_values, int n_attributes){ int i; hc_simple_xml_writer_t *simple_writer = (hc_simple_xml_writer_t*) writer; hc_tag_stack_t *top = 0; require_ok(pretty_print(simple_writer, TRUE)); require_ok(push_tag (&top, simple_writer->tag_stack, element_name)); simple_writer->tag_stack = top; require_ok(hc_write(simple_writer, "<")); require_ok(hc_write(simple_writer, element_name)); /*fprintf(stdout, "%d attributes\n", n_attributes); */ for (i = 0; i < n_attributes; i++){ require_ok(hc_write(simple_writer, " ")); require_ok(hc_write(simple_writer, attribute_names[i])); require_ok(hc_write(simple_writer, "=\"")); require_ok(hc_write(simple_writer, attribute_values[i])); require_ok(hc_write(simple_writer, "\"")); } require_ok(hc_write(simple_writer, ">")); return HCERR_OK; }
static int mime_multipart_related_output_fn(const char* buf, int32_t size, void *stream_closure) { MimeMultipartRelated *relobj = (MimeMultipartRelated *) stream_closure; char* ptr; int32_t delta; int status; while (size > 0) { if (relobj->curtag_length > 0) { ptr = PL_strnchr(buf, '>', size); if (!ptr) { return push_tag(relobj, buf, size); } delta = ptr - buf + 1; status = push_tag(relobj, buf, delta); if (status < 0) return status; status = flush_tag(relobj); if (status < 0) return status; buf += delta; size -= delta; } ptr = PL_strnchr(buf, '<', size); if (ptr && ptr - buf >= size) ptr = 0; if (!ptr) { return real_write(relobj, buf, size); } delta = ptr - buf; status = real_write(relobj, buf, delta); if (status < 0) return status; buf += delta; size -= delta; PR_ASSERT(relobj->curtag_length == 0); status = push_tag(relobj, buf, 1); if (status < 0) return status; PR_ASSERT(relobj->curtag_length == 1); buf++; size--; } return 0; }
void TextParser::put_style() { if (!m_bChanged) return; m_bChanged = false; QString style; if (!color.isEmpty()) style = color; if (!face.isEmpty()) { if (!style.isEmpty()) style += ";"; style += face; } if (!size.isEmpty()) { if (!style.isEmpty()) style += ";"; style += size; } QString tag("span style=\""); tag += style; tag += "\""; pop_tag(tag); push_tag(tag); }
static int flush_tag(MimeMultipartRelated* relobj) { int length = relobj->curtag_length; char* buf; int status; if (relobj->curtag == NULL || length == 0) return 0; status = push_tag(relobj, "", 1); /* Push on a trailing NULL. */ if (status < 0) return status; buf = relobj->curtag; PR_ASSERT(*buf == '<' && buf[length - 1] == '>'); while (*buf) { char c; char* absolute; char* part_url; char* ptr = buf; char *ptr2; char quoteDelimiter = '\0'; while (*ptr && *ptr != '=') ptr++; if (*ptr == '=') { /* Ignore = and leading space. */ /* Safe, because there's a '>' at the end! */ do {ptr++;} while (IS_SPACE(*ptr)); if (*ptr == '"' || *ptr == '\'') { quoteDelimiter = *ptr; /* Take up the quote and leading space here as well. */ /* Safe because there's a '>' at the end */ do {ptr++;} while (IS_SPACE(*ptr)); } } status = real_write(relobj, buf, ptr - buf); if (status < 0) return status; buf = ptr; if (!*buf) break; if (quoteDelimiter) { ptr = PL_strnchr(buf, quoteDelimiter, length - (buf - relobj->curtag)); } else { for (ptr = buf; *ptr ; ptr++) { if (*ptr == '>' || IS_SPACE(*ptr)) break; } PR_ASSERT(*ptr); } if (!ptr || !*ptr) break; while(buf < ptr) { /* ### mwelch For each word in the value string, see if the word is a cid: URL. If so, attempt to substitute the appropriate mailbox part URL in its place. */ ptr2=buf; /* walk from the left end rightward */ while((ptr2<ptr) && (!IS_SPACE(*ptr2))) ptr2++; /* Compare the beginning of the word with "cid:". Yuck. */ if (((ptr2 - buf) > 4) && ((buf[0]=='c' || buf[0]=='C') && (buf[1]=='i' || buf[1]=='I') && (buf[2]=='d' || buf[2]=='D') && buf[3]==':')) { // Make sure it's lowercase, otherwise it won't be found in the hash table buf[0] = 'c'; buf[1] = 'i'; buf[2] = 'd'; /* Null terminate the word so we can... */ c = *ptr2; *ptr2 = '\0'; /* Construct a URL out of the word. */ absolute = MakeAbsoluteURL(relobj->base_url, buf); /* See if we have a mailbox part URL corresponding to this cid. */ part_url = nullptr; MimeHashValue * value = nullptr; if (absolute) { value = (MimeHashValue *)PL_HashTableLookup(relobj->hash, buf); part_url = value ? value->m_url : nullptr; PR_FREEIF(absolute); } /*If we found a mailbox part URL, write that out instead.*/ if (part_url && accept_related_part(relobj, value->m_obj)) { status = real_write(relobj, part_url, strlen(part_url)); if (status < 0) return status; buf = ptr2; /* skip over the cid: URL we substituted */ /* don't show that object as attachment */ if (value->m_obj) value->m_obj->dontShowAsAttachment = true; } /* Restore the character that we nulled. */ *ptr2 = c; } /* rhp - if we get here, we should still check against the hash table! */ else { char holder = *ptr2; char *realout; *ptr2 = '\0'; /* Construct a URL out of the word. */ absolute = MakeAbsoluteURL(relobj->base_url, buf); /* See if we have a mailbox part URL corresponding to this cid. */ MimeHashValue * value; if (absolute) value = (MimeHashValue *)PL_HashTableLookup(relobj->hash, absolute); else value = (MimeHashValue *)PL_HashTableLookup(relobj->hash, buf); realout = value ? value->m_url : nullptr; *ptr2 = holder; PR_FREEIF(absolute); if (realout && accept_related_part(relobj, value->m_obj)) { status = real_write(relobj, realout, strlen(realout)); if (status < 0) return status; buf = ptr2; /* skip over the cid: URL we substituted */ /* don't show that object as attachment */ if (value->m_obj) value->m_obj->dontShowAsAttachment = true; } } /* rhp - if we get here, we should still check against the hash table! */ /* Advance to the beginning of the next word, or to the end of the value string. */ while((ptr2<ptr) && (IS_SPACE(*ptr2))) ptr2++; /* Write whatever original text remains after cid: URL substitution. */ status = real_write(relobj, buf, ptr2-buf); if (status < 0) return status; buf = ptr2; } } if (buf && *buf) { status = real_write(relobj, buf, strlen(buf)); if (status < 0) return status; } relobj->curtag_length = 0; return 0; }
/** HTML 문서를 parsing 하면서 text로 변환. */ int htmlParse::parse(stToken *tok, char *text, int maxlen) { int tok_val=0, prev_tok_val=0, prev_ch=0; int is_in_tag=0; uint4 tmp=0; int not_moved=0; char *start=text; stTagProc *curr_tag=0; //stTagProc **prev_tag=0; stEntityProc *ent_proc; int tag_depth=0; int is_in_BODY_tag=0; int is_in_A_tag=0; int is_in_PRE_tag=0; // 2003.11.8 int hlink_chars = 0; //int newline_delayed=0; // Optional Pair && Newline tag가 시작한 경우 TRUE //int dont_newline=0; // TRUE means "dont write newline" int pair_mark=0; int words_in_line=0; int b_tag_first_in_line=0; // 라인 첫머리에 B tag가 나온 경우 int glossary_marked=0; int is_glossary=0; tok->offset = 0; tok->line_num=1; *text = 0; maxlen -= 20; while(tok->offset < tok->src_len && (int)((uint4)text-(uint4)start) < maxlen) { tmp = tok->offset; htmlLex::get_token_mem(tok); #ifdef BUG if (tok->tok_len>=20) { printf("long tok: line=%d, tok_val=%d, CH=%c\n", tok->line_num, tok->tok_val, tok->src_mem[tok->offset-1]); fflush(stdout); printf("tok=%s\n", tok->tok_str); fflush(stdout); } #endif #ifdef DEB /* if (tok->tok_val==0 || tok->line_num==1) { printf("line=%d, tok_val=%d, CH=%c\n", tok->line_num, tok->tok_val, tok->src_mem[tok->offset-1]); printf("offset=%d, text-start=%d\n", tok->offset, text-start); } */ #endif if (text > start) { prev_ch = *(text-1); if (prev_ch=='\n') { words_in_line = 0; b_tag_first_in_line = 0; glossary_marked = 0; } } if (tok->tok_val == TOK_EOF) break; if (tok->tok_val == URL_CMNT) { //printf("%s", tok->tok_str); continue; } if (tok->offset==tmp) { #ifdef BUG //printf("parse(): offset not moved: line=%d, CH=%d(%c)\n", // tok->line_num, tok->src_mem[tok->offset-1], tok->src_mem[tok->offset-1]); #endif tok->offset++; if (++not_moved >= 2) break; else continue; } tok_val = tok->tok_val; if (tok_val == BTAG_BEGIN) { skip_to_two_token(tok, STRING, TAG_END); if (tok->tok_val != STRING) { // must be a HTML tag skip_to_token(tok, TAG_END); continue; } //prn_tabs(); //fprintf(log,"%s-->:%d :%d\n", tok->tok_str, tok->line_num, tok->offset); curr_tag = htmlTagEntity::tag_idx(tok->tok_str); if (curr_tag == NULL) { skip_to_token(tok, TAG_END); continue; } tag_depth++; #ifdef MAIN_TEXT_ONLY if (curr_tag->pair != Not_PAIR && is_in_BODY_tag && curr_tag!=htmlTagEntity::A_tag) push_tag(curr_tag, tag_depth, (int)((uint4)text-(uint4)start)); if (curr_tag==htmlTagEntity::BODY_tag) { is_in_BODY_tag = 1; } #endif if (curr_tag==htmlTagEntity::A_tag) { is_in_A_tag = 1; } else if (curr_tag==htmlTagEntity::PRE_tag) { is_in_PRE_tag = 1; } else if(curr_tag==htmlTagEntity::B_tag||curr_tag==htmlTagEntity::DT_tag) { if (words_in_line==0) b_tag_first_in_line = 1; } else if (curr_tag==htmlTagEntity::TITLE_tag) { get_hyperlink_title(tok->src_mem + tok->offset , htmlTitle, HTMLTITLE_LEN); #ifdef DEB PRN("tok->scr_mem=%X, offset=%d, title=%X\n", tok->src_mem, tok->offset, htmlTitle); PRN("title: %s (%d)\n", htmlTitle, strlen(htmlTitle) ); #endif is_glossary = is_glossay_mode(htmlTitle); } else if (curr_tag==htmlTagEntity::BR_tag) { *text++ = '\n'; } if (curr_tag->proc == 0) { // 0 if (curr_tag->pair != Not_PAIR) skip_ignore_part(tok, curr_tag); else skip_to_token(tok, TAG_END); if (curr_tag->newline) { if (prev_ch != '\n') *text++ = '\n'; } continue; } if (curr_tag==htmlTagEntity::TITLE_tag) { if (prev_ch != '\n') *text++ = '\n'; *text++ = '['; *text++ = '['; } else if (curr_tag->proc == 1) { } else if (!pair_mark && curr_tag->proc == 2) { pair_mark = 2; *text++ = '<'; } else if (!pair_mark && curr_tag->proc == 3) { pair_mark = 3; *text++ = '\''; } else if (!pair_mark && curr_tag->proc == 4) { pair_mark = 4; *text++ = '['; } else if (curr_tag->proc == 5) { if (prev_ch != '\n') *text++ = '\n'; *text++ = '*'; *text++ = ' '; words_in_line = 0; } else if (curr_tag->proc == 6) { if (prev_ch != '\n') *text++ = '\n'; *text++ = '\n'; } skip_to_token(tok, TAG_END); if (!pair_mark && prev_ch != ' ') *text++ = ' '; //if (!ISSPACE(*text)) *text++ = ' '; }// BTAG_BEGIN '<' else if (tok_val == ETAG_BEGIN) { skip_to_two_token(tok, STRING, TAG_END); if (tok->tok_val != STRING) { // must be a HTML tag skip_to_token(tok, TAG_END); continue; } curr_tag = htmlTagEntity::tag_idx(tok->tok_str); if (curr_tag == NULL) { skip_to_token(tok, TAG_END); continue; } #ifdef MAIN_TEXT_ONLY tag_depth--; if (curr_tag==htmlTagEntity::BODY_tag) { is_in_BODY_tag = 0; } if (curr_tag->pair != Not_PAIR && is_in_BODY_tag && curr_tag!=htmlTagEntity::A_tag) end_tag(curr_tag, (int)((uint4)text-(uint4)start)); #endif if (curr_tag==htmlTagEntity::A_tag) { is_in_A_tag = 0; #ifdef MARK_A_LINK *text++ = '}'; #endif } else if (curr_tag==htmlTagEntity::PRE_tag) { is_in_PRE_tag = 0; } else if(curr_tag==htmlTagEntity::DT_tag || (curr_tag==htmlTagEntity::B_tag && (b_tag_first_in_line && is_glossary))) { if (glossary_marked==0) { *text++ = ' '; *text++ = ':'; //*text++ = ' '; b_tag_first_in_line = 0; glossary_marked = 1; } } else if (curr_tag==htmlTagEntity::TITLE_tag) { *text++ = ']'; *text++ = ']'; *text++ = '\n'; } else if (curr_tag->proc == 1) { } else if (pair_mark==2 && curr_tag->proc == 2) { pair_mark = 0; *text++ = '>'; } else if (pair_mark==3 && curr_tag->proc == 3) { pair_mark = 0; *text++ = '\''; } else if (pair_mark==4 && curr_tag->proc == 4) { pair_mark = 0; *text++ = ']'; } else if (curr_tag->proc == 5) { } else if (curr_tag->proc == 6) { if (prev_ch != '\n') *text++ = '\n'; *text++ = '\n'; } if (curr_tag->newline) { *text++ = '\n'; } skip_to_token(tok, TAG_END); }// ETAG_BEGIN '</' else if (tok_val == STAG_END) { skip_to_token(tok, TAG_END); is_in_tag = 0; tag_depth--; } else if (tok_val == TAG_EXC) { tmp = skip_to_token(tok, TAG_END); //fprintf(log,"<- > skipped %d\n", tmp); } else if (tok_val == CMNT_BEGIN) { //fprintf(log,"Cmnt -->: %d :%d\n", tok->line_num, tok->offset); htmlLex::skip_to_cmnt_end(tok); //fprintf(log,"Cmnt <--: %d :%d\n", tok->line_num, tok->offset); } else if (tok_val == ENTITY_STR) { if (prev_ch !=' ') *text++ = ' '; //2002.12.2 ent_proc = htmlTagEntity::entity_idx(tok->tok_str); if (ent_proc && ent_proc->conv[0]) { #ifdef BUG //prn_ent_proc(ent_proc); #endif strcpy(text, ent_proc->conv); text += strlen(ent_proc->conv); } else { //fprintf(log,"ignored entity = %s\n", tok->tok_str); } } else if (tok_val == ENTITY_NUM) { if (prev_ch !=' ') *text++ = ' '; //2002.12.2 ent_proc = htmlTagEntity::entity_id_idx(tok->tok_realval); if (ent_proc && ent_proc->conv[0]) { #ifdef BUG //prn_ent_proc(ent_proc); #endif strcpy(text, ent_proc->conv); text += strlen(ent_proc->conv); } else { if (tok->tok_realval < 0x80) { // 2005.7.19 *text = (char)tok->tok_realval ; text++; } } } else { char *t = text; if (is_in_PRE_tag || tok->tok_len>1 || tok_val == STRING || tok_val == NUMBER) { //if (prev_tok_val == TAG_END && *text != ' ') *text++ = ' '; // 2003. 3.20 //if (prev_tok_val == TAG_END && isalnum(prev_ch) ) // *text++ = ' '; words_in_line++; strcpy(text, tok->tok_str); text += tok->tok_len; } // 2002.10.16 한글은 space로 전환 else if (tok->tok_val == FR_STR) { if (do_prn_hangul) { if (prev_ch != ' ') *text++ = ' '; strcpy(text, tok->tok_str); text += tok->tok_len; words_in_line++; } else { if (prev_ch != ' ') *text++ = ' '; } } else if (tok->tok_len==1) { if (curr_tag==htmlTagEntity::PRE_tag) { #ifdef ODD_CHAR if ( (tok_val & 0xF0) != 0x90) *text++ = tok_val; else if (tok_val==0x92) { if (prev_tok_val != 0x92) *text++ = '\''; } #else *text++ = tok_val; #endif } //else if (tok_val == '\n' || tok_val == '\r') { else if (ISSPACE(tok_val)) { if (prev_ch != ' ') *text++ = ' '; } else { if (prev_ch==':' && tok_val==':') { } else *text++ = tok_val; } } if (is_in_A_tag) hlink_chars += (int)(text - t); } prev_tok_val = tok->tok_val; }// while(1) *text = 0; if ((int)(text-start) >= maxlen-5) { PRN("parse(): too far !! maxlen=%d, %d\n", maxlen, text-start); } if ((int)(text-start) < maxlen-5) memset(text, 0, 4); #ifdef DEB PRN("text=%X, start=%X, text=%d, start=%d\n", text, start, (int)text % 10000, (int)start % 10000); PRN("start[0]=%d text[0]=%d\n", start[0], text[0]); #endif return ((int)text-(int)start); }
void xml_tag_start(const char *tag) { push_tag(tag); xprintf("<%s", tag); }