Exemple #1
0
void TextParser::setState(unsigned code, bool bSet)
{
    if (bSet) {
        if ((m_state & code) == code)
            return;
        m_state |= code;
    } else {
        if ((m_state & code) == 0)
            return;
        m_state &= ~code;
    }
    QString tag;
    switch (code) {
    case 1:
        tag = "b";
        break;
    case 2:
        tag = "i";
        break;
    case 4:
        tag = "u";
        break;
    default:
        return;
    }
    if (bSet) {
        push_tag(tag);
    } else {
        pop_tag(tag);
    }
}
Exemple #2
0
hcerr_t start_element (xml_writer *writer,
		       char *element_name, 
		       char **attribute_names, 
		       char **attribute_values,
		       int n_attributes){
  int i;
  hc_simple_xml_writer_t *simple_writer = (hc_simple_xml_writer_t*) writer;
  hc_tag_stack_t *top = 0;

  require_ok(pretty_print(simple_writer, TRUE));
  require_ok(push_tag (&top, simple_writer->tag_stack, element_name));

  simple_writer->tag_stack = top;
  require_ok(hc_write(simple_writer, "<"));
  require_ok(hc_write(simple_writer, element_name));
  /*fprintf(stdout, "%d attributes\n", n_attributes); */
  for (i = 0; i < n_attributes; i++){
    require_ok(hc_write(simple_writer, " "));
    require_ok(hc_write(simple_writer, attribute_names[i]));
    require_ok(hc_write(simple_writer, "=\""));
    require_ok(hc_write(simple_writer, attribute_values[i]));
    require_ok(hc_write(simple_writer, "\""));
  }
  require_ok(hc_write(simple_writer, ">"));
  return HCERR_OK;
}
Exemple #3
0
static int
mime_multipart_related_output_fn(const char* buf, int32_t size, void *stream_closure)
{
  MimeMultipartRelated *relobj = (MimeMultipartRelated *) stream_closure;
  char* ptr;
  int32_t delta;
  int status;
  while (size > 0) {
    if (relobj->curtag_length > 0) {
      ptr = PL_strnchr(buf, '>', size);
      if (!ptr) {
        return push_tag(relobj, buf, size);
      }
      delta = ptr - buf + 1;
      status = push_tag(relobj, buf, delta);
      if (status < 0) return status;
      status = flush_tag(relobj);
      if (status < 0) return status;
      buf += delta;
      size -= delta;
    }
    ptr = PL_strnchr(buf, '<', size);
    if (ptr && ptr - buf >= size) ptr = 0;
    if (!ptr) {
      return real_write(relobj, buf, size);
    }
    delta = ptr - buf;
    status = real_write(relobj, buf, delta);
    if (status < 0) return status;
    buf += delta;
    size -= delta;
    PR_ASSERT(relobj->curtag_length == 0);
    status = push_tag(relobj, buf, 1);
    if (status < 0) return status;
    PR_ASSERT(relobj->curtag_length == 1);
    buf++;
    size--;
  }
  return 0;
}
Exemple #4
0
void TextParser::put_style()
{
    if (!m_bChanged)
        return;
    m_bChanged = false;
    QString style;
    if (!color.isEmpty())
        style = color;
    if (!face.isEmpty()) {
        if (!style.isEmpty())
            style += ";";
        style += face;
    }
    if (!size.isEmpty()) {
        if (!style.isEmpty())
            style += ";";
        style += size;
    }
    QString tag("span style=\"");
    tag += style;
    tag += "\"";
    pop_tag(tag);
    push_tag(tag);
}
Exemple #5
0
static int
flush_tag(MimeMultipartRelated* relobj)
{
  int length = relobj->curtag_length;
  char* buf;
  int status;

  if (relobj->curtag == NULL || length == 0) return 0;

  status = push_tag(relobj, "", 1); /* Push on a trailing NULL. */
  if (status < 0) return status;
  buf = relobj->curtag;
  PR_ASSERT(*buf == '<' && buf[length - 1] == '>');
  while (*buf) {
    char c;
    char* absolute;
    char* part_url;
    char* ptr = buf;
    char *ptr2;
    char quoteDelimiter = '\0';
    while (*ptr && *ptr != '=') ptr++;
    if (*ptr == '=') {
      /* Ignore = and leading space. */
      /* Safe, because there's a '>' at the end! */
      do {ptr++;} while (IS_SPACE(*ptr));
      if (*ptr == '"' || *ptr == '\'') {
        quoteDelimiter = *ptr;
        /* Take up the quote and leading space here as well. */
        /* Safe because there's a '>' at the end */
        do {ptr++;} while (IS_SPACE(*ptr));
      }
    }
    status = real_write(relobj, buf, ptr - buf);
    if (status < 0) return status;
    buf = ptr;
    if (!*buf) break;
    if (quoteDelimiter)
    {
      ptr = PL_strnchr(buf, quoteDelimiter, length - (buf - relobj->curtag));
    } else {
      for (ptr = buf; *ptr ; ptr++) {
        if (*ptr == '>' || IS_SPACE(*ptr)) break;
      }
      PR_ASSERT(*ptr);
    }
    if (!ptr || !*ptr) break;

    while(buf < ptr)
    {
      /* ### mwelch For each word in the value string, see if
                      the word is a cid: URL. If so, attempt to
              substitute the appropriate mailbox part URL in
              its place. */
      ptr2=buf; /* walk from the left end rightward */
      while((ptr2<ptr) && (!IS_SPACE(*ptr2)))
        ptr2++;
      /* Compare the beginning of the word with "cid:". Yuck. */
      if (((ptr2 - buf) > 4) &&
        ((buf[0]=='c' || buf[0]=='C') &&
         (buf[1]=='i' || buf[1]=='I') &&
         (buf[2]=='d' || buf[2]=='D') &&
          buf[3]==':'))
      {
        // Make sure it's lowercase, otherwise it won't be found in the hash table
        buf[0] = 'c'; buf[1] = 'i'; buf[2] = 'd';

        /* Null terminate the word so we can... */
        c = *ptr2;
        *ptr2 = '\0';

        /* Construct a URL out of the word. */
        absolute = MakeAbsoluteURL(relobj->base_url, buf);

        /* See if we have a mailbox part URL
           corresponding to this cid. */
        part_url = nullptr;
        MimeHashValue * value = nullptr;
        if (absolute)
        {
          value = (MimeHashValue *)PL_HashTableLookup(relobj->hash, buf);
          part_url = value ? value->m_url : nullptr;
          PR_FREEIF(absolute);
        }

        /*If we found a mailbox part URL, write that out instead.*/
        if (part_url && accept_related_part(relobj, value->m_obj))
        {
          status = real_write(relobj, part_url, strlen(part_url));
          if (status < 0) return status;
          buf = ptr2; /* skip over the cid: URL we substituted */

          /* don't show that object as attachment */
          if (value->m_obj)
            value->m_obj->dontShowAsAttachment = true;
        }

        /* Restore the character that we nulled. */
        *ptr2 = c;
      }
      /* rhp - if we get here, we should still check against the hash table! */
      else
      {
        char holder = *ptr2;
        char *realout;

        *ptr2 = '\0';

        /* Construct a URL out of the word. */
        absolute = MakeAbsoluteURL(relobj->base_url, buf);

        /* See if we have a mailbox part URL
           corresponding to this cid. */
        MimeHashValue * value;
        if (absolute)
          value = (MimeHashValue *)PL_HashTableLookup(relobj->hash, absolute);
        else
          value = (MimeHashValue *)PL_HashTableLookup(relobj->hash, buf);
        realout = value ? value->m_url : nullptr;

        *ptr2 = holder;
        PR_FREEIF(absolute);

        if (realout && accept_related_part(relobj, value->m_obj))
        {
          status = real_write(relobj, realout, strlen(realout));
          if (status < 0) return status;
          buf = ptr2; /* skip over the cid: URL we substituted */

          /* don't show that object as attachment */
          if (value->m_obj)
            value->m_obj->dontShowAsAttachment = true;
        }
      }
      /* rhp - if we get here, we should still check against the hash table! */

      /* Advance to the beginning of the next word, or to
         the end of the value string. */
      while((ptr2<ptr) && (IS_SPACE(*ptr2)))
        ptr2++;

      /* Write whatever original text remains after
         cid: URL substitution. */
      status = real_write(relobj, buf, ptr2-buf);
      if (status < 0) return status;
      buf = ptr2;
    }
  }
  if (buf && *buf) {
    status = real_write(relobj, buf, strlen(buf));
    if (status < 0) return status;
  }
  relobj->curtag_length = 0;
  return 0;
}
Exemple #6
0
/**
HTML 문서를 parsing 하면서 text로 변환.
*/
int htmlParse::parse(stToken *tok, char *text, int maxlen)
{
	int	tok_val=0, prev_tok_val=0, prev_ch=0;
	int	is_in_tag=0;
	uint4	tmp=0;
	int	not_moved=0;
	char *start=text;

	stTagProc *curr_tag=0;
	//stTagProc **prev_tag=0;
	stEntityProc *ent_proc;
	int	tag_depth=0;
	int	is_in_BODY_tag=0;
	int	is_in_A_tag=0;
	int	is_in_PRE_tag=0; // 2003.11.8
	int	hlink_chars = 0;
	//int	newline_delayed=0; // Optional Pair && Newline tag가 시작한 경우 TRUE
	//int	dont_newline=0; // TRUE means "dont write newline"
	int	pair_mark=0;
	int	words_in_line=0;
	int	b_tag_first_in_line=0; // 라인 첫머리에 B tag가 나온 경우 
	int	glossary_marked=0;
	int	is_glossary=0;

	tok->offset = 0;
	tok->line_num=1;
	*text = 0;

	maxlen -= 20;
	
	while(tok->offset <  tok->src_len && (int)((uint4)text-(uint4)start) < maxlen) {
	
		tmp = tok->offset;

		htmlLex::get_token_mem(tok);

		#ifdef BUG
		if (tok->tok_len>=20) {
			printf("long tok: line=%d, tok_val=%d, CH=%c\n",
			tok->line_num, tok->tok_val, tok->src_mem[tok->offset-1]);
			fflush(stdout);
			printf("tok=%s\n", tok->tok_str);
			fflush(stdout);
		}
		#endif
		
		#ifdef DEB
		/*
		if (tok->tok_val==0 || tok->line_num==1) {
			printf("line=%d, tok_val=%d, CH=%c\n",
			tok->line_num, tok->tok_val, tok->src_mem[tok->offset-1]);
			printf("offset=%d, text-start=%d\n", tok->offset, text-start);
		}
		*/
		#endif
		
		if (text > start) {
			prev_ch = *(text-1);
			if (prev_ch=='\n') {
				words_in_line = 0;
				b_tag_first_in_line = 0;
				glossary_marked = 0;
			}
		}
		
		if (tok->tok_val == TOK_EOF) break;

		if (tok->tok_val == URL_CMNT) {
			//printf("%s", tok->tok_str);
			continue;
		}
		
		if (tok->offset==tmp) {
			#ifdef BUG
			//printf("parse(): offset not moved: line=%d, CH=%d(%c)\n",
			//	tok->line_num, tok->src_mem[tok->offset-1], tok->src_mem[tok->offset-1]);
			#endif
			tok->offset++;
			if (++not_moved >= 2) break;
			else continue;
		}
		tok_val = tok->tok_val;

	
		if (tok_val == BTAG_BEGIN) {	
			skip_to_two_token(tok, STRING, TAG_END);
			if (tok->tok_val != STRING) { // must be a HTML tag
				skip_to_token(tok, TAG_END);
				continue;			
			}
			
			//prn_tabs();
			//fprintf(log,"%s-->:%d :%d\n", tok->tok_str, tok->line_num, tok->offset);
			
			curr_tag = htmlTagEntity::tag_idx(tok->tok_str);
			
			if (curr_tag == NULL) {
				skip_to_token(tok, TAG_END);
				continue;
			}

			tag_depth++;

			#ifdef MAIN_TEXT_ONLY
			if (curr_tag->pair != Not_PAIR && is_in_BODY_tag && curr_tag!=htmlTagEntity::A_tag)
			push_tag(curr_tag, tag_depth, (int)((uint4)text-(uint4)start));
			

			if (curr_tag==htmlTagEntity::BODY_tag) {
				is_in_BODY_tag = 1;
			}
			#endif
			
			if (curr_tag==htmlTagEntity::A_tag) {
				is_in_A_tag = 1;
			}
			else if (curr_tag==htmlTagEntity::PRE_tag) {
				is_in_PRE_tag = 1;
			}
			else if(curr_tag==htmlTagEntity::B_tag||curr_tag==htmlTagEntity::DT_tag) {
				if (words_in_line==0)
					b_tag_first_in_line = 1;
			}
			else if (curr_tag==htmlTagEntity::TITLE_tag) {							
				get_hyperlink_title(tok->src_mem + tok->offset  , htmlTitle, HTMLTITLE_LEN);
					
				#ifdef DEB
				PRN("tok->scr_mem=%X, offset=%d, title=%X\n", 
					tok->src_mem, tok->offset, htmlTitle);
				PRN("title: %s (%d)\n", htmlTitle, strlen(htmlTitle) );
				#endif
				
				is_glossary =  is_glossay_mode(htmlTitle);
		
			}						
			else if (curr_tag==htmlTagEntity::BR_tag) {
				*text++ = '\n';
			}
	
			if (curr_tag->proc == 0) { // 0
				if (curr_tag->pair != Not_PAIR)
					skip_ignore_part(tok, curr_tag);
				else
					skip_to_token(tok, TAG_END);
				if (curr_tag->newline) {
					if (prev_ch != '\n') *text++ = '\n';
				}				
				continue;
			}

			if (curr_tag==htmlTagEntity::TITLE_tag) {
				if (prev_ch != '\n') *text++ = '\n';
				*text++ = '[';
				*text++ = '[';
			}
			else if (curr_tag->proc == 1) {
			}
			else if (!pair_mark && curr_tag->proc == 2) {
				pair_mark = 2;
				*text++ = '<';
			}
			else if (!pair_mark && curr_tag->proc == 3) {
				pair_mark = 3;
				*text++ = '\'';
			}
			else if (!pair_mark && curr_tag->proc == 4) {
				pair_mark = 4;
				*text++ = '[';
			}
			else if (curr_tag->proc == 5) {
				if (prev_ch != '\n') *text++ = '\n';
				*text++ = '*';
				*text++ = ' ';
				words_in_line = 0;
			}
			else if (curr_tag->proc == 6) {
				if (prev_ch != '\n') *text++ = '\n';
				*text++ = '\n';
			}
			
				
			skip_to_token(tok, TAG_END);

			if (!pair_mark && prev_ch != ' ') *text++ = ' ';
			//if (!ISSPACE(*text)) *text++ = ' ';

		}// BTAG_BEGIN '<'
				
		else if (tok_val == ETAG_BEGIN) {
					
			skip_to_two_token(tok, STRING, TAG_END);
			if (tok->tok_val != STRING) { // must be a HTML tag
				skip_to_token(tok, TAG_END);
				continue;
			}
			
		
			curr_tag = htmlTagEntity::tag_idx(tok->tok_str);

			if (curr_tag == NULL) {
				skip_to_token(tok, TAG_END);
				continue;
			}

			#ifdef MAIN_TEXT_ONLY
			tag_depth--;

			if (curr_tag==htmlTagEntity::BODY_tag) {
				is_in_BODY_tag = 0;
			}
			
			if (curr_tag->pair != Not_PAIR && is_in_BODY_tag && curr_tag!=htmlTagEntity::A_tag)
			end_tag(curr_tag, (int)((uint4)text-(uint4)start));
			#endif
			
			if (curr_tag==htmlTagEntity::A_tag) {
				is_in_A_tag = 0;
				#ifdef MARK_A_LINK
				*text++ = '}';
				#endif
			}
			else if (curr_tag==htmlTagEntity::PRE_tag) {
				is_in_PRE_tag = 0;
			}
			else if(curr_tag==htmlTagEntity::DT_tag || 
				(curr_tag==htmlTagEntity::B_tag && (b_tag_first_in_line && is_glossary)))
			{
				if (glossary_marked==0) {
					*text++ = ' ';
					*text++ = ':';
					//*text++ = ' ';
					b_tag_first_in_line = 0;
					glossary_marked = 1;
				}
					
			}
			else if (curr_tag==htmlTagEntity::TITLE_tag) {
				*text++ = ']';
				*text++ = ']';
				*text++ = '\n';
			}
			else if (curr_tag->proc == 1) {
			}
			else if (pair_mark==2 && curr_tag->proc == 2) {
				pair_mark = 0;
				*text++ = '>';
			}
			else if (pair_mark==3 && curr_tag->proc == 3) {
				pair_mark = 0;
				*text++ = '\'';
			}
			else if (pair_mark==4 && curr_tag->proc == 4) {
				pair_mark = 0;
				*text++ = ']';
			}
			else if (curr_tag->proc == 5) {				
			}
			else if (curr_tag->proc == 6) {
				if (prev_ch != '\n') *text++ = '\n';
				*text++ = '\n';
			}

			if (curr_tag->newline) {
				*text++ = '\n';
			}			
		
	
			skip_to_token(tok, TAG_END);

		}// ETAG_BEGIN '</'
		
		else if (tok_val == STAG_END) {
			skip_to_token(tok, TAG_END);
			is_in_tag = 0;
			tag_depth--;
		}
		
		else if (tok_val == TAG_EXC) {			
			tmp = skip_to_token(tok, TAG_END);
			//fprintf(log,"<- > skipped %d\n", tmp);
		}
		
		else if (tok_val == CMNT_BEGIN) {
			//fprintf(log,"Cmnt -->: %d :%d\n", tok->line_num, tok->offset);
			htmlLex::skip_to_cmnt_end(tok);
			//fprintf(log,"Cmnt <--: %d :%d\n", tok->line_num, tok->offset);
		}		
		else if (tok_val == ENTITY_STR) {
			if (prev_ch !=' ') *text++ = ' '; //2002.12.2
			
			ent_proc = htmlTagEntity::entity_idx(tok->tok_str);

			if (ent_proc && ent_proc->conv[0]) {
				#ifdef BUG
				//prn_ent_proc(ent_proc);
				#endif
				strcpy(text, ent_proc->conv);
				text += strlen(ent_proc->conv);
			}
			else {
				//fprintf(log,"ignored entity = %s\n", tok->tok_str);
			}
		
		}
		else if (tok_val == ENTITY_NUM) {
			if (prev_ch !=' ') *text++ = ' '; //2002.12.2
			
			ent_proc = htmlTagEntity::entity_id_idx(tok->tok_realval);

			if (ent_proc && ent_proc->conv[0]) {
				#ifdef BUG
				//prn_ent_proc(ent_proc);
				#endif
				strcpy(text, ent_proc->conv);
				text += strlen(ent_proc->conv);
			}
			else {			
				if (tok->tok_realval < 0x80) {
				// 2005.7.19
					*text = (char)tok->tok_realval ;
					text++;
				}

			}

		}
		else {
			char *t = text;
			if (is_in_PRE_tag || tok->tok_len>1 || tok_val == STRING || tok_val == NUMBER) {
				//if (prev_tok_val == TAG_END && *text != ' ') *text++ = ' ';
				// 2003. 3.20
				//if (prev_tok_val == TAG_END && isalnum(prev_ch) ) 
				//	*text++ = ' ';
				words_in_line++;
				strcpy(text, tok->tok_str);
				text += tok->tok_len;
			}
			// 2002.10.16 한글은 space로 전환 
			else if (tok->tok_val == FR_STR) {
				if (do_prn_hangul) {
					if (prev_ch != ' ') *text++ = ' ';
					strcpy(text, tok->tok_str);
					text += tok->tok_len;
					words_in_line++;
				}
				else {
					if (prev_ch != ' ') *text++ = ' ';
				}
			}
			else if (tok->tok_len==1) {		
				
				if (curr_tag==htmlTagEntity::PRE_tag) {					
					#ifdef ODD_CHAR
					if ( (tok_val & 0xF0) != 0x90)
						*text++ = tok_val;
					else if (tok_val==0x92) {
						if (prev_tok_val != 0x92)
							*text++ = '\'';
					}
					#else
					*text++ = tok_val;
					#endif
				}
				//else if (tok_val == '\n' || tok_val == '\r') {
				else if (ISSPACE(tok_val)) {
					if (prev_ch != ' ') *text++ = ' ';
				}
								
				else {
					if (prev_ch==':' && tok_val==':') { }
					else *text++ = tok_val;
				}
				
			}
			if (is_in_A_tag)
				hlink_chars += (int)(text - t);
		}

		prev_tok_val = tok->tok_val;
		
	}// while(1)

	*text = 0;

	if ((int)(text-start) >= maxlen-5) {
		PRN("parse(): too far !! maxlen=%d, %d\n", maxlen, text-start);
	}
	if ((int)(text-start) < maxlen-5)
		memset(text, 0, 4);
	#ifdef DEB
		PRN("text=%X, start=%X, text=%d, start=%d\n", 
			text, start, (int)text % 10000, (int)start % 10000);
		PRN("start[0]=%d text[0]=%d\n", start[0], text[0]);
	#endif

	return ((int)text-(int)start);
}
Exemple #7
0
void xml_tag_start(const char *tag)
{
    push_tag(tag);
    xprintf("<%s", tag);
}