Ejemplo n.º 1
0
/*
 * The almighty html parser method
 */
void parse_html(GtkTextView *text_view, GtkTextMark html_start, int ignore)
{
	GtkTextBuffer *html_buffer = gtk_text_view_get_buffer(text_view);

	tag *last_tag;
	GList *tag_list = NULL;

	int tagid = 0;
	GtkTextIter start_iter, end_iter;
	GtkTextMark *end_mark;
	GtkTextIter tag_start_iter, tag_end_iter;

	gtk_text_buffer_get_iter_at_mark(html_buffer, &start_iter, &html_start);
	gtk_text_buffer_get_end_iter(html_buffer, &end_iter);

	end_mark =
		gtk_text_buffer_create_mark(html_buffer, NULL, &end_iter, TRUE);

	gtk_text_buffer_get_iter_at_mark(html_buffer, &tag_start_iter,
		&html_start);
	gtk_text_buffer_get_iter_at_mark(html_buffer, &tag_end_iter,
		&html_start);

	/* Check if < and > exist in that order */
	while (search_char(&tag_start_iter, '<') &&
		search_char(&tag_end_iter, '>')) {
		gchar *tag_string;
		GtkTextMark *tag_start_mark = NULL, *next_start_mark = NULL;

		if (gtk_text_iter_compare(&tag_start_iter, &tag_end_iter) > 0) {
			gtk_text_iter_forward_char(&tag_end_iter);
			tag_start_iter = tag_end_iter;
			continue;
		}

		gtk_text_iter_forward_char(&tag_end_iter);

		tag_start_mark = gtk_text_buffer_create_mark(html_buffer, NULL,
			&tag_start_iter, TRUE);

		next_start_mark = gtk_text_buffer_create_mark(html_buffer, NULL,
			&tag_end_iter, TRUE);

		tag_string =
			gtk_text_buffer_get_slice(html_buffer, &tag_start_iter,
			&tag_end_iter, TRUE);

		/* Get rid of the < and > and clean up the tag string */
		tag_string = strstr(tag_string, "<") + 1;

		if (tag_string && strstr(tag_string, ">"))
			*(strstr(tag_string, ">")) = '\0';

		g_strstrip(tag_string);

		if (*tag_string == '/' && tag_is_valid(++tag_string)) {
			int found_match = 0;
			last_tag = NULL;

			/* Now get rid of the tag from the text */
			gtk_text_buffer_delete(html_buffer, &tag_start_iter,
				&tag_end_iter);

			/* 
			 * This is an end tag. So now we must apply the tag to
			 * the enclosed text
			 */
			do {
				last_tag =
					g_list_nth_data(g_list_last(tag_list),
					0);
				if (last_tag == NULL)
					break;

				if (!g_ascii_strncasecmp(tag_string,
						last_tag->name,
						strlen(tag_string))) {
					last_tag->end = *tag_start_mark;
					found_match = 1;
				} else {
					last_tag->end = *end_mark;
				}

				apply_tag(text_view, *last_tag, ignore);

				tag_list = g_list_remove(tag_list, last_tag);
			}
			while (!found_match);
		} else if (tag_is_valid(tag_string)) {
			tag *cur;

			/* Now get rid of the tag from the text */
			gtk_text_buffer_delete(html_buffer, &tag_start_iter,
				&tag_end_iter);

			/* This is a start tag. So put this into the list */
			cur = (tag *)malloc(sizeof(tag));
			bzero(cur->id, 8);

			sprintf(cur->id, "%d%d", messageid, tagid++);
			cur->name = strdup(tag_string);
			cur->start = *tag_start_mark;

			/* 
			 * Insert into the tag list only if it's a
			 * closing type tag
			 */
			if (!(ay_strcasestr(tag_string, "smiley") == tag_string
					|| ay_strcasestr(tag_string,
						"br") == tag_string
					|| ay_strcasestr(tag_string,
						"img") == tag_string
					|| ay_strcasestr(tag_string,
						"hr") == tag_string)) {
				tag_list = g_list_append(tag_list, cur);
			} else {
				apply_tag(text_view, *cur, ignore);
				free(cur);
			}

		}

		/* Re-initialize the string to get new positions */
		gtk_text_buffer_get_end_iter(html_buffer, &end_iter);

		end_mark =
			gtk_text_buffer_create_mark(html_buffer, NULL,
			&end_iter, TRUE);

		gtk_text_buffer_get_iter_at_mark(html_buffer, &tag_start_iter,
			next_start_mark);
		gtk_text_buffer_get_iter_at_mark(html_buffer, &tag_end_iter,
			next_start_mark);
	}

	while ((last_tag = g_list_nth_data(g_list_last(tag_list), 0))) {
		last_tag->end = *end_mark;
		apply_tag(text_view, *last_tag, ignore);
		tag_list = g_list_remove(tag_list, last_tag);
	}

	g_list_free(tag_list);

	unescape_html(html_buffer, html_start);
	messageid++;
}
Ejemplo n.º 2
0
int clump(DBC* orig, DB* ldb, DB* first, DB* second, DB* match, DB* prim){
    int i, write_cycle, changed, m=1, ret=0, no_matches;
    double val;
    int(*key_func)(DB*, const DBT*, const DBT*, DBT*);
    DBC* prim_cur_i, prim_cur_j, *first_cur, *second_cur, *match_cur;
    DBC* fs[2];
    //DBC* carray[3];
    DBT match_key;
    DBT ldb_key, ldb_dat;
    DBT dummy_dat;
    DBT key_i, pkey_i, data_i;
    DBT key_j, pkey_j, data_j;
    db_recno_t m_count;
    void* old;

    char invnum_buf[16];
    char *tagp;

    DBT_CLEAR(key_i);
    DBT_CLEAR(pkey_i);
    DBT_CLEAR(data_i);

    DBT_CLEAR(key_j);
    DBT_CLEAR(pkey_j);
    DBT_CLEAR(data_j);

    DBT_CLEAR(match_key);
    DBT_CLEAR(dummy_dat);

    DBT_CLEAR(ldb_key);
    DBT_CLEAR(ldb_dat);

    match_key.data = &m;
    match_key.size = sizeof(int);

    ret = first->cursor(first, NULL, &first_cur, 0);
    ret = second->cursor(second, NULL, &second_cur, 0);
    if(ret)
        printf("Cursor creation problem! %d\n", ret);
    fs[0] = first_cur;
    fs[1] = second_cur;
   /* 
    match->cursor(match, NULL, &match_cur, 0);
    no_matches = match_cur->get(match_cur, &match_key, &dummy_dat, DB_SET);
    printf("likelihood!: %g\n", *(double*)dummy_dat.data);
    match_cur->count(match_cur, &m_count, 0);
    printf("matches: %u\n", (size_t)m_count);
    */
    
    //return(0);

    changed=1;
    while(changed){
        //Repeat until none of the tags change.
        //printf("again!\n");
        changed=0;
        orig->dup(orig, &prim_cur_i, DB_POSITION);
        prim_cur_i->pget(prim_cur_i, &key_i, &pkey_i, &data_i, DB_CURRENT); //primary get.
        do {
            //Check for a tag
            tagp = has_tag((DbRecord*)data_i.data);
            if(tagp==NULL){
                apply_tag((DbRecord*)data_i.data, NULL);
                tagp = has_tag((DbRecord*)data_i.data);
                if(tagp == NULL){
                    printf("SERIOUS PROBLEM in tag application. Aborting.\n ");
                    exit(1);
                }
                //prim->put(prim, NULL, &pkey_i, &data_i, 0);
            }
            //memcpy(invnum_buf, tagp, 16);
            //printf("invnum_buf: %s\n", invnum_buf);
            //key_i.data = invnum_buf;
            //key_i.size = strlen(invnum_buf);
            for(write_cycle=0; write_cycle<2; ++write_cycle){
                //In the first pass, find the minimum tag that this record is associated with
                //In the second pass, write that tag to all records.
                if(write_cycle)
                    prim->put(prim, NULL, &pkey_i, &data_i, 0);

                for(i=0; i<2; ++i){
                //For each pass here, look for the record being the first in the comparison
                //then the second in the comparison
                    if(DB_NOTFOUND == (ret = fs[i]->pget(fs[i], &pkey_i, &key_i, &dummy_dat, DB_SET))){
                        //printf("join failed!\n");
                        continue;
                    }

                    do{
                        //printf("Keys: %s, ", (char*)key_i.data);
                        //printf("Sim: %f, ", *(double*)dummy_dat.data);
                        if(*(double*)dummy_dat.data < PR_T){
                        //    printf("\n");
                            continue;
                        }
                        key_func = i ? first_index : second_index;
                        key_func(first /*dummy*/, &key_i, &dummy_dat /*dummy*/, &pkey_j);
                        old = pkey_j.data;
                        //pkey_j.flags = DB_DBT_USERMEM;
                        //printf("ldb_key: %s\n", (char*)key_i.data);
                        //printf("pkey_j: %lu\n", *(u_long*)pkey_j.data);
                        prim->get(prim, NULL, &pkey_j, &data_j, 0);

                        if(!write_cycle){
                            if(tagcmp((DbRecord*)data_i.data, (DbRecord*)data_j.data) > 0){
                                apply_tag((DbRecord*)data_i.data,
                                    has_tag((DbRecord*)data_j.data));
                                //printf("\tNew Min: %s\n", has_tag((DbRecord*)data_i.data));
                                changed=1;
                            }
                            free(old);
                            continue;
                        }

                        //printf("Old Invnum_N: %s, ", ((DbRecord*)data_j.data)->Invnum_N);
                        if(tagcmp((DbRecord*)data_i.data, (DbRecord*)data_j.data)!=0){
                            apply_tag(((DbRecord*)data_j.data), has_tag((DbRecord*)data_i.data));
                            prim->put(prim, NULL, &pkey_j, &data_j, 0);
                            prim->get(prim, NULL, &pkey_j, &data_j, 0);
                            changed=1;
                        }
                        //printf("New Invnum_N: %s\n", ((DbRecord*)data_j.data)->Invnum_N);
                        //free(pkey_j.data);
                        free(old);
                    } while(DB_NOTFOUND != fs[i]->pget(fs[i], &pkey_i, &key_i, &dummy_dat, DB_NEXT_DUP));
                }//First, second idx
            }//Write cycle
        } while(DB_NOTFOUND !=
          prim_cur_i->pget(prim_cur_i, &key_i, &pkey_i, &data_i, DB_NEXT_DUP));
    }//changed
    first_cur->close(first_cur);
    second_cur->close(second_cur);
    return(0);
}