/* * The almighty html parser method */ void parse_html(GtkTextView *text_view, GtkTextMark html_start, int ignore) { GtkTextBuffer *html_buffer = gtk_text_view_get_buffer(text_view); tag *last_tag; GList *tag_list = NULL; int tagid = 0; GtkTextIter start_iter, end_iter; GtkTextMark *end_mark; GtkTextIter tag_start_iter, tag_end_iter; gtk_text_buffer_get_iter_at_mark(html_buffer, &start_iter, &html_start); gtk_text_buffer_get_end_iter(html_buffer, &end_iter); end_mark = gtk_text_buffer_create_mark(html_buffer, NULL, &end_iter, TRUE); gtk_text_buffer_get_iter_at_mark(html_buffer, &tag_start_iter, &html_start); gtk_text_buffer_get_iter_at_mark(html_buffer, &tag_end_iter, &html_start); /* Check if < and > exist in that order */ while (search_char(&tag_start_iter, '<') && search_char(&tag_end_iter, '>')) { gchar *tag_string; GtkTextMark *tag_start_mark = NULL, *next_start_mark = NULL; if (gtk_text_iter_compare(&tag_start_iter, &tag_end_iter) > 0) { gtk_text_iter_forward_char(&tag_end_iter); tag_start_iter = tag_end_iter; continue; } gtk_text_iter_forward_char(&tag_end_iter); tag_start_mark = gtk_text_buffer_create_mark(html_buffer, NULL, &tag_start_iter, TRUE); next_start_mark = gtk_text_buffer_create_mark(html_buffer, NULL, &tag_end_iter, TRUE); tag_string = gtk_text_buffer_get_slice(html_buffer, &tag_start_iter, &tag_end_iter, TRUE); /* Get rid of the < and > and clean up the tag string */ tag_string = strstr(tag_string, "<") + 1; if (tag_string && strstr(tag_string, ">")) *(strstr(tag_string, ">")) = '\0'; g_strstrip(tag_string); if (*tag_string == '/' && tag_is_valid(++tag_string)) { int found_match = 0; last_tag = NULL; /* Now get rid of the tag from the text */ gtk_text_buffer_delete(html_buffer, &tag_start_iter, &tag_end_iter); /* * This is an end tag. So now we must apply the tag to * the enclosed text */ do { last_tag = g_list_nth_data(g_list_last(tag_list), 0); if (last_tag == NULL) break; if (!g_ascii_strncasecmp(tag_string, last_tag->name, strlen(tag_string))) { last_tag->end = *tag_start_mark; found_match = 1; } else { last_tag->end = *end_mark; } apply_tag(text_view, *last_tag, ignore); tag_list = g_list_remove(tag_list, last_tag); } while (!found_match); } else if (tag_is_valid(tag_string)) { tag *cur; /* Now get rid of the tag from the text */ gtk_text_buffer_delete(html_buffer, &tag_start_iter, &tag_end_iter); /* This is a start tag. So put this into the list */ cur = (tag *)malloc(sizeof(tag)); bzero(cur->id, 8); sprintf(cur->id, "%d%d", messageid, tagid++); cur->name = strdup(tag_string); cur->start = *tag_start_mark; /* * Insert into the tag list only if it's a * closing type tag */ if (!(ay_strcasestr(tag_string, "smiley") == tag_string || ay_strcasestr(tag_string, "br") == tag_string || ay_strcasestr(tag_string, "img") == tag_string || ay_strcasestr(tag_string, "hr") == tag_string)) { tag_list = g_list_append(tag_list, cur); } else { apply_tag(text_view, *cur, ignore); free(cur); } } /* Re-initialize the string to get new positions */ gtk_text_buffer_get_end_iter(html_buffer, &end_iter); end_mark = gtk_text_buffer_create_mark(html_buffer, NULL, &end_iter, TRUE); gtk_text_buffer_get_iter_at_mark(html_buffer, &tag_start_iter, next_start_mark); gtk_text_buffer_get_iter_at_mark(html_buffer, &tag_end_iter, next_start_mark); } while ((last_tag = g_list_nth_data(g_list_last(tag_list), 0))) { last_tag->end = *end_mark; apply_tag(text_view, *last_tag, ignore); tag_list = g_list_remove(tag_list, last_tag); } g_list_free(tag_list); unescape_html(html_buffer, html_start); messageid++; }
int clump(DBC* orig, DB* ldb, DB* first, DB* second, DB* match, DB* prim){ int i, write_cycle, changed, m=1, ret=0, no_matches; double val; int(*key_func)(DB*, const DBT*, const DBT*, DBT*); DBC* prim_cur_i, prim_cur_j, *first_cur, *second_cur, *match_cur; DBC* fs[2]; //DBC* carray[3]; DBT match_key; DBT ldb_key, ldb_dat; DBT dummy_dat; DBT key_i, pkey_i, data_i; DBT key_j, pkey_j, data_j; db_recno_t m_count; void* old; char invnum_buf[16]; char *tagp; DBT_CLEAR(key_i); DBT_CLEAR(pkey_i); DBT_CLEAR(data_i); DBT_CLEAR(key_j); DBT_CLEAR(pkey_j); DBT_CLEAR(data_j); DBT_CLEAR(match_key); DBT_CLEAR(dummy_dat); DBT_CLEAR(ldb_key); DBT_CLEAR(ldb_dat); match_key.data = &m; match_key.size = sizeof(int); ret = first->cursor(first, NULL, &first_cur, 0); ret = second->cursor(second, NULL, &second_cur, 0); if(ret) printf("Cursor creation problem! %d\n", ret); fs[0] = first_cur; fs[1] = second_cur; /* match->cursor(match, NULL, &match_cur, 0); no_matches = match_cur->get(match_cur, &match_key, &dummy_dat, DB_SET); printf("likelihood!: %g\n", *(double*)dummy_dat.data); match_cur->count(match_cur, &m_count, 0); printf("matches: %u\n", (size_t)m_count); */ //return(0); changed=1; while(changed){ //Repeat until none of the tags change. //printf("again!\n"); changed=0; orig->dup(orig, &prim_cur_i, DB_POSITION); prim_cur_i->pget(prim_cur_i, &key_i, &pkey_i, &data_i, DB_CURRENT); //primary get. do { //Check for a tag tagp = has_tag((DbRecord*)data_i.data); if(tagp==NULL){ apply_tag((DbRecord*)data_i.data, NULL); tagp = has_tag((DbRecord*)data_i.data); if(tagp == NULL){ printf("SERIOUS PROBLEM in tag application. Aborting.\n "); exit(1); } //prim->put(prim, NULL, &pkey_i, &data_i, 0); } //memcpy(invnum_buf, tagp, 16); //printf("invnum_buf: %s\n", invnum_buf); //key_i.data = invnum_buf; //key_i.size = strlen(invnum_buf); for(write_cycle=0; write_cycle<2; ++write_cycle){ //In the first pass, find the minimum tag that this record is associated with //In the second pass, write that tag to all records. if(write_cycle) prim->put(prim, NULL, &pkey_i, &data_i, 0); for(i=0; i<2; ++i){ //For each pass here, look for the record being the first in the comparison //then the second in the comparison if(DB_NOTFOUND == (ret = fs[i]->pget(fs[i], &pkey_i, &key_i, &dummy_dat, DB_SET))){ //printf("join failed!\n"); continue; } do{ //printf("Keys: %s, ", (char*)key_i.data); //printf("Sim: %f, ", *(double*)dummy_dat.data); if(*(double*)dummy_dat.data < PR_T){ // printf("\n"); continue; } key_func = i ? first_index : second_index; key_func(first /*dummy*/, &key_i, &dummy_dat /*dummy*/, &pkey_j); old = pkey_j.data; //pkey_j.flags = DB_DBT_USERMEM; //printf("ldb_key: %s\n", (char*)key_i.data); //printf("pkey_j: %lu\n", *(u_long*)pkey_j.data); prim->get(prim, NULL, &pkey_j, &data_j, 0); if(!write_cycle){ if(tagcmp((DbRecord*)data_i.data, (DbRecord*)data_j.data) > 0){ apply_tag((DbRecord*)data_i.data, has_tag((DbRecord*)data_j.data)); //printf("\tNew Min: %s\n", has_tag((DbRecord*)data_i.data)); changed=1; } free(old); continue; } //printf("Old Invnum_N: %s, ", ((DbRecord*)data_j.data)->Invnum_N); if(tagcmp((DbRecord*)data_i.data, (DbRecord*)data_j.data)!=0){ apply_tag(((DbRecord*)data_j.data), has_tag((DbRecord*)data_i.data)); prim->put(prim, NULL, &pkey_j, &data_j, 0); prim->get(prim, NULL, &pkey_j, &data_j, 0); changed=1; } //printf("New Invnum_N: %s\n", ((DbRecord*)data_j.data)->Invnum_N); //free(pkey_j.data); free(old); } while(DB_NOTFOUND != fs[i]->pget(fs[i], &pkey_i, &key_i, &dummy_dat, DB_NEXT_DUP)); }//First, second idx }//Write cycle } while(DB_NOTFOUND != prim_cur_i->pget(prim_cur_i, &key_i, &pkey_i, &data_i, DB_NEXT_DUP)); }//changed first_cur->close(first_cur); second_cur->close(second_cur); return(0); }