void multimap_init_copy(multimap_t* pt_multimapdest, const multimap_t* cpt_multimapsrc) { assert(pt_multimapdest != NULL && cpt_multimapsrc != NULL); assert( pt_multimapdest->_t_pair._t_firsttypesize == cpt_multimapsrc->_t_pair._t_firsttypesize && pt_multimapdest->_t_pair._t_secondtypesize == cpt_multimapsrc->_t_pair._t_secondtypesize); assert( strncmp( pt_multimapdest->_t_pair._sz_firsttypename, cpt_multimapsrc->_t_pair._sz_firsttypename, _ELEM_TYPE_NAME_SIZE) == 0 && strncmp( pt_multimapdest->_t_pair._sz_secondtypename, cpt_multimapsrc->_t_pair._sz_secondtypename, _ELEM_TYPE_NAME_SIZE) == 0); /* initialize dest multimap with src multimap attribute */ multimap_init(pt_multimapdest); /* insert all element from src to dest */ if(!multimap_empty(cpt_multimapsrc)) { multimap_insert_range( pt_multimapdest, multimap_begin(cpt_multimapsrc), multimap_end(cpt_multimapsrc)); } }
char *parse_mail_header(container *M_header, char **attr_dst, char **title_dst) { char *p = NULL, *p2; char attr[MAX_ATTR_LEN]; int pos = 0; iterator it = multimap_begin(M_header); int num_receivers = 0; for (; it.valid; it=multimap_next(it)) { if (strcmp("subject", (char *) multimap_key(it).ptr) == 0) p = (char*)multimap_val(it).ptr; #ifdef EMAIL_ADRESS_AS_ATTRIBUTE else if (!strcmp("from", (char*) multimap_key(it).ptr)) { pos += snprintf(&(attr[pos]), MAX_ATTR_LEN - pos, "from=%s,", (char *) multimap_val(it).ptr); //asprintf(&crawldocumentAdd.attributes, "from=%s,", } else if (strcmp("to", (char *) multimap_key(it).ptr) == 0) { // TODO: Support more than one 'to' field if (num_receivers++) continue; pos += snprintf(&(attr[pos]), MAX_ATTR_LEN - pos, "to=%s,", (char *) multimap_val(it).ptr); } #endif } pos += snprintf(&(attr[pos]), MAX_ATTR_LEN - pos, "num_receivers=%d,", num_receivers); *attr_dst = strdup(attr); if (p == NULL) { *title_dst = ""; } else { while (isspace(*p)) { p++; } for (p2 = p; *p2 != '\n' && *p2 != '\r' && *p2 != '\0'; p2++) ; if (p2 - p > 0) { *title_dst = strndup(p, p2 - p); fix_subject(*title_dst, p2-p+1); } else { *title_dst = NULL; } if (*title_dst == NULL) *title_dst = ""; } }
void multimap_assign(multimap_t* pt_multimapdest, const multimap_t* cpt_multimapsrc) { assert(pt_multimapdest != NULL && cpt_multimapsrc != NULL); assert( _same_multimap_pair_type( &pt_multimapdest->_t_pair, &cpt_multimapsrc->_t_pair)); /* destroy dest multimap */ multimap_destroy(pt_multimapdest); /* initialize dest multimap with src multimap attribute */ multimap_init(pt_multimapdest); /* insert all element from src to dest */ if(!multimap_empty(cpt_multimapsrc)) { multimap_insert_range( pt_multimapdest, multimap_begin(cpt_multimapsrc), multimap_end(cpt_multimapsrc)); } }
void _attribute_sort_items_(container **X, enum attr_sort_enum sort, char sort_reverse) { if (*X == NULL) return; int i, j; container *N = NULL; /* printf("sort: "); switch (sort) { case sort_none: printf("sort_none"); break; case sort_hits: printf("sort_hits"); break; case sort_alpha: printf("sort_alpha"); break; } if (sort_reverse) printf(" (reverse)"); printf("\n"); for (i=0; i<vector_size(*X); i++) { struct _attr_tree_ *item = vector_get(*X, i).ptr; printf(" %.8x name:%s hits:%i\n", (int)X, item->name, item->hits); } */ if (sort == sort_hits) N = multimap_container( int_container(), ptr_container() ); else if (sort == sort_alpha) N = multimap_container( string_container(), ptr_container() ); for (i=0; i<vector_size(*X); i++) { struct _attr_tree_ *item = vector_get(*X, i).ptr; if (item->children != NULL) _attribute_sort_items_(&(item->children), item->sort, item->sort_reverse); if (sort == sort_hits) multimap_insert(N, item->hits, item); else if (sort == sort_alpha) { if (item->name!=NULL) multimap_insert(N, item->name, item); else if (item->value!=NULL) multimap_insert(N, item->name, item); else if (item->name!=NULL) multimap_insert(N, item->name, item); else multimap_insert(N, item->name, item); } } if (!sort_reverse && sort != sort_hits && sort != sort_alpha) return; container *Y = vector_container( ptr_container() ); iterator it; if ((sort == sort_hits && sort_reverse) || (sort == sort_alpha && !sort_reverse)) { it = multimap_begin(N); for (; it.valid; it=multimap_next(it)) vector_pushback(Y, map_val(it).ptr); } else { if (sort != sort_hits && sort != sort_alpha) { for (i=vector_size(*X)-1; i>=0; i--) vector_pushback(Y, vector_get(*X, i).ptr); } else { it = multimap_end(N); for (; it.valid; it=multimap_previous(it)) vector_pushback(Y, map_val(it).ptr); } } if (N != NULL) destroy(N); destroy(*X); *X = Y; /* for (i=0; i<vector_size(*X); i++) { struct _attr_tree_ *item = vector_get(*X, i).ptr; printf(" %.8x name:%s hits:%i\n", (int)X, item->name, item->hits); } */ return; }
void grab_email(struct crawlinfo *ci, set *acl_allow, set *acl_deny, char *url, char *sid, size_t contentlen, time_t lastmodified, char *usersid, CURL **curl) { size_t len; /* Is it the parent? */ struct ex_buffer mail; struct crawldocumentExistFormat crawldocumentExist; struct crawldocumentAddFormat crawldocumentAdd; len = strlen(url); //printf("%s\n", cur->str); crawldocumentExist.documenturi = make_crawl_uri(url, sid); crawldocumentExist.lastmodified = lastmodified; if (crawldocumentExist.documenturi == NULL) { (ci->documentError)(ci->collection ,1, "Could not allocate memory for documenturi"); return; } if ((ci->documentExist)(ci->collection, &crawldocumentExist)) { // This document already exists } else { if (ex_getEmail(url, &mail, curl) == NULL) { free(crawldocumentExist.documenturi); return; } // Let's add it crawldocumentAdd.documenturi = crawldocumentExist.documenturi; /* Find the subject */ //printf("RAW EMAIL:\n%.4096s\n", mail.buf); //printf("ANALYZE ON!!\n\n"); container *M_header = mail_analyze_header(mail.buf, mail.size-1); #if 0 iterator it = multimap_begin(M_header); for (; it.valid; it=multimap_next(it)) { printf("KEY( %s ): VALUE( %s )\n", (char*)multimap_key(it).ptr, (char*)multimap_val(it).ptr); } printf("\nANALYZE OFF!!\n"); destroy(M_header); p = NULL; #endif parse_mail_header(M_header, &crawldocumentAdd.attributes, &crawldocumentAdd.title); destroy(M_header); crawldocumentAdd.documenttype = "eml"; crawldocumentAdd.doctype = ""; crawldocumentAdd.document = mail.buf; crawldocumentAdd.dokument_size = mail.size-1; // Last byte is string null terminator crawldocumentAdd.lastmodified = lastmodified; //hvis vi har blitt sendt en user sid så bruker vi den som acl. if (usersid == NULL) { crawldocumentAdd.acl_allow = set_to_string(acl_allow, ","); crawldocumentAdd.acl_denied = set_to_string(acl_deny, ","); } else { crawldocumentAdd.acl_allow = strdup(usersid); crawldocumentAdd.acl_denied = strdup(""); } #if 0 buffer *B = buffer_init(-1); int to_i=1, from_i=1; it = multimap_begin(M_header); for (; it.valid; it=multimap_next(it)) { if (!strcmp("from", (char*)multimap_key(it).ptr) && from_i<=4) bprintf(B, "from_%i=%s,", from_i++, multimap_val(it).ptr); else if (!strcmp("to", (char*)multimap_key(it).ptr) && to_i<=4) bprintf(B, "to_%i=%s,", to_i++, multimap_val(it).ptr); } crawldocumentAdd.attributes = buffer_exit(B); #endif bblog(INFO, "Adding: '%s'", crawldocumentAdd.title); bblog(INFO, "usersid \"%s\"acl_allow \"%s\" acl_denied \"%s\"",usersid,crawldocumentAdd.acl_allow , crawldocumentAdd.acl_denied); (ci->documentAdd)(ci->collection, &crawldocumentAdd); if (crawldocumentAdd.title[0] != '\0') free(crawldocumentAdd.title); free(crawldocumentAdd.acl_allow); free(crawldocumentAdd.acl_denied); #ifdef EMAIL_ADRESS_AS_ATTRIBUTE bblog(INFO, "attributes: %s", crawldocumentAdd.attributes); free(crawldocumentAdd.attributes); #else bblog(WARN, "not compiled with attributes"); #endif free(mail.buf); } free(crawldocumentExist.documenturi); }