char *bm_needleinhaystack_skipnchars(char *needle, size_t needle_len, char *haystack, size_t haystack_len, size_t table[UCHAR_MAX + 1], int casesensitive, int start_pos) { register size_t shift=0; register size_t pos = start_pos; char *here; if(needle_len == 0) { return haystack; } while (pos < haystack_len){ while( pos < haystack_len && (shift = table[(unsigned char)haystack[pos]]) > 0) { pos += shift; } if (0 == shift) { if (0 == memwildcardcmp(needle,here = (char *)&haystack[pos-needle_len+1], needle_len, casesensitive)) { return(here); } else { pos++; } } } return NULL; }
/* Perform a modified boyer-moore string search (w/ support for wildcards and case-insensitive searches) and allows the starting position in the buffer to be manually set, which allows data to be skipped */ unsigned char *bm_search_skipn(unsigned char *needle, size_t needle_len, unsigned char *haystack, size_t haystack_len, size_t table[UCHAR_MAX + 1], int casesensitive, int searchtype, int start_pos) { register size_t shift = 0; register size_t pos = start_pos; unsigned char *here; if (needle_len == 0) return haystack; if (searchtype == SEARCHTYPE_FORWARD || searchtype == SEARCHTYPE_FORWARD_NEXT) { while (pos < haystack_len) { while (pos < haystack_len && (shift = table[(unsigned char)haystack[pos]]) > 0) { pos += shift; } if (0 == shift) { here = (unsigned char *) &haystack[pos - needle_len + 1]; if (0 == memwildcardcmp(needle, here, needle_len, casesensitive)) { return (here); } else pos++; } } return NULL; } else if (searchtype == SEARCHTYPE_REVERSE) //Run our search backwards { while (pos < haystack_len) { while ( pos < haystack_len && (shift = table[(unsigned char)haystack[haystack_len - pos - 1]]) > 0 ) { pos += shift; } if (0 == shift) { if (0 == memwildcardcmp(needle, here = (unsigned char *) &haystack[haystack_len - pos - 1], needle_len, casesensitive)) { return (here); } else pos++; } } return NULL; } return NULL; }
/******************************************************************************** *Function: search_chunk *Description: Analyze the given chunk by running each defined search spec on it *Return: TRUE/FALSE **********************************************************************************/ int search_chunk(f_state *s, unsigned char *buf, f_info *i, u_int64_t chunk_size, u_int64_t f_offset) { u_int64_t c_offset = 0; //u_int64_t foundat_off = 0; //u_int64_t buf_off = 0; unsigned char *foundat = buf; unsigned char *current_pos = NULL; unsigned char *header_pos = NULL; unsigned char *newbuf = NULL; unsigned char *ind_ptr = NULL; u_int64_t current_buflen = chunk_size; int tryBS[3] = { 4096, 1024, 512 }; unsigned char *extractbuf = NULL; u_int64_t file_size = 0; s_spec *needle = NULL; int j = 0; int bs = 0; int rem = 0; int x = 0; int found_ind = FALSE; off_t saveme; //char comment[32]; for (j = 0; j < s->num_builtin; j++) { needle = &search_spec[j]; foundat = buf; /*reset the buffer for the next search spec*/ #ifdef DEBUG printf(" SEARCHING FOR %s's\n", needle->suffix); #endif bs = 0; current_buflen = chunk_size; while (foundat) { needle->written = FALSE; found_ind = FALSE; memset(needle->comment, 0, COMMENT_LENGTH - 1); if (chunk_size <= (foundat - buf)) { #ifdef DEBUG printf("avoided seg fault in search_chunk()\n"); #endif foundat = NULL; break; } current_buflen = chunk_size - (foundat - buf); //if((foundat-buf)< 1 ) break; #ifdef DEBUG //foundat_off=foundat; //buf_off=buf; //printf("current buf:=%llu (foundat-buf)=%llu \n", current_buflen, (u_int64_t) (foundat_off - buf_off)); #endif if (signal_caught == SIGTERM || signal_caught == SIGINT) { user_interrupt(s, i); printf("Cleaning up.\n"); signal_caught = 0; } if (get_mode(s, mode_quick)) /*RUN QUICK SEARCH*/ { #ifdef DEBUG //printf("quick mode is on\n"); #endif /*Check if we are not on a block head, adjust if so*/ rem = (foundat - buf) % s->block_size; if (rem != 0) { foundat += (s->block_size - rem); } if (memwildcardcmp(needle->header, foundat, needle->header_len, needle->case_sen ) != 0) { /*No match, jump to the next block*/ if (current_buflen > s->block_size) { foundat += s->block_size; continue; } else /*We are out of buffer lets go to the next search spec*/ { foundat = NULL; break; } } header_pos = foundat; } else /**********RUN STANDARD SEARCH********************/ { foundat = bm_search(needle->header, needle->header_len, foundat, current_buflen, //How much to search through needle->header_bm_table, needle->case_sen, //casesensative SEARCHTYPE_FORWARD); header_pos = foundat; } if (foundat != NULL && foundat >= 0) /*We got something, run the appropriate heuristic to find the EOF*/ { current_buflen = chunk_size - (foundat - buf); if (get_mode(s, mode_ind_blk)) { #ifdef DEBUG printf("ind blk detection on\n"); #endif //dumpInd(foundat+12*1024,1024); for (x = 0; x < 3; x++) { bs = tryBS[x]; if (ind_block(foundat, current_buflen, bs)) { if (get_mode(s, mode_verbose)) { sprintf(needle->comment, " (IND BLK bs:=%d)", bs); } //dumpInd(foundat+12*bs,bs); #ifdef DEBUG printf("performing mem move\n"); #endif if(current_buflen > 13 * bs)//Make sure we have enough buffer { if (!memmove(foundat + 12 * bs, foundat + 13 * bs, current_buflen - 13 * bs)) break; found_ind = TRUE; #ifdef DEBUG printf("performing mem move complete\n"); #endif ind_ptr = foundat + 12 * bs; current_buflen -= bs; chunk_size -= bs; break; } } } } c_offset = (foundat - buf); current_pos = foundat; /*Now lets analyze the file and see if we can determine its size*/ // printf("c_offset=%llu %x %x %llx\n", c_offset,foundat,buf,c_offset); foundat = extract_file(s, c_offset, foundat, current_buflen, needle, f_offset); #ifdef DEBUG if (foundat == NULL) { printf("Foundat == NULL!!!\n"); } #endif if (get_mode(s, mode_write_all)) { if (needle->written == FALSE) { /*write every header we find*/ if (current_buflen >= needle->max_len) { file_size = needle->max_len; } else { file_size = current_buflen; } sprintf(needle->comment, " (Header dump)"); extractbuf = (unsigned char *)malloc(file_size * sizeof(char)); memcpy(extractbuf, header_pos, file_size); write_to_disk(s, needle, file_size, extractbuf, c_offset + f_offset); free(extractbuf); } } else if (!foundat) /*Should we search further?*/ { /*We couldn't determine where the file ends, now lets check to see * if we should try again */ if (current_buflen < needle->max_len) /*We need to bridge the gap*/ { #ifdef DEBUG printf(" Bridge the gap\n"); #endif saveme = ftello(i->handle); /*grow the buffer and try to extract again*/ newbuf = read_from_disk(c_offset + f_offset, i, needle->max_len); if (newbuf == NULL) break; current_pos = extract_file(s, c_offset, newbuf, needle->max_len, needle, f_offset); /*Lets put the fp back*/ fseeko(i->handle, saveme, SEEK_SET); free(newbuf); } else { foundat = header_pos; /*reset the foundat pointer to the location of the last header*/ foundat += needle->header_len + 1; /*jump past the header*/ } } } if (found_ind) { /*Put the ind blk back in, re-arrange the buffer so that the future blks names come out correct*/ #ifdef DEBUG printf("Replacing the ind block\n"); #endif /*This is slow, should we do this??????*/ if (!memmove(ind_ptr + 1 * bs, ind_ptr, current_buflen - 13 * bs)) break; memset(ind_ptr, 0, bs - 1); chunk_size += bs; memset(needle->comment, 0, COMMENT_LENGTH - 1); } } //end while } return TRUE; }