Beispiel #1
0
void anagrams_for(const char *word, struct candidates *candidates)
{
   char lower_word[MAX_STR_LEN] = { 0 };
   strncpy(&lower_word[0], word, MAX_STR_LEN);
   to_lower_case(&lower_word[0]);
   char sorted_word[MAX_STR_LEN] = { 0 };
   strncpy(&sorted_word[0], &lower_word[0], MAX_STR_LEN);
   qsort(&sorted_word, strlen(sorted_word), 1, compare);
   for (int i = 0; i < (int)candidates->count; i++) {
      char lower_candidate[MAX_STR_LEN] = { 0 };
      strncpy(&lower_candidate[0], candidates->candidate[i].candidate,
              MAX_STR_LEN);
      to_lower_case(&lower_candidate[0]);
      if (strcmp(&lower_candidate[0], &lower_word[0]) == 0) {
         candidates->candidate[i].is_anagram = NOT_ANAGRAM;
      }
      if (candidates->candidate[i].is_anagram == UNCHECKED) {
         qsort(&lower_candidate[0], strlen(&lower_candidate[0]), 1, compare);
         if (strcmp(&sorted_word[0], &lower_candidate[0]) == 0) {
            candidates->candidate[i].is_anagram = IS_ANAGRAM;
         } else {
            candidates->candidate[i].is_anagram = NOT_ANAGRAM;
         }
      }
   }
}
Beispiel #2
0
/*
* Read each line (entry) of file into blacklist_entries and returns 0 on success, -1 on failure.
*/
int read_blacklist_file(char* filename) {
	
	// open file 
	file = fopen(filename, "r");
	if (NULL == file) {
		printf("Error opening file.\n");
		return -1;
	}
	
	// read each line (entry) into blacklist_entries
	int line_index = 0;
	char * line = (char *) malloc((MAX_ENTRIES + 3) * sizeof(char));
	int line_len;
	while (NULL != fgets(line, MAX_ENTRIES + 3, file) && line_index < MAX_ENTRIES) {
		blacklist_entries[line_index] = (char *) malloc((MAX_ENTRIES + 3) * sizeof(char));
		strcpy(blacklist_entries[line_index], line);
		
		// remove '\n' from entry
		line_len = strlen(blacklist_entries[line_index]);
		if ('\n' == *(blacklist_entries[line_index] + line_len - 1)) {
			*(blacklist_entries[line_index] + line_len - 1) = '\0';
		}
		
		// convert to lower case since we don't care about case 
		to_lower_case(blacklist_entries[line_index]);
				
		line_index++;
	}
	free(line);
	num_entries = line_index;
	
	return 0;
}
Beispiel #3
0
int main(int argc, char const *argv[]) {
    char* read_buf = malloc(READ_BUF_LEN);
    hashmap_t *hm = create_hash_map(HASHMAP_SIZE);

    int result = scanf("%511[a-z|A-Z|']", read_buf);
    int *count;
    while (result != EOF) {
        if (result == 1) {
            to_lower_case(read_buf);

            count = lookup_val(read_buf, hm);
            if (count != NULL) {
                *count = *count + 1;
            } else {
                add_val(read_buf, 1, hm);
            }
        } else if (result == 0) {
            scanf("%511[^a-z|A-Z|']", read_buf);
        }

        result = scanf("%511[a-z|A-Z|']", read_buf);
    }

    node_t **words = to_sorted_array(hm);
    for (int i = 0; i < hm->size; i++) {
        printf("%s %d\n", words[i]->string, words[i]->count);
    }

    return 0;
}
Beispiel #4
0
/* Adds a record to the record_list, first ensuring that the data is valid and
   contains the correct amount of arguments (at least 3 not including add),
   then converting the first and last names to lower case, then formatting
   and adding the record to the record_list . Extra arguments are ignored. */
void add(const char line[], record_list *list) {
	char   firstName[NAMESIZE];
	char   lastName[NAMESIZE];
	int    score;
	float  testScore;
	record rec;
	
	if(sscanf(line, "%*s %s %s %d", firstName, lastName, &score) == 3) {
		
		/* Ensures the number is an integer and not a float */
		if(sscanf(line, "%*s %*s %*s %*d %f",  &testScore) == 1) {
			return;
		}
		
		/* Validates the rest of the data */
		if (validate_add(firstName, lastName, score)) {
			to_lower_case(firstName);
			to_lower_case(lastName);
		} else {
			return;
		}
		
		strcpy(rec.name.last, lastName);
		strcpy(rec.name.first, firstName);
		rec.score = score;
		
		if (list->nused == list->nalloc) {
			record *tmp = realloc(list->data, (list->nalloc + BLOCK) * sizeof(record));
			
			if (tmp == 0) {
				return;
			}
			
			#ifdef DEBUG
				printf("#\n");
			#endif
			
			list->data = tmp;
			list->nalloc += BLOCK;
		}
		list->data[list->nused++] = rec;
			
		printf("%s\n", "OK");
	}	
}
Beispiel #5
0
bool Callable(std::string command)
{
	Protocol::Message args = command;
	if (args.size() < 1) return false;

	string cmd = to_lower_case(args[0]);

	return !!Command::list.count(cmd);
}
Beispiel #6
0
int main() {
	int i;
	for(i = 0; i < 128; i ++) {
		nemu_assert(to_lower_case(i) == ans[i]);
	}

	nemu_assert(i == 128);

	HIT_GOOD_TRAP;

	return 0;
}
Beispiel #7
0
void add_data_from_line(Memory& mem, string& line)
{
	regex regex_apostrophe("'");
	std::regex_replace(line, regex_apostrophe, "’");
	regex regex_word("([a-zA-záéíóúñÁÉÍÓÚÑ'’]+)");
	sregex_iterator find_word(line.begin(), line.end(), regex_word);
	for (sregex_iterator find_end; find_word != find_end; ++find_word) {
		string word_found = find_word->str();
		word_found = to_lower_case(word_found);
		update_word(mem.word_list, word_found);
	}
	vector<regex> regex_punc = {
		regex("\\."),
		regex(","),
		regex(";"),
		regex(":"),
		regex("!"),
		regex("\\?"),
		regex("(?:[-–—]\\s*){1,2}"),
		regex("…|(?:\\.\\s*){3}")
	};
	for (int i = 0; i < PUNC_NUM; ++i) {
		sregex_iterator find_punc(line.begin(), line.end(), regex_punc[i]);
		mem.punc_freq[i] += std::distance(find_punc, sregex_iterator());
	}
	regex regex_split_line("^([^.!?…]*[.!?…])?((?:[^.!?…]*[.!?…])*?)([^.!?…]*)$");
	std::sregex_token_iterator split_line_prev(line.begin(), line.end(), regex_split_line, 1);
	std::sregex_token_iterator split_line_cont(line.begin(), line.end(), regex_split_line, 2);
	std::sregex_token_iterator split_line_xtra(line.begin(), line.end(), regex_split_line, 3);
	string sentence_prev = *split_line_prev;
	int sentence_prev_len = get_word_count(sentence_prev);
	if (sentence_prev_len > 0) {
		sentence_prev_len += mem.sentence_carry;
		mem.sentence_len.push_back(sentence_prev_len);
		mem.sentence_carry = 0;
	}
	string sentence_cont = *split_line_cont;
	regex regex_split_sentence("([^\\.!?…]*)[\\.!?…]");
	sregex_iterator find_sentence(sentence_cont.begin(), sentence_cont.end(), regex_split_sentence);
	for (sregex_iterator find_end; find_sentence != find_end; ++find_sentence) {
		string sentence_found = find_sentence->str();
		int sentence_len = get_word_count(sentence_found);
		mem.sentence_len.push_back(sentence_len);
		mem.sentence_carry = 0;
	}
	string sentence_xtra = *split_line_xtra;
	int sentence_xtra_len = get_word_count(sentence_xtra);
	if (sentence_xtra_len > 0) {
		mem.sentence_carry += sentence_xtra_len;
	}
}
Beispiel #8
0
/*
* Returns true if host is blacklisted, otherwise false
*/
bool is_blacklisted(char * host) {
	char * host_copy = (char *) malloc(sizeof(char) * strlen(host));
	strcpy(host_copy, host);
	
	to_lower_case(host_copy);
	
	int i;
	for (i = 0; i < num_entries; i++) {
		if (NULL != strstr(host, blacklist_entries[i])) {
			return true;
		}
	}
	return false;
}	
/**
 * Compares two MIDP strings.
 *
 * The distinction from midpStringCmp is a posibility of
 * partly equal strings recognition
 *
 * @param str1 the first string to be comapred
 * @param str2 the second string to be comapred
 * @param case_sensitive indicates case sensivity
 * @return 0 if strings are equals; if strings are
 * different returns number of the first different symbol
 */
static int compare_two_strings(const pcsl_string *str1, const pcsl_string *str2, 
                                            jsr211_boolean case_sensitive) {
  const jchar *buf1, *buf2, *cur1, *cur2;
  int i, n, res;

  cur1 = buf1 = pcsl_string_get_utf16_data(str1);
  cur2 = buf2 = pcsl_string_get_utf16_data(str2);
  n = pcsl_string_utf16_length(str1);
  i = pcsl_string_utf16_length(str2);

  res = n == i? 0: n < i? n: i;
  if (res != 0) {
    n = res++;
  }
  i = 0;

  if (!case_sensitive) {
    while (i++ < n) {
      if (to_lower_case(*cur1++) != to_lower_case(*cur2++)) {
        res = i;
        break;
      }
    }
  } else {
    while (i++ < n) {
      if (*cur1++ != *cur2++) {
        res = i;
        break;
      }
    }
  }

  pcsl_string_release_utf16_data(buf1, str1);
  pcsl_string_release_utf16_data(buf2, str2);
  return res;
}
Beispiel #10
0
	long ClientWriter::put_chunked_header(RequestHeaders request_headers){
		PROFILE_ME;

		StreamBuffer data;

		data.put(get_string_from_verb(request_headers.verb));
		data.put(' ');
		data.put(request_headers.uri);
		if(!request_headers.get_params.empty()){
			data.put('?');
			data.put(url_encoded_from_optional_map(request_headers.get_params));
		}
		char temp[64];
		const unsigned ver_major = request_headers.version / 10000, ver_minor = request_headers.version % 10000;
		unsigned len = (unsigned)std::sprintf(temp, " HTTP/%u.%u\r\n", ver_major, ver_minor);
		data.put(temp, len);

		AUTO_REF(headers, request_headers.headers);
		if(!headers.has("Content-Type")){
			headers.set(sslit("Content-Type"), "application/x-www-form-urlencoded; charset=utf-8");
		}

		AUTO(transfer_encoding, headers.get("Transfer-Encoding"));
		AUTO(pos, transfer_encoding.find(';'));
		if(pos != std::string::npos){
			transfer_encoding.erase(pos);
		}
		transfer_encoding = to_lower_case(trim(STD_MOVE(transfer_encoding)));

		if(transfer_encoding.empty() || (transfer_encoding == STR_IDENTITY)){
			headers.set(sslit("Transfer-Encoding"), STR_CHUNKED);
		} else {
			headers.set(sslit("Transfer-Encoding"), STD_MOVE(transfer_encoding));
		}

		for(AUTO(it, headers.begin()); it != headers.end(); ++it){
			data.put(it->first.get());
			data.put(": ");
			data.put(it->second);
			data.put("\r\n");
		}
		data.put("\r\n");

		return on_encoded_data_avail(STD_MOVE(data));
	}
Beispiel #11
0
void Call(string command)
{
	Protocol::Message args = command;
	if (args.size() < 1) return;

	string cmd = to_lower_case(args[0]);
	args.erase(args.begin());

	if (!Command::list.count(cmd))
		Echo(string("Unknown command: ") + cmd);
	else
	{
		pair<Command::Func,size_t> func = Command::list[cmd];
		if (args.size() < func.second)
			Echo(string("To few arguments: ") + cmd);
		else
			func.first(args);
	}
}
std::string ArabicIGMapping::get_unknown_mapping(const std::string& word, unsigned /*position*/) const
{
  //  std::cout << "Berk Mapping!! new word: " << word << std::endl;
  std::string word_class = "UNK";
  std::string word_lower_case=to_lower_case(word);
  //    std::cout << word << std::endl;
  //    std::cout << "before replace" << std::endl;
  std::string word_newchar = replace(word);
  //    std::cout << "after replace" << std::endl;
  //    std::cout << word_newchar << std::endl;

  bool hasDigit = word_newchar.find_first_of("0123456789") != std::string::npos;
  //    bool hasDash =  !hasDigit  && word_newchar.find_first_of("-") != std::string::npos;
  //    bool hasLowerCase = false;
  //    int upperCaseCount=0;

  //std::cout << "this string contains a digit, see " << word << std::endl;
  //std::cout << "this string contains a dash, see " << word << std::endl;

  //     for(std::string::const_iterator c = word.begin(); c != word.end(); c++){

  //       if (is_upper_case_letter(*c)){
  //        //Berkeley parser puts hasLower = true here - an error?
  //        ++upperCaseCount;
  //       }else if  (is_lower_case_letter(*c)){
  //        hasLowerCase = true;
  //       }
  //     }

  //  if (upperCaseCount > 0){
  //  std::cout << "original word is " << word << " lower case version is " << word_lower_case << std::endl;
  //}
  //if first character is upper case and it is the first word in sentences and the remaining characters are lower case

  //static SymbolTable* sym_tab_word = SymbolTable::instance_word();

  //deal with capitalisation
  //     if ( is_upper_case_letter(*(word.begin()))){
  //       if (position==0 && upperCaseCount==1){
  //        word_class.append("-INITC");
  //        if (sym_tab_word->token_exists(word_lower_case)){
  //                word_class.append("-KNOWNLC");
  //        }
  //       }else {
  //        word_class.append("-CAPS");
  //       }
  //       //missing an else if here - basically, I'm not dealing with non-letters

  //     }else if (hasLowerCase){
  //       word_class.append("-LC");
  //     }

  if (hasDigit){
    word_class.append("-NUM");
  }
  //     if (hasDash){
  //       word_class.append("-DASH");
  //     }

  unsigned word_length = word_newchar.size();

  if (word_length >= 5 && !hasDigit) {
    // don't do for very short words;
    // Implement common discriminating suffixes

    std::string last_character = word_newchar.substr(word_length-1,1);
    std::string last_two = word_newchar.substr(word_length-2,2);
    std::string last_three = word_newchar.substr(word_length-3,3);

    boost::regex re;
    re.assign("^Al~a\\*i.*$");
    if (boost::regex_match(get_base(word_newchar), re))
      {
        word_class.append("-pref_Al~a*i");
	//      std::cout << "word: " << word_newchar << " word class: " << word_class << std::endl;
        return word_class;
      }
    re.assign(".*a\\*iyna$");
    if (boost::regex_match(get_base(word_newchar), re))
      {
        word_class.append("-suf_a*iyna");
	//      std::cout << "word: " << word_newchar << " word class: " << word_class << std::endl;
        return word_class;
      }
    re.assign(".*niy~$");
    if (boost::regex_match(get_base(word_newchar), re))
      {
	word_class.append("-suf_niy~");
        //      std::cout << "word: " << word_newchar << " word class: " << word_class << std::endl;
	return word_class;
      }
    re.assign(".*liy~$");
    if (boost::regex_match(get_base(word_newchar), re))
      {
	word_class.append("-suf_liy~");
        //      std::cout << "word: " << word_newchar << " word class: " << word_class << std::endl;
	return word_class;
      }
    re.assign(".*biy~$");
    if (boost::regex_match(get_base(word_newchar), re))
      {
	word_class.append("-suf_biy~");
        //      std::cout << "word: " << word_newchar << " word class: " << word_class << std::endl;
	return word_class;
      }
    re.assign(".*riy~$");
    if (boost::regex_match(get_base(word_newchar), re))
      {
	word_class.append("-suf_riy~");
        //      std::cout << "word: " << word_newchar << " word class: " << word_class << std::endl;
	return word_class;
      }
    re.assign(".*miy~$");
    if (boost::regex_match(get_base(word_newchar), re))
      {
	word_class.append("-suf_miy~");
        //      std::cout << "word: " << word_newchar << " word class: " << word_class << std::endl;
	return word_class;
      }
    re.assign(".*kiy~$");
    if (boost::regex_match(get_base(word_newchar), re))
      {
	word_class.append("-suf_kiy~");
        //      std::cout << "word: " << word_newchar << " word class: " << word_class << std::endl;
	return word_class;
      }
    re.assign("^Al~a.*$");
    if (boost::regex_match(get_base(word_newchar), re))
      {
        word_class.append("-pref_Al~a");
	//      std::cout << "word: " << word_newchar << " word class: " << word_class << std::endl;
        return word_class;
      }
    re.assign(".*iyna$");
    if (boost::regex_match(get_base(word_newchar), re))
      {
	word_class.append("-suf_iyna");
        //      std::cout << "word: " << word_newchar << " word class: " << word_class << std::endl;
	return word_class;
      }
    re.assign(".*siy~$");
    if (boost::regex_match(get_base(word_newchar), re))
      {
	word_class.append("-suf_siy~");
        //      std::cout << "word: " << word_newchar << " word class: " << word_class << std::endl;
	return word_class;
      }
    re.assign(".*iy~$");
    if (boost::regex_match(get_base(word_newchar), re))
      {
	word_class.append("-suf_iy~");
        //      std::cout << "word: " << word_newchar << " word class: " << word_class << std::endl;
	return word_class;
      }
    re.assign("^mu.*$");
    if (boost::regex_match(get_base(word_newchar), re))
      {
        word_class.append("-pref_mu");
	//      std::cout << "word: " << word_newchar << " word class: " << word_class << std::endl;
        return word_class;
      }
    re.assign("^>a.*$");

    if (boost::regex_match(get_base(word_newchar), re))
      {
        word_class.append("-pref_>a");
	//      std::cout << "word: " << word_newchar << " word class: " << word_class << std::endl;
        return word_class;
      }
    re.assign("^ta.*$");
    if (boost::regex_match(get_base(word_newchar), re))
      {
        word_class.append("-pref_ta");
	//      std::cout << "word: " << word_newchar << " word class: " << word_class << std::endl;
        return word_class;
      }
    re.assign("^\\{i.*$");
    if (boost::regex_match(get_base(word_newchar), re))
      {
        word_class.append("-pref_{i");
	//      std::cout << "word: " << word_newchar << " word class: " << word_class << std::endl;
        return word_class;
      }
    re.assign("^ma.*$");
    if (boost::regex_match(get_base(word_newchar), re))
      {
        word_class.append("-pref_ma");
	//      std::cout << "word: " << word_newchar << " word class: " << word_class << std::endl;
        return word_class;
      }
    re.assign("^Eo.*$");
    if (boost::regex_match(get_base(word_newchar), re))
      {
        word_class.append("-pref_Eo");
	//      std::cout << "word: " << word_newchar << " word class: " << word_class << std::endl;
        return word_class;
      }
    re.assign("^Al.*$");
    if (boost::regex_match(get_base(word_newchar), re))
      {
        word_class.append("-pref_Al");
	//      std::cout << "word: " << word_newchar << " word class: " << word_class << std::endl;
        return word_class;
      }
    re.assign("^so.*$");
    if (boost::regex_match(get_base(word_newchar), re))
      {
        word_class.append("-pref_so");
	//      std::cout << "word: " << word_newchar << " word class: " << word_class << std::endl;
        return word_class;
      }
    re.assign(".*An$");
    if (boost::regex_match(get_base(word_newchar), re))
      {
	word_class.append("-suf_An");
        //      std::cout << "word: " << word_newchar << " word class: " << word_class << std::endl;
	return word_class;
      }
    re.assign(".*yA$");
    if (boost::regex_match(get_base(word_newchar), re))
      {
	word_class.append("-suf_yA~");
        //      std::cout << "word: " << word_newchar << " word class: " << word_class << std::endl;
	return word_class;
      }
    re.assign(".*na$");
    if (boost::regex_match(get_base(word_newchar), re))
      {
	word_class.append("-suf_na~");
        //      std::cout << "word: " << word_newchar << " word class: " << word_class << std::endl;
	return word_class;
      }
    re.assign("^<i.*$");
    if (boost::regex_match(get_base(word_newchar), re))
      {
        word_class.append("-pref_<i");
	//      std::cout << "word: " << word_newchar << " word class: " << word_class << std::endl;
        return word_class;
      }
    re.assign(".*Ar$");
    if (boost::regex_match(get_base(word_newchar), re))
      {
	word_class.append("-suf_Ar");
        //      std::cout << "word: " << word_newchar << " word class: " << word_class << std::endl;
	return word_class;
      }
    re.assign("^no.*$");
    if (boost::regex_match(get_base(word_newchar), re))
      {
        word_class.append("-pref_no");
	//      std::cout << "word: " << word_newchar << " word class: " << word_class << std::endl;
        return word_class;
      }
    re.assign(".*uw$");
    if (boost::regex_match(get_base(word_newchar), re))
      {
	word_class.append("-suf_uw");
        //      std::cout << "word: " << word_newchar << " word class: " << word_class << std::endl;
	return word_class;
      }

    re.assign(".*A<$");
    if (boost::regex_match(get_base(word_newchar), re))
      {
	word_class.append("-suf_A<");
        //      std::cout << "word: " << word_newchar << " word class: " << word_class << std::endl;
	return word_class;
      }
    //    std::cout << "word: " << word_newchar << " word class: " << word_class << std::endl;
  }
  return word_class;


}
Beispiel #13
0
bool to_bool(const std::string &s)
{
	std::string val = s;
	to_lower_case(val);
	return ( (val == "yes") || (val == "true") || (atoi(val.c_str()) >= 1) ) ? true : false;
}
Beispiel #14
0
// The Assembler
bytes * System::assembl(string instruction) {
    int     pos = 0;
    string  opcode, dest, src;
    int     flags = 0, ndest, nsrc;
    string  src_test, dest_test;
    bytes * bIns;

    bIns = (bytes *) malloc(sizeof(bytes)); // the output of the assembler
    memset(bIns, 0, sizeof(bytes));
    int   ins_pos  = 0;             // the position in the ins bytes
    int   op_flags = 0;            // the flags that will be used in the search for the opcode
    bytes modrm;
    modrm.length = 0;
    bool   bits16      = false;
    int    imul_imm    = 0;        // special case
    bool   no_imul_imm = true;     // special case
    string ins;
	if (instruction.empty()) {
        goto assemble_error;
    }
    // we will now cut the instruction to separate the opcode & rm & reg or imm or asnything in src & dest
    ins = trim(to_lower_case(instruction)); // deleting the spaces and convert it into lower case (avoid any problems in comparing strings)
    // should the instruction look like opcode dest,src (with deleting the spaces) like mov eax,edx (eax-->dest ,edx-->src
Getting_Opcode:
    opcode = ins.substr(pos, ins.find_first_of(" ")); // getting the opcode
    // testing the prefixes
    if (compare_array(opcode, prefixes, PREFIXES_LENGTH)) {
        int i = compare_array(opcode, prefixes, PREFIXES_LENGTH);
        ins = trim(ins.substr(ins.find_first_of(" ") + 1, ins.size()));
        if ((i == 1) || (i == 2)) {
            bIns->s[ins_pos] = 0xF3;
            ins_pos++;
        } else if (i == 3) {
            bIns->s[ins_pos] = 0xF2;
            ins_pos++;
        }
        goto Getting_Opcode;
    }
    if ((ins.find_first_of(" ") != 0) && (ins.find_first_of(" ") <= ins.size())) {
        pos  = ins.find_first_of(" ") + 1;
        dest = trim(ins.substr(pos, ins.find_first_of(",") - pos)); // getting the dest (rm,reg or imm)
    } else {
        dest = "";
    }
    if ((ins.find_first_of(",") != 0) && (ins.find_first_of(",") <= ins.size())) { // if there's "," so there's a src
        pos = ins.find_first_of(",") + 1;
        src = trim(ins.substr(pos, ins.size() - pos));
    } else {
        src = "";
    } // else src will be null
    // -------------------------------------------------------------------------------------------
    // Destination :
    if (dest.empty()) {
        flags = NO_SRCDEST;
    } else {
        dest_test = trim(dest.substr(0, dest.find_first_of(" "))); // first string to be compared
        if (compare_array(dest_test, reg32, REGISTERS_LENGTH)) { // is it a reg 32 buts
            flags |= DEST_REG | DEST_BITS32;
            ndest  = compare_array(dest_test, reg32, REGISTERS_LENGTH) - 1;     // the register
        } else if (compare_array(dest_test, reg16, REGISTERS_LENGTH)) { // 16 bits
            flags |= DEST_REG | DEST_BITS16;
            ndest  = compare_array(dest_test, reg16, REGISTERS_LENGTH) - 1;     // the register
        } else if (compare_array(dest_test, reg8, REGISTERS_LENGTH)) { // 8 bits
            flags |= DEST_REG | DEST_BITS8;
            ndest  = compare_array(dest_test, reg8, REGISTERS_LENGTH) - 1;     // the register

            // ------------------------------------
        } else if (compare_array(dest_test, rm_sizes, RM_SIZES_LENGTH)) { // is it rm
            flags |= 0x00000200;
            int n = compare_array(dest_test, rm_sizes, RM_SIZES_LENGTH);
            int dest_pos;
            if (n == 3) {
                flags |= DEST_BITS32; // setting the size of rm depend on byte word or DWORD
            } else if (n == 2) {
                flags |= DEST_BITS16;
            } else if (n == 1) {
                flags |= DEST_BITS8;
            }
            dest_test = trim(dest.substr(rm_sizes[n - 1].size(), dest.size() - 1));
            string s[] = {
                "ptr"
            }; // check on ptr
            if (compare_array(dest_test, s, 1, 3) == 0) {
                goto assemble_error;
            }
            dest_test = trim(dest_test.substr(3, dest_test.size() - 1));
            if (compare_array(dest_test, seg, SEGEMENTS_LENGTH, 2) != 0) { // if there's a specific segement
                // writing the segement prefixes
                int n = compare_array(dest_test, seg, SEGEMENTS_LENGTH, 2);
                if (n == 1) {
                    bIns->s[ins_pos] = 0x2E;
                    ins_pos++;
                } else if (n == 2) {
                    bIns->s[ins_pos] = 0x3E;
                    ins_pos++;
                } else if (n == 3) {
                    bIns->s[ins_pos] = 0x36;
                    ins_pos++;
                } else if (n == 4) {
                    bIns->s[ins_pos] = 0x26;
                    ins_pos++;
                } else if (n == 5) {
                    bIns->s[ins_pos] = 0x64;
                    ins_pos++;
                } else if (n == 6) {
                    bIns->s[ins_pos] = 0x65;
                    ins_pos++;
                }
                if (dest_test.find_first_of(":") != 2) { // it should be fs:[xxx]
                    goto assemble_error;
                }
                dest_test = trim(dest_test.substr(3, dest_test.size() - 1));
            } // check on the []
            if ((!dest_test.substr(0, 1).compare("[") == 0) || (!dest_test.substr(dest_test.size() - 1, dest_test.size() - 1).compare("]") == 0)) {
                goto assemble_error;
            }
            dest_test = trim(dest_test.substr(1, dest_test.size() - 2));
            // now get the modrm
            modrm = get_modrm(dest_test, flags);
            if (modrm.length == 0) {
                goto assemble_error;
            }
            // -----------------------------------------
        } else if (compare_array(dest_test, numbers, 10, 1)) { // is it imm
            flags |= DEST_IMM;
            ndest  = imm_to_dec(dest_test);
        }
    }
    // -------------------------------------------------------------------------------------------
    // Source:
    //
    if (src.empty()) {
        flags |= SRC_NOSRC;
    } else {
        // special case
        // imul exx,exx,imm
        if ((src.find_first_of(",") != 0) && (src.find_first_of(",") <= src.size())) {
            imul_imm    = imm_to_dec(src.substr(src.find_first_of(",") + 1, src.size()));
            src         = src.substr(0, src.find_first_of(","));
            no_imul_imm = false;
        }
        src_test = trim(src.substr(0, src.find_first_of(" "))); // first string to be compared
        if (compare_array(src_test, reg32, REGISTERS_LENGTH)) { // is it a reg 32 buts
            flags |= SRC_REG | SRC_BITS32;
            nsrc   = compare_array(src_test, reg32, REGISTERS_LENGTH) - 1;    // the register
        } else if (compare_array(src_test, reg16, REGISTERS_LENGTH)) { // 16 bits
            flags |= SRC_REG;
            if ((opcode.compare("movzx") == 0) || (opcode.compare("movsx") == 0)) {
                flags |= MOVXZ_SRC16;
            } else {
                flags |= SRC_BITS16;
            }
            nsrc = compare_array(src_test, reg16, REGISTERS_LENGTH) - 1; // the register
        } else if (compare_array(src_test, reg8, REGISTERS_LENGTH)) { // 8 bits
            flags |= SRC_REG;
            if ((opcode.compare("movzx") == 0) || (opcode.compare("movsx") == 0)) {
                flags |= MOVXZ_SRC8;
            } else {
                flags |= SRC_BITS8;
            }
            nsrc = compare_array(src_test, reg8, REGISTERS_LENGTH) - 1; // the register

            // ------------------------------------
        } else if (compare_array(src_test, rm_sizes, RM_SIZES_LENGTH)) { // is it rm
            flags |= SRC_RM;
            int n = compare_array(src_test, rm_sizes, RM_SIZES_LENGTH);
            int src_pos;
            if (n == 3) {
                flags |= SRC_BITS32; // setting the size of rm depend on byte word or DWORD
            } else if (n == 2) {
                if ((opcode.compare("movzx") == 0) || (opcode.compare("movsz") == 0)) {
                    flags |= MOVXZ_SRC16;
                } else {
                    flags |= SRC_BITS16;
                }
            } else if (n == 1) {
                if ((opcode.compare("movzx") == 0) || (opcode.compare("movsz") == 0)) {
                    flags |= MOVXZ_SRC8;
                } else {
                    flags |= SRC_BITS8;
                }
            }
            src_test = trim(src.substr(rm_sizes[n - 1].size(), src.size() - 1));
            string s[] = {"ptr"}; // check on ptr
            if (compare_array(src_test, s, 1, 3) == 0) {
                goto assemble_error;
            }
            src_test = trim(src_test.substr(3, src_test.size() - 1));
            if (compare_array(src_test, seg, SEGEMENTS_LENGTH, 2) != 0) { // if there's a specific segement
                // writing the segement flags
                int n = compare_array(src_test, seg, SEGEMENTS_LENGTH, 2);
                if (n == 1) {
                    bIns->s[ins_pos] = 0x2E;
                    ins_pos++;
                } else if (n == 2) {
                    bIns->s[ins_pos] = 0x3E;
                    ins_pos++;
                } else if (n == 3) {
                    bIns->s[ins_pos] = 0x36;
                    ins_pos++;
                } else if (n == 4) {
                    bIns->s[ins_pos] = 0x26;
                    ins_pos++;
                } else if (n == 5) {
                    bIns->s[ins_pos] = 0x64;
                    ins_pos++;
                } else if (n == 6) {
                    bIns->s[ins_pos] = 0x65;
                    ins_pos++;
                }
                if (src_test.find_first_of(":") != 2) { // it should be fs:[xxx]
                    goto assemble_error;
                }
                src_test = trim(src_test.substr(3, src_test.size() - 1));
            } // check on the []
            if ((!src_test.substr(0, 1).compare("[") == 0) || (!src_test.substr(src_test.size() - 1, src_test.size() - 1).compare("]") == 0)) {
                goto assemble_error;
            }
            src_test = trim(src_test.substr(1, src_test.size() - 2));
            // now get the modrm
            modrm = get_modrm(src_test, flags);
            if (modrm.length == 0) {
                goto assemble_error;
            }
            // -----------------------------------------
        } else if (compare_array(src_test, numbers, 10, 1)) { // is it imm
            flags |= SRC_IMM;
            nsrc   = imm_to_dec(src_test);
        }
    }
    // -------------------------------------------------------------------------------
    // Convertion from assembler flags to opcodes flaga

    if (flags & NO_SRCDEST) {
        op_flags = OP_ANY;
    } else if (flags & DEST_RM) {
        if (flags & SRC_RM) {
            goto assemble_error;
        } else if (flags & SRC_NOSRC) {
            op_flags = OP_RM_ONLY;
        } else if (flags & SRC_REG) {
            op_flags = OP_RM_R;
            int reg_flag = 1 << nsrc;
            op_flags   |= reg_flag;
            modrm.s[0] += nsrc << 3;
        } else if (flags & SRC_IMM) {
            op_flags = OP_RM_IMM;
        } else {
            goto assemble_error;
        }
    } else if (flags & DEST_IMM) {
        if (flags & SRC_NOSRC) {
            op_flags = OP_IMM_ONLY;
        } else {
            goto assemble_error;
        }
    } else if (flags & DEST_REG) {
        if (flags & SRC_RM) {
            op_flags = OP_R_RM;
            int reg_flag = 1 << ndest;
            op_flags   |= reg_flag;
            modrm.s[0] += ndest << 3;
        } else if (flags & SRC_NOSRC) {
            op_flags = OP_REG_ONLY;
            int reg_flag = 1 << ndest;
            op_flags |= reg_flag;
        } else if (flags & SRC_REG) {
            op_flags = OP_R_RM;
            int reg_flag = 1 << ndest;
            op_flags    |= reg_flag;
            modrm.length = 1;
            modrm.s[0]   = 0xC0 + nsrc + (ndest << 3);
        } else if (flags & SRC_IMM) {
            op_flags = OP_R_IMM;
            int reg_flag = 1 << ndest;
            op_flags |= reg_flag;
        } else {
            goto assemble_error;
        }
    }
    if (flags & DEST_BITS32) {
        op_flags |= OP_BITS32;
    } else if (flags & DEST_BITS16) {
        op_flags        |= OP_BITS32;
        bits16           = true;
        bIns->s[ins_pos] = 0x66; // the prefix
        ins_pos++;
    } else if (flags & DEST_BITS8) {
        op_flags |= OP_BITS8;
    }
    if ((op_flags & OP_RM_IMM) || (op_flags & OP_R_IMM) || (op_flags & OP_IMM_ONLY)) {
        if (op_flags & OP_IMM_ONLY) {
            if (ndest < 256) {
                op_flags |= OP_IMM8;
            } else {
                op_flags |= OP_IMM32;
            }
        } else {
            if (nsrc < 256) {
                op_flags |= OP_IMM8;
            } else {
                op_flags |= OP_IMM32;
            }
        }
    }
    // if ((op_flags & OP_RM_R) && (op_flags & OP_BITS32)){
    // char buff[50];
    // sprintf(buff,"%X %X %X %X %X %X %X",modrm.s[0],modrm.s[1],modrm.s[2],modrm.s[3],modrm.s[4],modrm.s[5],modrm.s[6]);
    // cout << buff << "\n" ;
    // }else {
    // cout << op_flags << "\n" ;
    // };
    // ----------------------------------------------------------------------------------------------
    // Special Opcodes:

    // mov eax,moffset
    if ((opcode.compare("mov") == 0) && (modrm.s[0] == 5) && (modrm.length >= 1)) {
        if (((op_flags & OP_RM_R) && (nsrc == 0)) || ((op_flags & OP_R_RM) && (ndest == 0))) {
            op_flags |= OP_RM_DISP;
            // deleting the modrm byte and leave the disp32
            for (int l = 1; l < modrm.length; l++) {
                modrm.s[l - 1] = modrm.s[l];
            }
            modrm.length--;
            if (bits16 == true) {
                if (bIns->s[ins_pos - 1] == 0x66) {
                    bIns->s[ins_pos - 1] = 0x67;
                }
            }
        }
        // xchg exx,eax --> 9x
    } else if ((opcode.compare("xchg") == 0) && (op_flags & OP_RM_R) && (op_flags & OP_REG_EAX) && (op_flags & OP_BITS32)) {
        op_flags = OP_REG_ONLY | OP_BITS32;
        int rm = (modrm.s[0] & 0x07);
        op_flags    |= (1 << rm);
        modrm.length = 0;
        // xchg eax,exx -->9x
    } else if ((opcode.compare("xchg") == 0) && (op_flags & OP_RM_R) && ((modrm.s[0] & 0x07) == 0) && (op_flags & OP_BITS32)) {
        op_flags    &= 0xFF;    // get the op_reg_exx
        op_flags    |= OP_REG_ONLY | OP_BITS32;
        modrm.length = 0;
        // jcxz
    } else if (opcode.compare("jcxz") == 0)
	{
        opcode           = "jecxz";
        bIns->s[ins_pos] = 0x67;
        ins_pos++;
        // ret lw or ret far lw
    } else if (((opcode.compare("ret") == 0) || (opcode.compare("ret far") == 0)) && (op_flags & OP_IMM_ONLY))
	{
        bits16 = true;
        flags &= ~OP_IMM8;
        flags &= OP_IMM32;
	// Imul exx,exx
    } else if ((opcode.compare("imul") == 0) && !(flags & SRC_NOSRC)) {
        if (no_imul_imm == true) {
            op_flags |= OP_0F;
        }
    // movsw,stosw and so on
    } else if ((opcode.size() == 5) && (opcode.c_str()[4] == 'w')) {
        opcode = opcode.substr(0, 4);
        opcode.append("d");
        bIns->s[ins_pos] = 0x66;
        ins_pos++;
        // movzx & movsx
    } else if (flags & MOVXZ_SRC8) {
        op_flags |= OP_SRC8;
    } else if (flags & MOVXZ_SRC16) {
        op_flags |= OP_SRC16;
    }
    // ----------------------------------------------------------------------------------------------
    // Searching for the opcode:

opcode_check:
    for (int i = 0; i < dis_entries; i++) {
        if ((FlagTable[i].opcode == 0) && (FlagTable[i].flags == 0)) {
            continue; // ignore invalid Entries
        }
        if (FlagTable[i].mnemonics.compare(opcode.c_str()) == 0) {
            int n = (op_flags & FlagTable[i].flags);
            // if(FlagTable[i].opcode=0xAF) cout << (int*)FlagTable[i].flags<<"   "<<(int*)op_flags <<"   "<< (int*)n << "\n";
            if (n == op_flags) { // this mean op_flags inside the flagtable.flaga
                // we find it
                // cout << FlagTable[i].mnemonics << "\n";         //****************************************
                if (FlagTable[i].flags & OP_0F) {
                    bIns->s[ins_pos] = 0x0F;
                    ins_pos++;
                }
                bIns->s[ins_pos] = FlagTable[i].opcode;
                ins_pos++;
                if (FlagTable[i].flags & OP_GROUP) {
                    modrm.s[0] &= 0xC7; // deleting the reg
                    modrm.s[0] += (FlagTable[i].reg << 3);
                }
                if ((op_flags & OP_RM_IMM) || (op_flags & OP_R_IMM) || (op_flags & OP_IMM_ONLY)) {
                    if (op_flags & OP_IMM_ONLY) {
                        if ((op_flags & OP_IMM8) && !(FlagTable[i].flags & OP_IMM32)) { // it's mean it's only for IMM8
                            memcpy(&modrm.s[modrm.length], &ndest, 1);
                            modrm.length++;
                        } else if (bits16) {
                            memcpy(&modrm.s[modrm.length], &ndest, 2);
                            modrm.length += 2;
                        } else {
                            memcpy(&modrm.s[modrm.length], &ndest, 4);
                            modrm.length += 4;
                        }
                    } else {
                        if ((op_flags & OP_IMM8) && !(FlagTable[i].flags & OP_IMM32)) { // it's mean it's only for IMM8
                            memcpy(&modrm.s[modrm.length], &nsrc, 1);
                            modrm.length++;
                        } else if (bits16) {
                            memcpy(&modrm.s[modrm.length], &nsrc, 2);
                            modrm.length += 2;
                        } else {
                            memcpy(&modrm.s[modrm.length], &nsrc, 4);
                            modrm.length += 4;
                        }
                    }
                }
                // special case
                // imul exx,exx,imm
                if (FlagTable[i].opcode == 0x69) {
                    if (imul_imm < 256) {
                            memcpy(&modrm.s[modrm.length], &imul_imm, 1);
                        modrm.length++;
                        bIns->s[ins_pos - 1] = 0x6B;
                    } else {
                            memcpy(&modrm.s[modrm.length], &imul_imm, 4);
                        modrm.length += 4;
                    }
                }
                // shr exx,1 (or anything in group 2 with imm==1)
                if ((FlagTable[i].opcode == 0xC0) && (op_flags & OP_RM_IMM) && (nsrc == 1)) {
                    bIns->s[ins_pos - 1] = 0xD0;
                    modrm.length         = 1;
                }
                if ((FlagTable[i].opcode == 0xC1) && (op_flags & OP_RM_IMM) && (nsrc == 1)) {
                    bIns->s[ins_pos - 1] = 0xD1;
                    modrm.length         = 1;
                }
                for (int l = 0; l < modrm.length; l++) {
                    bIns->s[ins_pos] = modrm.s[l];
                    ins_pos++;
                }
                goto opcode_founded;
            }
        }
    }
    // we will reach here if it didn't find so we will test other flags :)
    if (op_flags & OP_R_IMM) {
        op_flags &= (~OP_R_IMM);
        // delete the register flag
        op_flags    &= ~(1 << ndest);
        op_flags    |= OP_RM_IMM;
        modrm.length = 1;
        modrm.s[0]   = 0xC0 + (ndest);
        goto opcode_check;
    } else if (op_flags & OP_REG_ONLY) {
        op_flags &= (~OP_REG_ONLY);
        // delete the register flag
        op_flags    &= ~(1 << ndest);
        op_flags    |= OP_RM_ONLY;
        modrm.length = 1;
        modrm.s[0]   = 0xC0 + (ndest);
        goto opcode_check;
    } else if ((flags & SRC_REG) && (flags & DEST_REG) && (op_flags & OP_R_RM)) {
        op_flags &= ~OP_R_RM;
        op_flags &= ~(1 << ndest);
        op_flags |= OP_RM_R;
        int reg_flag = 1 << nsrc;
        op_flags    |= reg_flag;
        modrm.length = 1;
        modrm.s[0]   = 0xC0 + ndest + (nsrc << 3);
        goto opcode_check;
    }
opcode_founded:
    bIns->length = ins_pos;

    /*
    cout<< "Length = "<<bIns->length << "\n";
    char buff[50];
    sprintf(buff,"%X %X %X %X %X %X %X %X %X %X",bIns->s[0],bIns->s[1],bIns->s[2],bIns->s[3],bIns->s[4],bIns->s[5],bIns->s[6],bIns->s[7],bIns->s[8],bIns->s[9]);
    cout << buff << "\n" ;
    cout << instruction<<"\n";//*/
    return bIns;

assemble_error:
    bIns->length = 0;
    return bIns;
}
Beispiel #15
0
	long ClientWriter::put_request(RequestHeaders request_headers, StreamBuffer entity){
		PROFILE_ME;

		StreamBuffer data;

		data.put(get_string_from_verb(request_headers.verb));
		data.put(' ');
		data.put(request_headers.uri);
		if(!request_headers.get_params.empty()){
			data.put('?');
			data.put(url_encoded_from_optional_map(request_headers.get_params));
		}
		char temp[64];
		const unsigned ver_major = request_headers.version / 10000, ver_minor = request_headers.version % 10000;
		unsigned len = (unsigned)std::sprintf(temp, " HTTP/%u.%u\r\n", ver_major, ver_minor);
		data.put(temp, len);

		AUTO_REF(headers, request_headers.headers);
		if(entity.empty()){
			headers.erase("Content-Type");
			headers.erase("Transfer-Encoding");

			if((request_headers.verb == V_POST) || (request_headers.verb == V_PUT)){
				headers.set(sslit("Content-Length"), STR_0);
			} else {
				headers.erase("Content-Length");
			}
		} else {
			if(!headers.has("Content-Type")){
				headers.set(sslit("Content-Type"), "application/x-www-form-urlencoded; charset=utf-8");
			}

			AUTO(transfer_encoding, headers.get("Transfer-Encoding"));
			AUTO(pos, transfer_encoding.find(';'));
			if(pos != std::string::npos){
				transfer_encoding.erase(pos);
			}
			transfer_encoding = to_lower_case(trim(STD_MOVE(transfer_encoding)));

			if(transfer_encoding.empty() || (transfer_encoding == STR_IDENTITY)){
				headers.set(sslit("Content-Length"), boost::lexical_cast<std::string>(entity.size()));
			} else {
				// 只有一个 chunk。
				StreamBuffer chunk;
				len = (unsigned)std::sprintf(temp, "%llx\r\n", (unsigned long long)entity.size());
				chunk.put(temp, len);
				chunk.splice(entity);
				chunk.put("\r\n0\r\n\r\n");
				entity.swap(chunk);
			}
		}
		for(AUTO(it, headers.begin()); it != headers.end(); ++it){
			data.put(it->first.get());
			data.put(": ");
			data.put(it->second);
			data.put("\r\n");
		}
		data.put("\r\n");

		data.splice(entity);

		return on_encoded_data_avail(STD_MOVE(data));
	}
Beispiel #16
0
 inline static dt::istring call(vm_heap &heap, const dt::istring &input) { return to_lower_case(heap, input); }