Token TokenCreator::identifyTokenType(Token token, assembly_language language) { std::string type; this->language = language; if (isLabel(token)) type = "LABEL"; else if (isInstruction(token)) type = "INSTRUCTION"; else if(isDirective(token)) type = "DIRECTIVE"; else type = "OPERAND"; return Token(token.name, token.line_number, type); }
static instruction *automate(lexer &lexer, struct lexer::token *identifier) { size_t ins = isInstruction(identifier->str); ioperand *arg = i_t[ins].arg == true ? getArg(lexer) : NULL; struct lexer::token *token = lexer.next_token(); if (token != NULL && token->type != lexer::Separator) { delete token; throw std::exception(); } delete token; return (new instruction(arg, i_t[ins].type)); }
inputType identifyType (char input[]) { if (isDirective(input)) { return DIRECTIVE; } else if (isLabel(input)) { return LABEL; } else if (isComment(input)) { return COMMENT; } else if(isInstruction(input)) { return INSTRUCTION; } return ERROR; }
void parse () { if (option.input != option.ALREADY) freopen ("vmlinux.objd", "r", stdin); bool relevant = false; regmatch_t match; int i; if (option.work == option_t::FUNC) printf ("^("); while (fgets (buf, BUF_LEN, stdin)) { while (strlen(buf) > 0 && buf[strlen(buf)-1] < ' ') buf[strlen(buf)-1] = '\0'; if (isFunction()) { relevant = !regexec(®, state.func.name, 1, &match, 0); for (int i = 0; relevant && i < DCount; i ++) if (strstr(state.func.name, DangerousFunction[i]) != NULL) relevant = false; for (int i = 0; relevant && i < FCount; i ++) if (strcmp(state.func.name, FrequentFunction[i]) == 0) relevant = false; if (relevant) { if (option.work == option_t::INSTR) fprintf(stderr,"In function %s:\n", state.func.name); else printf("%s|", state.func.name); } } else if (relevant && isInstruction() && option.work == option_t::INSTR) { for (i = 0; i < MCount; i ++) if (strstr(state.instr.text, MemoryAccess[i]) == state.instr.text) { char *p, *q; char *sp, *bp; p = strchr(state.instr.text, '('); if (p == NULL) break; q = strchr(p, ')'); if (q == NULL) q = p + strlen(p); sp = strstr(p, "sp"); bp = strstr(p, "bp"); /* * We want to trap at memory access instructions that operates on non-stack address. * A simple heuristic is used here that if any memory access is relative to %rbp or * %rsp it is probably on the stack, and we refrain from them. */ if ((sp == NULL || sp > q) && (bp == NULL || bp > q)) { printf ("%016lx: %s\n", state.instr.address, state.instr.text); } break; } } } if (option.work == option_t::FUNC) printf ("iMpoSSiBlE)$\n"); }
int main() { int start_loc = 0, locctr = 0 , end_loc, program_length = 0, instr_length = 0; char program_name[8] = {'\0'}, filename[] = "fig2.5.txt"; FILE *fptr; char accept[100] = {'\0'}; char label[30], opcode[30], operand[30]; int A = 0, X = 0, L = 0,PC = 0, SW = 0, B = 0, S = 0, T = 0, F = 0; int n = 1, i = 1, x = 0, b = 0, p = 0, e = 0, address = 0; /************************************ pass 1 ****************************************/ if((fptr = fopen(filename, "r")) != NULL) { //printf("file found\n"); fgets(accept, 30, fptr); read(label, opcode, operand, accept); if(strcmp(opcode,"START") == 0) { int k, six = 1, hvalue = 0; strcpy(program_name, label); if(isHexadecimal(operand) == 1) { for(k = strlen(operand) - 1, six ; k >= 0 ; k--, six *= 16) { int temp = (int)operand[k] - 48; if(temp > 9) { temp = 10 + (int)operand[k] - 65; } hvalue += temp * six; } start_loc = hvalue; locctr = hvalue; } else { sscanf(operand, "%d", &start_loc); sscanf(operand, "%d", &locctr); } } else { locctr = 0; } fgets(accept, 30, fptr); read(label, opcode, operand, accept); while(!feof(fptr) && strcmp(opcode,"END") != 0) { if(isComment(accept) == 0) // not a comment { if(strlen(label) != 0) { if(inSymbol(label) != -1) { printf("error, duplicate symbol !!"); return 0; } else { strcpy(SymbolTable[STindex].label, label); SymbolTable[STindex++].address = locctr; } } if(isInstruction(opcode, "FORMAT") != 0) { if(opcode[0] == '+') { instr_length = 4; } else { instr_length = isInstruction(opcode, "FORMAT"); } } else if(strcmp(opcode,"WORD") == 0) { instr_length = 3; } else if(strcmp(opcode,"RESW") == 0) { instr_length = 3 * ctoi(operand, "RESW"); } else if(strcmp(opcode,"RESB") == 0) { instr_length = ctoi(operand, "RESB"); } else if(strcmp(opcode,"BYTE") == 0) { instr_length = ctoi(operand, "BYTE"); } else if(strcmp(opcode,"BASE") == 0) { instr_length = 0; int bp = inSymbol(operand); B = bp; } else if(strcmp(opcode,"END") == 0) { instr_length = 0; } else { printf("error, invaild operation code !!"); return 0; } } locctr += instr_length; instr_length = 0; fgets(accept, 30, fptr); read(label, opcode, operand, accept); } end_loc = locctr; program_length = end_loc - start_loc; fclose(fptr); } else { printf("file not found\n"); } /************************************ pass 2 ****************************************//*H T E M*/ locctr = 0; instr_length = 0; if((fptr = fopen(filename, "r")) != NULL) { //printf("file found\n"); fgets(accept, 30, fptr); read(label, opcode, operand, accept); if(strcmp(opcode,"START") == 0) { int k, six = 1, hvalue = 0; strcpy(program_name, label); if(isHexadecimal(operand) == 1) { for(k = strlen(operand) - 1, six ; k >= 0 ; k--, six *= 16) { int temp = (int)operand[k] - 48; if(temp > 9) { temp = 10 + (int)operand[k] - 65; } hvalue += temp * six; } locctr = hvalue; } else { sscanf(operand, "%d", &locctr); } } else { locctr = 0; } fgets(accept, 30, fptr); read(label, opcode, operand, accept); printf("H %-6s %06X %06X\n",program_name, start_loc, program_length); while(!feof(fptr) && strcmp(opcode,"END") != 0) { n = i = 1; x = b = p = e = 0; char object_code[50] = {'\0'}; if(isComment(accept) == 0) // not a comment { if(strcmp(opcode,"WORD") == 0) { instr_length = 3; generate_objcode(object_code, operand, "WORD"); } else if(strcmp(opcode,"RESW") == 0) { instr_length = 3 * ctoi(operand, "RESW"); } else if(strcmp(opcode,"RESB") == 0) { instr_length = ctoi(operand, "RESB"); } else if(strcmp(opcode,"BYTE") == 0) { instr_length = ctoi(operand, "BYTE"); generate_objcode(object_code, operand, "BYTE"); } else if(strcmp(opcode,"BASE") == 0) { instr_length = 0; noobj = 1; } else if(strcmp(opcode,"END") == 0) { instr_length = 0; noobj = 1; } else if(isInstruction(opcode, "FORMAT") != 0) { if(opcode[0] == '+') { char temp[10] = {'\0'}; strcpy(temp,strtok(operand,"#@")); if(inSymbol(temp) != -1) { Mrecord[Mindex++] = locctr + 1; } instr_length = 4; e = 1; b = p = 0; } else { instr_length = isInstruction(opcode, "FORMAT"); } } else { printf("error, invaild operation code !!"); return 0; } } /****************************************************************************************/ switch (operand[0]) { case '@': n = 1; i = 0; break; case '#': n = 0; i = 1; break; default: n = i = 1; if(strstr(operand,",X")!=NULL) { x = 1; strtok(operand,","); } break; } if(inSymbol(operand) != -1) { b = 0; p = 1; if(opcode[0] == '+') { e = 1; b = p = 0; } address=inSymbol(operand); } else if(isdigit(operand[1])!=0) { sscanf(operand + 1,"%d",&address); } if(p == 1) { address = address - (locctr + instr_length); if(address > 2047 || address < -2048) { b = 1; p = 0; address = inSymbol(operand) - B; } } int format = isInstruction(opcode, "FORMAT"), op = isInstruction(opcode, "OPCODE"); if(strcmp(opcode,"RSUB") == 0) { b = p = 0; address = 0; } /******************************************************************************/ char *head = (char*)malloc(4 * sizeof(char)); if(format>=3) { op *= 16; n *= 32; i *= 16; x *= 8; b *= 4; p *= 2; op += n + i + x + b + p + e; sprintf(object_code,"%03X",op); } else if(format==2) sprintf(object_code,"%02X",op); if(opcode[0]=='+') sprintf(object_code+3,"%05X",address); else if(format==3) { if(address<0) { sprintf(head,"%3hX",address); strcpy(object_code+3,head+1); } else sprintf(object_code+3,"%03X",address); } else if(format==2) { sprintf(object_code+2,"%1X",search_reg(operand[0])); if(strlen(operand) > 2) sprintf(object_code+3,"%1X",search_reg(operand[2])); else sprintf(object_code+3,"%1X",0); } /*******************************************************************************/ printf("T %06X %02X %s\n",locctr, strlen(object_code), object_code); locctr += instr_length; instr_length = 0; fgets(accept, 30, fptr); read(label, opcode, operand, accept); } printMrecord(); printf("E %06X\n",start_loc); fclose(fptr); } else { printf("file not found\n"); } //system("pause"); return 0; }
/* * Gets the next token from the `lexfile` FILE stream. * Side effects: * - If the TokenType has an associated string, it is found in global `lexstr`. * - If the TokenType has an associated integer value, look in global `lexint`. */ TokenType next_tok(void) { if (!curr_char) eat(); // Eat first char. while (curr_char != EOF) { lo_col = curr_col; // Save first col of the token. // Newline. if (curr_char == '\n') { eat(); return TOK_NL; } // Skip whitespace. if (isspace(curr_char)) { eat(); continue; } // Skip comments until next line. if (curr_char == ';') { do { eat(); } while (curr_char != '\n' && curr_char != EOF); continue; } // id ::= [A-Za-z$_][A-Za-z_$0-9]* // reg ::= r([0-9]|1[0-5])[abcd] | rs | re[0-6] | rk[0-7] // label ::= <nonopcode id>: if (is_idstart(curr_char)) { int i; // TODO: Make this loop prettier. for (i = 0; curr_char != EOF; i++) { lexstr[i] = curr_char; if (is_idcont(peek())) { eat(); } else { break; } } lexstr[++i] = '\0'; eat(); // Advance to next char after the identifier. // Register? if (lexstr[0] == 'r') { // Long or short if (is_long_reg(lexstr)) return TOK_GL_REG; if (is_short_reg(lexstr)) return TOK_GS_REG; // Extra if (is_extra_reg(lexstr)) return TOK_E_REG; // Kernel if (is_kernel_reg(lexstr)) return TOK_K_REG; } // Directive? if (is_dtv(lexstr)) { return TOK_DATA_SEG; } // Instruction? if (isInstruction(lexstr)) return TOK_INSTR; // Label? if (curr_char == ':') { eat(); // Eat the ':' return TOK_LABEL; } // Plain identfier return TOK_ID; } // chr_lit ::= '[^\\']' if (curr_char == '\'') { eat(); // Get inner char. if (curr_char == '\\') { lexstr[0] = escape(eat()); } else { lexstr[0] = curr_char; } lexstr[1] = '\0'; eat(); // Reach the closing quote. // Error: for situations like '\' if (curr_char != '\'') { jas_err("Character literal missing closing quote.", curr_line, lo_col, curr_col); return TOK_UNK; } eat(); // Get rid of ' and advance. return TOK_CHR_LIT; } // str_lit ::= "(\\.|[^\\"])*" if (curr_char == '"') { eat(); // Get first char of string. // Let by escape chars, but not single \ or ". int i; for (i = 0; curr_char != '"'; i++) { // Check that we don't close reach EOF before the close ". if (curr_char == EOF) { jas_err("EOF while parsing string literal.", curr_line, curr_col, curr_col); return TOK_UNK; } if (curr_char == '\\') { lexstr[i] = escape(eat()); } else { lexstr[i] = curr_char; } eat(); } lexstr[i] = '\0'; eat(); // Get rid of the " and advance. return TOK_STR_LIT; } // num_lit ::= [+-][1-9][0-9]* | [+-]0[0-7]* | [+-]0x[0-9A-Fa-f]+ // | [+-]0b[01]+ if ((issign(curr_char) && isdigit(peek())) || isdigit(curr_char)) { int base = 10; // Numeric base for interpreting the literal. int chars_read; // Keep track of how many columns we move forward. int sign = +1; // Grab sign if it exists. if (issign(curr_char)) { sign = (curr_char == '+' ? +1 : -1); eat(); // Get next number character. } // Choose base by prefix: if (curr_char == '0') { int next = peek(); if (next == 'x' || next == 'X') { base = HEX_BASE; } else if (next == 'b' || next == 'B') { base = BIN_BASE; } else { base = OCT_BASE; } } // Re-place sign back into lexstr for strtol. if (sign == +1) { lexstr[0] = '+'; } else { lexstr[0] = '-'; } // Copy in whole num literal, keep track of columns. chars_read = fgets_base(lexstr + 1, lexfile, base); curr_col += chars_read; // Convert to integer value, using saved sign. lexint = sign * strtol(lexstr, NULL, base); // Check for `int` size (we can support max of 32 bits) if (lexint < INT_MIN || UINT_MAX < lexint) { jas_err("Integer larger than 32 bits.", curr_line, lo_col, curr_col); } return TOK_NUM; } // Let by various punctuation: switch (curr_char) { case ',': eat(); return TOK_COMMA; case '.': eat(); return TOK_DOT; case '+': eat(); return TOK_PLUS; case '-': eat(); return TOK_MINUS; case '[': eat(); return TOK_LBRACKET; case ']': eat(); return TOK_RBRACKET; } jas_err("Unknown character encountered.", curr_line, lo_col, lo_col); eat(); // Advance to next char. } return TOK_EOF; }
const char *Line::getOrigLine() const { assert(isInstruction()); return orig_line; }
void Line::addOption(uint32_t option) { assert(isInstruction()); assert(option < OptNrOptions); options |= 1U << option; }
bool Line::isValid() const { return isInstruction() || isLabel() || isAlign() || isByte(); }
uint32_t Line::getInstruction() const { assert(isInstruction()); return instruction; }
bool Line::hasOption(uint32_t option) const { assert(isInstruction()); assert(option < OptNrOptions); return ((options & (1U << option)) != 0); }
void parse () { if (option.input != option.ALREADY) freopen ("vmlinux.objd", "r", stdin); bool relevant = false; regmatch_t match; int i; if (option.work == option_t::FUNC) printf ("^("); while (fgets (buf, BUF_LEN, stdin)) { while (strlen(buf) > 0 && buf[strlen(buf)-1] < ' ') buf[strlen(buf)-1] = '\0'; if (isFunction()) { relevant = !regexec(®, state.func.name, 1, &match, 0); for (int i = 0; relevant && i < DCount; i ++) if (strstr(state.func.name, DangerousFunction[i]) != NULL) relevant = false; for (int i = 0; relevant && i < FCount; i ++) if (strcmp(state.func.name, FrequentFunction[i]) == 0) relevant = false; if (relevant) { if (option.work == option_t::INSTR) fprintf(stderr,"In function %s:\n", state.func.name); else printf("%s|", state.func.name); } } else if (relevant && isInstruction() && option.work == option_t::INSTR) { for (i = 0; i < MCount; i ++) if (strstr(state.instr.text, MemoryAccess[i]) == state.instr.text) { char *p, *q; char *sp, *bp; p = strchr(state.instr.text, '('); if (p == NULL) break; q = strchr(p, ')'); if (q == NULL) q = p + strlen(p); sp = strstr(p, "sp"); bp = strstr(p, "bp"); if ((sp == NULL || sp < q) && (bp == NULL || bp < q)) { printf ("%016lx: %s\n", state.instr.address, state.instr.text); } break; } } } if (option.work == option_t::FUNC) printf ("iMpoSSiBlE)$\n"); }