swexp_list_node *parse_s_expr(parser *p, char opening_brace) { char c; swexp_list_node fakehead, *tail = &fakehead; fakehead.next = NULL; fakehead.content = NULL; fakehead.type = UNDEFINED; char closing_brace = brace_pair(opening_brace); while ((c = pgetc(p)) != EOF && !is_closing_brace(c)) { // if we encounter a comment in any state, strip it out IGNORE_COMMENTS() switch (p->state) { case SKIP_SPACE: if (is_space(c) || is_newline(c)) { // do nothing if it is space or newline } else if (is_opening_brace(c)) { // parse the parenthesized s expression into a list // and append it to the thing swexp_list_node *list = malloc(sizeof(swexp_list_node)); list->type = LIST; list->next = NULL; list->location = NULL; list->content = parse_s_expr(p, c); tail->next = list; tail = list; } else { // step back and start collecting the atom prewind(p, c); begin_atom(p); } break; case COLLECTING_ATOM: if (is_space(c) || is_newline(c)) { prewind(p, c); tail->next = close_atom(p); tail = tail->next; } else if (is_opening_brace(c)) { swexp_list_node *node = close_atom(p); node->next = parse_s_expr(p, c); tail->next = listof(node); tail = chain_tail(tail); } else { build_atom(p, c); } break; default: printf("unexpected state %d in parse_s_expr", p->state); exit(1); } } if (is_closing_brace(c) && c != closing_brace) { printf("mismatched braces in s expression\n"); exit(1); } if (p->state == COLLECTING_ATOM) { tail->next = close_atom(p); } if (c == EOF) { printf("unexpected EOF while parsing s expression\n"); exit(1); } p->state = SKIP_SPACE; return fakehead.next; }
void classbrowser_parse_file(Classbrowser_Backend *classback, gchar *filename) { gchar *file_contents; gchar *o; // original pointer to start of contents gchar *c; // current position within contents #ifdef DEBUG //debug var gchar *sss, *dss, *scs, *mcs, *hss; #endif gboolean within_php; gboolean within_single_line_comment; gboolean within_multi_line_comment; gboolean within_heredoc; //gboolean within_nowdoc; gboolean within_single_string; gboolean within_double_string; guint brace_count; guint parenthesis_count; guint line_number; gchar *heredoc_tag_start; gchar *heredoc_closingtag; guint heredoctag_length = 0; gboolean looking_for_heredocident; gchar *within_class; guint class_length = 0; gboolean looking_for_class_name; gboolean within_class_name; gchar *start_class_name = NULL; gchar *within_function; guint function_length = 0; gboolean looking_for_function_name; gboolean within_function_name; gchar *start_function_name = NULL; gboolean within_function_param_list; gchar *start_param_list; gchar *param_list; guint param_list_length; gboolean function_awaiting_brace_or_parenthesis; gboolean posiblevar=FALSE; gchar *startvarname=NULL; gchar *posvarname=NULL; gchar *varname=NULL; gchar *beforevarname=NULL; within_php = FALSE; within_single_line_comment = FALSE; within_multi_line_comment = FALSE; within_single_string = FALSE; within_double_string = FALSE; within_heredoc = FALSE; looking_for_heredocident = FALSE; heredoc_closingtag = NULL; heredoc_tag_start = NULL; brace_count = 0; line_number = 1; within_class = NULL; looking_for_class_name = FALSE; within_class_name = FALSE; within_function = NULL; looking_for_function_name = FALSE; within_function_name = FALSE; within_function_param_list = FALSE; start_param_list = NULL; param_list = NULL; function_awaiting_brace_or_parenthesis = FALSE; g_return_if_fail(filename); file_contents = read_text_file(filename); g_return_if_fail(file_contents); o = file_contents; c = o; while (*c) { if (!within_php) { if (check_previous(o, c, "<?")) { within_php=TRUE; } } else { if (within_single_line_comment && is_newline(*c)) { #ifdef DEBUG str_sec_print("SLC", scs, c, line_number); #endif within_single_line_comment = FALSE; } else if (within_multi_line_comment && check_previous(o, c, "*/")) { #ifdef DEBUG str_sec_print("MLC", mcs, c, line_number); #endif within_multi_line_comment = FALSE; } //escaped single quote within single quoted string does not end the string //single quote ends single quoted string if (within_single_string && *c=='\'' && !check_previous(o, c, "\\'")) { #ifdef DEBUG str_sec_print("SQS", sss, c, line_number); #endif within_single_string = FALSE; } //escaped double quote within double quoted string does not end the string //double quote ends double quoted string else if (within_double_string && *c=='"' && !check_previous(o, c, "\\\"")) { #ifdef DEBUG str_sec_print("DQS", dss, c, line_number); #endif within_double_string = FALSE; } ///heredocs have custom closing tags. check it from the opening tag else if (within_heredoc && !looking_for_heredocident && *c=='\n' && (check_previous(o, c-1, heredoc_closingtag) || (*(c-1) == ';' && check_previous(o, c-2, heredoc_closingtag)))) { #ifdef DEBUG gphpedit_debug_message(DEBUG_CLASSBROWSER_PARSE, "%s(%d): End Heredoc", filename, line_number); str_sec_print("HDS", hss, c, line_number); #endif g_free(heredoc_closingtag); within_heredoc = FALSE; } else if (within_heredoc && looking_for_heredocident && *c == '\n') { //if nowdoc if (*heredoc_tag_start == '\'') { //-2 for the two single quotes heredoctag_length = c - heredoc_tag_start - 2; heredoc_closingtag = g_malloc(heredoctag_length + 1); strncpy(heredoc_closingtag, heredoc_tag_start + 1, heredoctag_length); heredoc_closingtag[heredoctag_length]='\0'; #ifdef DEBUG gphpedit_debug_message(DEBUG_CLASSBROWSER_PARSE,"Expecting Nowdoc closing tag: %s\n", heredoc_closingtag); #endif } else { heredoctag_length = c - heredoc_tag_start; heredoc_closingtag = g_malloc(heredoctag_length + 1); strncpy(heredoc_closingtag, heredoc_tag_start, heredoctag_length); heredoc_closingtag[heredoctag_length]='\0'; #ifdef DEBUG gphpedit_debug_message(DEBUG_CLASSBROWSER_PARSE, "Expecting Heredoc closing tag: %s", heredoc_closingtag); #endif } looking_for_heredocident = FALSE; } //if not within comments or strings or heredocs else if (!within_multi_line_comment && !within_single_line_comment && !within_double_string && !within_single_string && !within_heredoc) { if (check_previous(o, c, "?>")) { within_php = FALSE; } //when does the second condition happen? //you are already outside a string. you can't have a backslash //just before a new opening single quote else if (*c == '\'' && !check_previous(o, c, "\\'")) { within_single_string=TRUE; #ifdef DEBUG sss = c; gphpedit_debug_message(DEBUG_CLASSBROWSER_PARSE,"Found Single Quoted String: %d", line_number); #endif } //when does the second condition happen? else if (*c == '"' && !check_previous(o, c, "\\\"")) { within_double_string=TRUE; #ifdef DEBUG dss = c; gphpedit_debug_message(DEBUG_CLASSBROWSER_PARSE, "Found Double Quoted String: %d", line_number); #endif } //more efficient to call function only when needed hence the first check else if (*c == '<' && check_previous(o, c, "<<<")) { #ifdef DEBUG hss = c-2; gphpedit_debug_message(DEBUG_CLASSBROWSER_PARSE, "%s(%d): Found Heredoc", filename, line_number); #endif within_heredoc=TRUE; heredoc_tag_start = c+1; looking_for_heredocident = TRUE; } //more efficient to call function only when needed hence the first check else if (*c == '/' && check_previous(o, c, "//")) { #ifdef DEBUG scs = c-1; gphpedit_debug_message(DEBUG_CLASSBROWSER_PARSE, "%s(%d): Found Single Line Comment", filename, line_number); #endif within_single_line_comment = TRUE; } //more efficient to call function only when needed hence the first check else if (*c == '*' && check_previous(o, c, "/*")) { #ifdef DEBUG mcs = c-1; gphpedit_debug_message(DEBUG_CLASSBROWSER_PARSE, "%s(%d): Found Multi Line Comment", filename, line_number); #endif within_multi_line_comment = TRUE; } else { if (check_previous(o, c, "class ") && non_letter_before(o, c, "class ")) { #ifdef DEBUG gphpedit_debug_message(DEBUG_CLASSBROWSER_PARSE, "%s(%d): Found Class", filename, line_number); #endif looking_for_class_name = TRUE; } else if (is_identifier_char(*c) && looking_for_class_name && !within_class_name) { start_class_name = c-1; looking_for_class_name = FALSE; within_class_name = TRUE; } else if ( (is_whitespace(*c) || is_opening_brace(*c)) && within_class_name) { class_length = (c - start_class_name); if (within_class) { g_free(within_class); } within_class = g_malloc(class_length+1); strncpy(within_class, start_class_name, class_length); within_class[class_length]='\0'; #ifdef DEBUG gphpedit_debug_message(DEBUG_CLASSBROWSER_PARSE, "%s(%d): Class '%s'", filename, line_number, within_class); #endif classbrowser_classlist_add(classback, within_class, filename, line_number,TAB_PHP); within_class_name = FALSE; } else if (check_previous(o, c, "function ") && non_letter_before(o, c, "function ")) { #ifdef DEBUG gphpedit_debug_message(DEBUG_CLASSBROWSER_PARSE,"%s","Looking for function name"); #endif looking_for_function_name = TRUE; } if (is_identifier_char(*c) && looking_for_function_name && !within_function_name) { #ifdef DEBUG gphpedit_debug_message(DEBUG_CLASSBROWSER_PARSE,"%s", "Storing function name"); #endif start_function_name = c; function_length = 0; looking_for_function_name = FALSE; within_function_name = TRUE; } if ( (is_whitespace(*c) || is_opening_brace(*c) || is_opening_parenthesis(*c)) && within_function_name && function_length==0) { #ifdef DEBUG gphpedit_debug_message(DEBUG_CLASSBROWSER_PARSE,"%s", "Found function"); #endif function_length = (c - start_function_name); if (within_function) { g_free(within_function); } within_function = g_malloc(function_length+1); strncpy(within_function, start_function_name, function_length); within_function[function_length]='\0'; function_awaiting_brace_or_parenthesis = TRUE; within_function_name = FALSE; } if ( function_awaiting_brace_or_parenthesis && is_opening_brace(*c)) { function_awaiting_brace_or_parenthesis = FALSE; if (within_class) { classbrowser_functionlist_add(classback,within_class, within_function, filename, TAB_PHP, line_number, NULL); #ifdef DEBUG gphpedit_debug_message(DEBUG_CLASSBROWSER_PARSE, "%s(%d): Class method %s::%s", filename, line_number, within_class, within_function); #endif } else { classbrowser_functionlist_add(classback,NULL, within_function, filename, TAB_PHP, line_number, NULL); #ifdef DEBUG gphpedit_debug_message(DEBUG_CLASSBROWSER_PARSE, "%s(%d): Function %s", filename, line_number, within_function); #endif } } else if (function_awaiting_brace_or_parenthesis && is_opening_parenthesis(*c)) { within_function_param_list = TRUE; start_param_list = c+1; function_awaiting_brace_or_parenthesis = FALSE; } else if (is_closing_parenthesis(*c) && within_function_param_list) { param_list_length = (c - start_param_list); if (param_list) { g_free(param_list); } param_list = g_malloc(param_list_length+1); strncpy(param_list, start_param_list, param_list_length); param_list[param_list_length]='\0'; if (within_class) { classbrowser_functionlist_add(classback, within_class, within_function, filename, TAB_PHP, line_number, param_list); #ifdef DEBUG gphpedit_debug_message(DEBUG_CLASSBROWSER_PARSE, "%s(%d): Class method %s::%s(%s)", filename, line_number, within_class, within_function, param_list); #endif } else { classbrowser_functionlist_add(classback, NULL, within_function, filename, TAB_PHP,line_number, param_list); #ifdef DEBUG gphpedit_debug_message(DEBUG_CLASSBROWSER_PARSE, "%s(%d): Function %s(%s)", filename, line_number, within_function, param_list); #endif } within_function_param_list = FALSE; } if (posiblevar){ if (is_identifier_char(*c)){ posvarname=c; } else { //g_print("char:%c ret:false\n",*c); posiblevar=FALSE; int len=posvarname - startvarname +1; /*include initial $*/ if (len>1){ /*only if we have $ and something more */ varname = g_malloc(len +1); strncpy(varname,startvarname,len); varname[len]='\0'; if (!beforevarname){ beforevarname=g_strdup(varname); /*store last variable name found*/ } else { if (strcmp(beforevarname,varname)==0){ #ifdef DEBUG gphpedit_debug_message(DEBUG_CLASSBROWSER_PARSE, "Duplicate variable: %s",varname); #endif } else { #ifdef DEBUG gphpedit_debug_message(DEBUG_CLASSBROWSER_PARSE, "Classbrowser var added:%s",varname); #endif classbrowser_varlist_add(classback, varname, within_function, filename); g_free(beforevarname); beforevarname=g_strdup(varname); } } g_free(varname); } } } if (*c=='$' && !within_function_param_list && !within_multi_line_comment && !within_single_line_comment){ /* skip params vars */ posiblevar=TRUE; startvarname=c; } if (is_opening_brace(*c)) { brace_count++; #ifdef DEBUG gphpedit_debug_message(DEBUG_CLASSBROWSER_PARSE,"Brace count %d:%c", brace_count, *c); #endif } else if (is_closing_brace(*c)) { brace_count--; #ifdef DEBUG gphpedit_debug_message(DEBUG_CLASSBROWSER_PARSE,"Brace count %d:%c", brace_count, *c); #endif if (brace_count == 0) { if (within_class) { #ifdef DEBUG gphpedit_debug_message(DEBUG_CLASSBROWSER_PARSE, "Freeing class %s", within_class); #endif g_free(within_class); within_class = NULL; } } } else if (is_opening_parenthesis(*c)) { parenthesis_count++; } else if (is_closing_parenthesis(*c)) { parenthesis_count--; } } } } if (is_newline(*c)) { line_number++; } c++; } if (param_list) g_free(param_list); if (within_function) g_free(within_function); if (beforevarname) g_free(beforevarname); g_free(file_contents); }
swexp_list_node *parse_line(parser *p) { // parses a line of text, starting at a non-whitespace char char c; // build a list of expressions started by this // list head on the stack. swexp_list_node head, *tail; head.next = NULL; head.content = NULL; head.type = UNDEFINED; tail = &head; p->state = SKIP_SPACE; while ((c = pgetc(p)) != EOF && !is_newline(c) && !is_closing_brace(c)) { // if we encounter a comment in any state, strip it out IGNORE_COMMENTS() switch (p->state) { case COLLECTING_ATOM: if (is_space(c)) { // end atom tail->next = close_atom(p); tail = tail->next; prewind(p, c); } else if (is_opening_brace(c)) { swexp_list_node *bracehead = close_atom(p); swexp_list_node *bracecontent = parse_s_expr(p, c); bracehead->next = bracecontent; tail->next = listof(bracehead); tail = chain_tail(tail); } else { // continue to build item build_atom(p, c); } break; case SKIP_SPACE: if (is_opening_brace(c)) { swexp_list_node *brace = parse_s_expr(p, c); tail->next = brace; tail = chain_tail(tail); } else if (!is_space(c)) { begin_atom(p); prewind(p, c); } break; default: printf("unexpected state %d in parse_line\n", p->state); exit(1); } } if (is_newline(c)) { p->indentation = 0; } if (is_closing_brace(c)) { printf("encountered unmatched closing brace\n"); exit(1); } // close ongoing capture if (p->state == COLLECTING_ATOM) { tail->next = close_atom(p); } // if the number of collected atoms is more than one, // make it a list and return it if (chain_len(head.next) > 1) { swexp_list_node *listhead = malloc(sizeof(swexp_list_node)); listhead->type = LIST; listhead->next = NULL; listhead->content = head.next; listhead->location = NULL; return listhead; } else { return head.next; } }