/* Input: Arg1: +Substr Arg2: + String Arg3: +forward/reverse (checks only f/r) f means the first match from the start of String r means the first match from the end of String Output: Arg4: Beg Beg is the offset where Substr matches. Must be a variable or an integer Arg5: End End is the offset of the next character after the end of Substr Must be a variable or an integer. Both Beg and End can be negative, in which case they represent the offset from the 2nd character past the end of String. For instance, -1 means the next character past the end of String, so End = -1 means that Substr must be a suffix of String.. The meaning of End and of negative offsets is consistent with substring and string_substitute predicates. */ xsbBool str_match(CTXTdecl) { static char *subptr, *stringptr, *direction, *matchptr; static size_t substr_beg, substr_end; int reverse=TRUE; /* search in reverse */ int beg_bos_offset=TRUE; /* measure beg offset from the beg of string */ int end_bos_offset=TRUE; /* measure end offset from the beg of string */ Integer str_len, sub_len; /* length of string and substring */ Cell beg_offset_term, end_offset_term; term = ptoc_tag(CTXTc 1); term2 = ptoc_tag(CTXTc 2); term3 = ptoc_tag(CTXTc 3); beg_offset_term = ptoc_tag(CTXTc 4); end_offset_term = ptoc_tag(CTXTc 5); if (!isatom(term) || !isatom(term2) || !isatom(term3)) { xsb_abort("STR_MATCH: Arguments 1,2,3 must be bound to strings"); } subptr = string_val(term); stringptr = string_val(term2); direction = string_val(term3); if (*direction == 'f') reverse=FALSE; else if (*direction != 'r') xsb_abort("STR_MATCH: Argument 3 must be bound to forward/reverse"); str_len=strlen(stringptr); sub_len=strlen(subptr); if (isointeger(beg_offset_term)) { if (oint_val(beg_offset_term) < 0) { beg_bos_offset = FALSE; } } if (isointeger(end_offset_term)) { if (oint_val(end_offset_term) < 0) { end_bos_offset = FALSE; } } if (reverse) matchptr = xsb_strrstr(stringptr, subptr); else matchptr = strstr(stringptr, subptr); if (matchptr == NULL) return FALSE; substr_beg = (beg_bos_offset? matchptr - stringptr : -(str_len - (matchptr - stringptr)) ); substr_end = (end_bos_offset? (matchptr - stringptr) + sub_len : -(str_len + 1 - (matchptr - stringptr) - sub_len) ); return (p2p_unify(CTXTc beg_offset_term, makeint(substr_beg)) && p2p_unify(CTXTc end_offset_term, makeint(substr_end))); }
sexp_t *prim_atom(sexp_t *args) { if (list_len(args) != 1) { fprintf(stderr, "error: argument count\n"); return NULL; } if (isatom(car(args))) return t; return nil; }
static bool stream_type(int sno, Term t2 USES_REGS) { /* '$set_output'(+Stream,-ErrorMessage) */ stream_flags_t flags = GLOBAL_Stream[sno].status & (Binary_Stream_f); if (!IsVarTerm(t2) && !(isatom(t2))) { return FALSE; } if (flags & Binary_Stream_f) return Yap_unify(t2, TermBinary); return Yap_unify(t2, TermText); }
xsbBool str_cat(CTXTdecl) { char *str1, *str2, *tmpstr; size_t tmpstr_len; term = ptoc_tag(CTXTc 1); term2 = ptoc_tag(CTXTc 2); if (isatom(term) && isatom(term2)) { str1 = string_val(term); str2 = string_val(term2); tmpstr_len = strlen(str1) + strlen(str2) + 1; tmpstr = (char *)mem_alloc(tmpstr_len,LEAK_SPACE); strcpy(tmpstr, str1); strcat(tmpstr, str2); str1 = string_find(tmpstr, 1); mem_dealloc(tmpstr,tmpstr_len,LEAK_SPACE); return atom_unify(CTXTc makestring(str1), ptoc_tag(CTXTc 3)); } else return FALSE; }
static bool found_eof(int sno, Term t2 USES_REGS) { /* '$set_output'(+Stream,-ErrorMessage) */ stream_flags_t flags = GLOBAL_Stream[sno].status & (Past_Eof_Stream_f | Eof_Stream_f); if (!IsVarTerm(t2) && !(isatom(t2))) { return FALSE; } if (flags & Past_Eof_Stream_f) return Yap_unify(t2, MkAtomTerm(AtomPast)); if (flags & Eof_Stream_f) return Yap_unify(t2, MkAtomTerm(AtomAt)); return Yap_unify(t2, MkAtomTerm(AtomNot)); }
static bool has_encoding(int sno, Term t2 USES_REGS) { /* '$set_output'(+Stream,-ErrorMessage) */ if (!IsVarTerm(t2) && !(isatom(t2))) { return FALSE; } if (0 && IsAtomTerm(t2)) { encoding_t e = enc_id(RepAtom(AtomOfTerm(t2))->StrOfAE); GLOBAL_Stream[sno].encoding = e; return true; } else { const char *s = enc_name(LOCAL_encoding); return Yap_unify(t2, MkAtomTerm(Yap_LookupAtom(s))); } }
static Int representation_error(int sno, Term t2 USES_REGS) { stream_flags_t flags = GLOBAL_Stream[sno].status & (RepError_Xml_f | RepError_Prolog_f); /* '$representation_error'(+Stream,-ErrorMessage) */ if (!IsVarTerm(t2) && isatom(t2)) { return false; } if (flags & RepError_Xml_f) { return Yap_unify(t2, TermXml); } if (flags & RepError_Prolog_f) { return Yap_unify(t2, TermProlog); } return Yap_unify(t2, TermError); }
static bool stream_mode(int sno, Term t2 USES_REGS) { /* '$set_output'(+Stream,-ErrorMessage) */ stream_flags_t flags = GLOBAL_Stream[sno].status & (Input_Stream_f | Output_Stream_f | Append_Stream_f); if (!IsVarTerm(t2) && !(isatom(t2))) { return FALSE; } if (flags & Input_Stream_f) return Yap_unify(t2, TermRead); if (flags & Output_Stream_f) return Yap_unify(t2, TermWrite); if (flags & Append_Stream_f) return Yap_unify(t2, TermAppend); return false; }
static bool eof_action(int sno, Term t2 USES_REGS) { /* '$set_output'(+Stream,-ErrorMessage) */ stream_flags_t flags = GLOBAL_Stream[sno].status & (Eof_Error_Stream_f | Reset_Eof_Stream_f | Push_Eof_Stream_f); if (!IsVarTerm(t2) && !(isatom(t2))) { return FALSE; } if (flags & Eof_Error_Stream_f) { return Yap_unify(t2, TermError); } if (flags & Reset_Eof_Stream_f) { return Yap_unify(t2, TermReset); } return Yap_unify(t2, TermEOfCode); }
IR Compile(Form* form) { string out; string tmp; if(form == NULL) error(form,"Can't emit code for the null form."); else if(isatom(form)) { if(val(form) == "quit") exit(0); else if(val(form) == "IR") { master.Program->dump(); nerror("Dumped IR."); } else if(val(form) == "debug") { master.debug = !master.debug; nerror("Debug mode is ",(master.debug?"on":"off"),"."); } else out = emitCode(form); } else out = emitCode(form/*,Top*/); /*for(unsigned long i = 0; i < master.Persistent.size(); i++) tmp += master.Persistent[i] + "\n";*/ for(unsigned long i = 0; i < master.CodeStack.size(); i++) tmp += master.CodeStack[i] + "\n"; out = "define " + latest_type() + " @entry(){\n" + out + "\nret " + latest_type() + " " + get_current_res() + "\n}"; out = tmp + out; string type = latest_type(); master.CodeStack.clear(); clear_reader(); return {out,type}; }
static int istag(char ch) { return isatom(ch) && (ch != '+'); }
string dump_form(Form* input) { string out = (string)"Form: " + (isatom(input) ? "Atom" : "List") + "\n'"; out += input + (string)"'\n"; return out + "(Line " + to_string(input->line) + ", column " + to_string(input->column) + ")"; }
/* XSB string substitution entry point: replace substrings specified in Arg2 with strings in Arg3. In: Arg1: string Arg2: substring specification, a list [s(B1,E1),s(B2,E2),...] Arg3: list of replacement strings Out: Arg4: new (output) string Always succeeds, unless error. */ xsbBool string_substitute(CTXTdecl) { /* Prolog args are first assigned to these, so we could examine the types of these objects to determine if we got strings or atoms. */ prolog_term input_term, output_term; prolog_term subst_reg_term, subst_spec_list_term, subst_spec_list_term1; prolog_term subst_str_term=(prolog_term)0, subst_str_list_term, subst_str_list_term1; char *input_string=NULL; /* string where matches are to be found */ char *subst_string=NULL; prolog_term beg_term, end_term; Integer beg_offset=0, end_offset=0, input_len; Integer last_pos = 0; /* last scanned pos in input string */ /* the output buffer is made large enough to include the input string and the substitution string. */ int conversion_required=FALSE; /* from C string to Prolog char list */ XSB_StrSet(&output_buffer,""); input_term = reg_term(CTXTc 1); /* Arg1: string to find matches in */ if (isatom(input_term)) /* check it */ input_string = string_val(input_term); else if (islist(input_term)) { input_string = p_charlist_to_c_string(CTXTc input_term, &input_buffer, "STRING_SUBSTITUTE", "input string"); conversion_required = TRUE; } else xsb_abort("[STRING_SUBSTITUTE] Arg 1 (the input string) must be an atom or a character list"); input_len = strlen(input_string); /* arg 2: substring specification */ subst_spec_list_term = reg_term(CTXTc 2); if (!islist(subst_spec_list_term) && !isnil(subst_spec_list_term)) xsb_abort("[STRING_SUBSTITUTE] Arg 2 must be a list [s(B1,E1),s(B2,E2),...]"); /* handle substitution string */ subst_str_list_term = reg_term(CTXTc 3); if (! islist(subst_str_list_term)) xsb_abort("[STRING_SUBSTITUTE] Arg 3 must be a list of strings"); output_term = reg_term(CTXTc 4); if (! isref(output_term)) xsb_abort("[STRING_SUBSTITUTE] Arg 4 (the output) must be an unbound variable"); subst_spec_list_term1 = subst_spec_list_term; subst_str_list_term1 = subst_str_list_term; if (isnil(subst_spec_list_term1)) { XSB_StrSet(&output_buffer, input_string); goto EXIT; } if (isnil(subst_str_list_term1)) xsb_abort("[STRING_SUBSTITUTE] Arg 3 must not be an empty list"); do { subst_reg_term = p2p_car(subst_spec_list_term1); subst_spec_list_term1 = p2p_cdr(subst_spec_list_term1); if (!isnil(subst_str_list_term1)) { subst_str_term = p2p_car(subst_str_list_term1); subst_str_list_term1 = p2p_cdr(subst_str_list_term1); if (isatom(subst_str_term)) { subst_string = string_val(subst_str_term); } else if (islist(subst_str_term)) { subst_string = p_charlist_to_c_string(CTXTc subst_str_term, &subst_buf, "STRING_SUBSTITUTE", "substitution string"); } else xsb_abort("[STRING_SUBSTITUTE] Arg 3 must be a list of strings"); } beg_term = p2p_arg(subst_reg_term,1); end_term = p2p_arg(subst_reg_term,2); if (!(isointeger(beg_term)) || !(isointeger(end_term))) xsb_abort("[STRING_SUBSTITUTE] Non-integer in Arg 2"); else { beg_offset = oint_val(beg_term); end_offset = oint_val(end_term); } /* -1 means end of string */ if (end_offset < 0) end_offset = input_len; if ((end_offset < beg_offset) || (beg_offset < last_pos)) xsb_abort("[STRING_SUBSTITUTE] Substitution regions in Arg 2 not sorted"); /* do the actual replacement */ XSB_StrAppendBlk(&output_buffer,input_string+last_pos,(int)(beg_offset-last_pos)); XSB_StrAppend(&output_buffer, subst_string); last_pos = end_offset; } while (!isnil(subst_spec_list_term1)); XSB_StrAppend(&output_buffer, input_string+end_offset); EXIT: /* get result out */ if (conversion_required) c_string_to_p_charlist(CTXTc output_buffer.string, output_term, 4, "STRING_SUBSTITUTE", "Arg 4"); else c2p_string(CTXTc output_buffer.string, output_term); return(TRUE); }
/* XSB string substitution entry point In: Arg1: string Arg2: beginning offset Arg3: ending offset. `_' or -1: end of string, -2: char before last, etc. Out: Arg4: new (output) string Always succeeds, unless error. */ xsbBool substring(CTXTdecl) { /* Prolog args are first assigned to these, so we could examine the types of these objects to determine if we got strings or atoms. */ prolog_term input_term, output_term; prolog_term beg_offset_term, end_offset_term; char *input_string=NULL; /* string where matches are to be found */ Integer beg_offset=0, end_offset=0, input_len=0, substring_len=0; int conversion_required=FALSE; XSB_StrSet(&output_buffer,""); input_term = reg_term(CTXTc 1); /* Arg1: string to find matches in */ if (isatom(input_term)) /* check it */ input_string = string_val(input_term); else if (islist(input_term)) { input_string = p_charlist_to_c_string(CTXTc input_term, &input_buffer, "SUBSTRING", "input string"); conversion_required = TRUE; } else xsb_abort("[SUBSTRING] Arg 1 (the input string) must be an atom or a character list"); input_len = strlen(input_string); /* arg 2: beginning offset */ beg_offset_term = reg_term(CTXTc 2); if (! (isointeger(beg_offset_term))) xsb_abort("[SUBSTRING] Arg 2 (the beginning offset) must be an integer"); beg_offset = oint_val(beg_offset_term); if (beg_offset < 0) beg_offset = 0; else if (beg_offset > input_len) beg_offset = input_len; /* arg 3: ending offset */ end_offset_term = reg_term(CTXTc 3); if (isref(end_offset_term)) end_offset = input_len; else if (! (isointeger(end_offset_term))) xsb_abort("[SUBSTRING] Arg 3 (the end offset) must be integer or _"); else end_offset = oint_val(end_offset_term); if (end_offset < 0) end_offset = input_len + 1 + end_offset; else if (end_offset > input_len) end_offset = input_len; else if (end_offset < beg_offset) end_offset = beg_offset; output_term = reg_term(CTXTc 4); if (! isref(output_term)) xsb_abort("[SUBSTRING] Arg 4 (the output string) must be an unbound variable"); /* do the actual replacement */ substring_len = end_offset-beg_offset; XSB_StrAppendBlk(&output_buffer, input_string+beg_offset, (int)substring_len); XSB_StrNullTerminate(&output_buffer); /* get result out */ if (conversion_required) c_string_to_p_charlist(CTXTc output_buffer.string, output_term, 4, "SUBSTRING", "Arg 4"); else c2p_string(CTXTc output_buffer.string, output_term); return(TRUE); }
string emitCode(Form* form/*, emissionContext ctx = Bottom*/) { string out; if(form == NULL) error(form,"Can't emit code for the null form."); else if(isatom(form)) { switch(analyze(val(form))) { case BooleanTrue: { out = constant(get_unique_res("i1"),"i1","true"); break; } case BooleanFalse: { out = constant(get_unique_res("i1"),"i1","false"); break; } case Integer: { out = constant(get_unique_res("i64"),"i64",val(form)); break; } case Character: { string c = string(val(form),1,val(form).length()-2); string address = "@___string" + to_string<unsigned long>(++string_version); push(address + " = global [2 x i8] c\"" + c + "\0\0\""); out += get_unique_res("i8") + " = load i8* getelementptr inbounds ([2 x i8]* " + address + ", i32 0, i64 0)"; break; } case Real: { out = constant(get_unique_res("double"),"double",val(form)); break; } case String: { //Remember strings come with their double quotes //Also convert them to unicode string str = cutboth(val(form)); unsigned long length = str.length(); stringstream ss; string result; for(unsigned long i = 0; i < str.length(); i++) { string tmp; if(str[i] == '\\') { //Oh goodness, escape sequences i++; switch(str[i]) { case 'n': ss << "0A"; break; case '\\': ss << "5C"; break; case '"': ss << "22"; break; case '0': ss << "00"; break; case 'a': ss << "07"; break; case 'b': ss << "08"; break; case 'f': ss << "0C"; break; case 'r': ss << "0D"; break; case 't': ss << "09"; break; case 'v': ss << "0B"; break; case 'x': //TODO: Manage hex input break; case 'o': //TODO: Manage octal input break; case 'U': //TODO: Great, a unicode codepoint... break; default: error(form,"Unknown character escape sequence."); } length--; } else { ss << hex << (int)str[i]; } tmp = ss.str(); if(tmp.length() > 2) { tmp = string(tmp,tmp.length()-2); } result += '\\' + tmp; //cerr << "Result: " << result << endl; } string type = "[" + to_string<unsigned long>(length+1) + " x i8]"; push("@___string" + to_string<unsigned long>(++string_version) + " = global " + type + " c\"" + result + "\\00\""); out = get_unique_res("i8*") + " = getelementptr " + type + "* @___string" + to_string<unsigned long>(string_version) + ", i64 0, i64 0"; break; } case Symbol: { string sym = val(form); Variable* tmp = lookup(sym); if(tmp == NULL) error_unbound(form); else out = load(get_unique_res_address(tmp->type,tmp->address,true),tmp->type,((tmp->global) ? "@" : "%") + sym+to_string(tmp->scope_address)); break; } case Unidentifiable: { error(form,"Received an unidentifiable form as input."); break; } } } else { if(islist(car(form))) error(form,"Lists can't be used as function names in calls. Until I implement lambda."); string func = val(car(form)); map<string,hFuncPtr>::iterator seeker = Core.find(func); if(seeker != Core.end()) out = seeker->second(form); else out = callFunction(form); } return out+"\n"; }