// // Split_Lines: C // // Given a string series, split lines on CR-LF. Give back array of strings. // // Note: The definition of "line" in POSIX is a sequence of characters that // end with a newline. Hence, the last line of a file should have a newline // marker, or it's not a "line") // // https://stackoverflow.com/a/729795 // // This routine does not require it. // // !!! CR support is likely to be removed...and CR will be handled as a normal // character, with special code needed to process it. // REBARR *Split_Lines(const REBVAL *str) { REBDSP dsp_orig = DSP; REBCNT len = VAL_LEN_AT(str); REBCNT i = VAL_INDEX(str); if (i == len) return Make_Array(0); DECLARE_MOLD (mo); Push_Mold(mo); REBCHR(const*) cp = VAL_STRING_AT(str); REBUNI c; cp = NEXT_CHR(&c, cp); for (; i < len; ++i, cp = NEXT_CHR(&c, cp)) { if (c != LF && c != CR) { Append_Codepoint(mo->series, c); continue; } Init_Text(DS_PUSH(), Pop_Molded_String(mo)); SET_CELL_FLAG(DS_TOP, NEWLINE_BEFORE); Push_Mold(mo); if (c == CR) { REBCHR(const*) tp = NEXT_CHR(&c, cp); if (c == LF) { ++i; cp = tp; // treat CR LF as LF, lone CR as LF } } } // If there's any remainder we pushed in the buffer, consider the end of // string to be an implicit line-break if (STR_SIZE(mo->series) == mo->offset) Drop_Mold(mo); else { Init_Text(DS_PUSH(), Pop_Molded_String(mo)); SET_CELL_FLAG(DS_TOP, NEWLINE_BEFORE); } return Pop_Stack_Values_Core(dsp_orig, ARRAY_FLAG_NEWLINE_AT_TAIL); }
void Read_Input_Files(int argc, const char *argv[], int round) { int n; Init_Text(argc); Init_Token_Array(); /* Assume all texts to be new */ Number_Of_New_Texts = Number_Of_Texts; /* Read the files */ for (n = 0; n < Number_Of_Texts; n++) { const char *fname = argv[n]; struct text *txt = &Text[n]; if (round == 1 && !is_set_option('T')) { fprintf(Output_File, "File %s: ", fname); } txt->tx_fname = fname; txt->tx_pos = 0; txt->tx_start = txt->tx_limit = Text_Length(); if (is_new_old_separator(fname)) { if (round == 1 && !is_set_option('T')) { fprintf(Output_File, "separator\n"); } Number_Of_New_Texts = n; } else { if (!Open_Text(First, txt)) { if (round == 1 && !is_set_option('T')) { fprintf(Output_File, ">>>> cannot open <<<< "); } /* the file has still been opened with a null file for uniformity */ } while (Next_Text_Token_Obtained(First)) { if (!Token_EQ(lex_token, End_Of_Line)) { Store_Token(lex_token); } } Close_Text(First, txt); txt->tx_limit = Text_Length(); /* report */ if (round == 1 && !is_set_option('T')) { fprint_count(Output_File, txt->tx_limit - txt->tx_start, token_name ); fprintf(Output_File, ", "); fprint_count(Output_File, lex_nl_cnt-1, "line"); if (lex_non_ascii_cnt) { fprintf(Output_File, ", "); fprint_count(Output_File, lex_non_ascii_cnt, "non-ASCII character" ); } fprintf(Output_File, "\n"); } #ifdef DB_TEXT db_print_text(txt); #endif /* DB_TEXT */ } fflush(Output_File); } /* report total */ if (round == 1 && !is_set_option('T')) { fprintf(Output_File, "Total: "); fprint_count(Output_File, Text_Length() - 1, token_name); fprintf(Output_File, "\n\n"); fflush(Output_File); } }
void Read_Input_Files(int argc, const char *argv[]) { int n; Init_Text(argc); Init_Token_Array(); /* Initially assume all texts to be new */ Number_of_New_Texts = Number_of_Texts; /* Read the files */ for (n = 0; n < Number_of_Texts; n++) { const char *fname = argv[n]; struct text *txt = &Text[n]; if (!is_set_option('T')) { fprintf(Output_File, "File %s: ", fname); } txt->tx_fname = fname; txt->tx_pos = 0; txt->tx_start = Token_Array_Length(); txt->tx_limit = Token_Array_Length(); if (is_new_old_separator(fname)) { if (!is_set_option('T')) { fprintf(Output_File, "new/old separator\n"); } if (Number_of_New_Texts == Number_of_Texts) { Number_of_New_Texts = n; } else fatal("more than one new/old separator"); } else { int file_opened = 0; if (Open_Text(First_Pass, txt)) { file_opened = 1; } else { /* print a warning */ if (is_set_option('T')) { /* the file name has not yet been printed; print it now */ fprintf(Output_File, "File %s: ", fname); } fprintf(Output_File, ">>>> cannot open <<<<\n"); /* the file has still been opened with a null file for uniformity */ } while (Next_Text_Token_Obtained()) { if (!Token_EQ(lex_token, End_Of_Line)) { Store_Token(lex_token); } } Close_Text(First_Pass, txt); txt->tx_limit = Token_Array_Length(); txt->tx_EOL_terminated = Token_EQ(lex_token, End_Of_Line); /* report */ if (file_opened && !is_set_option('T')) { fprint_count(Output_File, txt->tx_limit - txt->tx_start, Token_Name ); fprintf(Output_File, ", "); fprint_count(Output_File, lex_nl_cnt - 1 + (!txt->tx_EOL_terminated ? 1 : 0), "line" ); if (!txt->tx_EOL_terminated) { fprintf(Output_File, " (not NL-terminated)"); } if (lex_non_ascii_cnt) { fprintf(Output_File, ", "); fprint_count(Output_File, lex_non_ascii_cnt, "non-ASCII character" ); } fprintf(Output_File, "\n"); } #ifdef DB_TEXT db_print_text(txt); #endif /* DB_TEXT */ } fflush(Output_File); } /* report total */ int sep_present = (Number_of_Texts != Number_of_New_Texts); fprintf(Output_File, "Total input: "); fprint_count(Output_File, (!sep_present ? Number_of_Texts : Number_of_Texts - 1), "file" ); fprintf(Output_File, " (%d new, %d old), ", Number_of_New_Texts, (!sep_present ? 0 : Number_of_Texts - Number_of_New_Texts - 1) ); fprint_count(Output_File, Token_Array_Length() - 1, Token_Name); fprintf(Output_File, "\n\n"); fflush(Output_File); }