Пример #1
0
//
//  Split_Lines: C
//
// Given a string series, split lines on CR-LF.  Give back array of strings.
//
// Note: The definition of "line" in POSIX is a sequence of characters that
// end with a newline.  Hence, the last line of a file should have a newline
// marker, or it's not a "line")
//
// https://stackoverflow.com/a/729795
//
// This routine does not require it.
//
// !!! CR support is likely to be removed...and CR will be handled as a normal
// character, with special code needed to process it.
//
REBARR *Split_Lines(const REBVAL *str)
{
    REBDSP dsp_orig = DSP;

    REBCNT len = VAL_LEN_AT(str);
    REBCNT i = VAL_INDEX(str);
    if (i == len)
        return Make_Array(0);

    DECLARE_MOLD (mo);
    Push_Mold(mo);

    REBCHR(const*) cp = VAL_STRING_AT(str);

    REBUNI c;
    cp = NEXT_CHR(&c, cp);

    for (; i < len; ++i, cp = NEXT_CHR(&c, cp)) {
        if (c != LF && c != CR) {
            Append_Codepoint(mo->series, c);
            continue;
        }

        Init_Text(DS_PUSH(), Pop_Molded_String(mo));
        SET_CELL_FLAG(DS_TOP, NEWLINE_BEFORE);

        Push_Mold(mo);

        if (c == CR) {
            REBCHR(const*) tp = NEXT_CHR(&c, cp);
            if (c == LF) {
                ++i;
                cp = tp; // treat CR LF as LF, lone CR as LF
            }
        }
    }

    // If there's any remainder we pushed in the buffer, consider the end of
    // string to be an implicit line-break

    if (STR_SIZE(mo->series) == mo->offset)
        Drop_Mold(mo);
    else {
        Init_Text(DS_PUSH(), Pop_Molded_String(mo));
        SET_CELL_FLAG(DS_TOP, NEWLINE_BEFORE);
    }

    return Pop_Stack_Values_Core(dsp_orig, ARRAY_FLAG_NEWLINE_AT_TAIL);
}
Пример #2
0
void
Read_Input_Files(int argc, const char *argv[], int round) {
	int n;

	Init_Text(argc);
	Init_Token_Array();

	/* Assume all texts to be new */
	Number_Of_New_Texts = Number_Of_Texts;

	/* Read the files */
	for (n = 0; n < Number_Of_Texts; n++) {
		const char *fname = argv[n];
		struct text *txt = &Text[n];

		if (round == 1 && !is_set_option('T')) {
			fprintf(Output_File, "File %s: ", fname);
		}

		txt->tx_fname = fname;
		txt->tx_pos = 0;
		txt->tx_start =
		txt->tx_limit = Text_Length();
		if (is_new_old_separator(fname)) {
			if (round == 1 && !is_set_option('T')) {
				fprintf(Output_File, "separator\n");
			}
			Number_Of_New_Texts = n;
		}
		else {
			if (!Open_Text(First, txt)) {
				if (round == 1 && !is_set_option('T')) {
					fprintf(Output_File,
						">>>> cannot open <<<< ");
				}
				/*	the file has still been opened
					with a null file for uniformity
				*/
			}
			while (Next_Text_Token_Obtained(First)) {
				if (!Token_EQ(lex_token, End_Of_Line)) {
					Store_Token(lex_token);
				}
			}
			Close_Text(First, txt);
			txt->tx_limit = Text_Length();

			/* report */
			if (round == 1 && !is_set_option('T')) {
				fprint_count(Output_File,
					     txt->tx_limit - txt->tx_start,
					     token_name
				);
				fprintf(Output_File, ", ");
				fprint_count(Output_File, lex_nl_cnt-1, "line");
				if (lex_non_ascii_cnt) {
					fprintf(Output_File, ", ");
					fprint_count(Output_File,
						     lex_non_ascii_cnt,
						     "non-ASCII character"
					);
				}
				fprintf(Output_File, "\n");
			}

#ifdef	DB_TEXT
			db_print_text(txt);
#endif	/* DB_TEXT */
		}
		fflush(Output_File);
	}

	/* report total */
	if (round == 1 && !is_set_option('T')) {
		fprintf(Output_File, "Total: ");
		fprint_count(Output_File, Text_Length() - 1, token_name);
		fprintf(Output_File, "\n\n");
		fflush(Output_File);
	}
}
Пример #3
0
void
Read_Input_Files(int argc, const char *argv[]) {
	int n;

	Init_Text(argc);
	Init_Token_Array();

	/* Initially assume all texts to be new */
	Number_of_New_Texts = Number_of_Texts;

	/* Read the files */
	for (n = 0; n < Number_of_Texts; n++) {
		const char *fname = argv[n];
		struct text *txt = &Text[n];

		if (!is_set_option('T')) {
			fprintf(Output_File, "File %s: ", fname);
		}

		txt->tx_fname = fname;
		txt->tx_pos = 0;
		txt->tx_start = Token_Array_Length();
		txt->tx_limit = Token_Array_Length();

		if (is_new_old_separator(fname)) {
			if (!is_set_option('T')) {
				fprintf(Output_File, "new/old separator\n");
			}
			if (Number_of_New_Texts == Number_of_Texts) {
				Number_of_New_Texts = n;
			} else fatal("more than one new/old separator");
		}
		else {
			int file_opened = 0;
			if (Open_Text(First_Pass, txt)) {
				file_opened = 1;
			} else {
				/* print a warning */
				if (is_set_option('T')) {
					/* the file name has not yet been
					   printed; print it now
					*/
					fprintf(Output_File, "File %s: ",
						fname);
				}
				fprintf(Output_File,
					">>>> cannot open <<<<\n");
				/*	the file has still been opened
					with a null file for uniformity
				*/
			}
			while (Next_Text_Token_Obtained()) {
				if (!Token_EQ(lex_token, End_Of_Line)) {
					Store_Token(lex_token);
				}
			}
			Close_Text(First_Pass, txt);
			txt->tx_limit = Token_Array_Length();
			txt->tx_EOL_terminated =
				Token_EQ(lex_token, End_Of_Line);

			/* report */
			if (file_opened && !is_set_option('T')) {
				fprint_count(Output_File,
					     txt->tx_limit - txt->tx_start,
					     Token_Name
				);
				fprintf(Output_File, ", ");
				fprint_count(Output_File,
					lex_nl_cnt - 1 +
					     (!txt->tx_EOL_terminated ? 1 : 0),
					"line"
				);
				if (!txt->tx_EOL_terminated) {
					fprintf(Output_File,
						" (not NL-terminated)");
				}
				if (lex_non_ascii_cnt) {
					fprintf(Output_File, ", ");
					fprint_count(Output_File,
						     lex_non_ascii_cnt,
						     "non-ASCII character"
					);
				}
				fprintf(Output_File, "\n");
			}

#ifdef	DB_TEXT
			db_print_text(txt);
#endif	/* DB_TEXT */
		}
		fflush(Output_File);
	}

	/* report total */
	int sep_present = (Number_of_Texts != Number_of_New_Texts);
	fprintf(Output_File, "Total input: ");
	fprint_count(Output_File,
		     (!sep_present ? Number_of_Texts : Number_of_Texts - 1),
		     "file"
	);
	fprintf(Output_File, " (%d new, %d old), ",
		Number_of_New_Texts,
		(!sep_present ? 0 :  Number_of_Texts - Number_of_New_Texts - 1)
	);
	fprint_count(Output_File, Token_Array_Length() - 1, Token_Name);
	fprintf(Output_File, "\n\n");
	fflush(Output_File);
}