int32_t write_clusters(char* outname, char* outname_end, uintptr_t unfiltered_indiv_ct, uintptr_t* indiv_exclude, uintptr_t indiv_ct, char* person_ids, uintptr_t max_person_id_len, uint32_t omit_unassigned, uintptr_t cluster_ct, uint32_t* cluster_map, uint32_t* cluster_starts, char* cluster_ids, uintptr_t max_cluster_id_len) { unsigned char* wkspace_mark = wkspace_base; FILE* outfile = NULL; uintptr_t indiv_uidx = 0; int32_t retval = 0; uint32_t* indiv_to_cluster; char* person_id_ptr; char* bufptr; uintptr_t indiv_idx; uint32_t cluster_idx; uint32_t slen; if (wkspace_alloc_ui_checked(&indiv_to_cluster, unfiltered_indiv_ct * sizeof(int32_t))) { goto write_cluster_ret_NOMEM; } fill_unfiltered_indiv_to_cluster(unfiltered_indiv_ct, cluster_ct, cluster_map, cluster_starts, indiv_to_cluster); memcpy(outname_end, ".clst", 6); if (fopen_checked(&outfile, outname, "w")) { goto write_cluster_ret_OPEN_FAIL; } for (indiv_idx = 0; indiv_idx < indiv_ct; indiv_idx++) { indiv_uidx = next_non_set_unsafe(indiv_exclude, indiv_uidx); cluster_idx = indiv_to_cluster[indiv_uidx]; if ((!omit_unassigned) || (cluster_idx != 0xffffffffU)) { person_id_ptr = &(person_ids[indiv_uidx * max_person_id_len]); slen = strlen_se(person_id_ptr); bufptr = memcpyax(tbuf, person_id_ptr, slen, ' '); bufptr = strcpyax(bufptr, &(person_id_ptr[slen + 1]), ' '); if (cluster_idx != 0xffffffffU) { bufptr = strcpyax(bufptr, &(cluster_ids[cluster_idx * max_cluster_id_len]), '\n'); } else { bufptr = memcpyl3a(bufptr, "NA\n"); } if (fwrite_checked(tbuf, bufptr - tbuf, outfile)) { goto write_cluster_ret_WRITE_FAIL; } } indiv_uidx++; } if (fclose_null(&outfile)) { goto write_cluster_ret_WRITE_FAIL; } sprintf(logbuf, "Pruned cluster assignments written to %s.\n", outname); logprintb(); while (0) { write_cluster_ret_NOMEM: retval = RET_NOMEM; break; write_cluster_ret_OPEN_FAIL: retval = RET_OPEN_FAIL; break; write_cluster_ret_WRITE_FAIL: retval = RET_WRITE_FAIL; break; } fclose_cond(outfile); wkspace_reset(wkspace_mark); return retval; }
int32_t main(int32_t argc, char** argv) { FILE* infile = NULL; char* outname = NULL; uintptr_t column_sep = 2; uint32_t flags = 0; int32_t retval = 0; uintptr_t* col_widths = NULL; unsigned char* spacebuf = NULL; unsigned char* rjustify_buf = NULL; uintptr_t col_ct = 0; uint32_t infile_param_idx = 0; char* param_ptr; #ifndef _WIN32 char* cptr; #endif uint32_t param_idx; uint32_t uii; int32_t ii; char cc; if (argc == 1) { goto main_ret_HELP; } for (param_idx = 1; param_idx < (uint32_t)argc; param_idx++) { if ((!strcmp(argv[param_idx], "--help")) || (!strcmp(argv[param_idx], "-help")) || (!strcmp(argv[param_idx], "-?")) || (!strcmp(argv[param_idx], "-h"))) { goto main_ret_HELP; } } if (argc > 10) { fputs("Error: Too many parameters.\n\n", stderr); goto main_ret_INVALID_CMDLINE_2; } for (param_idx = 1; param_idx < (uint32_t)argc; param_idx++) { if (argv[param_idx][0] != '-') { if (!infile_param_idx) { infile_param_idx = param_idx; } else if (!outname) { if (flags & FLAG_INPLACE) { goto main_ret_INVALID_CMDLINE_3; } outname = argv[param_idx]; } else { fputs("Error: Invalid parameter sequence.\n\n", stderr); goto main_ret_INVALID_CMDLINE_2; } continue; } param_ptr = &(argv[param_idx][1]); if (*param_ptr == '-') { // allow both single- and double-dash param_ptr++; } if (!strcmp(param_ptr, "inplace")) { if (outname) { goto main_ret_INVALID_CMDLINE_3; } flags |= FLAG_INPLACE; } else if ((!strcmp(param_ptr, "spacing")) || (!strcmp(param_ptr, "s"))) { if (++param_idx == (uint32_t)argc) { fputs("Error: Missing --spacing parameter.\n", stderr); goto main_ret_INVALID_CMDLINE; } ii = atoi(argv[param_idx]); if (ii < 1) { fprintf(stderr, "Error: Invalid --spacing parameter '%s'.\n", argv[param_idx]); goto main_ret_INVALID_CMDLINE; } column_sep = (uint32_t)ii; } else if (!strcmp(param_ptr, "ralign")) { flags |= FLAG_RJUSTIFY; } else if (!strcmp(param_ptr, "leading")) { flags |= FLAG_SPACES_BEFORE_FIRST; } else if (!strcmp(param_ptr, "extend-short")) { flags |= FLAG_PAD; } else if (!strcmp(param_ptr, "trailing")) { flags |= FLAG_SPACES_AFTER_LAST; } else if (!strcmp(param_ptr, "force-eoln")) { flags |= FLAG_FINAL_EOLN; } else if (!strcmp(param_ptr, "noblank")) { flags |= FLAG_STRIP_BLANK; } else { if ((argv[param_idx][1] != '-') && argv[param_idx][1]) { // permit abbreviated style while (1) { cc = *param_ptr++; if (!cc) { break; } switch (cc) { case 'i': if (outname) { goto main_ret_INVALID_CMDLINE_3; } flags |= FLAG_INPLACE; break; case 'r': flags |= FLAG_RJUSTIFY; break; case 'l': flags |= FLAG_SPACES_BEFORE_FIRST; break; case 'e': flags |= FLAG_PAD; break; case 't': flags |= FLAG_SPACES_AFTER_LAST; break; case 'f': flags |= FLAG_FINAL_EOLN; break; case 'n': flags |= FLAG_STRIP_BLANK; break; default: fprintf(stderr, "Error: Invalid flag '%s'.\n\n", argv[param_idx]); goto main_ret_INVALID_CMDLINE_2; } } } else { fprintf(stderr, "Error: Invalid flag '%s'.\n\n", argv[param_idx]); goto main_ret_INVALID_CMDLINE_2; } } } if (!infile_param_idx) { fputs("Error: No input filename.\n\n", stderr); goto main_ret_INVALID_CMDLINE_2; } if (flags & FLAG_INPLACE) { uii = strlen(argv[infile_param_idx]); outname = (char*)malloc(uii + 11); if (!outname) { goto main_ret_NOMEM; } memcpy(outname, argv[infile_param_idx], uii); memcpy(&(outname[uii]), "-temporary", 11); } else if (outname) { #ifdef _WIN32 uii = GetFullPathName(argv[infile_param_idx], FNAMESIZE, pathbuf, NULL); if ((!uii) || (uii > FNAMESIZE)) #else if (!realpath(argv[infile_param_idx], pathbuf)) #endif { fprintf(stderr, "Error: Failed to open %s.\n", argv[infile_param_idx]); goto main_ret_OPEN_FAIL; } #ifdef _WIN32 uii = GetFullPathName(outname, FNAMESIZE, &(pathbuf[FNAMESIZE + 64]), NULL); if (uii && (uii <= FNAMESIZE) && (!strcmp(pathbuf, &(pathbuf[FNAMESIZE + 64])))) #else cptr = realpath(outname, &(pathbuf[FNAMESIZE + 64])); if (cptr && (!strcmp(pathbuf, &(pathbuf[FNAMESIZE + 64])))) #endif { fputs("Error: Input and output files match. Use --inplace instead.\n", stderr); goto main_ret_INVALID_CMDLINE; } } if (fopen_checked(&infile, argv[infile_param_idx], "rb")) { goto main_ret_OPEN_FAIL; } retval = scan_column_widths(infile, column_sep, &col_widths, &col_ct, &spacebuf, (flags & FLAG_RJUSTIFY)? (&rjustify_buf) : NULL); if (retval) { goto main_ret_1; } retval = pretty_write(infile, outname, flags, column_sep, col_widths, col_ct, spacebuf, rjustify_buf); if (retval) { goto main_ret_1; } fclose_null(&infile); if (flags & FLAG_INPLACE) { unlink(argv[infile_param_idx]); if (rename(outname, argv[infile_param_idx])) { fprintf(stderr, "Error: File rename failed. Output is in %s instead of %s.\n", outname, argv[infile_param_idx]); goto main_ret_OPEN_FAIL; } } while (0) { main_ret_HELP: fputs( "prettify v1.04 (21 Feb 2014) Christopher Chang ([email protected])\n\n" "Takes a tab-and/or-space-delimited text table, and generates a space-delimited\n" "pretty-printed version. Multibyte character encodings are not currently\n" "supported.\n\n" , stdout); disp_usage(stdout); fputs( "\nTo perform the simplest reverse conversion (multiple spaces to one tab), you\n" "can use\n" " cat [input filename] | tr -s ' ' '\\t' > [output filename]\n" "For one-to-one conversion between spaces and tabs instead, omit the \"-s\". And\n" "to strip leading and trailing tabs and spaces, try\n" " cat [in] | sed 's/^[[:space:]]*//g' | sed 's/[[:space:]]*$//g' > [out]\n" , stdout); retval = RET_HELP; break; main_ret_NOMEM: retval = RET_NOMEM; break; main_ret_OPEN_FAIL: retval = RET_OPEN_FAIL; break; main_ret_INVALID_CMDLINE_3: fputs("Error: --inplace cannot be used with an output filename.\n", stderr); retval = RET_INVALID_CMDLINE; break; main_ret_INVALID_CMDLINE_2: disp_usage(stderr); main_ret_INVALID_CMDLINE: retval = RET_INVALID_CMDLINE; break; } main_ret_1: free_cond(col_widths); fclose_cond(infile); dispmsg(retval); return retval; }
int32_t pretty_write(FILE* infile, char* outname, uint32_t flags, uintptr_t column_sep, uintptr_t* col_widths, uintptr_t col_ct, unsigned char* spacebuf, unsigned char* rjustify_buf) { FILE* outfile = NULL; uintptr_t cur_col_idx = 0; uintptr_t cur_col_width = 0; uintptr_t prev_col_width = 0; uintptr_t rjbuf_len = 0; unsigned char* token_end = NULL; uint32_t spaces_before_first = flags & FLAG_SPACES_BEFORE_FIRST; uint32_t pad = flags & FLAG_PAD; uint32_t spaces_after_last = flags & FLAG_SPACES_AFTER_LAST; uint32_t final_eoln = flags & FLAG_FINAL_EOLN; uint32_t strip_blank = flags & FLAG_STRIP_BLANK; uint32_t no_final_newline = 0; int32_t retval = 0; unsigned char* readptr; unsigned char* line_end; unsigned char* readbuf_end; uintptr_t cur_read; if (!outname) { outfile = stdout; } else { if (fopen_checked(&outfile, outname, "w")) { goto pretty_write_ret_OPEN_FAIL; } } rewind(infile); cur_read = fread(g_readbuf, 1, BUFSIZE, infile); if (ferror(infile)) { goto pretty_write_ret_READ_FAIL; } readptr = g_readbuf; readbuf_end = &(g_readbuf[cur_read]); while (1) { line_end = (unsigned char*)memchr(readptr, '\n', (uintptr_t)(readbuf_end - readptr)); if (!line_end) { if (readptr != readbuf_end) { no_final_newline = 1; } line_end = readbuf_end; } while (readptr < line_end) { if (!cur_col_width) { if (skip_spaces_ck(&readptr, line_end)) { break; } if (cur_col_idx || spaces_before_first) { if (!rjustify_buf) { fwrite(spacebuf, 1, col_widths[cur_col_idx] + column_sep - prev_col_width, outfile); } else { fwrite(spacebuf, 1, column_sep, outfile); } } cur_col_idx++; } token_end = get_token_end_ck(readptr, line_end); cur_col_width += (uintptr_t)(token_end - readptr); if (token_end == line_end) { break; } if (!rjustify_buf) { fwrite(readptr, 1, token_end - readptr, outfile); prev_col_width = cur_col_width; } else { fwrite(spacebuf, 1, col_widths[cur_col_idx] - cur_col_width, outfile); if (rjbuf_len) { fwrite(rjustify_buf, 1, rjbuf_len, outfile); rjbuf_len = 0; } fwrite(readptr, 1, token_end - readptr, outfile); } cur_col_width = 0; readptr = token_end; } if ((line_end < readbuf_end) || (!cur_read)) { if (cur_col_idx) { if (!rjustify_buf) { if (cur_col_width) { // last column not dumped yet fwrite(readptr, 1, token_end - readptr, outfile); prev_col_width = cur_col_width; } if (pad || spaces_after_last) { fwrite(spacebuf, 1, col_widths[cur_col_idx] - prev_col_width, outfile); } } else { fwrite(spacebuf, 1, col_widths[cur_col_idx] - cur_col_width, outfile); if (rjbuf_len) { fwrite(rjustify_buf, 1, rjbuf_len, outfile); rjbuf_len = 0; } fwrite(readptr, 1, token_end - readptr, outfile); } if (pad) { while (cur_col_idx < col_ct) { fwrite(spacebuf, 1, col_widths[++cur_col_idx] + column_sep, outfile); } } if (spaces_after_last) { fwrite(spacebuf, 1, column_sep, outfile); } } if (!cur_read) { // EOF if (final_eoln && no_final_newline) { putc('\n', outfile); } break; } if (cur_col_idx || (!strip_blank)) { putc('\n', outfile); if (ferror(outfile)) { goto pretty_write_ret_WRITE_FAIL; } } readptr = &(line_end[1]); cur_col_idx = 0; cur_col_width = 0; prev_col_width = 0; continue; } // in middle of line if (cur_col_width) { if (!rjustify_buf) { fwrite(readptr, 1, token_end - readptr, outfile); } else { memcpy(&(rjustify_buf[rjbuf_len]), readptr, token_end - readptr); rjbuf_len += (uintptr_t)(token_end - readptr); } } cur_read = fread(g_readbuf, 1, BUFSIZE, infile); if (ferror(infile)) { goto pretty_write_ret_READ_FAIL; } if (cur_read) { no_final_newline = 0; } readptr = g_readbuf; readbuf_end = &(g_readbuf[cur_read]); } if (outname) { if (fclose_null(&outfile)) { goto pretty_write_ret_WRITE_FAIL; } } while (0) { pretty_write_ret_OPEN_FAIL: retval = RET_OPEN_FAIL; break; pretty_write_ret_READ_FAIL: retval = RET_READ_FAIL; break; pretty_write_ret_WRITE_FAIL: retval = RET_WRITE_FAIL; break; } if (outname) { fclose_cond(outfile); } return retval; }