static Boolean seek_to_file_row(int array_row, FILE *fp, xgobidata *xg) { int i, file_row, ch; /* ch is used, no matter what the compiler says */ static int prev_file_row = 0; Boolean ok = true; if (array_row >= xg->file_sample_size) { return false; } /* Identify the row number of the next file row we want. */ file_row = xg->file_rows_sampled[array_row]; for (i=prev_file_row; i<file_row; i++) { if (!find_data_start(fp)) { ok = False; break; } else { /* skip a line */ while ((ch = getc(fp)) != '\n') { ; } } } /* add one to step over the one we're about to read in */ prev_file_row = file_row+1; return ok; }
static GwyContainer* ezdfile_load(const gchar *filename, G_GNUC_UNUSED GwyRunType mode, GError **error) { GwyContainer *meta, *container = NULL; guchar *buffer = NULL; gsize size = 0; GError *err = NULL; EZDSection *section = NULL; GwyDataField *dfield = NULL; GPtrArray *ezdfile; guint header_size, n; gint i; gchar *p; if (!gwy_file_get_contents(filename, &buffer, &size, &err)) { err_GET_FILE_CONTENTS(error, &err); return NULL; } if (strncmp(buffer, MAGIC, MAGIC_SIZE) || !(header_size = find_data_start(buffer, size))) { err_FILE_TYPE(error, "EZD/NID"); gwy_file_abandon_contents(buffer, size, NULL); return NULL; } ezdfile = g_ptr_array_new(); p = g_strndup(buffer, header_size - DATA_MAGIC_SIZE); if (!file_read_header(ezdfile, p, error)) { gwy_file_abandon_contents(buffer, size, NULL); g_free(p); return NULL; } g_free(p); n = find_data_offsets(buffer + header_size, size - header_size, ezdfile, error); if (!n) { gwy_file_abandon_contents(buffer, size, NULL); return NULL; } container = gwy_container_new(); i = 0; for (n = 0; n < ezdfile->len; n++) { gchar key[24]; section = (EZDSection*)g_ptr_array_index(ezdfile, n); if (!section->data) continue; dfield = gwy_data_field_new(section->xres, section->yres, 1.0, 1.0, FALSE); read_data_field(dfield, section); g_snprintf(key, sizeof(key), "/%d/data", i); gwy_container_set_object_by_name(container, key, dfield); g_object_unref(dfield); fix_scales(section, i, container); meta = ezdfile_get_metadata(ezdfile, n); if (meta) { g_snprintf(key, sizeof(key), "/%d/meta", i); gwy_container_set_object_by_name(container, key, meta); g_object_unref(meta); } i++; } gwy_file_abandon_contents(buffer, size, NULL); ezdfile_free(ezdfile); return container; }
void read_ascii(FILE *fp, xgobidata *xg) { register int ch; int i, j, k, jrows, nrows, jcols, fs; int nitems; float row1[NCOLS]; short row1_missing[NCOLS]; int nblocks; char word[64]; /* Initialize these before starting */ for (k=0; k<NCOLS; k++) { row1_missing[k] = 0; row1[k] = 0.0; } xg->ncols_used = 0; init_file_rows_sampled(xg); /* * Find the index of the first row of data that we're interested in. */ nrows = 0; if (xg->file_read_type == read_all) { if (find_data_start(fp) == False) return; } else { /* if -only was used on the command line */ if (!seek_to_file_row(nrows, fp, xg)) return; } /* * Read in the first row of the data file and calculate ncols. */ gotone = True; /* * I've left behind some checking that's done in bak/read_array.c -- * test xgobi on a text file and see what happens. */ while ( (ch = getc(fp)) != '\n') { if (ch == '\t' || ch == ' ') ; else if ( ungetc(ch, fp) == EOF || fscanf(fp, "%s", word) < 0 ) { fprintf(stderr, "read_array: error in reading first row of data\n"); fclose(fp); exit(0); } else { if ( strcasecmp(word, "na") == 0 || strcmp(word, ".") == 0) { xg->missing_values_present = True; xg->nmissing++; row1_missing[xg->ncols_used] = 1; } else { row1[xg->ncols_used] = (float) atof(word); } xg->ncols_used++ ; if (xg->ncols_used >= NCOLS) { fprintf(stderr, "This file has more than %d columns. In order to read it in,\n", NCOLS); fprintf(stderr, "increase NCOLS in xgobitypes.h and recompile.\n"); exit(0); } } } xg->ncols = xg->ncols_used + 1; /* * If we're reading everything, allocate the first block. * If -only has been used, allocate the whole shebang. */ if (xg->file_read_type == read_all) { xg->nrows = 0; alloc_block(1, xg); if (xg->missing_values_present) alloc_missing_block(1, xg); } else { /* -only has been used */ xg->nrows = xg->file_sample_size; xg->raw_data = (float **) XtMalloc( (Cardinal) xg->nrows * sizeof(float *)); for (i=0; i<xg->nrows; i++) xg->raw_data[i] = (float *) XtMalloc((Cardinal) xg->ncols * sizeof(float)); if (xg->missing_values_present) init_missing_array(xg->nrows, xg->ncols, xg); } /* * Fill in the first row */ for (j=0; j<xg->ncols_used; j++) xg->raw_data[0][j] = row1[j]; if (xg->missing_values_present) { for (j=0; j<xg->ncols_used; j++) xg->is_missing[0][j] = row1_missing[j]; } nrows++; /* * Read data, reallocating as needed. Determine nrows for the read_all case. */ nblocks = 1; nitems = xg->ncols_used; jrows = 1; jcols = 0; while (1) { if (jcols == 0) { if (xg->file_read_type == read_all) { if (!find_data_start(fp)) break; } else { /* if -only was used on the command line */ if (!seek_to_file_row(nrows, fp, xg)) break; } } fs = fscanf(fp, "%s", word); if (fs == EOF) break; else if (fs < 0) { fprintf(stderr, "Problem with input data\n"); fclose(fp); exit(0); } else { nitems++; if ( strcasecmp(word, "na") == 0 || strcmp(word, ".") == 0 ) { if (!xg->missing_values_present) { xg->missing_values_present = True; /* * Only when the first "na" or "." has been encountered * is it necessary to allocate space to contain the * missing values matrix. Initialize all previous values * to 0. */ if (xg->file_read_type == read_all) { alloc_missing_block(nblocks, xg); for (i=BLOCKSIZE*(nblocks-1); i<BLOCKSIZE*nblocks; i++) { for (k=0; k<xg->ncols_used; k++) xg->is_missing[i][k] = 0; } } else { init_missing_array(xg->nrows, xg->ncols, xg); } } xg->nmissing++; xg->is_missing[nrows][jcols] = 1; xg->raw_data[nrows][jcols] = 0.0; } else xg->raw_data[nrows][jcols] = (float) atof(word); jcols++; if (jcols == xg->ncols_used) { jcols = 0; nrows++; jrows++; } if (xg->file_read_type == read_all) { if (jrows == BLOCKSIZE) { jrows = 0; nblocks++; if (nblocks%20 == 0) fprintf(stderr, "reallocating; n > %d\n", nblocks*BLOCKSIZE); alloc_block(nblocks, xg); if (xg->missing_values_present) alloc_missing_block(nblocks, xg); } } else { /* -only was used */ if (nrows >= xg->nrows) break; } } } /* * Close the data file */ if (fclose(fp) == EOF) fprintf(stderr, "read_array: error in fclose"); if (xg->file_read_type == read_all) xg->nrows = nrows; fprintf(stderr, "size of data: %d x %d\n", xg->nrows, xg->ncols); if ( nitems != xg->nrows * xg->ncols_used ) { (void) fprintf(stderr, "read_array: nrows*ncols != nitems read\n"); (void) fprintf(stderr, "(nrows %d, ncols %d, nitems read %d)\n", xg->nrows, xg->ncols_used, nitems); exit(0); } else if (nitems == 0) { (void) fprintf(stderr, "No data read\n"); exit(0); } else { /* * If we haven't yet encountered a missing value, free up * the whole matrix. */ if (!xg->missing_values_present) xg->is_missing = (short **) NULL; if (xg->file_read_type == read_all) { /* * One last XtFree and XtRealloc to make raw_data take up exactly * the amount of space it needs. */ for (i=xg->nrows; i<BLOCKSIZE*nblocks; i++) XtFree((XtPointer) xg->raw_data[i]); xg->raw_data = (float **) XtRealloc((XtPointer) xg->raw_data, (Cardinal) xg->nrows * sizeof(float *)); if (xg->missing_values_present) { for (i=xg->nrows; i<BLOCKSIZE*nblocks; i++) XtFree((XtPointer) xg->is_missing[i]); xg->is_missing = (short **) XtRealloc((XtPointer) xg->is_missing, (Cardinal) xg->nrows * sizeof(short *)); } } /* * If the data contains only one column, add a second, * the numbers 1:nrows -- and let the added column be * the first column? */ xg->single_column = False; if (xg->ncols_used == 1) { xg->single_column = True; xg->ncols_used = 2; xg->ncols = 3; for (i=0; i<xg->nrows; i++) { xg->raw_data[i] = (float *) XtRealloc( (XtPointer) xg->raw_data[i], (Cardinal) 3 * sizeof(float)); xg->raw_data[i][1] = xg->raw_data[i][0] ; xg->raw_data[i][0] = (float) (i+1) ; /* And populate a column of missing values with 0s, if needed */ if (xg->missing_values_present) { xg->is_missing[i] = (short *) XtRealloc( (XtPointer) xg->is_missing[i], (Cardinal) 3 * sizeof(short)); xg->is_missing[i][1] = 0 ; } } } } }