/
loadtxt.cpp
160 lines (143 loc) · 3.37 KB
/
loadtxt.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
#include <cstdlib>
#include <cstdio>
#include <vector>
#include <cassert>
#include <string>
static const double kMinExponent = -300;
static const double kMaxExponent = 300;
inline double Pow10Positive(int e) {
static std::vector<double> cache = {1.};
while (cache.size() < e + 1) {
cache.push_back(cache.back() * 10.);
}
return cache[e];
}
inline double Pow10Negative(int e) {
static std::vector<double> cache = {1.};
while (cache.size() < e + 1) {
cache.push_back(cache.back() * .1);
}
return cache[e];
}
inline int ConsumeInteger(const char*& p, int& ndigits) {
ndigits = 0;
int sum = 0;
while (*p >= '0' && *p <= '9') {
sum = sum * 10 + (*p - '0');
++p;
++ndigits;
}
return sum;
}
inline int ConsumeSignedInteger(const char*& p, int& ndigits) {
// Consume sign
int sign = 1;
if (*p == '-') {
sign = -1;
++p;
} else if (*p == '+') {
sign = 1;
++p;
}
return ConsumeInteger(p, ndigits) * sign;
}
inline double ConsumeDouble(const char*& p) {
// Consume whole part
int ndigits_whole;
int whole_part = ConsumeSignedInteger(p, ndigits_whole);
// Consume fractional part
double val = whole_part;
if (*p == '.') {
++p;
int ndigits_fractional = 0;
int fractional_part = ConsumeInteger(p, ndigits_fractional);
if (ndigits_whole == 0 && ndigits_fractional == 0) {
printf("Found period with no digits either before or after\n");
exit(-1);
} else if (ndigits_fractional > 0) {
val += fractional_part * Pow10Negative(ndigits_fractional);
}
} else if (ndigits_whole == 0) {
printf("Found neither whole part nor period\n");
exit(-1);
}
// Consume exponent
if (*p == 'e' || *p == 'E') {
++p;
int ndigits_exponent;
int exponent = ConsumeSignedInteger(p, ndigits_exponent);
if (ndigits_exponent == 0) {
printf("Found exponent char but no exponent\n");
exit(-1);
} else if (exponent > kMaxExponent || exponent < kMinExponent) {
printf("Exponent out of range\n");
exit(-1);
} else if (exponent > 0) {
val *= Pow10Positive(exponent);
} else if (exponent < 0) {
val *= Pow10Negative(-exponent);
}
}
return val;
}
void ProcessLine(std::vector<double>& out, const char* line, size_t len) {
const char* p = line;
while (p < line + len) {
out.push_back(ConsumeDouble(p));
if (*p != ' ' && *p != '\n') {
printf("Expected whitespace but found %c\n", *p);
exit(-1);
}
while (*p == ' ' || *p == '\n') {
++p;
}
}
}
int LoadText_Direct(const char* path) {
std::vector<double> items;
int nrows = 0;
FILE* fd = fopen(path, "r");
while (!feof(fd)) {
size_t len = 0;
const char* line = fgetln(fd, &len);
if (line == nullptr) {
break;
}
items.clear();
ProcessLine(items, line, len);
++nrows;
}
fclose(fd);
return nrows;
}
int LoadText_Fscanf(const char* path) {
int nrows = 0;
std::vector<double> line(5);
FILE* fd = fopen(path, "r");
assert(fd);
while (!feof(fd)) {
fscanf(fd, "%lf %lf %lf %lf %lf\n",
&line[0], &line[1], &line[2], &line[3], &line[4]);
++nrows;
}
fclose(fd);
return nrows;
}
void PrintUsageAndExit() {
printf("Usage: loadtxt [--direct | --printf] PATH\n");
exit(-1);
}
int main(int argc, char **argv) {
if (argc != 3) {
PrintUsageAndExit();
}
std::string mode(argv[1]);
int nrows;
if (mode == "--direct") {
nrows = LoadText_Direct(argv[2]);
} else if (mode == "--fscanf") {
nrows = LoadText_Fscanf(argv[2]);
}
printf("Read %d rows\n", nrows);
return 0;
}