bool convert(const std::string& s, restcgi::date_time& v) { if (s.empty() || isspaces(s.c_str())) return false; std::string::const_iterator first = s.begin(); if (!parse(first, s.end(), v) || !isspaces(s.c_str() + (first - s.begin()))) throw std::invalid_argument("cannot convert to date and time, invalid chars: " + s); return true; }
static int file_lines(FILE *fp){ int n = 0; char buff[BUFFSIZE]; while(fgets(buff,sizeof(buff),fp)){ if(!isspaces(buff)) n++; } rewind(fp); return n; }
int assign_labels(char *file_name, document *data, int *max_labelid){ int n,m; FILE *fp; char line[BUFFSIZE]; *max_labelid = -1; if((fp = fopen(file_name,"r")) == NULL) return 0; m = file_lines(fp); n = 0; while(fgets(line, sizeof(line), fp)){ int i, len; char *sp, *lp = line; if(isspaces(line)){ fprintf(stderr, "feature_matrix: suspicious line:\n%s", line); exit(1); } len = n_fields_label(line); if(!(len > 0)){ fprintf(stderr, "feature_matrix: suspicious line:\n%s", line); exit(1); } data[n].labelid = (int *)calloc(len,sizeof(int)); data[n].labellen = len; if(data[n].labelid == NULL) return 0; i = 0; while(*lp){ int id; if(isspace(*lp)){ lp++; continue; } if((sp = strpbrk(lp, " \t\n")) == NULL) break; *sp = '\0'; id = atoi(lp) - 1; if(id >= *max_labelid) *max_labelid = id + 1; data[n].labelid[i] = id; lp = sp + 1; i++; } n++; } fclose(fp); return 1; }
document * feature_matrix (char *file_name, int *maxid, int *maxlen) { document *d; int n, m; FILE *fp; char line[BUFSIZE]; *maxid = -1; *maxlen = 0; if ((fp = fopen(file_name, "r")) == NULL) return NULL; m = file_lines(fp); if ((d = (document *)calloc(m + 1, sizeof(document))) == NULL) return NULL; d[m].len = -1; n = 0; while (fgets(line, sizeof(line), fp)) { int i, len; char *cp, *sp, *lp = line; if (isspaces(line)) continue; len = n_fields(line); if (len > *maxlen) *maxlen = len; if (!(len > 0)) { fprintf(stderr, "feature_matrix: suspicious line:\n%s", line); exit(1); } d[n].id = (int *)calloc(len, sizeof(int)); d[n].cnt = (double *)calloc(len, sizeof(double)); d[n].len = len; if ((d[n].id == NULL) || (d[n].cnt == NULL)) return NULL; i = 0; while (*lp) { int id; double cnt; if ((cp = strchr(lp, ':')) == NULL) break; if ((sp = strpbrk(cp + 1, " \t\n")) == NULL) break; *cp = '\0'; *sp = '\0'; id = atoi(lp) - 1; /* zero origin */ cnt = atof(cp + 1); if (id >= *maxid) *maxid = id + 1; d[n].id[i] = id; d[n].cnt[i] = cnt; lp = sp + 1; i++; } n++; } fclose(fp); return(d); }
void lt_XMLTags::init(XMLByteStream &xmlbs) { if(!get_count()) { G_THROW( ERR_MSG("XMLTags.no_GP") ); } GPList<lt_XMLTags> level; GUTF8String tag,raw(xmlbs.gets(0,'<',false)); int linesread=xmlbs.get_lines_read(); if(!isspaces(raw)) { G_THROW( (ERR_MSG("XMLTags.raw_string") "\t")+raw); } GUTF8String encoding; for(int len;(len=(tag=xmlbs.gets(0,'>',true)).length());) { if(tag[len-1] != '>') { G_THROW((ERR_MSG("XMLTags.bad_tag") "\t")+tag); } switch(tag[1]) { case '?': { while(len < 4 || tag.substr(len-2,len) != "?>") { GUTF8String cont(xmlbs.gets(0,'>',true)); if(!cont.length()) { G_THROW( (ERR_MSG("XMLTags.bad_PI") "\t")+tag); } len=((tag+=cont).length()); } char const *n; GUTF8String xtag = tag.substr(2,-1); GUTF8String xname = tagtoname(xtag,n); if(xname.downcase() == "xml") { ParseValues(n,args); for(GPosition pos=args;pos;++pos) { if(args.key(pos) == "encoding") { const GUTF8String e=args[pos].upcase(); if(e != encoding) { xmlbs.set_encoding((encoding=e)); } } } } break; } case '!': { if(tag[2] == '-' && tag[3] == '-') { while((len < 7) || (tag.substr(len-3,-1) != "-->")) { GUTF8String cont(xmlbs.gets(0,'>',true)); if(!cont.length()) { GUTF8String mesg; mesg.format( ERR_MSG("XMLTags.bad_comment") "\t%s",(const char *)tag); G_THROW(mesg); } len=((tag+=cont).length()); } } break; } case '/': { GUTF8String xname=tagtoname(tag.substr(2,-1)); GPosition last=level.lastpos(); if(last) { if(level[last]->name != xname) { G_THROW( (ERR_MSG("XMLTags.unmatched_end") "\t") +level[last]->name+("\t"+GUTF8String(level[last]->get_Line())) +("\t"+xname)+("\t"+GUTF8String(linesread+1))); } level.del(last); }else { G_THROW( ERR_MSG("XMLTags.bad_form") ); } break; } default: { GPosition last=level.lastpos(); GP<lt_XMLTags> t; if(last) { t=new lt_XMLTags(tag.substr(1,len-1)); level[last]->addtag(t); if(tag[len-2] != '/') { level.append(t); } }else if(tag[len-2] != '/') { char const *n; GUTF8String xtag = tag.substr(1,-1); name=tagtoname(xtag, n); ParseValues(n,args); t=this; level.append(t); }else { G_THROW( ERR_MSG("XMLTags.no_body") ); } t->set_Line(linesread+1); break; } } if((raw=xmlbs.gets(0,'<',false))[0]) { linesread=xmlbs.get_lines_read(); GPosition last=level.lastpos(); if(last) { level[last]->addraw(raw); }else if(!isspaces(raw)) { G_THROW(( ERR_MSG("XMLTags.raw_string") "\t")+raw); } } } }