// Reads a field ending with a comma, hyphen, newLine, semi-colon, or // Opening or closing parentheses // Delimiting character is not appended // CurPos is incremented past the delimiting character // Returns the delimiting character, or NULL if blank field char CSVFile::getShortField (char *str, int maxlen) { int i = 0; while (dataLine[curPos] != COMMA && dataLine[curPos] != '\0' && // dataLine[curPos] != HYPHEN && dataLine[curPos] != SEMI_COLON && dataLine[curPos] != OPEN_PAREN && dataLine[curPos] != CLOSE_PAREN && curPos < lineLength && i < maxlen) str[i++] = dataLine[curPos++]; if (dataLine[curPos] != COMMA && dataLine[curPos] != '\0' && // dataLine[curPos] != HYPHEN && dataLine[curPos] != SEMI_COLON && dataLine[curPos] != OPEN_PAREN && dataLine[curPos] != CLOSE_PAREN && i == maxlen && i) { str[i-1] = NULL; skipField (1); } else { str[i] = NULL; curPos++; } // returns the delimiting character return (dataLine[curPos-1])? dataLine[curPos-1]: '\1'; }
// Reads a field ending with a comma, newline, or at maxlen // Delimiting character is not appended // CurPos is incremented to the next field // Returns FALSE if the current field is blank int CSVFile::getField (char *str, int maxlen) { int i = 0; while (dataLine[curPos] != COMMA && i < maxlen && curPos < lineLength) str[i++] = dataLine[curPos++]; if (dataLine[curPos] != COMMA && i == maxlen) { str[--i] = NULL; skipField (1); } else { str[i] = NULL; curPos++; } CString csStr = str; csStr.TrimLeft(); csStr.TrimRight(); strcpy( str, csStr ); // returns count of characters in field return i; }
void MongoSchema::process(){ //std::cout << "Processing " << m_dbname << "." << m_col << std::endl; std::string querystr; querystr.clear(); querystr.append(m_dbname); querystr.append("."); querystr.append(m_col); int recordscount = m_conn->count(querystr); //std::cout << "count:" << recordscount << std::endl; std::auto_ptr<mongo::DBClientCursor> cursor = m_conn->query(querystr, mongo::Query()); //std::set<std::string> fields; while(cursor->more()){ mongo::BSONObj bo = cursor->next(); for( BSONObj::iterator i = bo.begin(); i.more(); ) { BSONElement e = i.next(); if(skipField(e.fieldName())){ continue; } if(e.isSimpleType()){ hashmap::const_iterator keyexsit = m_map.find(e.fieldName()); SchemaModel* sm = new SchemaModel(); if(keyexsit != m_map.end()){ sm = &m_map[e.fieldName()]; sm->count ++; }else{ sm->count = 1; sm->datatype = getType(e); m_map[e.fieldName()] = *sm; } }else if(e.isABSONObj()){ int depth = 0; std::string parent = e.fieldName(); extractBSON(e.Obj(), depth, parent); } } } BSONObjBuilder bOb; BSONArrayBuilder bArr; std::tr1::hash<std::string> hashfunc = m_map.hash_function(); for( hashmap::const_iterator i = m_map.begin(), e = m_map.end() ; i != e ; ++i ) { SchemaModel sm = i->second; float percentage = (float)sm.count / (float)recordscount * 100.0; std::cout.precision(4); BSONObj bo = BSON( "field" << i->first << "percent" << percentage << "datatype" << sm.datatype ); bArr.append(bo); //std::cout << i->first << " -> "<< "Percent: "<< percentage << " (hash = " << hashfunc( i->first ) << ")" << "\r\n"; } bOb.append(m_col, bArr.arr()); m_schema = bOb.obj(); }
// sets current position to the start of a specific field number // field numbers are zero based void CSVFile::setToField (int fieldNum) { if (fieldNum == END) { curPos = lineLength; return; } curPos = 0; skipField (fieldNum); }
int MongoSchema::extractBSON(mongo::BSONObj bo, int& depth, std::string parent){ if(depth >= m_depth){ return 0; } depth++; for( BSONObj::iterator i = bo.begin(); i.more(); ) { BSONElement e = i.next(); if(skipField(e.fieldName())){ continue; } std::string fieldname = parent ; fieldname.append("."); fieldname.append(e.fieldName()); if(e.isSimpleType()){ hashmap::const_iterator keyexsit = m_map.find(fieldname); SchemaModel* sm = new SchemaModel(); if(keyexsit != m_map.end()){ sm = &m_map[fieldname]; sm->count ++; }else{ sm->count = 1; sm->datatype = getType(e); m_map[fieldname] = *sm; } }else if(e.isABSONObj()){ extractBSON(e.Obj(), depth, fieldname); } } return 0; }
int handleKey(TDA_Task* task, char** key, char** content) { int out = -1; if (strcmp(*key, "data") == 0) { match(content, "{"); openBrackets++; out = parse(content, key); while (out > -1) { out = handleKey(task, key, content); if (!out) { // out == 0, Stop condition return out; } else if (out < 0) { return out; } else { out = parse(content, key); } } } else if (strcmp(*key, "assignee") == 0) { NamedObject n; out = readNamedObject(content, key, &n); setTaskAssignee(task, &n); } else if (strcmp(*key, "id") == 0) { out = getInt(content, key); setTaskId(task, *key); } else if (strcmp(*key, "name") == 0) { out = getString(content, key); setTaskName(task, *key); } else if (strcmp(*key, "notes") == 0) { out = getString(content, key); setTaskNotes(task, *key); } else if (strcmp(*key, "created_at") == 0) { out = getDate(content, key); setTaskCreationDate(task, *key); } else if (strcmp(*key, "modified_at") == 0) { out = getDate(content, key); setTaskModificationDate(task, *key); } else if (strcmp(*key, "completed_at") == 0) { out = getDate(content, key); setTaskCompletionDate(task, *key); } else if (strcmp(*key, "due_on") == 0) { out = getDate(content, key); setTaskDueDate(task, *key); } else if (strcmp(*key, "completed") == 0) { out = getBool(content, key); if (out > -1) setTaskCompleted(task, (bool) out); out = 2; // Can't return out, because it's 0 on false and 0 is parsing complete } else if (strcmp(*key, "tags") == 0) { out = match(content, "["); while(out > -1 && out != ']') { NamedObject n; out = readNamedObject(content, key, &n); addTaskTag(task, &n); } out = match(content, ","); } else { out = skipField(content); if (out == 0) { return 0; } } return out; }