Esempio n. 1
0
/**
* Return the confidence with with this algorithm can load the file
* @param line the line to parse
* @param cols The vector to parse into
* @param expectedCommas The number of expected commas in the line
* @param isOldTBL boolean to deal with new and old TBL formats.
* @returns An integer specifying how many columns were parsed into.
* @throws std::length_error if anything other than 17 columns (or 16
* cell-delimiting commas) is found when loading an old Refl TBL. A
* length_error will be thrown for new TBL formats if there are less column
* headings than expected commas.
*/
size_t LoadTBL::getCells(std::string line, std::vector<std::string> &cols,
                         size_t expectedCommas, bool isOldTBL) const {
  // first check the number of commas in the line.
  size_t found = countCommas(line);
  if (isOldTBL) {
    if (found == expectedCommas) {
      // If there are 16 that simplifies things and i can get boost to do the
      // hard
      // work
      boost::split(cols, line, boost::is_any_of(","),
                   boost::token_compress_off);
    } else if (found < expectedCommas) {
      // less than 16 means the line isn't properly formatted. So Throw
      std::string message =
          "A line must contain " + std::to_string(expectedCommas) +
          " cell-delimiting commas. Found " + std::to_string(found) + ".";
      throw std::length_error(message);
    } else {
      // More than 16 will need further checks as more is only ok when pairs of
      // quotes surround a comma, meaning it isn't a delimiter
      std::vector<std::vector<size_t>> quoteBounds;
      findQuotePairs(line, quoteBounds);
      // if we didn't find any quotes, then there are too many commas and we
      // definitely have too many delimiters
      if (quoteBounds.empty()) {
        std::string message =
            "A line must contain " + std::to_string(expectedCommas) +
            " cell-delimiting commas. Found " + std::to_string(found) + ".";
        throw std::length_error(message);
      }
      // now go through and split it up manually. Throw if we find ourselves in
      // a
      // positon where we'd add a 18th value to the vector
      csvParse(line, cols, quoteBounds, expectedCommas);
    }
  } else {
    std::vector<std::vector<size_t>> quoteBounds;
    findQuotePairs(line, quoteBounds);
    csvParse(line, cols, quoteBounds, expectedCommas);
    if (cols.size() > expectedCommas) {
      for (size_t i = expectedCommas + 1; i < cols.size(); i++) {
        cols[expectedCommas].append(
            boost::lexical_cast<std::string>("," + cols[i]));
      }
    } else if (cols.size() < expectedCommas) {
      std::string message =
          "A line must contain " + std::to_string(expectedCommas) +
          " cell-delimiting commas. Found " + std::to_string(found) + ".";
      throw std::length_error(message);
    }
  }
  return cols.size();
}
Esempio n. 2
0
struct slName *tagFindValList(struct tagStanza *stanza, char *tag)
/* Read in tag as a list. Do a slFreeList on this when done.
 * Returns NULL if no value */
{
char *val = tagFindVal(stanza, tag);
return csvParse(val);
}
Esempio n. 3
0
struct slName *tagMustFindValList(struct tagStanza *stanza, char *tag)
/* Find tag or die trying, and return it as parsed out list */
{
char *val = tagMustFindVal(stanza, tag);
return csvParse(val);
}
Esempio n. 4
0
void rWriteJson(FILE *f, struct tagStorm *storm, struct tagStanza *stanza, 
    struct ttjSubObj *obj, struct ttjSubObj *labeledObj, struct hash *schemaHash,
    struct dyString *scratch)
/* Write out json object recursively */
{
boolean isArray = allDigitNames(obj->children);
struct ttjSubObj *field; 
if (isArray)
    {
    fprintf(f, "["); 
    for (field = obj->children; field != NULL; field = field->next)
        {
	if (field != obj->children) // Only write comma separators after the first one
	   fprintf(f, ",");
	rWriteJson(f, storm, stanza, field, labeledObj, schemaHash, scratch);
	}
    fprintf(f, "]");
    }
else
    { 
    fprintf(f, "{"); 
    boolean firstOut = TRUE;

    /* Figure out if we need to attach a core object and do so.  The figuring bit is
     * frankly clunky. */
    char *objType = labeledObj->name;
    if (sameString(objType, "submitter") || sameString(objType, "contributors"))
         objType = "contact";
    else if (sameString(objType, "publications"))
         objType = "publication";
    else if (sameString(objType, "protocol"))  // protocol is actually just protocol_id
         objType = "string";
    else if (sameString(objType, "protocols")) // but protocols array is protocol
         objType = "protocol";
    else if (sameString(objType, "umi_barcode"))
         objType = "barcode";
    if (objNeedsCore(objType))
        printCore(f, objType, &firstOut);


    for (field = obj->children; field != NULL; field = field->next)
	{
	char *fieldName = field->name;
	if (field->children != NULL)
	     {
	     /* Look for funny characteristics_ as these are largely up to user. */
	     if (startsWith("characteristics_", field->name))
	         errAbort("No '.' allowed in field name after characteristics_ in %s", 
		    field->children->fullName);

	     /* If actually have data in this stanza write our field. */
	     if (prefixDotInStanza(field->fullName, stanza, scratch))
		 {
		 writeJsonTag(f, fieldName, &firstOut);
		 rWriteJson(f, storm, stanza, field, field, schemaHash, scratch);
		 }
	     }
	else
	    {
	    char *val = tagFindVal(stanza, field->fullName);
	    if (val != NULL)
		{
		boolean isNum = FALSE;
		char *schemaName = tagSchemaFigureArrayName(field->fullName, scratch);
		struct tagSchema *schema = hashFindVal(schemaHash, schemaName);
		if (schema != NULL)
		   isNum = (schema->type == '#' || schema->type == '%');
		if (sameString(fieldName, "files"))
		    {
		    writeJsonTag(f, "lanes", &firstOut);
		    writeLaneArray(f, stanza, val);
		    }
		else
		    {
		    boolean isArray = FALSE;
		    writeJsonTag(f, fieldName, &firstOut);
		    if (schema != NULL)
			isArray = schema->isArray;
		    struct slName *list = csvParse(val);
		    if (isArray)
			fputc('[', f);
		    else
			{
			if (list->next != NULL)  // more than one element
			   errAbort("Multiple vals for scalar tag %s in stanza starting line %d of %s",
				field->fullName, stanza->startLineIx, storm->fileName);
			}
		    struct slName *el;
		    for (el = list; el != NULL; el = el->next)
			{
			writeJsonVal(f, el->name, isNum);
			if (el->next != NULL)
			    fputc(',', f);
			}
		    if (isArray)
			fputc(']', f);
		    slFreeList(&list);
		    }
		}
	    }
	}
    fprintf(f, "}");
    }
}
Esempio n. 5
0
void writeLaneArray(FILE *f, struct tagStanza *stanza, char *csvList)
/* Write out an array of file objects base on file names in csvList */
{
struct slName *list = csvParse(csvList), *file;

/* Figure out number of files per lanes.  We'll take the lane number for the
 * file names if available, but if not we'll assume list is sorted and will
 * put the appropriate number of files in each lane. */
int laneCounter = 1;
int filesPerLane = 1;
int curFileInLane = 0;
char *pairedEnds = tagMustFindVal(stanza, "assay.seq.paired_ends");
if (!sameString(pairedEnds, "no"))
    filesPerLane = 2;

/* First pass, make a list of lanes */
struct laneFiles *laneList = NULL, *lane;
for (file = list; file != NULL; file = file->next)
    {
    /* Figure out laneIx, from file name if possible, otherwise by counting */
    char *fileName = file->name;
    int laneIx = laneFromFileName(fileName);
    if (laneIx == 0)
	laneIx = laneCounter;
    /* Update laneCounter */
    if (++curFileInLane >= filesPerLane)
	{
	++laneCounter;
	curFileInLane = 0;
	}

    /* Find lane in laneList, make new lane if it's not there. */
    lane = laneFilesFind(laneList, laneIx);
    if (lane == NULL)
        {
	AllocVar(lane);
	lane->laneIx = laneIx;
	slAddHead(&laneList, lane);
	}
    slNameAddTail(&lane->fileList, fileName);
    }
slReverse(&laneList);
slSort(&laneList, laneFilesCmp);

/* Now make a lane array and go through lane list */
boolean firstOut = TRUE;
fputc('[', f);
for (lane = laneList; lane != NULL; lane = lane->next)
    {
    /* Write comma between lane objects */
    if (firstOut)
	firstOut = FALSE;
    else
	fputc(',', f);

    /* Write lane object starting with lane index*/
    fputc('{', f);
    fprintf(f, "\"%s\": %d", "number", lane->laneIx);

    /* Rest of lane fields are based on files we contain */
    for (file = lane->fileList; file != NULL; file = file->next)
	{
	/* Calculate type */
	char *fileName = file->name;

	char *type = NULL;
	if (sameString(pairedEnds, "no"))
	    {
	    type = "r1";
	    }
	else if (sameString(pairedEnds, "yes"))
	    {
	    int end = endFromFileName(fileName);
	    if (end == 1)
		type = "r1";
	    else
		type = "r2";
	    }
	else
	    errAbort("Unrecognized paired_ends %s", pairedEnds);

	fprintf(f, ",\"%s\":", type);
	writeJsonVal(f, fileName, FALSE);
	}
    fputc('}', f);
    }

fputc(']', f);
slFreeList(&list);
}