static void printTabularHeaderRow(const struct vcfInfoDef *def) /* Parse the column header parts out of def->description and print as table header row; * call this only when looksTabular returns TRUE. */ { regmatch_t substrArr[PATH_LEN]; if (regexMatchSubstr(def->description, COL_DESC_REGEX, substrArr, ArraySize(substrArr))) { puts("<TR>"); // Make a copy of the part of def->description that matches the regex, // then chop by '|' and print out header column tags: int matchSize = substrArr[0].rm_eo - substrArr[0].rm_so; char copy[matchSize+1]; safencpy(copy, sizeof(copy), def->description + substrArr[0].rm_so, matchSize); // Turn '_' into ' ' so description words can wrap inside headers, saving some space subChar(copy, '_', ' '); char *words[PATH_LEN]; int descColCount = chopByChar(copy, '|', words, ArraySize(words)); int i; for (i = 0; i < descColCount; i++) printf("<TH class='withThinBorder'>%s</TH>", words[i]); puts("</TR>"); } else errAbort("printTabularHeaderRow: code bug, if looksTabular returns true then " "regex should work here"); }
static char *readHtmlRecursive(char *fileName, char *database) /* Slurp in an html file. Wherever it contains insertHtmlRegex, recursively slurp that in * and replace insertHtmlRegex with the contents. */ { char *html; readInGulp(fileName, &html, NULL); if (isEmpty(html)) return html; regmatch_t substrs[4]; while (regexMatchSubstr(html, insertHtmlRegex, substrs, ArraySize(substrs))) { struct dyString *dy = dyStringNew(0); // All text before the regex match: dyStringAppendN(dy, html, substrs[0].rm_so); // Is there an #if before the #insert ? boolean doInsert = TRUE; if (substrs[1].rm_so != -1 && (! sameStringN(database, html+substrs[2].rm_so, (substrs[2].rm_eo - substrs[2].rm_so)))) doInsert = FALSE; if (doInsert) { // Recursively pull in inserted file contents from relative path, replacing regex match: char dir[PATH_LEN]; splitPath(fileName, dir, NULL, NULL); char insertFileName[PATH_LEN+FILENAME_LEN]; safecpy(insertFileName, sizeof(insertFileName), dir); safencat(insertFileName, sizeof(insertFileName), html+substrs[3].rm_so, (substrs[3].rm_eo - substrs[3].rm_so)); if (!fileExists(insertFileName)) errAbort("readHtmlRecursive: relative path '%s' (#insert'ed in %s) not found", insertFileName, fileName); char *insertedText = readHtmlRecursive(insertFileName, database); dyStringAppend(dy, insertedText); freez(&insertedText); } // All text after the regex match: dyStringAppend(dy, html+substrs[0].rm_eo); freez(&html); html = dyStringCannibalize(&dy); } return html; }
static void parseMetadataLine(struct vcfFile *vcff, char *line) /* Parse a VCF header line beginning with "##" that defines a metadata. */ { char *ptr = line; if (ptr == NULL && !startsWith(ptr, "##")) errAbort("Bad line passed to parseMetadataLine"); ptr += 2; char *firstEq = strchr(ptr, '='); if (firstEq == NULL) { vcfFileErr(vcff, "Metadata line lacks '=': \"%s\"", line); return; } regmatch_t substrs[8]; // Some of the metadata lines are crucial for parsing the rest of the file: if (startsWith("##fileformat=", line) || startsWith("##format", line)) { if (regexMatchSubstr(line, fileformatRegex, substrs, ArraySize(substrs))) { // substrs[2] is major version #, substrs[3] is set only if there is a minor version, // and substrs[4] is the minor version #. vcff->majorVersion = atoi(line + substrs[2].rm_so); if (substrs[3].rm_so != -1) vcff->minorVersion = atoi(line + substrs[4].rm_so); } else vcfFileErr(vcff, "##fileformat line does not match expected pattern /%s/: \"%s\"", fileformatRegex, line); } else if (startsWith("##INFO=", line) || startsWith("##FORMAT=", line)) { boolean isInfo = startsWith("##INFO=", line); nonAsciiWorkaround(line); if (regexMatchSubstr(line, infoOrFormatRegex, substrs, ArraySize(substrs)) || regexMatchSubstr(line, infoOrFormatRegex3_3, substrs, ArraySize(substrs))) // substrs[2] is ID/key, substrs[3] is Number, [4] is Type and [5] is Description. { struct vcfInfoDef *def = vcfFileAlloc(vcff, sizeof(struct vcfInfoDef)); def->key = vcfFileCloneSubstr(vcff, line, substrs[2]); char *number = vcfFileCloneSubstr(vcff, line, substrs[3]); if (sameString(number, ".") || sameString(number, "A") || sameString(number, "G")) // A is #alts which varies line-to-line; "G" is #genotypes which we haven't // yet seen. Why is there a G here -- shouldn't such attributes go in the // genotype columns? def->fieldCount = -1; else def->fieldCount = atoi(number); def->type = vcfInfoTypeFromSubstr(vcff, line, substrs[4]); // greedy regex pulls in end quote, trim if found: if (line[substrs[5].rm_eo-1] == '"') line[substrs[5].rm_eo-1] = '\0'; def->description = vcfFileCloneSubstr(vcff, line, substrs[5]); slAddHead((isInfo ? &(vcff->infoDefs) : &(vcff->gtFormatDefs)), def); } else vcfFileErr(vcff, "##%s line does not match expected pattern /%s/ or /%s/: \"%s\"", (isInfo ? "INFO" : "FORMAT"), infoOrFormatRegex, infoOrFormatRegex3_3, line); } else if (startsWith("##FILTER=", line) || startsWith("##ALT=", line)) { boolean isFilter = startsWith("##FILTER", line); if (regexMatchSubstr(line, filterOrAltRegex, substrs, ArraySize(substrs)) || regexMatchSubstr(line, filterRegex3_3, substrs, ArraySize(substrs))) { // substrs[2] is ID/key, substrs[4] is Description. struct vcfInfoDef *def = vcfFileAlloc(vcff, sizeof(struct vcfInfoDef)); def->key = vcfFileCloneSubstr(vcff, line, substrs[2]); def->description = vcfFileCloneSubstr(vcff, line, substrs[4]); slAddHead((isFilter ? &(vcff->filterDefs) : &(vcff->altDefs)), def); } else { if (isFilter) vcfFileErr(vcff, "##FILTER line does not match expected pattern /%s/ or /%s/: \"%s\"", filterOrAltRegex, filterRegex3_3, line); else vcfFileErr(vcff, "##ALT line does not match expected pattern /%s/: \"%s\"", filterOrAltRegex, line); } } }