/***************************************************************************** * * int AsnBufGetWordBreak(str, maxlen) * return length (<= maxlen) of str to next white space * *****************************************************************************/ static size_t LIBCALL AsnBufGetWordBreak (CharPtr str, size_t stringlen, size_t maxlen) { register CharPtr tmp; register size_t len; if (stringlen <= maxlen) return stringlen; tmp = str + maxlen; /* point just PAST the end of region */ len = maxlen + 1; while ((len) && (! IS_WHITESP(*tmp))) { len--; tmp--; } while ((len) && (IS_WHITESP(*tmp))) { len--; /* move past white space */ tmp--; } if (len < 1) /* never found any whitespace or only 1 space */ len = maxlen; /* have to break a word */ return len; }
static CharPtr get_token(CharPtr str, CharPtr token) { int i; token[2]= '\0'; while(IS_WHITESP(*str)) { if(*str == '\0') return NULL; ++str; } if(*str == '\0') return NULL; for(i= 1; i < 250; i++) { if(IS_WHITESP(*str)) { token[i]= ' '; token[i+1]= '\0'; return str; } if(*str == '\0') { token[i]= ' '; token[i+1]= '\0'; return NULL; } token[i]= TO_UPPER(*str); str++; } token[i]= ' '; token[i+1]= '*'; token[i+2]= '\0'; return str; }
NLM_EXTERN size_t Nlm_stream2text(const Nlm_Char FAR PNTR str, size_t max_col, int PNTR dash) { const Nlm_Char FAR PNTR s; const Nlm_Char FAR PNTR sb; /* the nearest breakable position */ size_t n_lead = 0; size_t n_tail = 0; size_t len = Nlm_StringLen( str ); len = max_col < len ? max_col : len; *dash = 0; if (len == 0 || can_break(str[len-1], str[len])) return len; /* go to the beginning of the last completely fit word */ for (sb = &str[len-1]; sb != str && !IS_WHITESP(*sb) && !can_break(*sb, *(sb+1)); sb--) continue; while (sb != str && IS_WHITESP(*sb)) sb--; if (sb == str) { /* the first word is longer than "max_col" */ if (len > MAX_NO_DASH && IS_ALPHA(str[len-1]) && IS_ALPHA(str[len])) *dash = 1; /* recommend use dash in the place of last symbol */ return len; } /* decide of whether and how to break the last alphabet word */ /* count the lead and the tail of the last non-fit word */ for (s = &str[len]; *s != '\0' && IS_ALPHA(*s); s++, n_tail++) continue; for (s = &str[len-1]; IS_ALPHA(*s); s--, n_lead++) continue; ASSERT ( s > str ); /* try to "move" symbols from lead in the sake of tail */ while (n_lead > MIN_LEAD && n_tail < MIN_TAIL) { n_lead--; n_tail++; } if (n_lead < MIN_LEAD || n_tail < MIN_TAIL) { /* no luck this time -- move the whole non-fit word to the next line */ return (sb - str + 1); } else { *dash = 1; return (s - str + n_lead + 1); } }
NLM_EXTERN Nlm_CharPtr LIBCALL Nlm_text2stream(const Nlm_Char FAR PNTR str) { int on_space = 0; Nlm_CharPtr line, s; if ( !str ) return NULL; while (*str && IS_WHITESP( *str )) str++; if ( !*str ) return NULL; s = line = (Nlm_CharPtr) Nlm_MemNew(Nlm_StringLen(str) + 1); for ( ; *str; str++) { if ( IS_WHITESP(*str) ) { if (*str == '\n') *s = '\n'; on_space = 1; } else { if ( on_space ) { if (*s == '\n' && s - line > 1 && *(s-1) == '-' && IS_ALPHA(*(s-2))) { *s = '\0'; s--; /* eat dash before end-of-line, merge the broken word */ } else *s++ = SPACE; *s++ = *str; on_space = 0; } else *s++ = *str; } } *s = '\0'; return (Nlm_CharPtr) realloc(line, Nlm_StringLen(line) + 1); }
/* Set of conditions when the decision on the line breaking can be * made having only 2 symbols("ch0" and "ch1" -- to the left and to the * right of the break, respectively) */ static int can_break(Nlm_Char ch0, Nlm_Char ch1) { if (ch1 == '\0' || IS_WHITESP(ch1) || IS_WHITESP(ch0)) return 1; switch ( ch1 ) { case '(': case '[': case '{': return 1; } switch ( ch0 ) { case '-': case '+': case '=': case '&': case '|': case ')': case '}': case ']': if (ch1 != ch0) return 1; break; case '\\': case '/': case '*': case ';': case ':': case ',': return 1; case '?': case '!': case '.': if (ch1 != '.' && ch1 != '?' && ch1 != '!') return 1; break; } return 0; }
static Nlm_Uint8 s_StringToUint8(const char *str, const char **endptr, int *sgn) { int sign = 0; /* actual sign */ Nlm_Uint8 limdiv, limoff, result; const char *s, *save; char c; /* assume error */ *endptr = 0; if (!str) return 0; s = str; while (IS_WHITESP(*s)) s++; /* empty string - error */ if (*s == '\0') return 0; if (*sgn == 1) { if (*s == '-') { sign = 1; s++; } else if (*s == '+') { s++; } } save = s; limdiv = UINT8_MAX / 10; limoff = UINT8_MAX % 10; result = 0; for (c = *s; c; c = *++s) { if (!IS_DIGIT(c)) { break; } c -= '0'; if (result > limdiv || (result == limdiv && c > limoff)) { /* overflow */ return 0; } result *= 10; result += c; } /* there was no conversion - error */ if (save == s) return 0; *sgn = sign; *endptr = s; return result; }
/* Act like a regular memcpy but replace all space symbols to #SPACE */ static void x_memcpy(Nlm_Char FAR PNTR targ, const Nlm_Char FAR PNTR src, size_t n) { for ( ; n--; src++) { ASSERT ( *src ); if ( IS_WHITESP(*src) ) *targ++ = SPACE; else *targ++ = *src; } }
static void DoTitle (SeqDescrPtr vnp, Pointer userdata) { Char ch; CharPtr ptr, str, tmp; ScanDataPtr sdp; if (vnp->choice != Seq_descr_title) return; str = (CharPtr) vnp->data.ptrvalue; if (StringHasNoText (str)) return; sdp = (ScanDataPtr) userdata; tmp = str; ptr = StringStr (tmp, "complete "); while (ptr != NULL) { tmp = ptr + 9; ch = *tmp; while (ch != '\0' && (! (IS_WHITESP (ch)))) { tmp++; ch = *tmp; } if (ch == '\0') return; if (StringNICmp (tmp, " genome", 7) == 0) { tmp [7] = '\0'; RecordTitle (sdp, ptr); return; } else if (StringNICmp (tmp, " DNA", 4) == 0) { tmp [4] = '\0'; RecordTitle (sdp, ptr); return; } else if (StringNICmp (tmp, " sequence", 9) == 0) { tmp [9] = '\0'; RecordTitle (sdp, ptr); return; } ptr = StringStr (tmp, "complete "); } if (StringStr (str, "genome DNA") != NULL) { RecordTitle (sdp, "genome DNA"); return; } if (sdp->bulk) return; if (StringStr (str, "genomic DNA") != NULL) { RecordTitle (sdp, "genomic DNA"); return; } }
/***************************************************************************** * * Int4 AsnTypeStringToHex(from, len, to, left) * converts an octet string to binary * returns number of hex digits created if all ok * *left is chars left at the end of the buffer including first letter of * a remaining digit (from does not have an even number of letters) * since this could include white space, could be more than 1 * returns a negative number on an error * skips over internal or trailing white space * left can be NULL in which case it is ignored * *****************************************************************************/ NLM_EXTERN Int4 LIBCALL AsnTypeStringToHex (Pointer from, Int4 len, Pointer to, Int4Ptr left) { BytePtr f, t; Byte octet = 0, value; int i; Int4 added = 0; f = (BytePtr) from; t = (BytePtr) to; if (left != NULL) *left = 0; i = 16; while (len) { if (! IS_WHITESP(*f)) /* skip spaces */ { if (i == 16) /* first letter of pair */ { octet = 0; if (left != NULL) *left = len; /* point at it in case one letter left */ } value = TO_UPPER(*f); if ((value >= 'A') && (value <= 'F')) octet += (((value - 'A') + 10) * i); else if ((value >= '0') && (value <= '9')) octet += ((value - '0') * i); else return (Int4)(-1); if (i == 16) /* first letter of pair */ i = 1; /* goto second letter */ else /* letter pair was read */ { i = 16; /* reset for first letter */ if (left != NULL) *left = 0; /* nothing left so far */ *t = octet; /* record the hex digit */ t++; added++; } } len--; f++; } return added; }
static void DoThesis (PubdescPtr pdp, Pointer userdata) { CitBookPtr cbp; Char ch; ScanDataPtr sdp; CharPtr title, tmp; ValNodePtr ttl, vnp; if (pdp == NULL) return; for (vnp = pdp->pub; vnp != NULL; vnp = vnp->next) { if (vnp->choice == PUB_Man) { cbp = (CitBookPtr) vnp->data.ptrvalue; if (cbp != NULL) { ttl = cbp->title; if (ttl != NULL) { title = (CharPtr) ttl->data.ptrvalue; if (! StringHasNoText (title)) { if (StringLen (title) > 3) { ch = *title; if (IS_LOWER (ch)) { tmp = title; ch = *tmp; while (ch != '\0' && (! (IS_WHITESP (ch)))) { tmp++; ch = *tmp; } *tmp = '\0'; sdp = (ScanDataPtr) userdata; RecordThesis (sdp, title); } } } } } } } }
static Int2 AsnLexWordEx (AsnIoPtr aip, Uint1 fix_non_print) { register CharPtr pos; Int1 state; Int2 token; register int linepos, len; int done; if (aip->tagsaved) /* had to read ahead */ { aip->tagsaved = FALSE; return aip->token; } if (! aip->bytes) /* no data loaded */ AsnIoGets(aip); linepos = aip->linepos; pos = aip->linebuf + linepos; state = aip->state; len = 0; while (*pos == '\n' || *pos == '\r') /* skip empty lines */ { pos = AsnIoGets(aip); /* get a line */ if (pos == NULL) return EOF_TOKEN; } if (state == IN_STRING_STATE) { aip->word = pos; if ((* pos == '\"') && (*(pos + 1) != '\"')) /* end of string */ { token = END_STRING; pos++; state = 0; /* reset state */ } else { token = IN_STRING; while ((* pos != '\"') && (* pos != '\n') && (* pos != '\r')) { if ((fix_non_print != 2) && ((*pos < ' ') || (*pos > '~'))) { done = (int)(*pos); *pos = '\0'; if ((fix_non_print == 0) || (fix_non_print == 3)) { AsnIoErrorMsg(aip, 106, done, aip->word); } done = 0; *pos = '#'; /* replace with # */ } pos++; len++; } if ((*pos != '\n') && (*pos != '\r') && (* (pos + 1) == '\"')) /* internal quote */ { len++; /* include in previous string */ pos += 2; /* point to rest of string */ } } } else if (state == IN_BITHEX_STATE) { aip->word = pos; if (*pos == '\'') /* end of binhex */ { state = 0; /* set to normal */ pos++; /* move past quote */ while (IS_WHITESP(*pos)) { if (*pos == '\n' || *pos == '\r') /* skip empty lines */ { pos = AsnIoGets(aip); /* get a line */ if (pos == NULL) return EOF_TOKEN; } else pos++; } if (* pos == 'H') token = OCTETS; else if (* pos == 'B') token = ASNBITS; else { AsnIoErrorMsg(aip, 58, aip->linenumber); token = ERROR_TOKEN; } pos++; /* move past H or B */ } else { token = IN_BITHEX; while ((* pos != '\'') && (* pos != '\n') && (* pos != '\r')) { pos++; len++; } } } else /* normal scanning */ { while ((* pos <= ' ') || ((*pos == '-') && (*(pos+1) == '-'))) /* skip leading white space */ { if (*pos == '\n' || *pos == '\r') { pos = AsnIoGets(aip); if (pos == NULL) return EOF_TOKEN; } else if ((*pos == '-') && (*(pos+1) == '-')) /* skip comments */ { pos += 2; done = 0; while (! done) { if ((*pos == '-') && (*(pos +1) == '-')) { pos += 2; done = 1; } else if (*pos == '\n' || *pos == '\r') done = 1; else pos++; } } else pos++; } aip->word = pos; if (* pos == '\"') { token = START_STRING; state = IN_STRING_STATE; } else if (* pos == '\'') { token = START_BITHEX; state = IN_BITHEX_STATE; } else if (* pos == ',') token = COMMA; else if (* pos == '{') token = START_STRUCT; else if (* pos == '}') token = END_STRUCT; else if (* pos == '[') token = START_TAG; else if (* pos == ']') token = END_TAG; else if (* pos == '(') token = OPEN_PAREN; else if (* pos == ')') token = CLOSE_PAREN; else if (* pos == ';') token = SEMI_COLON; else if (* pos == ':') { if ((*(pos + 1) == ':') && (*(pos + 2) == '=')) { token = ISDEF; pos += 2; len = 3; } else { AsnIoErrorMsg(aip, 59, *pos, aip->linenumber); token = ERROR_TOKEN; } } else if (IS_UPPER(*pos)) /* a reference or keyword */ { token = REF; while ((IS_ALPHANUM(*pos)) || (*pos == '-')) { pos++; len++; } pos--; /* move back for increment at end */ len--; } else if (IS_LOWER(*pos)) /* an identifier or valuereference */ { token = IDENT; while ((IS_ALPHANUM(*pos)) || (*pos == '-')) { pos++; len++; } pos--; /* move back for increment at end */ len--; } else if ((IS_DIGIT(*pos)) || ((*pos == '-') && (IS_DIGIT(*(pos+1))))) { token = NUMBER; if (*pos == '-') { pos++; len++; } while (IS_DIGIT(*pos)) { pos++; len++; } pos--; /* move back for increment at end */ len--; } else { AsnIoErrorMsg(aip, 59, *pos, aip->linenumber); token = ERROR_TOKEN; } len++; pos++; /* move over last symbol */ } aip->linepos = pos - aip->linebuf; /******************** check on MSWIN linepos = 0; while (pos != linebuf) { linepos++; pos++; } aip->linepos = linepos; **********************/ aip->state = state; aip->wordlen = len; aip->token = token; return token; }
static OtherLineInfoPtr s_ParseOtherLine (CharPtr lineStr) { Char ch; CharPtr otherStr; Int4 otherPosition; Int4 position; Int2 state; Int4 wordCount; OtherLineInfoPtr otherLinePtr; /* Parse the line character by character */ otherStr = (CharPtr) MemNew (StringLen(lineStr)+1); otherPosition = 0; state = OTHER_PRE_DATA; wordCount = 0; for (position = 0; lineStr[position] != '\0'; position++) { ch = lineStr[position]; switch (state) { case OTHER_PRE_DATA : if (IS_WHITESP(ch)) continue; else { wordCount = 1; state = OTHER_DATA; otherStr[otherPosition] = ch; otherPosition++; } break; case OTHER_DATA : if (IS_WHITESP(ch)) wordCount++; otherStr[otherPosition] = ch; otherPosition++; break; default: break; } } /* Check for blank line */ if (state == OTHER_PRE_DATA) { MemFree(otherStr); return NULL; } /* If we made it to here, then */ /* it's a valid definition line. */ otherStr[otherPosition] = '\0'; otherLinePtr = (OtherLineInfoPtr) MemNew (sizeof (OtherLineInfo)); if ((wordCount == 1) && IsValidId(otherStr)) { otherLinePtr->id = otherStr; otherLinePtr->other = NULL; } else { otherLinePtr->id = NULL; otherLinePtr->other = otherStr; } /* Return successfully */ return otherLinePtr; }
NLM_EXTERN Nlm_CharPtr LIBCALL Nlm_rule_line(const Nlm_Char FAR PNTR str, size_t len, enumRuleLine method) { size_t str_len; size_t n_space; /* allocate and initialize the resulting string */ Nlm_CharPtr s = (Nlm_CharPtr) Nlm_MemNew(len + 1); Nlm_MemSet(s, SPACE, len); s[len] = '\0'; /* skip leading and trailing spaces */ while ( IS_WHITESP(*str) ) str++; if ( !*str ) return s; for (str_len = Nlm_StringLen( str ); IS_WHITESP(str[str_len-1]); str_len--) continue; /* truncate the original string if doesn't fit */ if (len <= str_len) { x_memcpy(s, str, len); return s; } n_space = len - str_len; switch ( method ) { case RL_Left: { x_memcpy(s, str, str_len); break; } case RL_Right: { x_memcpy(s + n_space, str, str_len); break; } case RL_Spread: { size_t n_gap = 0; int prev_space = 0; const Nlm_Char FAR PNTR _str = str; size_t i = str_len; for ( ; i--; _str++) { ASSERT ( *_str ); if ( IS_WHITESP(*_str) ) { if ( !prev_space ) { n_gap++; prev_space = 1; } n_space++; } else prev_space = 0; } ASSERT ( !prev_space ); if ( n_gap ) { size_t n_div = n_space / n_gap; size_t n_mod = n_space % n_gap; Nlm_CharPtr _s = s; for (_str = str; *_str; ) { if ( !IS_WHITESP( *_str ) ) *_s++ = *_str++; else if ( n_space ) { size_t n_add = n_div; if (n_mod > 0) { n_add++; n_mod--; } n_space -= n_add; while ( n_add-- ) *_s++ = SPACE; for (_str++; IS_WHITESP(*_str); _str++) continue; } else break; } ASSERT ( _s == s + len ); break; } /* else -- use RL_Center */ } case RL_Center: { x_memcpy(s + n_space/2, str, str_len); break; } default: ASSERT ( 0 ); Nlm_MemFree( s ); return 0; } return s; }
static SeqLineInfoPtr s_ParseSequenceLine (CharPtr lineStr, AliConfigInfoPtr configPtr) { CharPtr seqStr; Int4 seqPosition = 0; CharPtr idStr; Int4 idPosition = 0; Int4 firstWordLen = 0; Char ch; Int2 state = PRE_DATA; Int4 position; Boolean firstWordNotSequence = FALSE; Boolean sequenceFound = FALSE; CharPtr tempStr; Boolean corruptSequence = FALSE; SeqLineInfoPtr seqLinePtr; if (StringLen(lineStr) == 0) return NULL; seqStr = (CharPtr) MemNew (StringLen(lineStr)+1); idStr = (CharPtr) MemNew (StringLen(lineStr)+1); for (position = 0; lineStr[position] != '\0'; position++) { ch = lineStr[position]; switch (state) { case PRE_DATA : /* If it's the first non-whitespace char */ /* then we've found our first word. */ if (!IS_WHITESP(ch)) { state = FIRST_WORD; if (!IsSequenceChar(ch, configPtr->gapChar, configPtr->missingChar, configPtr->unalignedChar)) firstWordNotSequence = TRUE; idStr[idPosition] = ch; idPosition++; firstWordLen++; } break; case FIRST_WORD : if (IS_WHITESP(ch)) { state = SEQUENCE_DATA; if ((idPosition > 0) && (firstWordNotSequence == FALSE)) { tempStr = seqStr; seqStr = idStr; idStr = tempStr; seqPosition = idPosition; idPosition = 0; sequenceFound = TRUE; } } else { /* If we find a non-sequence char in the */ /* first word then it might be an ID, */ /* with the sequence following. */ if (!IsSequenceChar(ch, configPtr->gapChar, configPtr->missingChar, configPtr->unalignedChar)) firstWordNotSequence = TRUE; idStr[idPosition] = ch; idPosition++; firstWordLen++; } break; case SEQUENCE_DATA : if (IS_WHITESP(ch)) continue; /* If we're in a sequence, then a non-sequence */ /* char invalidates it, although we do allow */ /* 'junk' at the end. */ if (!IsSequenceChar(ch, configPtr->gapChar, configPtr->missingChar, configPtr->unalignedChar)) { if ((lineStr[position - 1] == ' ') && sequenceFound) state = EOL_JUNK; else if ((corruptSequence == TRUE) || (s_MightBeCorruptSequence (seqPosition, &(lineStr[position]), configPtr))) { seqStr[seqPosition] = ch; seqPosition++; sequenceFound = TRUE; corruptSequence = TRUE; } else { MemFree(seqStr); MemFree(idStr); return NULL; } } else { seqStr[seqPosition] = ch; seqPosition++; sequenceFound = TRUE; } break; case EOL_JUNK : if (IS_WHITESP(ch)) state = POST_JUNK; break; case POST_JUNK : /* Only one 'word' of junk allowed */ if (!IS_WHITESP(ch)) { MemFree(seqStr); MemFree(idStr); return NULL; } break; } } /* Check for blank line */ if (state == PRE_DATA) { MemFree(seqStr); MemFree(idStr); return NULL; } if (state == FIRST_WORD) { /* If there was just one word, and it isn't */ /* a sequence string, then this isn't a */ /* sequence line. */ if (firstWordNotSequence == TRUE) { MemFree(seqStr); MemFree(idStr); return NULL; } /* If there was just one word, and it IS a sequence */ /* then the idStr is actually the seqStr. */ else { tempStr = seqStr; seqStr = idStr; idStr = tempStr; seqPosition = idPosition; idPosition = 0; } } /* If still no sequence string, */ /* then not a sequence line. */ if (StringLen(seqStr) == 0) { MemFree(seqStr); MemFree(idStr); return NULL; } /* Check to see if the ID is a valid one */ idStr[idPosition] = '\0'; seqStr[seqPosition] = '\0'; if ((idPosition > 0) && (IsValidId (idStr) == FALSE) && (IsNumString (idStr) == FALSE)) { MemFree(idStr); MemFree(seqStr); return NULL; } /* If we made it to here, then */ /* it's a valid sequence line. */ seqLinePtr = (SeqLineInfoPtr) MemNew (sizeof (SeqLineInfo)); seqLinePtr->firstWordLen = firstWordLen; if (StringLen (seqStr) != 0) seqLinePtr->sequence = seqStr; else { seqLinePtr->sequence = NULL; MemFree (seqStr); } if (StringLen (idStr) != 0) seqLinePtr->id = idStr; else { seqLinePtr->id = NULL; MemFree (idStr); } if (corruptSequence) seqLinePtr->maybe = TRUE; else seqLinePtr->maybe = FALSE; return seqLinePtr; }
static DefLineInfoPtr s_ParseDefLine (CharPtr lineStr, Int4 rowNum, ErrInfoPtr PNTR errorListPtr) { Char ch; CharPtr defStr; CharPtr idStr; Int4 defPosition; Int4 idPosition; Int4 position; Int2 state; DefLineInfoPtr defLinePtr = NULL; ErrInfoPtr errPtr; defPosition = 0; idPosition = 0; defStr = (CharPtr) MemNew (StringLen(lineStr)+1); idStr = (CharPtr) MemNew (StringLen(lineStr)+1); /* Parse the line character by character */ state = DEFLINE_PRE_DATA; for (position = 0; lineStr[position] != '\0'; position++) { ch = lineStr[position]; switch (state) { case DEFLINE_PRE_DATA : if (IS_WHITESP(ch)) continue; else if (ch == '>') state = DEFLINE_SEQID; else { MemFree(defStr); MemFree(idStr); return NULL; /* Not a defline */ } break; case DEFLINE_SEQID : if (IsValidIdChar(ch)) { idStr[idPosition] = ch; idPosition++; } else if (IS_WHITESP(ch)) { if (idPosition > 0) { state = DEFLINE_DEFINITION; defStr[defPosition] = ch; defPosition++; } else continue; } else if (ch == '[') { state = DEFLINE_DEFINITION; defStr[defPosition] = ch; defPosition++; } else { errPtr = Ali_AddError (errorListPtr, ERR_INVALID_DEFLINE, lineStr, (Int4) ch); errPtr->rowNum = rowNum; MemFree(defStr); MemFree(idStr); return NULL; } break; case DEFLINE_DEFINITION : defStr[defPosition] = ch; defPosition++; break; default: break; } } /* Check for blank line */ if (state == DEFLINE_PRE_DATA) { MemFree(defStr); MemFree(idStr); return NULL; } idStr[idPosition] = '\0'; defStr[defPosition] = '\0'; /* Make sure that it has at least one */ /* set of square brackets. */ if ((StringChr(defStr,'[') == NULL) || (StringChr(defStr,']') == NULL)) { errPtr = Ali_AddError (errorListPtr, ERR_DEFLINE_NODEFS, lineStr); errPtr->rowNum = rowNum; } /* If we made it to here, then */ /* it's a valid definition line. */ defLinePtr = (DefLineInfoPtr) MemNew (sizeof (DefLineInfo)); if (StringLen (defStr) != 0) defLinePtr->definitions = defStr; else defLinePtr->definitions = NULL; if (StringLen (idStr) != 0) defLinePtr->id = idStr; else defLinePtr->id = NULL; return defLinePtr; }
Nlm_Int2 Nlm_Main( void ) { #define MAX_COL 24 Nlm_Int4 argc = Nlm_GetArgc(); Nlm_CharPtr *argv = Nlm_GetArgv(); Nlm_Char x_str[MAX_COL * 1024]; FILE *fp = NULL; int n_read; FILE *logfile = Nlm_FileOpen("stdout", "w"); ASSERT ( logfile ); if (argc < 2) { fprintf(logfile, "Usage: %s <file_name>\n", argv[0]); return 1; } fp = Nlm_FileOpen(argv[1], "rb"); if ( !fp ) { fprintf(logfile, "Cannot open file: \"%s\"\n", argv[1]); return 2; } n_read = FileRead(x_str, 1, sizeof(x_str) - 1, fp); if (n_read < 2 * MAX_COL) { fprintf(logfile, "Too few bytes read from \"%s\": %d\n", argv[1], n_read); return 3; } ASSERT ( n_read < sizeof(x_str) ); x_str[n_read] = '\0'; {{ size_t max_col = MAX_COL - 1; int inc = 1; enumRuleLine rule_method = RL_Center; Nlm_CharPtr str = text2stream( x_str ); Nlm_CharPtr text_str = str; if ( !str ) { fprintf(logfile, "No non-space symbols in \"%s\"\n", argv[1]); return 4; } while (*str != '\0') { Nlm_Char s[MAX_COL + 1]; int dash = -12345; size_t n_print; while (*str && IS_WHITESP(*str)) str++; n_print = stream2text(str, max_col, &dash); ASSERT ( (max_col > 0 && str && *str) == (n_print > 0) ); ASSERT ( n_print <= max_col ); ASSERT ( dash != -12345 ); Nlm_MemCpy(s, str, n_print); s[n_print] = '\0'; ASSERT ( dash == 0 || n_print > 1 ); if ( dash ) s[--n_print] = '-'; {{ Nlm_CharPtr ruled_str = rule_line(s, (rule_method == RL_Right || rule_method == RL_Center ) ? MAX_COL : max_col, rule_method); fprintf(logfile, "|%s|\n", ruled_str); Nlm_MemFree( ruled_str ); }} str += n_print; if (max_col == 0 || max_col == MAX_COL) inc = -inc; max_col += inc; if (max_col == 0) if (rule_method == RL_Spread) rule_method = RL_Left; else rule_method++; } Nlm_MemFree( text_str ); }} Nlm_FileClose( logfile ); Nlm_FileClose( fp ); return 0; }
/***************************************************************************** * * Int2 AsnLexTWord(aip) * reads words, punctuation, and asn keywords with 2 parts * returns tokens defined at top * *****************************************************************************/ NLM_EXTERN Int2 AsnLexTWord (AsnIoPtr aip) { register CharPtr pos; register int len; Int1 state; Int2 token, asntype, linepos; int done; Boolean first = FALSE, hitnewline = FALSE; CharPtr commentptr; if (! aip->bytes) /* no data loaded */ { hitnewline = TRUE; first = TRUE; AsnIoGets(aip); } linepos = aip->linepos; pos = aip->linebuf + linepos; state = aip->state; len = 0; token = -1; while (*pos == '\n' || *pos == '\r') /* skip empty lines */ { hitnewline = TRUE; pos = AsnIoGets(aip); if (pos == NULL) return EOF_TOKEN; } if (state == IN_STRING_STATE) { aip->word = pos; if (* pos == '\"') /* end of string */ { token = END_STRING; pos++; state = 0; /* reset state */ } else { token = IN_STRING; while ((* pos != '\"') && (* pos != '\n') && (* pos != '\r')) { pos++; len++; } if ((*pos != '\n') && (*pos != '\r') && (* (pos + 1) == '\"')) /* internal quote */ { len++; /* include in previous string */ pos += 2; /* point to rest of string */ } } } else if (state == IN_BITHEX_STATE) { aip->word = pos; if (*pos == '\'') /* end of binhex */ { state = 0; /* set to normal */ pos++; /* move past quote */ while (IS_WHITESP(*pos)) pos++; if (* pos == 'H') token = OCTETS; else if (* pos == 'B') token = ASNBITS; else { AsnIoErrorMsg(aip, 58, aip->linenumber); token = ERROR_TOKEN; } } else { token = IN_BITHEX; while ((* pos != '\'') && (* pos != '\n') && (* pos != '\r')) { pos++; len++; } } } else /* normal scanning */ { done = 0; while (! done) { while (* pos <= ' ') /* skip leading white space */ { if (*pos == '\n' || *pos == '\r') { hitnewline = TRUE; pos = AsnIoGets(aip); if (pos == NULL) return EOF_TOKEN; } else pos++; } done = 1; while (done && (*pos == '-') && (*(pos+1) == '-')) /* skip comments */ { pos += 2; if (first) /* could be revision */ { first = FALSE; if (StrMatch(asnwords[57], pos, 10)) /* $Revision: */ { token = REVISION_TOKEN; pos += 10; while (IS_WHITESP(*pos)) pos++; aip->word = pos; while (IS_DIGIT(*pos)) /* eg. 1.2 */ { len++; pos++; } if (*pos == '.') /* take after . if present */ { pos++; len++; while (IS_DIGIT(*pos)) { len++; pos++; } } } } commentptr = pos; done = 0; while (! done) /* skip to end of comment */ { if ((*pos == '-') && (*(pos +1) == '-')) { if (token != REVISION_TOKEN) { AsnLexTAddComment(commentptr, pos, aip); if ((! hitnewline) && (aip->token != COMMENT_TOKEN)) token = COMMENT_TOKEN; } pos += 2; done = 1; } else if (*pos == '\n' || *pos == '\r') { if (token != REVISION_TOKEN) { AsnLexTAddComment(commentptr, pos, aip); if ((! hitnewline) && (aip->token != COMMENT_TOKEN)) token = COMMENT_TOKEN; } done = 1; } else pos++; } if ((token == REVISION_TOKEN) || (token == COMMENT_TOKEN)) { aip->linepos = pos - aip->linebuf; aip->state = state; aip->wordlen = len; aip->token = token; return token; } if (*pos <= ' ') done = 0; else done = 1; } } aip->word = pos; if (* pos == '\"') { token = START_STRING; state = IN_STRING_STATE; } else if (* pos == '\'') { token = START_BITHEX; state = IN_BITHEX_STATE; } else if (* pos == ',') token = COMMA; else if (* pos == '{') token = START_STRUCT; else if (* pos == '}') token = END_STRUCT; else if (* pos == '[') token = START_TAG; else if (* pos == ']') token = END_TAG; else if (* pos == '(') token = OPEN_PAREN; else if (* pos == ')') token = CLOSE_PAREN; else if (* pos == ';') token = SEMI_COLON; else if (* pos == ':') { if ((*(pos + 1) == ':') && (*(pos + 2) == '=')) { token = ISDEF; pos += 2; len = 3; } else { AsnIoErrorMsg(aip, 59, *pos, aip->linenumber); token = ERROR_TOKEN; } } else if (IS_UPPER(*pos)) /* a reference or keyword */ { token = REF; while ((IS_ALPHANUM(*pos)) || (*pos == '-')) { pos++; len++; } aip->wordlen = len; asntype = AsnLexTMatchToken(aip); /* check types */ if (asntype) /* did it match ? */ { if ((asntype > 27) && (asntype < 57)) /* not a primitive type */ { token = asntype + 400; /* make a keyword type */ if (asntype == COMPS_TOKEN) /* COMPONENTS OF */ { if ((*(pos + 1) == 'O') && (*(pos + 2) == 'F') && (IS_WHITESP(*(pos+3)))) { pos += 3; /* move past OF */ len += 3; } else AsnIoErrorMsg(aip, 89, aip->linenumber); } } else if (asntype == 57) /* StringStore */ token = STRSTORE_TYPE; else if (asntype == 59) /* BitInt */ token = BIGINT_TYPE; else { switch (asntype) { case 3: /* BIT */ case 4: /* OCTET */ if (! StrMatch(asnwords[11], (pos+1), 6)) AsnIoErrorMsg(aip, 90, aip->linenumber); pos += 7; /* move past STRING */ len += 7; break; case 11: /* SEQUENCE */ case 13: /* SET */ if ((*(pos + 1) == 'O') && (*(pos + 2) == 'F')) { asntype++; /* SET or SEQ OF */ pos += 3; len += 3; if (! IS_WHITESP(*pos)) AsnIoErrorMsg(aip, 91, aip->linenumber); } break; case 6: /* OBJECT */ if ((! StrMatch(asnwords[55], (pos+1), 10))) /* IDENTIFIER */ AsnIoErrorMsg(aip, 92, aip->linenumber); pos += 11; len += 11; break; default: break; } token = asntype + 300; /* change to point at type */ } } pos--; /* move back for increment at end */ len--; } else if (IS_LOWER(*pos)) /* an identifier or valuereference */ { token = IDENT; while ((IS_ALPHANUM(*pos)) || (*pos == '-')) { pos++; len++; } pos--; /* move back for increment at end */ len--; } else if ((IS_DIGIT(*pos)) || ((*pos == '-') && (IS_DIGIT(*(pos+1))))) { token = NUMBER; if (*pos == '-') { pos++; len++; } while (IS_DIGIT(*pos)) { pos++; len++; } pos--; /* move back for increment at end */ len--; } else { AsnIoErrorMsg(aip, 59, *pos, aip->linenumber); token = ERROR_TOKEN; } len++; pos++; /* move over last symbol */ } aip->linepos = pos - aip->linebuf; aip->state = state; aip->wordlen = len; aip->token = token; return token; }
/***************************************************************************** * * PubLabelUnique(pubptr, buf, buflen, content, unique) * makes a short label for any Pub in buf, up to buflen size * content follows objmgr OM_LABEL_ * if (unique is TRUE, appends a string based on title words to make * unique key base on ANSI std Z39.56-1991 * *****************************************************************************/ NLM_EXTERN Int2 LIBCALL PubLabelUnique (ValNodePtr pub, CharPtr buf, Int2 buflen, Uint1 content, Boolean unique) { CharPtr typelabel = NULL; static CharPtr pubtypes [14] = { "Unknown", "Generic", "Submit", "Medline", "MUID", "Article", "Journal", "Book", "Proceedings", "Patent", "PatID", "Manuscript", "Equiv" , "PMID" }; ValNodePtr vnp2=NULL, title=NULL; Int2 len, diff; Char tbuf[41]; Boolean first = TRUE; Int4 muid = 0, pmid = 0; AuthListPtr alp=NULL; AuthorPtr ap; ImprintPtr imp = NULL; CharPtr year = NULL, volume = NULL, issue = NULL, pages = NULL, title1=NULL, title2=NULL, titleunique = NULL, part_sup = NULL, part_supi = NULL; CitArtPtr cap; CitJourPtr cjp; CitBookPtr cbp=NULL; CitSubPtr csp; CitPatPtr cpp; IdPatPtr ipp; CitGenPtr cgp; MedlineEntryPtr mep; DatePtr dp = NULL; Boolean unpublished = FALSE, done; ValNodePtr eq[5]; Int2 i; CharPtr s, cit; if ((buf == NULL) || (buflen < 1)) return 0; buf[0] = '?'; buf[1] = '\0'; if (pub == NULL) return 0; if (pub->choice > 13) typelabel = pubtypes[0]; else typelabel = pubtypes[pub->choice]; len = buflen; if (content == OM_LABEL_TYPE) return LabelCopy(buf, typelabel, buflen); if (content == OM_LABEL_BOTH) { diff = LabelCopyExtra(buf, typelabel, buflen, NULL, ": "); buflen -= diff; buf += diff; } switch (pub->choice) { case PUB_Muid: sprintf(tbuf, "NLM%ld", (long)(pub->data.intvalue)); diff = LabelCopy(buf, tbuf, buflen); buflen -= diff; return (len - buflen); /* already unique */ break; case PUB_PMid: sprintf(tbuf, "PM%ld", (long)(pub->data.intvalue)); diff = LabelCopy(buf, tbuf, buflen); buflen -= diff; return (len - buflen); /* already unique */ break; case PUB_Equiv: for (i = 0; i < 5; i++) eq[i] = NULL; i = 0; for (vnp2 = (ValNodePtr)(pub->data.ptrvalue); ((vnp2 != NULL) && (buflen)); vnp2 = vnp2->next) { switch (vnp2->choice) { case PUB_Muid: eq[3] = vnp2; break; case PUB_Gen: cgp = (CitGenPtr)(vnp2->data.ptrvalue); if (cgp->serial_number > 0) { eq[4] = vnp2; break; } default: if (i < 5) eq[i] = vnp2; i++; break; } } for (i = 0; i < 5; i++) { if (eq[i] != NULL) { if (! first) { diff = LabelCopy(buf, " ", buflen); buflen -= diff; buf += diff; } else first = FALSE; diff = PubLabelUnique (eq[i], buf, buflen, OM_LABEL_CONTENT, unique); buflen -= diff; buf += diff; } } break; case PUB_Medline: mep = (MedlineEntryPtr)(pub->data.ptrvalue); if (mep->pmid > 0) sprintf(tbuf, "PM%ld", (long)(mep->pmid)); else sprintf(tbuf, "NLM%ld", (long)(mep->uid)); diff = LabelCopyExtra(buf, tbuf, buflen, NULL, " "); buflen -= diff; buf += diff; cap = mep->cit; goto cit_art; case PUB_Article: cap = (CitArtPtr)(pub->data.ptrvalue); cit_art: alp = cap->authors; if (cap->title != NULL) titleunique = (CharPtr)(cap->title->data.ptrvalue); switch (cap->from) { case 1: cjp = (CitJourPtr)(cap->fromptr); goto cit_jour; case 2: case 3: cbp = (CitBookPtr)(cap->fromptr); goto cit_book; } break; case PUB_Journal: cjp = (CitJourPtr)(pub->data.ptrvalue); cit_jour: imp = cjp->imp; title = cjp->title; break; case PUB_Book: case PUB_Proc: case PUB_Man: cbp = (CitBookPtr)(pub->data.ptrvalue); title = cbp->title; cit_book: imp = cbp->imp; if (alp == NULL) alp = cbp->authors; break; case PUB_Sub: csp = (CitSubPtr)(pub->data.ptrvalue); alp = csp->authors; imp = csp->imp; dp = csp->date; break; case PUB_Patent: cpp = (CitPatPtr)(pub->data.ptrvalue); alp = cpp->authors; dp = cpp->date_issue; if (dp == NULL) dp = cpp->app_date; title1 = cpp->country; title2 = cpp->number; if (title2 == NULL) title2 = cpp->app_number; break; case PUB_Pat_id: ipp = (IdPatPtr)(pub->data.ptrvalue); title1 = ipp->country; title2 = ipp->number; if (title2 == NULL) title2 = ipp->app_number; break; case PUB_Gen: cgp = (CitGenPtr)(pub->data.ptrvalue); if (cgp->serial_number > 0) { sprintf(tbuf, "[%d]", (int)(cgp->serial_number)); diff = LabelCopy(buf, tbuf, buflen); buflen -= diff; buf += diff; } if (cgp->muid > 0) { sprintf(tbuf, "NLM%ld", (long)(cgp->muid)); diff = LabelCopy(buf, tbuf, buflen); buflen -= diff; buf += diff; } dp = cgp->date; title = cgp->journal; alp = cgp->authors; if (cgp->cit != NULL) { if (! StringICmp("Unpublished", cgp->cit)) unpublished = TRUE; else if (title == NULL) title2 = cgp->cit; } volume = cgp->volume; issue=cgp->issue; pages=cgp->pages; if (cgp->title != NULL) titleunique = cgp->title; else if (title2 != NULL) titleunique = title2; else if (title == NULL) titleunique = cgp->cit; if (title == NULL && alp == NULL && cgp->title == NULL && volume == NULL && pages == NULL && issue == NULL) { titleunique = NULL; if ((cit = StringSave(cgp->cit)) != NULL) { if (!unique) { for (s = cit + StringLen(cit) -1; s > cit && *s !='|'; s--) continue; if (*s == '|' ) { *s = '\0'; } } diff = LabelCopy(buf, cit, buflen); MemFree(cit); } return 0; } break; default: sprintf(tbuf,"Unknown pub[%d]", (int)(pub->choice)); title2 = tbuf; break; } if (imp != NULL) { if (dp == NULL) dp = imp->date; if (volume == NULL) volume = imp->volume; if (issue == NULL) issue = imp->issue; if (pages == NULL) pages = imp->pages; part_sup = imp->part_sup; part_supi = imp->part_supi; } if (alp != NULL) { vnp2 = alp->names; if (vnp2 != NULL) { if (alp->choice == 1) /* std name */ { ap = (AuthorPtr)(vnp2->data.ptrvalue); diff = PersonIdLabel(ap->name, buf, buflen, PIDLABEL_GENBANK); } else diff = LabelCopy(buf, (CharPtr)(vnp2->data.ptrvalue), buflen); buflen -= diff; buf += diff; } } if (dp != NULL) { if (dp->data[0]) { if (pub->choice == PUB_Sub && dp->data[2] != 0 && dp->data[3] != 0){ sprintf(tbuf, "%d-%d-%d", (int)(dp->data[2]), (int)(dp->data[3]), (int)(dp->data[1])+1900); year = tbuf; } else { sprintf(tbuf, "%d", (int)(dp->data[1])+1900); year = tbuf; } } else year = dp->str; diff = LabelCopyExtra(buf, year, buflen, " (", ") "); buflen -= diff; buf += diff; } if ((title != NULL) && (titleunique == NULL)) titleunique = (CharPtr)(title->data.ptrvalue); if (title2 == NULL) { if (title != NULL) title2 = (CharPtr)(title->data.ptrvalue); } if (title2 != NULL) { if (cbp != NULL) title1 = "(in) "; diff = LabelCopyExtra(buf, title2, buflen, title1, " "); buflen -= diff; buf += diff; } if (volume != NULL) { if (part_sup != NULL) title1 = part_sup; else title1 = ":"; diff = LabelCopyExtra(buf, volume, buflen, NULL, title1); buflen -= diff; buf += diff; if (part_sup != NULL) { diff = LabelCopyExtra(buf, ":", buflen, NULL, NULL); buflen -= diff; buf += diff; } } if (issue != NULL) { if (part_supi != NULL) title1 = part_supi; else title1 = ")"; diff = LabelCopyExtra(buf, issue, buflen, "(" , title1); buflen -= diff; buf += diff; if (part_supi != NULL) { diff = LabelCopyExtra(buf, ")", buflen, NULL, NULL); buflen -= diff; buf += diff; } } if (pages != NULL) { diff = LabelCopy(buf, pages, buflen); buflen -= diff; buf += diff; } if (unpublished) { diff = LabelCopy(buf, "Unpublished", buflen); buflen -= diff; buf += diff; } if (unique) /* put on unique tag made from title */ { done = FALSE; i=0; if (titleunique != NULL && *titleunique != '\0') { while ((! done) && (i < 40)) { tbuf[i] = *titleunique; i++; while (! IS_WHITESP(*titleunique)) { titleunique++; if (*titleunique == '\0') { done = TRUE; break; } } while (IS_WHITESP(*titleunique)) { titleunique++; if (*titleunique == '\0') { done = TRUE; break; } } } } tbuf[i] = '\0'; diff = LabelCopyExtra(buf, tbuf, buflen, "|" , NULL); buflen -= diff; buf += diff; } return (len - buflen); }