static char * bibtex_item( char *p, newstr *s ) { int nquotes = 0; int nbrackets = 0; while ( *p ) { if ( !nquotes && !nbrackets ) { if ( *p==',' || *p=='=' || *p=='}' || *p==')' ) goto out; } if ( *p=='\"' && *(p-1)!='\\') { nquotes = ( nquotes==0 ); newstr_addchar( s, *p ); } else if ( *p=='{' ) { nbrackets++; /*if ( s->len!=0 )*/ newstr_addchar( s, *p ); } else if ( *p=='}' ) { nbrackets--; /*if ( nbrackets>0 )*/ newstr_addchar( s, *p ); } else { if ( s->len!=0 || ( s->len==0 && !is_ws( *p ) ) ) newstr_addchar( s, *p ); } p++; } out: newstr_trimendingws( s ); return p; }
int test_addchar( newstr *s ) { int failed = 0; int numshort = 5, numchars = 1000, i; /* ...appending '\0' characters won't increase length */ newstr_empty( s ); for ( i=0; i<numshort; ++i ) newstr_addchar( s, '\0' ); if ( test_consistency( s, 0, __FUNCTION__ ) || test_identity( s, "" ) ) failed++; /* ...build "11111" with newstr_addchar */ newstr_empty( s ); for ( i=0; i<numshort; ++i ) newstr_addchar( s, '1' ); if ( test_consistency( s, 5, __FUNCTION__ ) || test_identity( s, "11111" ) ) failed++; newstr_empty( s ); for ( i=0; i<numchars; ++i ) { newstr_addchar( s, ( i % 64 ) + 64); } if ( test_consistency( s, numchars, __FUNCTION__ ) ) failed++; return failed; }
static void bibtex_split( list *tokens, newstr *s ) { newstr currtok; int nquotes = 0, nbrackets = 0; int i, n = s->len; newstr_init( &currtok ); for ( i=0; i<n; ++i ) { if ( s->data[i]=='\"' ) { if ( nquotes ) nquotes = 0; else nquotes = 1; newstr_addchar( &currtok, '\"' ); } else if ( s->data[i]=='{' ) { nbrackets++; newstr_addchar( &currtok, '{' ); } else if ( s->data[i]=='}' ) { nbrackets--; newstr_addchar( &currtok, '}' ); } else if ( s->data[i]=='#' && !nquotes && !nbrackets ) { if ( currtok.len ) list_add( tokens, currtok.data ); newstr_empty( &currtok ); } else if ( !is_ws( s->data[i] ) || nquotes || nbrackets ) { newstr_addchar( &currtok, s->data[i] ); } } if ( currtok.len ) list_add( tokens, currtok.data ); for ( i=0; i<tokens->n; ++i ) { newstr_trimendingws( &(tokens->str[i]) ); } newstr_free( &currtok ); }
/* get reference name */ static char* process_bibtexid( char *p, newstr *data ) { newstr tmp; char *start_p = p; newstr_init( &tmp ); newstr_empty( data ); while ( *p && *p!=',' ) newstr_addchar( &tmp, *p++ ); if ( *p==',' ) p++; p = skip_ws( p ); /* skip ending newline/carriage return */ if ( tmp.len ) { if ( strchr( tmp.data, '=' ) ) { /* Endnote writes bibtex files w/o fields, try to * distinguish via presence of an equal sign.... if * it's there, assume that it's a tag/data pair instead * and roll back. */ p = start_p; } else { /* add '{' and '}' to protect from string expansion */ newstr_addchar( data, '{' ); newstr_strcat( data, tmp.data ); newstr_addchar( data, '}' ); } } newstr_free( &tmp ); return p; }
/* extract_range() * * Handle input strings like: * * "1-15" * " 1 - 15 " * " 1000--- 1500" * " 1 <<em-dash>> 10" * " 107 111" */ static void extract_range( newstr *input, newstr *begin, newstr *end ) { /* -30 is the first character of a UTF8 em-dash and en-dash */ const char terminators[] = { ' ', '-', '\t', '\r', '\n', -30, '\0' }; char *p; newstr_empty( begin ); newstr_empty( end ); if ( input->len==0 ) return; p = skip_ws( input->data ); while ( *p && !strchr( terminators, *p ) ) newstr_addchar( begin, *p++ ); p = skip_ws( p ); while ( *p=='-' ) p++; while ( utf8_is_emdash( p ) ) p+=3; while ( utf8_is_endash( p ) ) p+=3; p = skip_ws( p ); while ( *p && !strchr( terminators, *p ) ) newstr_addchar( end, *p++ ); }
static void output_person( FILE *fp, char *name ) { newstr family, given, suffix; char *p = name; newstrs_init( &family, &given, &suffix, NULL ); while ( *p && *p!='|' ) newstr_addchar( &family, *p++ ); while ( *p=='|' && *(p+1)!='|' ) { p++; if ( *p!='|' ) newstr_addchar( &given, *p++ ); while ( *p && *p!='|' ) p++; } if ( *p=='|' && *(p+1)=='|' ) { p += 2; while ( *p && *p!='|' ) newstr_addchar( &suffix, *p++ ); } if ( family.len ) fprintf( fp, "%s", family.data ); if ( suffix.len ) { if ( family.len ) fprintf( fp, " %s", suffix.data ); else fprintf( fp, "%s", suffix.data ); } if ( given.len ) fprintf( fp, ", %s", given.data ); newstrs_free( &family, &given, &suffix, NULL ); }
/* <AuthorList CompleteYN="Y"> * <Author> * <LastName>Barondeau</LastName> * <ForeName>David P</ForeName> * ( or <FirstName>David P</FirstName> ) * <Initials>DP</Initials> * </Author> * <Author> * <CollectiveName>Organization</CollectiveName> * </Author> * </AuthorList> */ static int medin_author( xml *node, newstr *name ) { char *p; if ( xml_tagexact( node, "LastName" ) ) { if ( name->len ) { newstr_prepend( name, "|" ); newstr_prepend( name, xml_data( node ) ); } else newstr_strcat( name, xml_data( node ) ); } else if ( xml_tagexact( node, "ForeName" ) || xml_tagexact( node, "FirstName" ) ) { p = xml_data( node ); while ( p && *p ) { if ( name->len ) newstr_addchar( name, '|' ); while ( *p && *p==' ' ) p++; while ( *p && *p!=' ' ) newstr_addchar( name, *p++ ); } } else if ( xml_tagexact( node, "Initials" ) && !strchr( name->data, '|' )) { p = xml_data( node ); while ( p && *p ) { if ( name->len ) newstr_addchar( name, '|' ); if ( !is_ws(*p) ) newstr_addchar( name, *p++ ); } } if ( node->next ) medin_author( node->next, name ); return BIBL_OK; }
static int test_addchar( newstr *s ) { int failed = 0; int numshort = 5, numchars = 1000, i; /* ...appending '\0' characters won't increase length */ newstr_empty( s ); for ( i=0; i<numshort; ++i ) newstr_addchar( s, '\0' ); if ( string_mismatch( s, 0, "" ) ) failed++; /* ...build "11111" with newstr_addchar */ newstr_empty( s ); for ( i=0; i<numshort; ++i ) newstr_addchar( s, '1' ); if ( string_mismatch( s, 5, "11111" ) ) failed++; /* ...build a bunch of random characters */ newstr_empty( s ); for ( i=0; i<numchars; ++i ) { newstr_addchar( s, ( i % 64 ) + 64); } if ( inconsistent_len( s, numchars ) ) failed++; return failed; }
static int generate_citekey( fields *info, int nref ) { newstr citekey; int n1, n2; char *p, buf[100]; newstr_init( &citekey ); n1 = fields_find( info, "AUTHOR", 0 ); if ( n1==-1 ) n1 = fields_find( info, "AUTHOR", -1 ); n2 = fields_find( info, "YEAR", 0 ); if ( n2==-1 ) n2 = fields_find( info, "YEAR", -1 ); if ( n2==-1 ) n2 = fields_find( info, "PARTYEAR", 0 ); if ( n2==-1 ) n2 = fields_find( info, "PARTYEAR", -1 ); if ( n1!=-1 && n2!=-1 ) { p = info->data[n1].data; while ( p && *p && *p!='|' ) { if ( !is_ws( *p ) ) newstr_addchar( &citekey, *p ); p++; } p = info->data[n2].data; while ( p && *p ) { if ( !is_ws( *p ) ) newstr_addchar( &citekey, *p ); p++; } fields_add( info, "REFNUM", citekey.data, 0 ); } else { sprintf( buf, "ref%d\n", nref ); newstr_strcpy( &citekey, buf ); } newstr_free( &citekey ); return fields_find( info, "REFNUM", -1 ); }
static void resolve_citekeys( bibl *b, list *citekeys, int *dup ) { char abc[]="abcdefghijklmnopqrstuvwxyz"; newstr tmp; int nsame, ntmp, n, i, j; newstr_init( &tmp ); for ( i=0; i<citekeys->n; ++i ) { if ( dup[i]==-1 ) continue; nsame = 0; for ( j=i; j<citekeys->n; ++j ) { if ( dup[j]!=i ) continue; newstr_newstrcpy( &tmp, &(citekeys->str[j]) ); ntmp = nsame; while ( ntmp >= 26 ) { newstr_addchar( &tmp, 'a' ); ntmp -= 26; } if ( ntmp<26 && ntmp>=0 ) newstr_addchar( &tmp, abc[ntmp] ); nsame++; dup[j] = -1; n = fields_find( b->ref[j], "REFNUM", -1 ); if ( n!=-1 ) newstr_newstrcpy(&((b->ref[j])->data[n]),&tmp); } } newstr_free( &tmp ); }
static int build_refnum( fields *info, long nrefs ) { newstr refnum; char *p, num[512]; int y, a; newstr_init( &refnum ); y = fields_find( info, "YEAR", -1 ); if ( y==-1 ) y = fields_find( info, "PARTYEAR", -1 ); a = fields_find( info, "AUTHOR", -1 ); if ( a==-1 ) a = fields_find( info, "AUTHOR:CORP", -1 ); if ( a!=-1 && y!=-1 ) { p = info->data[a].data; while ( p && *p && *p!='|' ) newstr_addchar( &refnum, *p++ ); p = info->data[y].data; while ( p && *p && *p!=' ' && *p!='\t' ) newstr_addchar( &refnum, *p++ ); } else { sprintf( num, "%ld", nrefs ); newstr_strcpy( &refnum, "ref" ); newstr_strcat( &refnum, num ); } fields_add( info, "REFNUM", refnum.data, 0 ); newstr_free( &refnum ); return fields_find( info, "REFNUM", 0 ); }
char * xml_findend( char *buffer, char *tag ) { newstr endtag; char *p; newstr_init( &endtag ); newstr_strcpy( &endtag, "</" ); if ( xml_pns ) { newstr_strcat( &endtag, xml_pns ); newstr_addchar( &endtag, ':' ); } newstr_strcat( &endtag, tag ); newstr_addchar( &endtag, '>' ); p = strsearch( buffer, endtag.data ); if ( p && *p ) { if ( *p ) p++; /* skip <random_tag></end> combo */ while ( *p && *(p-1)!='>' ) p++; } newstr_free( &endtag ); return p; }
/* * xml_processtag * * XML_COMMENT <!-- .... --> * XML_DESCRIPTOR <?.....> * XML_OPEN <A> * XML_CLOSE </A> * XML_OPENCLOSE <A/> */ static char * xml_processtag( char *p, newstr *tag, xml_attrib **attrib, int *type ) { *attrib = NULL; if ( *p=='<' ) p++; if ( *p=='!' ) { while ( *p && *p!='>' ) newstr_addchar( tag, *p++ ); *type = XML_COMMENT; } else if ( *p=='?' ) { *type = XML_DESCRIPTOR; p++; /* skip '?' */ while ( *p && !strchr( " \t", *p ) && !xml_terminator(p,type) ) newstr_addchar( tag, *p++ ); if ( *p==' ' || *p=='\t' ) p = xml_processattrib( p, attrib, type ); } else if ( *p=='/' ) { while ( *p && !strchr( " \t", *p ) && !xml_terminator(p,type) ) newstr_addchar( tag, *p++ ); *type = XML_CLOSE; if ( *p==' ' || *p=='\t' ) p = xml_processattrib( p, attrib, type ); } else { *type = XML_OPEN; while ( *p && !strchr( " \t", *p ) && !xml_terminator(p,type) ) newstr_addchar( tag, *p++ ); if ( *p==' ' || *p=='\t' ) p = xml_processattrib( p, attrib, type ); } while ( *p && *p!='>' ) p++; if ( *p=='>' ) p++; return p; }
int copacin_readf( FILE *fp, char *buf, int bufsize, int *bufpos, newstr *line, newstr *reference, int *fcharset ) { int haveref = 0, inref=0; char *p; while ( !haveref && readmore( fp, buf, bufsize, bufpos, line ) ) { /* blank line separates */ if ( line->data==NULL ) continue; if ( inref && line->len==0 ) haveref=1; p = &(line->data[0]); if ( copacin_istag( p ) ) { if ( inref ) newstr_addchar( reference, '\n' ); newstr_strcat( reference, p ); newstr_empty( line ); inref = 1; } else if ( inref ) { if ( p ) { /* copac puts tag only on 1st line */ newstr_addchar( reference, ' ' ); if ( *p ) p++; if ( *p ) p++; if ( *p ) p++; newstr_strcat( reference, p ); } newstr_empty( line ); } else { newstr_empty( line ); } } *fcharset = CHARSET_UNKNOWN; return haveref; }
/* name_nocomma() * * names in the format "H. F. Author" */ void name_nocomma( char *start, newstr *outname ) { char *p, *last, *end; int uplast, lowlast, upfirst, lowfirst, splitfirst; /* move to end */ p = start; while ( *p && *(p+1) ) p++; /* point to last name */ end = p; while ( p>start && !is_ws( *p ) ) p--; if ( !strcasecmp( p, "Jr." ) || !strcasecmp( p, "III" ) ) { while ( p>start && is_ws( *p ) ) p--; while ( p>start && !is_ws( *p ) ) p--; } last = p; p = skip_ws( p ); /* look for upper and lower case in last name */ check_case( p, end+1, &uplast, &lowlast ); /* copy last name */ while ( p<=end ) newstr_addchar( outname, *p++ ); if ( start==last ) return; /*Only last name */ /* Given names */ newstr_addchar( outname, '|' ); /* look for upper and lower case in given name(s) */ check_case( start, last, &upfirst, &lowfirst ); splitfirst = should_split( uplast, lowlast, upfirst, lowfirst ); /* copy given name(s), splitfirst to identify cases of "HF Author" */ p = start; while ( p!=last ) { if ( *p!=' ' && *p!='\t' ) { if ( !(splitfirst && ( *p=='.' || *p=='-' ) ) ) { p = name_addmultibytechar( outname, p, last ); if ( splitfirst ) newstr_addchar(outname,'|'); } else p++; } else { while ( p!=last && ( *p==' ' || *p=='\t' ) ) p++; if ( p!=last && !splitfirst ) newstr_addchar( outname, '|' ); } } }
static int risin_readf( FILE *fp, char *buf, int bufsize, int *bufpos, newstr *line, newstr *reference, int *fcharset ) { int haveref = 0, inref = 0, readtoofar = 0; unsigned char *up; char *p; *fcharset = CHARSET_UNKNOWN; while ( !haveref && readmore( fp, buf, bufsize, bufpos, line ) ) { if ( !line->data || line->len==0 ) continue; p = &( line->data[0] ); /* Recognize UTF8 BOM */ up = (unsigned char * ) p; if ( line->len > 2 && up[0]==0xEF && up[1]==0xBB && up[2]==0xBF ) { *fcharset = CHARSET_UNICODE; p += 3; } /* Each reference starts with 'TY - ' && * ends with 'ER - ' */ if ( strncmp(p,"TY - ",6)==0 ) { if ( !inref ) { inref = 1; } else { /* we've read too far.... */ readtoofar = 1; inref = 0; } } if ( risin_istag( p ) ) { if ( !inref ) { fprintf(stderr,"Warning. Tagged line not " "in properly started reference.\n"); fprintf(stderr,"Ignored: '%s'\n", p ); } else if ( !strncmp(p,"ER -",5) ) { inref = 0; } else { newstr_addchar( reference, '\n' ); newstr_strcat( reference, p ); } } /* not a tag, but we'll append to last values ...*/ else if ( inref && strncmp(p,"ER -",5)) { newstr_addchar( reference, '\n' ); newstr_strcat( reference, p ); } if ( !inref && reference->len ) haveref = 1; if ( !readtoofar ) newstr_empty( line ); } if ( inref ) haveref = 1; return haveref; }
static void copacin_adddate( fields *info, char *tag, char *newtag, char *p, int level ) { char *months[12]={ "January", "February", "March", "April", "May", "June", "July", "August", "September", "October", "November", "December" }; char month[10]; int found,i,part; newstr date; newstr_init( &date ); part = (!strncasecmp(newtag,"PART",4)); if ( !strcasecmp( tag, "%D" ) ) { while ( *p ) newstr_addchar( &date, *p++ ); if ( date.len>0 ) { if ( part ) fields_add(info, "PARTYEAR", date.data, level); else fields_add( info, "YEAR", date.data, level ); } } else if ( !strcasecmp( tag, "%8" ) ) { while ( *p && *p!=' ' && *p!=',' ) newstr_addchar( &date, *p++ ); if ( date.len>0 ) { found = -1; for ( i=0; i<12 && found==-1; ++i ) if ( !strncasecmp( date.data, months[i], 3 ) ) found = i; if ( found!=-1 ) { if (found>8) sprintf( month, "%d", found+1 ); else sprintf( month, "0%d", found+1 ); if ( part ) fields_add( info, "PARTMONTH", month, level ); else fields_add( info, "MONTH", month, level ); } else { if ( part ) fields_add( info, "PARTMONTH", date.data, level ); else fields_add( info, "MONTH", date.data, level ); } } newstr_empty( &date ); p = skip_ws( p ); while ( *p && *p!='\n' && *p!=',' ) newstr_addchar( &date, *p++ ); if ( date.len>0 && date.len<3 ) { if ( part ) fields_add( info, "PARTDAY", date.data, level ); else fields_add( info, "DAY", date.data, level ); } } newstr_free( &date ); }
/* copac names appear to always start with last name first, but don't * always seem to have a comma after the name * * editors seem to be stuck in as authors with the tag "[Editor]" in it */ static int copacin_person( fields *bibin, newstr *intag, newstr *invalue, int level, param *pm, char *outtag, fields *bibout ) { char *usetag = outtag, editor[]="EDITOR"; newstr usename, *s; list tokens; int comma = 0, i, ok; if ( list_find( &(pm->asis), invalue->data ) !=-1 || list_find( &(pm->corps), invalue->data ) !=-1 ) { ok = name_add( bibout, outtag, invalue->data, level, &(pm->asis), &(pm->corps) ); if ( ok ) return BIBL_OK; else return BIBL_ERR_MEMERR; } list_init( &tokens ); newstr_init( &usename ); list_tokenize( &tokens, invalue, " ", 1 ); for ( i=0; i<tokens.n; ++i ) { s = list_get( &tokens, i ); if ( !strcmp( s->data, "[Editor]" ) ) { usetag = editor; newstr_strcpy( s, "" ); } else if ( s->len && s->data[s->len-1]==',' ) { comma++; } } if ( comma==0 && tokens.n ) { s = list_get( &tokens, 0 ); newstr_addchar( s, ',' ); } for ( i=0; i<tokens.n; ++i ) { s = list_get( &tokens, i ); if ( s->len==0 ) continue; if ( i ) newstr_addchar( &usename, ' ' ); newstr_newstrcat( &usename, s ); } list_free( &tokens ); ok = name_add( bibout, usetag, usename.data, level, &(pm->asis), &(pm->corps) ); newstr_free( &usename ); if ( ok ) return BIBL_OK; else return BIBL_ERR_MEMERR; }
static int wordin_pages( xml *node, fields *info ) { int i, status, ret = BIBL_OK; newstr sp, ep; char *p; newstrs_init( &sp, &ep, NULL ); p = xml_data( node ); while ( *p && *p!='-' ) newstr_addchar( &sp, *p++ ); if ( newstr_memerr( &sp ) ) { ret = BIBL_ERR_MEMERR; goto out; } if ( *p=='-' ) p++; while ( *p ) newstr_addchar( &ep, *p++ ); if ( newstr_memerr( &ep ) ) { ret = BIBL_ERR_MEMERR; goto out; } if ( sp.len ) { status = fields_add( info, "PAGES:START", sp.data, 1 ); if ( status!=FIELDS_OK ) { ret = BIBL_ERR_MEMERR; goto out; } } if ( ep.len ) { if ( sp.len > ep.len ) { for ( i=sp.len-ep.len; i<sp.len; ++i ) sp.data[i] = ep.data[i-sp.len+ep.len]; status = fields_add( info, "PAGES:STOP", sp.data, 1 ); } else status = fields_add( info, "PAGES:STOP", ep.data, 1 ); if ( status!=FIELDS_OK ) { ret = BIBL_ERR_MEMERR; goto out; } } out: newstrs_free( &sp, &ep, NULL ); return ret; }
/* name_addmultibytechar * * Add character to newstring s starting at pointer p. * * Handles the case for multibyte Unicode chars (with high bits * set). Do not progress past the lastp barrier. * * Since we can progress more than one byte in the string, * return the properly updated pointer p. */ static char * name_addmultibytechar( newstr *s, char *p, char *lastp ) { if ( ! ((*p) & 128) ) { newstr_addchar( s, *p ); p++; } else { while ( p!=lastp && ((*p) & 128) ) { newstr_addchar( s, *p ); p++; } } return p; }
static void output_name( FILE *outptr, char *p, int level ) { newstr family, part, suffix; int n=0; newstrs_init( &family, &part, &suffix, NULL ); while ( *p && *p!='|' ) newstr_addchar( &family, *p++ ); if ( *p=='|' ) p++; while ( *p ) { while ( *p && *p!='|' ) newstr_addchar( &part, *p++ ); /* truncate periods from "A. B. Jones" names */ if ( part.len ) { if ( part.len==2 && part.data[1]=='.' ) { part.len=1; part.data[1]='\0'; } if ( n==0 ) output_tag( outptr, lvl2indent(level), "name", NULL, TAG_OPEN, TAG_NEWLINE, "type", "personal", NULL ); output_tag( outptr, lvl2indent(incr_level(level,1)), "namePart", part.data, TAG_OPENCLOSE, TAG_NEWLINE, "type", "given", NULL ); n++; } if ( *p=='|' ) { p++; if ( *p=='|' ) { p++; while ( *p && *p!='|' ) newstr_addchar( &suffix, *p++ ); } newstr_empty( &part ); } } if ( family.len ) { if ( n==0 ) output_tag( outptr, lvl2indent(level), "name", NULL, TAG_OPEN, TAG_NEWLINE, "type", "personal", NULL ); output_tag( outptr, lvl2indent(incr_level(level,1)), "namePart", family.data, TAG_OPENCLOSE, TAG_NEWLINE, "type", "family", NULL ); n++; } if ( suffix.len ) { if ( n==0 ) output_tag( outptr, lvl2indent(level), "name", NULL, TAG_OPEN, TAG_NEWLINE, "type", "personal", NULL ); output_tag( outptr, lvl2indent(incr_level(level,1)), "namePart", suffix.data, TAG_OPENCLOSE, TAG_NEWLINE, "type", "suffix", NULL ); } newstrs_free( &part, &family, &suffix, NULL ); }
/* newstr_fget() * returns 0 if we're done, 1 if we're not done * extracts line by line (regardless of end characters) * and feeds from buf.... */ int newstr_fget( FILE *fp, char *buf, int bufsize, int *pbufpos, newstr *outs ) { int bufpos = *pbufpos, done = 0; char *ok; newstr_empty( outs ); while ( !done ) { while ( buf[bufpos] && buf[bufpos]!='\r' && buf[bufpos]!='\n' ) newstr_addchar( outs, buf[bufpos++] ); if ( buf[bufpos]=='\0' ) { ok = fgets( buf, bufsize, fp ); bufpos=*pbufpos=0; if ( !ok && feof(fp) ) { /* end-of-file */ buf[bufpos] = 0; if ( outs->len==0 ) return 0; /*nothing in out*/ else return 1; /*one last out */ } } else if ( buf[bufpos]=='\r' || buf[bufpos]=='\n' ) done=1; } if ( ( buf[bufpos]=='\n' && buf[bufpos+1]=='\r') || ( buf[bufpos]=='\r' && buf[bufpos+1]=='\n') ) bufpos+=2; else if ( buf[bufpos]=='\n' || buf[bufpos]=='\r' ) bufpos+=1; *pbufpos = bufpos; return 1; }
static char* process_line( newstr *tag, newstr *data, char *p ) { int i = 0; while ( i<6 && *p ) { if ( i<2 ) newstr_addchar( tag, *p ); p++; i++; } while ( *p==' ' || *p=='\t' ) p++; while ( *p && *p!='\r' && *p!='\n' ) newstr_addchar( data, *p++ ); newstr_trimendingws( data ); while ( *p=='\n' || *p=='\r' ) p++; return p; }
int risin_processf( fields *risin, char *p, char *filename, long nref ) { newstr tag, data; newstr_init( &tag ); newstr_init( &data ); while ( *p ) { if ( risin_istag( p ) ) { p = process_line( &tag, &data, p ); /* no anonymous fields allowed */ /* if ( tag.len && data.len )*/ if ( tag.len ) fields_add( risin, tag.data, data.data, 0 ); } else { p = process_line2( &tag, &data, p ); if ( data.len && risin->nfields>0 ) { newstr *od; od = &(risin->data[risin->nfields-1] ); newstr_addchar( od, ' ' ); newstr_strcat( od, data.data ); } } newstr_empty( &tag ); newstr_empty( &data ); } newstr_free( &tag ); newstr_free( &data ); return 1; }
/* * readf() * * returns zero if cannot get reference and hit end of-file * returns 1 if last reference in file, 2 if reference within file */ int bibtexin_readf( FILE *fp, char *buf, int bufsize, int *bufpos, newstr *line, newstr *reference, int *fcharset ) { int haveref = 0; char *p; *fcharset = CHARSET_UNKNOWN; while ( haveref!=2 && readmore( fp, buf, bufsize, bufpos, line ) ) { if ( line->len == 0 ) continue; /* blank line */ p = &(line->data[0]); /* Recognize UTF8 BOM */ if ( line->len > 2 && (unsigned char)(p[0])==0xEF && (unsigned char)(p[1])==0xBB && (unsigned char)(p[2])==0xBF ) { *fcharset = CHARSET_UNICODE; p += 3; } p = skip_ws( p ); if ( *p == '%' ) { /* commented out line */ newstr_empty( line ); continue; } if ( *p == '@' ) haveref++; if ( haveref && haveref<2 ) { newstr_strcat( reference, p ); newstr_addchar( reference, '\n' ); newstr_empty( line ); } else if ( !haveref ) newstr_empty( line ); } return haveref; }
static int bibtex_cleandata( newstr *tag, newstr *s, fields *info, param *p ) { int i, status; list tokens; newstr *tok; if ( !s->len ) return BIBL_OK; /* protect url from undergoing any parsing */ if ( is_url_tag( tag ) ) return BIBL_OK; list_init( &tokens ); status = bibtex_split( &tokens, s ); if ( status!=BIBL_OK ) goto out; for ( i=0; i<tokens.n; ++i ) { tok = list_get( &tokens, i ); if ( bibtex_protected( tok ) ) { if (!strncasecmp(tok->data,"\\href{", 6)) { bibtex_addtitleurl( info, tok ); } } if ( p->latexin && !is_name_tag( tag ) && !is_url_tag( tag ) ) bibtex_cleantoken( tok ); } newstr_empty( s ); for ( i=0; i<tokens.n; ++i ) { tok = list_get( &tokens, i ); if ( i>0 ) newstr_addchar( s, ' ' ); newstr_newstrcat( s, tok ); } out: list_free( &tokens ); return status; }
int endin_processf( fields *endin, char *p, char *filename, long nref ) { newstr tag, data; int n; newstr_init( &tag ); newstr_init( &data ); while ( *p ) { if ( endin_istag( p ) ) { p = process_endline( &tag, &data, p ); /* no empty fields allowed */ if ( data.len ) { fields_add( endin, tag.data, data.data, 0 ); } } else { p = process_endline2( &tag, &data, p ); /* endnote puts %K only on 1st line of keywords */ n = endin->nfields; if ( n>0 && data.len ) { if ( !strncmp( endin->tag[n-1].data, "%K", 2 ) ) { fields_add( endin, "%K", data.data, 0 ); } else { newstr_addchar( &(endin->data[n-1]), ' ' ); newstr_strcat( &(endin->data[n-1]), data.data ); } } } newstr_empty( &tag ); newstr_empty( &data ); } newstr_free( &tag ); newstr_free( &data ); return 1; }
void name_add( fields *info, char *tag, char *q, int level, list *asis, list *corps ) { newstr inname; char *p, *start, *end; if ( !q ) return; newstr_init( &inname ); while ( *q ) { start = q = skip_ws( q ); /* strip tailing whitespace and commas */ while ( *q && *q!='|' ) q++; end = q; while ( is_ws( *end ) || *end==',' || *end=='|' || *end=='\0' ) end--; for ( p=start; p<=end; p++ ) newstr_addchar( &inname, *p ); /* keep "names" like " , " from coredumping program */ if ( inname.len ) { name_process( info, tag, level, &inname, asis, corps ); newstr_empty( &inname ); } if ( *q=='|' ) q++; } newstr_free( &inname ); }
static void addpage( fields *info, char *p, int level ) { newstr page; newstr_init( &page ); p = skip_ws( p ); while ( *p && !is_ws(*p) && *p!='-' && *p!='\r' && *p!='\n' ) newstr_addchar( &page, *p++ ); if ( page.len>0 ) fields_add( info, "PAGESTART", page.data, level ); newstr_empty( &page ); while ( *p && (is_ws(*p) || *p=='-' ) ) p++; while ( *p && !is_ws(*p) && *p!='-' && *p!='\r' && *p!='\n' ) newstr_addchar( &page, *p++ ); if ( page.len>0 ) fields_add( info, "PAGEEND", page.data, level ); newstr_free( &page ); }
static void output_title( FILE *fp, fields *info, unsigned long refnum, char *bibtag, int level, int format_opts ) { newstr title; int n1 = -1, n2 = -1; /* Option is for short titles of journals */ if ( ( format_opts & BIBOUT_SHORTTITLE ) && level==1 ) { n1 = fields_find( info, "SHORTTITLE", level ); n2 = fields_find( info, "SHORTSUBTITLE", level ); } if ( n1==-1 ) { n1 = fields_find( info, "TITLE", level ); n2 = fields_find( info, "SUBTITLE", level ); } if ( n1!=-1 ) { newstr_init( &title ); newstr_newstrcpy( &title, &(info->data[n1]) ); fields_setused( info, n1 ); if ( n2!=-1 ) { if ( info->data[n1].data[info->data[n1].len]!='?' ) newstr_strcat( &title, ": " ); else newstr_addchar( &title, ' ' ); newstr_strcat( &title, info->data[n2].data ); fields_setused( info, n2 ); } output_element( fp, bibtag, title.data, format_opts ); newstr_free( &title ); } }