int copacin_processf( fields *copacin, char *p, char *filename, long nref ) { newstr tag, data; int status; newstr_init( &tag ); newstr_init( &data ); while ( *p ) { p = skip_ws( p ); if ( copacin_istag( p ) ) { p = copacin_addtag2( p, &tag, &data ); /* don't add empty strings */ if ( tag.len && data.len ) { status = fields_add( copacin, tag.data, data.data, 0 ); if ( status!=FIELDS_OK ) return 0; } newstr_empty( &tag ); newstr_empty( &data ); } else p = copacin_nextline( p ); } newstr_free( &tag ); newstr_free( &data ); return 1; }
int endin_processf( fields *endin, char *p, char *filename, long nref ) { newstr tag, data; int n; newstr_init( &tag ); newstr_init( &data ); while ( *p ) { if ( endin_istag( p ) ) { p = process_endline( &tag, &data, p ); /* no empty fields allowed */ if ( data.len ) { fields_add( endin, tag.data, data.data, 0 ); } } else { p = process_endline2( &tag, &data, p ); /* endnote puts %K only on 1st line of keywords */ n = endin->nfields; if ( n>0 && data.len ) { if ( !strncmp( endin->tag[n-1].data, "%K", 2 ) ) { fields_add( endin, "%K", data.data, 0 ); } else { newstr_addchar( &(endin->data[n-1]), ' ' ); newstr_strcat( &(endin->data[n-1]), data.data ); } } } newstr_empty( &tag ); newstr_empty( &data ); } newstr_free( &tag ); newstr_free( &data ); return 1; }
static void process_pages( fields *info, newstr *s, int level ) { char *p, *q; newstr sp, ep; newstr_init( &sp ); newstr_init( &ep ); newstr_findreplace( s, " ", "" ); p = q = s->data; while ( isdigit( *q ) ) q++; newstr_segcpy( &sp, p, q ); if ( sp.len>0 ) fields_add( info, "PAGESTART", sp.data, level ); p = q; while ( *p && !isdigit(*p) ) p++; q = p; while ( isdigit( *q ) ) q++; newstr_segcpy( &ep, p, q ); if ( ep.len>0 ) fields_add( info, "PAGEEND", ep.data, level ); newstr_free(&sp); newstr_free(&ep); }
int pages_add( fields *bibout, char *outtag, newstr *invalue, int level ) { int fstatus, status = 1; newstr start, stop; newstr_init( &start ); newstr_init( &stop ); extract_range( invalue, &start, &stop ); if ( newstr_memerr( &start ) || newstr_memerr( &stop ) ) { status = 0; goto out; } if ( start.len>0 ) { fstatus = fields_add( bibout, "PAGES:START", start.data, level ); if ( fstatus!=FIELDS_OK ) { status = 0; goto out; } } if ( stop.len>0 ) { fstatus = fields_add( bibout, "PAGES:STOP", stop.data, level ); if ( fstatus!=FIELDS_OK ) status = 0; } out: newstr_free( &start ); newstr_free( &stop ); return status; }
static int test_segcat( newstr *s ) { char segment[]="0123456789"; char *start=&(segment[2]), *end=&(segment[5]); int numstrings = 1000, i; int failed = 0; newstr t, u; newstr_init( &t ); newstr_init( &u ); newstr_empty( s ); newstr_segcpy( s, start, start ); if ( string_mismatch( s, 0, "" ) ) failed++; newstr_segcpy( &t, start, start ); if ( string_mismatch( &t, 0, "" ) ) failed++; newstr_segcpy( &u, start, end ); if ( string_mismatch( &u, 3, "234" ) ) failed++; newstr_empty( s ); for ( i=0; i<numstrings; ++i ) newstr_segcat( s, start, end ); if ( inconsistent_len( s, 3*numstrings ) ) failed++; newstr_free( &t ); newstr_free( &u ); return failed; }
static int read_ref( FILE *fp, bibl *bin, char *filename, convert_rules *r, param *p ) { newstr reference, line; fields *ref; char buf[256]=""; int nrefs = 0, bufpos = 0, fcharset; newstr_init( &reference ); newstr_init( &line ); while ( r->readf( fp, buf, sizeof(buf), &bufpos, &line, &reference, &fcharset ) ) { if ( reference.len==0 ) continue; ref = fields_new(); if ( !ref ) return BIBL_ERR_MEMERR; if ( r->processf( ref, reference.data, filename, nrefs+1 )){ bibl_addref( bin, ref ); } else { fields_free( ref ); free( ref ); } newstr_empty( &reference ); if ( fcharset!=CHARSET_UNKNOWN ) { /* charset from file takes priority over default, but * not user-specified */ if ( p->charsetin_src!=BIBL_SRC_USER ) { p->charsetin_src = BIBL_SRC_FILE; p->charsetin = fcharset; if ( fcharset!=CHARSET_UNICODE ) p->utf8in = 0; } } } newstr_free( &line ); newstr_free( &reference ); return BIBL_OK; }
static int test_indxcat( newstr *s ) { char segment[]="0123456789"; int numstrings = 3, i; newstr t, u; int failed = 0; newstr_init( &t ); newstr_init( &u ); newstr_empty( s ); newstr_indxcat( s, segment, 2, 2 ); if ( string_mismatch( s, 0, "" ) ) failed++; newstr_indxcat( &t, segment, 2, 2 ); if ( string_mismatch( &t, 0, "" ) ) failed++; newstr_indxcat( &u, segment, 2, 5 ); if ( string_mismatch( &u, 3, "234" ) ) failed++; newstr_empty( s ); for ( i=0; i<numstrings; ++i ) newstr_indxcat( s, segment, 2, 5 ); if ( string_mismatch( s, 9, "234234234" ) ) failed++; newstr_free( &t ); newstr_free( &u ); return failed; }
int risin_processf( fields *risin, char *p, char *filename, long nref ) { newstr tag, data; newstr_init( &tag ); newstr_init( &data ); while ( *p ) { if ( risin_istag( p ) ) { p = process_line( &tag, &data, p ); /* no anonymous fields allowed */ /* if ( tag.len && data.len )*/ if ( tag.len ) fields_add( risin, tag.data, data.data, 0 ); } else { p = process_line2( &tag, &data, p ); if ( data.len && risin->nfields>0 ) { newstr *od; od = &(risin->data[risin->nfields-1] ); newstr_addchar( od, ' ' ); newstr_strcat( od, data.data ); } } newstr_empty( &tag ); newstr_empty( &data ); } newstr_free( &tag ); newstr_free( &data ); return 1; }
int isiin_processf( fields *isiin, char *p, char *filename, long nref ) { newstr tag, data; int n; newstr_init( &tag ); newstr_init( &data ); while ( *p ) { newstr_empty( &tag ); newstr_empty( &data ); p = process_isiline( &tag, &data, p ); if ( !data.len ) continue; if ( (tag.len>1) && isiin_istag( tag.data ) ) { fields_add( isiin, tag.data, data.data, 0 ); } else { n = isiin->nfields; if ( n>0 ) { /* only one AU or AF for list of authors */ if ( !strcmp( isiin->tag[n-1].data,"AU") ){ fields_add( isiin, "AU", data.data, 0); } else if ( !strcmp( isiin->tag[n-1].data,"AF") ){ fields_add( isiin, "AF", data.data, 0); } /* otherwise append multiline data */ else { newstr_addchar( &(isiin->data[n-1]),' '); newstr_strcat( &(isiin->data[n-1]), data.data ); } } } } newstr_free( &data ); newstr_free( &tag ); return 1; }
static int bibtex_addtitleurl( fields *info, newstr *in ) { int fstatus, status = BIBL_OK; newstr s; char *p; newstr_init( &s ); /* ...skip past "\href{" and copy to "}" */ p = newstr_cpytodelim( &s, in->data + 6, "}", 1 ); if ( newstr_memerr( &s ) ) { status = BIBL_ERR_MEMERR; goto out; } /* ...add to URL */ fstatus = fields_add( info, "URL", s.data, 0 ); if ( fstatus!=FIELDS_OK ) { status = BIBL_ERR_MEMERR; goto out; } /* ...return deleted fragment to newstr in */ p = newstr_cpytodelim( &s, p, "", 0 ); if ( newstr_memerr( &s ) ) { status = BIBL_ERR_MEMERR; goto out; } newstr_swapstrings( &s, in ); out: newstr_free( &s ); return status; }
static char* process_bibtexid( char *p, newstr *id ) { char *start_p = p; newstr tmp; newstr_init( &tmp ); p = newstr_cpytodelim( &tmp, p, ",", 1 ); if ( tmp.len ) { if ( strchr( tmp.data, '=' ) ) { /* Endnote writes bibtex files w/o fields, try to * distinguish via presence of an equal sign.... if * it's there, assume that it's a tag/data pair instead * and roll back. */ p = start_p; newstr_empty( id ); } else { newstr_strcpy( id, tmp.data ); } } else { newstr_empty( id ); } newstr_free( &tmp ); return skip_ws( p ); }
/* Split keywords="" with semicolons. * Commas are also frequently used, but will break * entries like: * keywords="Microscopy, Confocal" * Returns BIBL_OK or BIBL_ERR_MEMERR */ static int process_keywords( fields *info, newstr *d, int level ) { int fstatus, status = BIBL_OK; newstr keyword; char *p; if ( !d || d->len==0 ) return BIBL_OK; p = d->data; newstr_init( &keyword ); while ( *p ) { p = newstr_cpytodelim( &keyword, skip_ws( p ), ";", 1 ); newstr_trimendingws( &keyword ); if ( newstr_memerr( &keyword ) ) { status = BIBL_ERR_MEMERR; goto out; } if ( keyword.len ) { fstatus = fields_add( info, "KEYWORD", keyword.data, level ); if ( fstatus!=FIELDS_OK ) { status = BIBL_ERR_MEMERR; goto out; } } } out: newstr_free( &keyword ); return status; }
char * xml_findend( char *buffer, char *tag ) { newstr endtag; char *p; newstr_init( &endtag ); newstr_strcpy( &endtag, "</" ); if ( xml_pns ) { newstr_strcat( &endtag, xml_pns ); newstr_addchar( &endtag, ':' ); } newstr_strcat( &endtag, tag ); newstr_addchar( &endtag, '>' ); p = strsearch( buffer, endtag.data ); if ( p && *p ) { if ( *p ) p++; /* skip <random_tag></end> combo */ while ( *p && *(p-1)!='>' ) p++; } newstr_free( &endtag ); return p; }
static void output_pages( FILE *fp, fields *info, unsigned long refnum, int format_opts ) { newstr pages; int sn, en; sn = fields_find( info, "PAGESTART", -1 ); en = fields_find( info, "PAGEEND", -1 ); if ( sn==-1 && en==-1 ) { output_articlenumber( fp, info, refnum, format_opts ); return; } newstr_init( &pages ); if ( sn!=-1 ) { newstr_strcat( &pages, info->data[sn].data ); fields_setused( info, sn ); } if ( sn!=-1 && en!=-1 ) { if ( format_opts & BIBOUT_SINGLEDASH ) newstr_strcat( &pages, "-" ); else newstr_strcat( &pages, "--" ); } if ( en!=-1 ) { newstr_strcat( &pages, info->data[en].data ); fields_setused( info, en ); } output_element( fp, "pages", pages.data, format_opts ); newstr_free( &pages ); }
static int generate_citekey( fields *info, int nref ) { newstr citekey; int n1, n2; char *p, buf[100]; newstr_init( &citekey ); n1 = fields_find( info, "AUTHOR", 0 ); if ( n1==-1 ) n1 = fields_find( info, "AUTHOR", -1 ); n2 = fields_find( info, "YEAR", 0 ); if ( n2==-1 ) n2 = fields_find( info, "YEAR", -1 ); if ( n2==-1 ) n2 = fields_find( info, "PARTYEAR", 0 ); if ( n2==-1 ) n2 = fields_find( info, "PARTYEAR", -1 ); if ( n1!=-1 && n2!=-1 ) { p = info->data[n1].data; while ( p && *p && *p!='|' ) { if ( !is_ws( *p ) ) newstr_addchar( &citekey, *p ); p++; } p = info->data[n2].data; while ( p && *p ) { if ( !is_ws( *p ) ) newstr_addchar( &citekey, *p ); p++; } fields_add( info, "REFNUM", citekey.data, 0 ); } else { sprintf( buf, "ref%d\n", nref ); newstr_strcpy( &citekey, buf ); } newstr_free( &citekey ); return fields_find( info, "REFNUM", -1 ); }
/* get reference name */ static char* process_bibtexid( char *p, newstr *data ) { newstr tmp; char *start_p = p; newstr_init( &tmp ); newstr_empty( data ); while ( *p && *p!=',' ) newstr_addchar( &tmp, *p++ ); if ( *p==',' ) p++; p = skip_ws( p ); /* skip ending newline/carriage return */ if ( tmp.len ) { if ( strchr( tmp.data, '=' ) ) { /* Endnote writes bibtex files w/o fields, try to * distinguish via presence of an equal sign.... if * it's there, assume that it's a tag/data pair instead * and roll back. */ p = start_p; } else { /* add '{' and '}' to protect from string expansion */ newstr_addchar( data, '{' ); newstr_strcat( data, tmp.data ); newstr_addchar( data, '}' ); } } newstr_free( &tmp ); return p; }
static void bibtex_split( list *tokens, newstr *s ) { newstr currtok; int nquotes = 0, nbrackets = 0; int i, n = s->len; newstr_init( &currtok ); for ( i=0; i<n; ++i ) { if ( s->data[i]=='\"' ) { if ( nquotes ) nquotes = 0; else nquotes = 1; newstr_addchar( &currtok, '\"' ); } else if ( s->data[i]=='{' ) { nbrackets++; newstr_addchar( &currtok, '{' ); } else if ( s->data[i]=='}' ) { nbrackets--; newstr_addchar( &currtok, '}' ); } else if ( s->data[i]=='#' && !nquotes && !nbrackets ) { if ( currtok.len ) list_add( tokens, currtok.data ); newstr_empty( &currtok ); } else if ( !is_ws( s->data[i] ) || nquotes || nbrackets ) { newstr_addchar( &currtok, s->data[i] ); } } if ( currtok.len ) list_add( tokens, currtok.data ); for ( i=0; i<tokens->n; ++i ) { newstr_trimendingws( &(tokens->str[i]) ); } newstr_free( &currtok ); }
static void resolve_citekeys( bibl *b, list *citekeys, int *dup ) { char abc[]="abcdefghijklmnopqrstuvwxyz"; newstr tmp; int nsame, ntmp, n, i, j; newstr_init( &tmp ); for ( i=0; i<citekeys->n; ++i ) { if ( dup[i]==-1 ) continue; nsame = 0; for ( j=i; j<citekeys->n; ++j ) { if ( dup[j]!=i ) continue; newstr_newstrcpy( &tmp, &(citekeys->str[j]) ); ntmp = nsame; while ( ntmp >= 26 ) { newstr_addchar( &tmp, 'a' ); ntmp -= 26; } if ( ntmp<26 && ntmp>=0 ) newstr_addchar( &tmp, abc[ntmp] ); nsame++; dup[j] = -1; n = fields_find( b->ref[j], "REFNUM", -1 ); if ( n!=-1 ) newstr_newstrcpy(&((b->ref[j])->data[n]),&tmp); } } newstr_free( &tmp ); }
static int bibtex_matches_list( fields *info, char *tag, char *suffix, newstr *data, int level, list *names, int *match ) { int i, fstatus, status = BIBL_OK; newstr newtag; *match = 0; if ( names->n==0 ) return status; newstr_init( &newtag ); for ( i=0; i<names->n; ++i ) { if ( strcmp( data->data, list_getc( names, i ) ) ) continue; newstr_initstr( &newtag, tag ); newstr_strcat( &newtag, suffix ); fstatus = fields_add( info, newtag.data, data->data, level ); if ( fstatus!=FIELDS_OK ) { status = BIBL_ERR_MEMERR; goto out; } *match = 1; goto out; } out: newstr_free( &newtag ); return status; }
static void output_title( FILE *fp, fields *info, unsigned long refnum, char *bibtag, int level, int format_opts ) { newstr title; int n1 = -1, n2 = -1; /* Option is for short titles of journals */ if ( ( format_opts & BIBOUT_SHORTTITLE ) && level==1 ) { n1 = fields_find( info, "SHORTTITLE", level ); n2 = fields_find( info, "SHORTSUBTITLE", level ); } if ( n1==-1 ) { n1 = fields_find( info, "TITLE", level ); n2 = fields_find( info, "SUBTITLE", level ); } if ( n1!=-1 ) { newstr_init( &title ); newstr_newstrcpy( &title, &(info->data[n1]) ); fields_setused( info, n1 ); if ( n2!=-1 ) { if ( info->data[n1].data[info->data[n1].len]!='?' ) newstr_strcat( &title, ": " ); else newstr_addchar( &title, ' ' ); newstr_strcat( &title, info->data[n2].data ); fields_setused( info, n2 ); } output_element( fp, bibtag, title.data, format_opts ); newstr_free( &title ); } }
void name_add( fields *info, char *tag, char *q, int level, list *asis, list *corps ) { newstr inname; char *p, *start, *end; if ( !q ) return; newstr_init( &inname ); while ( *q ) { start = q = skip_ws( q ); /* strip tailing whitespace and commas */ while ( *q && *q!='|' ) q++; end = q; while ( is_ws( *end ) || *end==',' || *end=='|' || *end=='\0' ) end--; for ( p=start; p<=end; p++ ) newstr_addchar( &inname, *p ); /* keep "names" like " , " from coredumping program */ if ( inname.len ) { name_process( info, tag, level, &inname, asis, corps ); newstr_empty( &inname ); } if ( *q=='|' ) q++; } newstr_free( &inname ); }
static int build_refnum( fields *info, long nrefs ) { newstr refnum; char *p, num[512]; int y, a; newstr_init( &refnum ); y = fields_find( info, "YEAR", -1 ); if ( y==-1 ) y = fields_find( info, "PARTYEAR", -1 ); a = fields_find( info, "AUTHOR", -1 ); if ( a==-1 ) a = fields_find( info, "AUTHOR:CORP", -1 ); if ( a!=-1 && y!=-1 ) { p = info->data[a].data; while ( p && *p && *p!='|' ) newstr_addchar( &refnum, *p++ ); p = info->data[y].data; while ( p && *p && *p!=' ' && *p!='\t' ) newstr_addchar( &refnum, *p++ ); } else { sprintf( num, "%ld", nrefs ); newstr_strcpy( &refnum, "ref" ); newstr_strcat( &refnum, num ); } fields_add( info, "REFNUM", refnum.data, 0 ); newstr_free( &refnum ); return fields_find( info, "REFNUM", 0 ); }
int main ( int argc, char *argv[] ) { int failed = 0; int ntest = 1000; int i; newstr s; newstr_init( &s ); for ( i=0; i<ntest; ++i ) failed += test_empty( &s ); for ( i=0; i<ntest; ++i) failed += test_addchar( &s ); for ( i=0; i<ntest; ++i) failed += test_strcat( &s ); for ( i=0; i<ntest; ++i) failed += test_strcpy( &s ); for ( i=0; i<ntest; ++i) failed += test_segcpy( &s ); for ( i=0; i<ntest; ++i) failed += test_segcat( &s ); for ( i=0; i<ntest; ++i) failed += test_findreplace( &s ); newstr_free( &s ); if ( !failed ) { printf( "%s: PASSED\n", progname ); return EXIT_SUCCESS; } else { printf( "%s: FAILED\n", progname ); return EXIT_FAILURE; } return EXIT_SUCCESS; }
int medin_readf( FILE *fp, char *buf, int bufsize, int *bufpos, newstr *line, newstr *reference, int *fcharset ) { newstr tmp; char *startptr = NULL, *endptr; int haveref = 0, inref = 0, file_charset = CHARSET_UNKNOWN, m, type = -1; newstr_init( &tmp ); while ( !haveref && newstr_fget( fp, buf, bufsize, bufpos, line ) ) { if ( line->data ) { m = xml_getencoding( line ); if ( m!=CHARSET_UNKNOWN ) file_charset = m; } if ( line->data ) { startptr = medin_findstartwrapper( line->data, &type ); } if ( startptr || inref ) { if ( inref ) newstr_strcat( &tmp, line->data ); else { newstr_strcat( &tmp, startptr ); inref = 1; } endptr = medin_findendwrapper( tmp.data, type ); if ( endptr ) { newstr_segcpy( reference, tmp.data, endptr ); haveref = 1; } } } newstr_free( &tmp ); *fcharset = file_charset; return haveref; }
static void output_name_type( fields *info, FILE *outptr, int level, char *map[], int nmap, char *tag ) { newstr ntag; int i, j, n=0, code, nfields; newstr_init( &ntag ); nfields = fields_num( info ); for ( j=0; j<nmap; ++j ) { for ( i=0; i<nfields; ++i ) { code = extract_name_and_info( &ntag, &(info->tag[i]) ); if ( strcasecmp( ntag.data, map[j] ) ) continue; if ( n==0 ) fprintf( outptr, "<%s><b:NameList>\n", tag ); if ( code != NAME ) output_name_nomangle( outptr, info->data[i].data ); else output_name( outptr, info->data[i].data ); fields_setused( info, i ); n++; } } newstr_free( &ntag ); if ( n ) fprintf( outptr, "</b:NameList></%s>\n", tag ); }
static int medin_authorlist( xml *node, fields *info ) { int fstatus, status; newstr name; char *tag; newstr_init( &name ); node = node->down; while ( node ) { if ( xml_tagexact( node, "Author" ) && node->down ) { status = medin_author( node->down, &name ); tag = "AUTHOR"; if ( !name.len ) { status = medin_corpauthor( node->down, &name ); tag = "AUTHOR:CORP"; } if ( newstr_memerr( &name ) || status!=BIBL_OK ) return BIBL_ERR_MEMERR; if ( name.len ) { fstatus = fields_add(info,tag,name.data,0); if ( fstatus!=FIELDS_OK ) return BIBL_ERR_MEMERR; } newstr_empty( &name ); } node = node->next; } newstr_free( &name ); return BIBL_OK; }
/* <MedlineDate>2003 Jan-Feb</MedlineDate> */ static int medin_medlinedate( fields *info, char *p, int level ) { int fstatus; newstr tmp; newstr_init( &tmp ); p = newstr_cpytodelim( &tmp, skip_ws( p ), " \t\n\r", 0 ); if ( newstr_memerr( &tmp ) ) return BIBL_ERR_MEMERR; if ( tmp.len > 0 ) { fstatus = fields_add( info, "PARTYEAR", tmp.data, level ); if ( fstatus!=FIELDS_OK ) return BIBL_ERR_MEMERR; } p = newstr_cpytodelim( &tmp, skip_ws( p ), " \t\n\r", 0 ); if ( newstr_memerr( &tmp ) ) return BIBL_ERR_MEMERR; if ( tmp.len > 0 ) { newstr_findreplace( &tmp, "-", "/" ); fstatus = fields_add( info, "PARTMONTH", tmp.data, level ); if ( fstatus!=FIELDS_OK ) return BIBL_ERR_MEMERR; } p = newstr_cpytodelim( &tmp, skip_ws( p ), " \t\n\r", 0 ); if ( newstr_memerr( &tmp ) ) return BIBL_ERR_MEMERR; if ( tmp.len > 0 ) { fstatus = fields_add( info, "PARTDAY", tmp.data, level ); if ( fstatus!=FIELDS_OK ) return BIBL_ERR_MEMERR; } newstr_free( &tmp ); return BIBL_OK; }
static int wordin_person( xml *node, fields *info, char *type ) { int status, ret = BIBL_OK; newstr name; newstr_init( &name ); status = wordin_person_last( node, &name ); if ( status!=BIBL_OK ) { ret = status; goto out; } status = wordin_person_first( node, &name ); if ( status!=BIBL_OK ) { ret = status; goto out; } status = fields_add( info, type, name.data, 0 ); if ( status != FIELDS_OK ) ret = BIBL_ERR_MEMERR; out: newstr_free( &name ); return ret; }
void newstr_segdel( newstr *s, char *p, char *q ) { newstr tmp1, tmp2; char *r; assert( s ); r = &(s->data[s->len]); newstr_init( &tmp1 ); newstr_init( &tmp2 ); newstr_segcpy( &tmp1, s->data, p ); newstr_segcpy( &tmp2, q, r ); newstr_empty( s ); if ( tmp1.data ) newstr_strcat( s, tmp1.data ); if ( tmp2.data ) newstr_strcat( s, tmp2.data ); newstr_free( &tmp2 ); newstr_free( &tmp1 ); }
void lists_free( lists *a ) { int i; for ( i=0; i<a->max; ++i ) newstr_free( &(a->str[i]) ); free( a->str ); lists_init( a ); }