static int bibtex_addtitleurl( fields *info, newstr *in ) { int fstatus, status = BIBL_OK; newstr s; char *p; newstr_init( &s ); /* ...skip past "\href{" and copy to "}" */ p = newstr_cpytodelim( &s, in->data + 6, "}", 1 ); if ( newstr_memerr( &s ) ) { status = BIBL_ERR_MEMERR; goto out; } /* ...add to URL */ fstatus = fields_add( info, "URL", s.data, 0 ); if ( fstatus!=FIELDS_OK ) { status = BIBL_ERR_MEMERR; goto out; } /* ...return deleted fragment to newstr in */ p = newstr_cpytodelim( &s, p, "", 0 ); if ( newstr_memerr( &s ) ) { status = BIBL_ERR_MEMERR; goto out; } newstr_swapstrings( &s, in ); out: newstr_free( &s ); return status; }
/* <MedlineDate>2003 Jan-Feb</MedlineDate> */ static int medin_medlinedate( fields *info, char *p, int level ) { int fstatus; newstr tmp; newstr_init( &tmp ); p = newstr_cpytodelim( &tmp, skip_ws( p ), " \t\n\r", 0 ); if ( newstr_memerr( &tmp ) ) return BIBL_ERR_MEMERR; if ( tmp.len > 0 ) { fstatus = fields_add( info, "PARTYEAR", tmp.data, level ); if ( fstatus!=FIELDS_OK ) return BIBL_ERR_MEMERR; } p = newstr_cpytodelim( &tmp, skip_ws( p ), " \t\n\r", 0 ); if ( newstr_memerr( &tmp ) ) return BIBL_ERR_MEMERR; if ( tmp.len > 0 ) { newstr_findreplace( &tmp, "-", "/" ); fstatus = fields_add( info, "PARTMONTH", tmp.data, level ); if ( fstatus!=FIELDS_OK ) return BIBL_ERR_MEMERR; } p = newstr_cpytodelim( &tmp, skip_ws( p ), " \t\n\r", 0 ); if ( newstr_memerr( &tmp ) ) return BIBL_ERR_MEMERR; if ( tmp.len > 0 ) { fstatus = fields_add( info, "PARTDAY", tmp.data, level ); if ( fstatus!=FIELDS_OK ) return BIBL_ERR_MEMERR; } newstr_free( &tmp ); return BIBL_OK; }
int pages_add( fields *bibout, char *outtag, newstr *invalue, int level ) { int fstatus, status = 1; newstr start, stop; newstr_init( &start ); newstr_init( &stop ); extract_range( invalue, &start, &stop ); if ( newstr_memerr( &start ) || newstr_memerr( &stop ) ) { status = 0; goto out; } if ( start.len>0 ) { fstatus = fields_add( bibout, "PAGES:START", start.data, level ); if ( fstatus!=FIELDS_OK ) { status = 0; goto out; } } if ( stop.len>0 ) { fstatus = fields_add( bibout, "PAGES:STOP", stop.data, level ); if ( fstatus!=FIELDS_OK ) status = 0; } out: newstr_free( &start ); newstr_free( &stop ); return status; }
static int wordin_pages( xml *node, fields *info ) { int i, status, ret = BIBL_OK; newstr sp, ep; char *p; newstrs_init( &sp, &ep, NULL ); p = xml_data( node ); while ( *p && *p!='-' ) newstr_addchar( &sp, *p++ ); if ( newstr_memerr( &sp ) ) { ret = BIBL_ERR_MEMERR; goto out; } if ( *p=='-' ) p++; while ( *p ) newstr_addchar( &ep, *p++ ); if ( newstr_memerr( &ep ) ) { ret = BIBL_ERR_MEMERR; goto out; } if ( sp.len ) { status = fields_add( info, "PAGES:START", sp.data, 1 ); if ( status!=FIELDS_OK ) { ret = BIBL_ERR_MEMERR; goto out; } } if ( ep.len ) { if ( sp.len > ep.len ) { for ( i=sp.len-ep.len; i<sp.len; ++i ) sp.data[i] = ep.data[i-sp.len+ep.len]; status = fields_add( info, "PAGES:STOP", sp.data, 1 ); } else status = fields_add( info, "PAGES:STOP", ep.data, 1 ); if ( status!=FIELDS_OK ) { ret = BIBL_ERR_MEMERR; goto out; } } out: newstrs_free( &sp, &ep, NULL ); return ret; }
static int medin_authorlist( xml *node, fields *info ) { int fstatus, status; newstr name; char *tag; newstr_init( &name ); node = node->down; while ( node ) { if ( xml_tagexact( node, "Author" ) && node->down ) { status = medin_author( node->down, &name ); tag = "AUTHOR"; if ( !name.len ) { status = medin_corpauthor( node->down, &name ); tag = "AUTHOR:CORP"; } if ( newstr_memerr( &name ) || status!=BIBL_OK ) return BIBL_ERR_MEMERR; if ( name.len ) { fstatus = fields_add(info,tag,name.data,0); if ( fstatus!=FIELDS_OK ) return BIBL_ERR_MEMERR; } newstr_empty( &name ); } node = node->next; } newstr_free( &name ); return BIBL_OK; }
static char * bibtex_tag( char *p, newstr *tag ) { p = newstr_cpytodelim( tag, skip_ws( p ), "= \t\r\n", 0 ); if ( newstr_memerr( tag ) ) return NULL; return skip_ws( p ); }
/* Split keywords="" with semicolons. * Commas are also frequently used, but will break * entries like: * keywords="Microscopy, Confocal" * Returns BIBL_OK or BIBL_ERR_MEMERR */ static int process_keywords( fields *info, newstr *d, int level ) { int fstatus, status = BIBL_OK; newstr keyword; char *p; if ( !d || d->len==0 ) return BIBL_OK; p = d->data; newstr_init( &keyword ); while ( *p ) { p = newstr_cpytodelim( &keyword, skip_ws( p ), ";", 1 ); newstr_trimendingws( &keyword ); if ( newstr_memerr( &keyword ) ) { status = BIBL_ERR_MEMERR; goto out; } if ( keyword.len ) { fstatus = fields_add( info, "KEYWORD", keyword.data, level ); if ( fstatus!=FIELDS_OK ) { status = BIBL_ERR_MEMERR; goto out; } } } out: newstr_free( &keyword ); return status; }
/* wordin_person_last() * * From an xml list, extract the value from the first entry * of <b:Last>xxxx</b:Last> and copy into name * * Additional <b:Last>yyyyy</b:Last> will be ignored. * * Returns BIBL_ERR_MEMERR on memory error, BIBL_OK otherwise. */ static int wordin_person_last( xml *node, newstr *name ) { while ( node && !xml_tagexact( node, "b:Last" ) ) node = node->next; if ( node && node->value->len ) { newstr_strcpy( name, node->value->data ); if ( newstr_memerr( name ) ) return BIBL_ERR_MEMERR; } return BIBL_OK; }
/* wordin_person_first() * * From an xml list, extract the value of any * <b:First>xxxx</b:First> and append "|xxxx" to name. * * Returns BIBL_ERR_MEMERR on memory error, BIBL_OK otherwise */ static int wordin_person_first( xml *node, newstr *name ) { for ( ; node; node=node->next ) { if ( !xml_tagexact( node, "b:First" ) ) continue; if ( node->value->len ) { if ( name->len ) newstr_addchar( name, '|' ); newstr_strcat( name, node->value->data ); if ( newstr_memerr( name ) ) return BIBL_ERR_MEMERR; } } return BIBL_OK; }
static newstr * list_set_cleanup( list *a, int n ) { if ( newstr_memerr( &(a->str[n]) ) ) return NULL; if ( a->sorted ) { if ( n>0 && list_comp_step( a, n-1, n )>0 ) a->sorted = 0; } if ( a->sorted ) { if ( n<a->n-1 && list_comp_step( a, n, n+1 )>0 ) a->sorted = 0; } return &(a->str[n]); }
/* <Pagination> * <MedlinePgn>12111-6</MedlinePgn> * </Pagination> */ static int medin_pagination( xml *node, fields *info ) { int i, fstatus, status; newstr sp, ep; char *p, *pp; if ( xml_tagexact( node, "MedlinePgn" ) && node->value ) { newstrs_init( &sp, &ep, NULL ); p = newstr_cpytodelim( &sp, xml_data( node ), "-", 1 ); if ( newstr_memerr( &sp ) ) return BIBL_ERR_MEMERR; if ( sp.len ) { fstatus = fields_add( info, "PAGESTART", sp.data, 1 ); if ( fstatus!=FIELDS_OK ) return BIBL_ERR_MEMERR; } p = newstr_cpytodelim( &ep, p, "", 0 ); if ( newstr_memerr( &ep ) ) return BIBL_ERR_MEMERR; if ( ep.len ) { if ( sp.len > ep.len ) { for ( i=sp.len-ep.len; i<sp.len; ++i ) sp.data[i] = ep.data[i-sp.len+ep.len]; pp = sp.data; } else pp = ep.data; fstatus = fields_add( info, "PAGEEND", pp, 1 ); if ( fstatus!=FIELDS_OK ) return BIBL_ERR_MEMERR; } newstrs_free( &sp, &ep, NULL ); } if ( node->down ) { status = medin_pagination( node->down, info ); if ( status!=BIBL_OK ) return status; } if ( node->next ) { status = medin_pagination( node->next, info ); if ( status!=BIBL_OK ) return status; } return BIBL_OK; }
/* * sentelink = {file://localhost/full/path/to/file.pdf,Sente,PDF} */ static int process_sente( fields *info, newstr *d, int level ) { int fstatus, status = BIBL_OK; newstr link; newstr_init( &link ); newstr_cpytodelim( &link, skip_ws( d->data ), ",", 0 ); newstr_trimendingws( &link ); if ( newstr_memerr( &link ) ) status = BIBL_ERR_MEMERR; if ( status==BIBL_OK && link.len ) { fstatus = fields_add( info, "FILEATTACH", link.data, level ); if ( fstatus!=FIELDS_OK ) status = BIBL_ERR_MEMERR; } newstr_free( &link ); return status; }
newstr * list_addvp( list *a, unsigned char mode, void *vp ) { newstr *s = NULL; int status; status = list_ensure_space( a ); if ( status==LIST_OK ) { s = &( a->str[a->n] ); if ( mode==LIST_CHR ) newstr_strcpy( s, (const char*) vp ); else if ( mode==LIST_STR ) newstr_newstrcpy( s, (newstr*) vp ); else return NULL; if ( newstr_memerr( s ) ) return NULL; a->n++; if ( a->sorted && a->n > 1 ) { if ( list_comp_step( a, a->n-2, a->n-1 ) > 0 ) a->sorted = 0; } } return s; }
/* * file={Description:/full/path/to/file.pdf:PDF} */ static int process_file( fields *info, newstr *d, int level ) { int fstatus, status = BIBL_OK; char *p = d->data; int i, n, n1, n2; newstr link; n = count_colons( p ); if ( n > 1 ) { /* A DOS file can contain a colon ":C:/....pdf:PDF" */ /* Extract after 1st and up to last colons */ n1 = first_colon( p ) + 1; n2 = last_colon( p ); newstr_init( &link ); for ( i=n1; i<n2; ++i ) { newstr_addchar( &link, p[i] ); } newstr_trimstartingws( &link ); newstr_trimendingws( &link ); if ( newstr_memerr( &link ) ) { status = BIBL_ERR_MEMERR; goto out; } if ( link.len ) { fstatus = fields_add( info, "FILEATTACH", link.data, level ); if ( fstatus!=FIELDS_OK ) status = BIBL_ERR_MEMERR; } out: newstr_free( &link ); } else { /* This field isn't formatted properly, so just copy directly */ fstatus = fields_add( info, "FILEATTACH", p, level ); if ( fstatus!=FIELDS_OK ) status = BIBL_ERR_MEMERR; } return status; }
static char * bibtex_data( char *p, fields *bibin, list *tokens ) { unsigned int nbracket = 0, nquotes = 0; char *startp = p; newstr tok, *t; newstr_init( &tok ); while ( p && *p ) { if ( !nquotes && !nbracket ) { if ( *p==',' || *p=='=' || *p=='}' || *p==')' ) goto out; } if ( *p=='\"' && nbracket==0 && ( p==startp || *(p-1)!='\\' ) ) { nquotes = !nquotes; newstr_addchar( &tok, *p ); if ( !nquotes ) { if ( newstr_memerr( &tok ) ) { p=NULL; goto out; } t = list_add( tokens, &tok ); if ( !t ) { p=NULL; goto out0; } newstr_empty( &tok ); } } else if ( *p=='#' && !nquotes && !nbracket ) { if ( tok.len ) { if ( newstr_memerr( &tok ) ) { p=NULL; goto out; } t = list_add( tokens, &tok ); if ( !t ) { p=NULL; goto out0; } } newstr_strcpy( &tok, "#" ); t = list_add( tokens, &tok ); if ( !t ) { p=NULL; goto out0; } newstr_empty( &tok ); } else if ( *p=='{' && !nquotes && ( p==startp || *(p-1)!='\\' ) ) { nbracket++; newstr_addchar( &tok, *p ); } else if ( *p=='}' && !nquotes && ( p==startp || *(p-1)!='\\' ) ) { nbracket--; newstr_addchar( &tok, *p ); if ( nbracket==0 ) { if ( newstr_memerr( &tok ) ) { p=NULL; goto out; } t = list_add( tokens, &tok ); if ( !t ) { p=NULL; goto out; } newstr_empty( &tok ); } } else if ( !is_ws( *p ) || nquotes || nbracket ) { if ( !is_ws( *p ) ) newstr_addchar( &tok, *p ); else { if ( tok.len!=0 && *p!='\n' && *p!='\r' ) newstr_addchar( &tok, *p ); else if ( tok.len!=0 && (*p=='\n' || *p=='\r')) { newstr_addchar( &tok, ' ' ); while ( is_ws( *(p+1) ) ) p++; } } } else if ( is_ws( *p ) ) { if ( tok.len ) { if ( newstr_memerr( &tok ) ) { p=NULL; goto out; } t = list_add( tokens, &tok ); if ( !t ) { p=NULL; goto out; } newstr_empty( &tok ); } } p++; } out: if ( nbracket!=0 ) { fprintf( stderr, "%s: Mismatch in number of brackets in reference.\n", progname ); } if ( nquotes!=0 ) { fprintf( stderr, "%s: Mismatch in number of quotes in reference.\n", progname ); } if ( tok.len ) { if ( newstr_memerr( &tok ) ) { p = NULL; goto out; } t = list_add( tokens, &tok ); if ( !t ) p = NULL; } out0: newstr_free( &tok ); return p; }