static int extract_name_and_info( newstr *outtag, newstr *intag ) { int code = NAME; newstr_newstrcpy( outtag, intag ); if ( newstr_findreplace( outtag, ":ASIS", "" ) ) code = NAME_ASIS; if ( newstr_findreplace( outtag, ":CORP", "" ) ) code = NAME_CORP; return code; }
/* process_string() * * Handle lines like: * * '@STRING{TL = {Tetrahedron Lett.}}' * * p should point to just after '@STRING' * * In BibTeX, if a string is defined several times, the last one is kept. * */ static int process_string( char *p ) { int n, status = BIBL_OK; newstr s1, s2, *t; newstrs_init( &s1, &s2, NULL ); while ( *p && *p!='{' && *p!='(' ) p++; if ( *p=='{' || *p=='(' ) p++; p = process_bibtexline( skip_ws( p ), &s1, &s2, 0, NULL ); if ( p==NULL ) { status = BIBL_ERR_MEMERR; goto out; } if ( s2.data ) { newstr_findreplace( &s2, "\\ ", " " ); } if ( s1.data ) { n = list_find( &find, s1.data ); if ( n==-1 ) { t = list_add( &find, &s1 ); if ( t==NULL ) { status = BIBL_ERR_MEMERR; goto out; } if ( s2.data ) t = list_add( &replace, &s2 ); else t = list_addc( &replace, "" ); if ( t==NULL ) { status = BIBL_ERR_MEMERR; goto out; } } else { if ( s2.data ) t = list_set( &replace, n, &s2 ); else t = list_setc( &replace, n, "" ); if ( t==NULL ) { status = BIBL_ERR_MEMERR; goto out; } } } out: newstrs_free( &s1, &s2, NULL ); return status; }
/* <MedlineDate>2003 Jan-Feb</MedlineDate> */ static int medin_medlinedate( fields *info, char *p, int level ) { int fstatus; newstr tmp; newstr_init( &tmp ); p = newstr_cpytodelim( &tmp, skip_ws( p ), " \t\n\r", 0 ); if ( newstr_memerr( &tmp ) ) return BIBL_ERR_MEMERR; if ( tmp.len > 0 ) { fstatus = fields_add( info, "PARTYEAR", tmp.data, level ); if ( fstatus!=FIELDS_OK ) return BIBL_ERR_MEMERR; } p = newstr_cpytodelim( &tmp, skip_ws( p ), " \t\n\r", 0 ); if ( newstr_memerr( &tmp ) ) return BIBL_ERR_MEMERR; if ( tmp.len > 0 ) { newstr_findreplace( &tmp, "-", "/" ); fstatus = fields_add( info, "PARTMONTH", tmp.data, level ); if ( fstatus!=FIELDS_OK ) return BIBL_ERR_MEMERR; } p = newstr_cpytodelim( &tmp, skip_ws( p ), " \t\n\r", 0 ); if ( newstr_memerr( &tmp ) ) return BIBL_ERR_MEMERR; if ( tmp.len > 0 ) { fstatus = fields_add( info, "PARTDAY", tmp.data, level ); if ( fstatus!=FIELDS_OK ) return BIBL_ERR_MEMERR; } newstr_free( &tmp ); return BIBL_OK; }
/* * process_names( info, newtag, field, level); * * split names in author list separated by and's (use '|' character) * and add names */ static void process_names( fields *info, char *tag, newstr *data, int level, list *asis, list *corps ) { newstr_findreplace( data, " and ", "|" ); name_add( info, tag, data->data, level, asis, corps ); }
static void process_pages( fields *info, newstr *s, int level ) { char *p, *q; newstr sp, ep; newstr_init( &sp ); newstr_init( &ep ); newstr_findreplace( s, " ", "" ); p = q = s->data; while ( isdigit( *q ) ) q++; newstr_segcpy( &sp, p, q ); if ( sp.len>0 ) fields_add( info, "PAGESTART", sp.data, level ); p = q; while ( *p && !isdigit(*p) ) p++; q = p; while ( isdigit( *q ) ) q++; newstr_segcpy( &ep, p, q ); if ( ep.len>0 ) fields_add( info, "PAGEEND", ep.data, level ); newstr_free(&sp); newstr_free(&ep); }
int test_findreplace( newstr *s ) { int failed = 0; int numstrings = 1000, i; char segment[]="0123456789"; for ( i=0; i<numstrings; ++i ) { newstr_strcpy( s, segment ); newstr_findreplace( s, "234", "" ); } failed += test_consistency( s, 7, __FUNCTION__ ); for ( i=0; i<numstrings; ++i ) { newstr_strcpy( s, segment ); newstr_findreplace( s, "234", "223344" ); } failed += test_consistency( s, 13, __FUNCTION__ ); return failed; }
static void bibtexin_cleanref( fields *bibin, param *p ) { newstr *t, *d; int i; for ( i=0; i<bibin->nfields; ++i ) { t = &( bibin->tag[i] ); d = &( bibin->data[i] ); bibtex_cleandata( d, bibin, p ); if ( !strsearch( t->data, "AUTHORS" ) ) { newstr_findreplace( d, "\n", " " ); newstr_findreplace( d, "\r", " " ); } else if ( !strsearch( t->data, "ABSTRACT" ) || !strsearch( t->data, "SUMMARY" ) || !strsearch( t->data, "NOTE" ) ) { newstr_findreplace( d, "\n", "" ); newstr_findreplace( d, "\r", "" ); } } }
static int test_findreplace( newstr *s ) { char segment[]="0123456789"; int numstrings = 1000, i; int failed = 0; for ( i=0; i<numstrings; ++i ) { newstr_strcpy( s, segment ); newstr_findreplace( s, "234", "" ); } if ( string_mismatch( s, 7, "0156789" ) ) failed++; for ( i=0; i<numstrings; ++i ) { newstr_strcpy( s, segment ); newstr_findreplace( s, "234", "223344" ); } if ( string_mismatch( s, 13, "0122334456789" ) ) failed++; return failed; }
static int bibtex_usestrings( newstr *s ) { int i; for ( i=0; i<find.n; ++i ) { if ( !strcasecmp( s->data, (find.str[i]).data ) ) { newstr_findreplace( s, (find.str[i]).data, (replace.str[i]).data ); return 1; } } return 0; }
/* copac names appear to always start with last name first, but don't * always seem to have a comma after the name * * editors seem to be stuck in as authors with the tag "[Editor]" in it */ static void copacin_addname( fields *info, char *tag, newstr *name, int level, list *asis, list *corps ) { char *usetag = tag, editor[]="EDITOR", *p; int comma = 0; if ( strstr( name->data,"[Editor]" ) ) { newstr_findreplace( name, "[Editor]", "" ); usetag = editor; } p = skip_ws( name->data ); while ( *p && !is_ws( *p ) ) { if ( *p==',' ) comma++; p++; } if ( !comma && is_ws( *p ) ) *p = ','; name_add( info, usetag, name->data, level, asis, corps ); }
static void bibtex_addstring( char *p ) { newstr s1, s2; newstr_init( &s1 ); newstr_init( &s2 ); p = skip_ws( p ); if ( *p=='(' || *p=='{' ) p++; p = process_bibtexline( p, &s1, &s2 ); newstr_findreplace( &s2, "\\ ", " " ); bibtex_cleantoken( &s2, NULL ); if ( s1.data ) { list_add( &find, s1.data ); if ( s2.data ) list_add( &replace, s2.data ); else list_add( &replace, "" ); } newstr_free( &s1 ); newstr_free( &s2 ); }
static void name_process( fields *info, char *tag, int level, newstr *inname, list *asis, list *corps ) { newstr newtag, outname; newstr_init( &newtag ); newstr_init( &outname ); if ( name_nomangle( tag, inname->data, &newtag, asis, corps ) ) { fields_add( info, newtag.data, inname->data, level ); } else { newstr_findreplace( inname, ".", ". " ); if ( strchr( inname->data, ',' ) ) name_comma( inname->data, &outname ); else name_nocomma( inname->data, &outname ); if ( outname.len!=0 ) { fields_add( info, tag, outname.data, level ); } } newstr_free( &newtag ); newstr_free( &outname ); }
/* * return 1 on a nomangle with a newtag value * return 0 on a name to mangle */ static int name_nomangle( char *tag, char *data, newstr *newtag, list *asis, list *corps ) { int corp_tag_flag, corp_list_flag; int asis_tag_flag, asis_list_flag; name_determine_flags( &corp_tag_flag, &corp_list_flag, &asis_tag_flag, &asis_list_flag, tag, data, asis, corps ); if ( corp_tag_flag || corp_list_flag || asis_tag_flag || asis_list_flag ) { newstr_strcpy( newtag, tag ); if ( corp_tag_flag ) { /* do nothing else */ } else if ( corp_list_flag && !asis_tag_flag ) { newstr_strcat( newtag, ":CORP" ); } else if ( corp_list_flag && asis_tag_flag ) { newstr_findreplace( newtag, ":ASIS", ":CORP" ); } else if ( asis_tag_flag ) { /* do nothing else */ } else if ( asis_list_flag ) { newstr_strcat( newtag, ":ASIS" ); } return 1; } else return 0; }
static int process_pages( fields *info, newstr *s, int level ) { int fstatus, status = BIBL_OK; newstr page; char *p; newstr_findreplace( s, " ", "" ); if ( s->len==0 ) return 1; newstr_init( &page ); p = skip_ws( s->data ); while ( *p && !is_ws(*p) && *p!='-' && *p!='\r' && *p!='\n' && *p!=-30 ) newstr_addchar( &page, *p++ ); if ( page.len>0 ) { fstatus = fields_add( info, "PAGESTART", page.data, level ); if ( fstatus!=FIELDS_OK ) { status = BIBL_ERR_MEMERR; goto out; } } while ( *p && (is_ws(*p) || *p=='-' ) ) p++; if ( *p && is_utf8_emdash( p ) ) p+=3; if ( *p && is_utf8_endash( p ) ) p+=3; newstr_empty( &page ); while ( *p && !is_ws(*p) && *p!='-' && *p!='\r' && *p!='\n' ) newstr_addchar( &page, *p++ ); if ( page.len>0 ) { fstatus = fields_add( info, "PAGEEND", page.data, level ); if ( fstatus!=FIELDS_OK ) status = BIBL_ERR_MEMERR; } out: newstr_free( &page ); return status; }
static void output_names( fields *f, FILE *outptr, int level ) { convert names[] = { { "author", "AUTHOR", 0, MARC_AUTHORITY }, { "editor", "EDITOR", 0, MARC_AUTHORITY }, { "annotator", "ANNOTATOR", 0, MARC_AUTHORITY }, { "artist", "ARTIST", 0, MARC_AUTHORITY }, { "author", "2ND_AUTHOR", 0, MARC_AUTHORITY }, { "author", "3RD_AUTHOR", 0, MARC_AUTHORITY }, { "author", "SUB_AUTHOR", 0, MARC_AUTHORITY }, { "author", "COMMITTEE", 0, MARC_AUTHORITY }, { "author", "COURT", 0, MARC_AUTHORITY }, { "author", "LEGISLATIVEBODY", 0, MARC_AUTHORITY }, { "author of afterword, colophon, etc.", "AFTERAUTHOR", 0, MARC_AUTHORITY }, { "author of introduction, etc.", "INTROAUTHOR", 0, MARC_AUTHORITY }, { "cartographer", "CARTOGRAPHER", 0, MARC_AUTHORITY }, { "collaborator", "COLLABORATOR", 0, MARC_AUTHORITY }, { "commentator", "COMMENTATOR", 0, MARC_AUTHORITY }, { "compiler", "COMPILER", 0, MARC_AUTHORITY }, { "degree grantor", "DEGREEGRANTOR", 0, MARC_AUTHORITY }, { "director", "DIRECTOR", 0, MARC_AUTHORITY }, { "event", "EVENT", 0, NO_AUTHORITY }, { "inventor", "INVENTOR", 0, MARC_AUTHORITY }, { "organizer of meeting", "ORGANIZER", 0, MARC_AUTHORITY }, { "patent holder", "ASSIGNEE", 0, MARC_AUTHORITY }, { "performer", "PERFORMER", 0, MARC_AUTHORITY }, { "producer", "PRODUCER", 0, MARC_AUTHORITY }, { "recipient", "RECIPIENT", 0, MARC_AUTHORITY }, { "redactor", "REDACTOR", 0, MARC_AUTHORITY }, { "reporter", "REPORTER", 0, MARC_AUTHORITY }, { "sponsor", "SPONSOR", 0, MARC_AUTHORITY }, { "translator", "TRANSLATOR", 0, MARC_AUTHORITY }, { "writer", "WRITER", 0, MARC_AUTHORITY }, }; int i, n, nfields, ntypes = sizeof( names ) / sizeof( convert ); int f_asis, f_corp, f_conf; newstr role; newstr_init( &role ); nfields = fields_num( f ); for ( n=0; n<ntypes; ++n ) { for ( i=0; i<nfields; ++i ) { if ( fields_level( f, i )!=level ) continue; if ( f->data[i].len==0 ) continue; f_asis = f_corp = f_conf = 0; newstr_strcpy( &role, f->tag[i].data ); if ( newstr_findreplace( &role, ":ASIS", "" )) f_asis=1; if ( newstr_findreplace( &role, ":CORP", "" )) f_corp=1; if ( newstr_findreplace( &role, ":CONF", "" )) f_conf=1; if ( strcasecmp( role.data, names[n].internal ) ) continue; if ( f_asis ) { output_tag( outptr, lvl2indent(level), "name", NULL, TAG_OPEN, TAG_NEWLINE, NULL ); output_fil( outptr, lvl2indent(incr_level(level,1)), "namePart", f, i, TAG_OPENCLOSE, TAG_NEWLINE, NULL ); } else if ( f_corp ) { output_tag( outptr, lvl2indent(level), "name", NULL, TAG_OPEN, TAG_NEWLINE, "type", "corporate", NULL ); output_fil( outptr, lvl2indent(incr_level(level,1)), "namePart", f, i, TAG_OPENCLOSE, TAG_NEWLINE, NULL ); } else if ( f_conf ) { output_tag( outptr, lvl2indent(level), "name", NULL, TAG_OPEN, TAG_NEWLINE, "type", "conference", NULL ); output_fil( outptr, lvl2indent(incr_level(level,1)), "namePart", f, i, TAG_OPENCLOSE, TAG_NEWLINE, NULL ); } else { output_name(outptr, f->data[i].data, level); } output_tag( outptr, lvl2indent(incr_level(level,1)), "role", NULL, TAG_OPEN, TAG_NEWLINE, NULL ); if ( names[n].code & MARC_AUTHORITY ) output_tag( outptr, lvl2indent(incr_level(level,2)), "roleTerm", names[n].mods, TAG_OPENCLOSE, TAG_NEWLINE, "authority", "marcrelator", "type", "text", NULL ); else output_tag( outptr, lvl2indent(incr_level(level,2)), "roleTerm", names[n].mods, TAG_OPENCLOSE, TAG_NEWLINE, "type", "text", NULL ); output_tag( outptr, lvl2indent(incr_level(level,1)), "role", NULL, TAG_CLOSE, TAG_NEWLINE, NULL ); output_tag( outptr, lvl2indent(level), "name", NULL, TAG_CLOSE, TAG_NEWLINE, NULL ); fields_setused( f, i ); } } newstr_free( &role ); }
static void bibtex_cleantoken( newstr *s, param *p ) { if ( p && p->latexin==0 ) return; /* 'textcomp' annotations */ newstr_findreplace( s, "\\textit", "" ); newstr_findreplace( s, "\\textbf", "" ); newstr_findreplace( s, "\\textsl", "" ); newstr_findreplace( s, "\\textsc", "" ); newstr_findreplace( s, "\\textsf", "" ); newstr_findreplace( s, "\\texttt", "" ); newstr_findreplace( s, "\\textsubscript", "" ); newstr_findreplace( s, "\\textsuperscript", "" ); newstr_findreplace( s, "\\emph", "" ); newstr_findreplace( s, "\\url", "" ); /* Other text annotations */ newstr_findreplace( s, "\\it ", "" ); newstr_findreplace( s, "\\em ", "" ); newstr_findreplace( s, "\\%", "%" ); newstr_findreplace( s, "\\$", "$" ); newstr_findreplace( s, "{", "" ); newstr_findreplace( s, "}", "" ); while ( newstr_findreplace( s, " ", " " ) ) {} /* 'textcomp' annotations that we don't want to substitute on output*/ newstr_findreplace( s, "\\textdollar", "$" ); newstr_findreplace( s, "\\textunderscore", "_" ); }
static void bibtex_cleantoken( newstr *s ) { /* 'textcomp' annotations */ newstr_findreplace( s, "\\textit", "" ); newstr_findreplace( s, "\\textbf", "" ); newstr_findreplace( s, "\\textsl", "" ); newstr_findreplace( s, "\\textsc", "" ); newstr_findreplace( s, "\\textsf", "" ); newstr_findreplace( s, "\\texttt", "" ); newstr_findreplace( s, "\\textsubscript", "" ); newstr_findreplace( s, "\\textsuperscript", "" ); newstr_findreplace( s, "\\emph", "" ); newstr_findreplace( s, "\\url", "" ); newstr_findreplace( s, "\\mbox", "" ); /* Other text annotations */ newstr_findreplace( s, "\\it ", "" ); newstr_findreplace( s, "\\em ", "" ); newstr_findreplace( s, "\\%", "%" ); newstr_findreplace( s, "\\$", "$" ); while ( newstr_findreplace( s, " ", " " ) ) {} /* 'textcomp' annotations that we don't want to substitute on output*/ newstr_findreplace( s, "\\textdollar", "$" ); newstr_findreplace( s, "\\textunderscore", "_" ); bibtex_process_bracket( s ); bibtex_process_tilde( s ); }