Пример #1
0
static int
extract_name_and_info( newstr *outtag, newstr *intag )
{
	int code = NAME;
	newstr_newstrcpy( outtag, intag );
	if ( newstr_findreplace( outtag, ":ASIS", "" ) ) code = NAME_ASIS;
	if ( newstr_findreplace( outtag, ":CORP", "" ) ) code = NAME_CORP;
	return code;
}
Пример #2
0
/* process_string()
 *
 * Handle lines like:
 *
 * '@STRING{TL = {Tetrahedron Lett.}}'
 *
 * p should point to just after '@STRING'
 *
 * In BibTeX, if a string is defined several times, the last one is kept.
 *
 */
static int
process_string( char *p )
{
	int n, status = BIBL_OK;
	newstr s1, s2, *t;
	newstrs_init( &s1, &s2, NULL );
	while ( *p && *p!='{' && *p!='(' ) p++;
	if ( *p=='{' || *p=='(' ) p++;
	p = process_bibtexline( skip_ws( p ), &s1, &s2, 0, NULL );
	if ( p==NULL ) { status = BIBL_ERR_MEMERR; goto out; }
	if ( s2.data ) {
		newstr_findreplace( &s2, "\\ ", " " );
	}
	if ( s1.data ) {
		n = list_find( &find, s1.data );
		if ( n==-1 ) {
			t = list_add( &find, &s1 );
			if ( t==NULL ) { status = BIBL_ERR_MEMERR; goto out; }
			if ( s2.data ) t = list_add( &replace, &s2 );
			else t = list_addc( &replace, "" );
			if ( t==NULL ) { status = BIBL_ERR_MEMERR; goto out; }
		} else {
			if ( s2.data ) t = list_set( &replace, n, &s2 );
			else t = list_setc( &replace, n, "" );
			if ( t==NULL ) { status = BIBL_ERR_MEMERR; goto out; }
		}
	}
out:
	newstrs_free( &s1, &s2, NULL );
	return status;
}
Пример #3
0
/*            <MedlineDate>2003 Jan-Feb</MedlineDate> */
static int
medin_medlinedate( fields *info, char *p, int level )
{
	int fstatus;
	newstr tmp;

	newstr_init( &tmp );

	p = newstr_cpytodelim( &tmp, skip_ws( p ), " \t\n\r", 0 );
	if ( newstr_memerr( &tmp ) ) return BIBL_ERR_MEMERR;
	if ( tmp.len > 0 ) {
		fstatus = fields_add( info, "PARTYEAR", tmp.data, level );
		if ( fstatus!=FIELDS_OK ) return BIBL_ERR_MEMERR;
	}

	p = newstr_cpytodelim( &tmp, skip_ws( p ), " \t\n\r", 0 );
	if ( newstr_memerr( &tmp ) ) return BIBL_ERR_MEMERR;
	if ( tmp.len > 0 ) {
		newstr_findreplace( &tmp, "-", "/" );
		fstatus = fields_add( info, "PARTMONTH", tmp.data, level );
		if ( fstatus!=FIELDS_OK ) return BIBL_ERR_MEMERR;
	}

	p = newstr_cpytodelim( &tmp, skip_ws( p ), " \t\n\r", 0 );
	if ( newstr_memerr( &tmp ) ) return BIBL_ERR_MEMERR;
	if ( tmp.len > 0 ) {
		fstatus = fields_add( info, "PARTDAY", tmp.data, level );
		if ( fstatus!=FIELDS_OK ) return BIBL_ERR_MEMERR;
	}

	newstr_free( &tmp );

	return BIBL_OK;
}
Пример #4
0
/*
 * process_names( info, newtag, field, level);
 *
 * split names in author list separated by and's (use '|' character)
 * and add names
 */
static void
process_names( fields *info, char *tag, newstr *data, int level, list *asis,
	list *corps )
{
	newstr_findreplace( data, " and ", "|" );
	name_add( info, tag, data->data, level, asis, corps );
}
Пример #5
0
static void
process_pages( fields *info, newstr *s, int level )
{
	char *p, *q;
	newstr sp, ep;

	newstr_init( &sp );
	newstr_init( &ep );

	newstr_findreplace( s, " ", "" );

	p = q = s->data;
	while ( isdigit( *q ) ) q++;
	newstr_segcpy( &sp, p, q );
	if ( sp.len>0 )
		fields_add( info, "PAGESTART", sp.data, level );

	p = q;
	while ( *p && !isdigit(*p) ) p++;
	q = p;
	while ( isdigit( *q ) ) q++;
	newstr_segcpy( &ep, p, q );
	if ( ep.len>0 )
		fields_add( info, "PAGEEND", ep.data, level );

	newstr_free(&sp);
	newstr_free(&ep);
}
Пример #6
0
int
test_findreplace( newstr *s )
{
	int failed = 0;
	int numstrings = 1000, i;
	char segment[]="0123456789";
	for ( i=0; i<numstrings; ++i ) {
		newstr_strcpy( s, segment );
		newstr_findreplace( s, "234", "" );
	}
	failed += test_consistency( s, 7, __FUNCTION__ );
	for ( i=0; i<numstrings; ++i ) {
		newstr_strcpy( s, segment );
		newstr_findreplace( s, "234", "223344" );
	}
	failed += test_consistency( s, 13, __FUNCTION__ );
	return failed;
}
Пример #7
0
static void
bibtexin_cleanref( fields *bibin, param *p )
{
	newstr *t, *d;
	int i;
	for ( i=0; i<bibin->nfields; ++i ) {
		t = &( bibin->tag[i] );
		d = &( bibin->data[i] );
		bibtex_cleandata( d, bibin, p );
		if ( !strsearch( t->data, "AUTHORS" ) ) {
			newstr_findreplace( d, "\n", " " );
			newstr_findreplace( d, "\r", " " );
		}
		else if ( !strsearch( t->data, "ABSTRACT" ) ||
		     !strsearch( t->data, "SUMMARY" ) || 
		     !strsearch( t->data, "NOTE" ) ) {
			newstr_findreplace( d, "\n", "" );
			newstr_findreplace( d, "\r", "" );
		}
	}
}
Пример #8
0
static int
test_findreplace( newstr *s )
{
	char segment[]="0123456789";
	int numstrings = 1000, i;
	int failed = 0;

	for ( i=0; i<numstrings; ++i ) {
		newstr_strcpy( s, segment );
		newstr_findreplace( s, "234", "" );
	}
	if ( string_mismatch( s, 7, "0156789" ) ) failed++;

	for ( i=0; i<numstrings; ++i ) {
		newstr_strcpy( s, segment );
		newstr_findreplace( s, "234", "223344" );
	}
	if ( string_mismatch( s, 13, "0122334456789" ) ) failed++;

	return failed;
}
Пример #9
0
static int
bibtex_usestrings( newstr *s )
{
	int i;
	for ( i=0; i<find.n; ++i ) {
		if ( !strcasecmp( s->data, (find.str[i]).data ) ) {
			newstr_findreplace( s, (find.str[i]).data, 
					(replace.str[i]).data );
			return 1;
		}
	}
	return 0;
}
Пример #10
0
/* copac names appear to always start with last name first, but don't
 * always seem to have a comma after the name
 *
 * editors seem to be stuck in as authors with the tag "[Editor]" in it
 */
static void
copacin_addname( fields *info, char *tag, newstr *name, int level, list *asis,
	list *corps )
{
	char *usetag = tag, editor[]="EDITOR", *p;
	int comma = 0;
	if ( strstr( name->data,"[Editor]" ) ) {
		newstr_findreplace( name, "[Editor]", "" );
		usetag = editor;
	}
	p = skip_ws( name->data );
	while ( *p && !is_ws( *p ) ) {
		if ( *p==',' ) comma++;
		p++;
	}
	if ( !comma && is_ws( *p ) ) *p = ',';
	name_add( info, usetag, name->data, level, asis, corps );
}
Пример #11
0
static void
bibtex_addstring( char *p )
{
	newstr s1, s2;
	newstr_init( &s1 );
	newstr_init( &s2 );
	p = skip_ws( p );
	if ( *p=='(' || *p=='{' ) p++;
	p = process_bibtexline( p, &s1, &s2 );
	newstr_findreplace( &s2, "\\ ", " " );
	bibtex_cleantoken( &s2, NULL );
	if ( s1.data ) {
		list_add( &find, s1.data );
		if ( s2.data ) list_add( &replace, s2.data );
		else list_add( &replace, "" );
	}
	newstr_free( &s1 );
	newstr_free( &s2 );
}
Пример #12
0
static void
name_process( fields *info, char *tag, int level, newstr *inname, list *asis,
	list *corps )
{
	newstr newtag, outname;
	newstr_init( &newtag );
	newstr_init( &outname );
	if ( name_nomangle( tag, inname->data, &newtag, asis, corps ) ) {
		fields_add( info, newtag.data, inname->data, level );
	} else {
		newstr_findreplace( inname, ".", ". " );
		if ( strchr( inname->data, ',' ) ) 
			name_comma( inname->data, &outname );
		else
			name_nocomma( inname->data, &outname );
		if ( outname.len!=0 ) {
			fields_add( info, tag, outname.data, level );
		}
	}
	newstr_free( &newtag );
	newstr_free( &outname );
}
Пример #13
0
/*
 * return 1 on a nomangle with a newtag value
 * return 0 on a name to mangle
 */
static int
name_nomangle( char *tag, char *data, newstr *newtag, list *asis, list *corps )
{
	int corp_tag_flag, corp_list_flag;
	int asis_tag_flag, asis_list_flag;
	name_determine_flags( &corp_tag_flag, &corp_list_flag,
		&asis_tag_flag, &asis_list_flag, tag, data, asis, corps );
	if ( corp_tag_flag || corp_list_flag || asis_tag_flag || asis_list_flag ) {
		newstr_strcpy( newtag, tag );
		if ( corp_tag_flag ) { /* do nothing else */
		} else if ( corp_list_flag && !asis_tag_flag ) {
			newstr_strcat( newtag, ":CORP" );
		} else if ( corp_list_flag && asis_tag_flag ) {
			newstr_findreplace( newtag, ":ASIS", ":CORP" );
		} else if ( asis_tag_flag ) { /* do nothing else */
		} else if ( asis_list_flag ) {
			newstr_strcat( newtag, ":ASIS" );
		}
		return 1;
	}
	else return 0;
}
Пример #14
0
static int
process_pages( fields *info, newstr *s, int level )
{
	int fstatus, status = BIBL_OK;
	newstr page;
	char *p;

	newstr_findreplace( s, " ", "" );
	if ( s->len==0 ) return 1;

	newstr_init( &page );
	p = skip_ws( s->data );
	while ( *p && !is_ws(*p) && *p!='-' && *p!='\r' && *p!='\n' && *p!=-30 )
		newstr_addchar( &page, *p++ );
	if ( page.len>0 ) {
		fstatus = fields_add( info, "PAGESTART", page.data, level );
		if ( fstatus!=FIELDS_OK ) {
			status = BIBL_ERR_MEMERR;
			goto out;
		}
	}

	while ( *p && (is_ws(*p) || *p=='-' ) ) p++;
	if ( *p && is_utf8_emdash( p ) ) p+=3;
	if ( *p && is_utf8_endash( p ) ) p+=3;

	newstr_empty( &page );
	while ( *p && !is_ws(*p) && *p!='-' && *p!='\r' && *p!='\n' )
		newstr_addchar( &page, *p++ );
	if ( page.len>0 ) {
		fstatus = fields_add( info, "PAGEEND", page.data, level );
		if ( fstatus!=FIELDS_OK ) status = BIBL_ERR_MEMERR; 
	}

out:
	newstr_free( &page );
	return status;
}
Пример #15
0
static void
output_names( fields *f, FILE *outptr, int level )
{
	convert   names[] = {
	  { "author",                              "AUTHOR",          0, MARC_AUTHORITY },
	  { "editor",                              "EDITOR",          0, MARC_AUTHORITY },
	  { "annotator",                           "ANNOTATOR",       0, MARC_AUTHORITY },
	  { "artist",                              "ARTIST",          0, MARC_AUTHORITY },
	  { "author",                              "2ND_AUTHOR",      0, MARC_AUTHORITY },
	  { "author",                              "3RD_AUTHOR",      0, MARC_AUTHORITY },
	  { "author",                              "SUB_AUTHOR",      0, MARC_AUTHORITY },
	  { "author",                              "COMMITTEE",       0, MARC_AUTHORITY },
	  { "author",                              "COURT",           0, MARC_AUTHORITY },
	  { "author",                              "LEGISLATIVEBODY", 0, MARC_AUTHORITY },
	  { "author of afterword, colophon, etc.", "AFTERAUTHOR",     0, MARC_AUTHORITY },
	  { "author of introduction, etc.",        "INTROAUTHOR",     0, MARC_AUTHORITY },
	  { "cartographer",                        "CARTOGRAPHER",    0, MARC_AUTHORITY },
	  { "collaborator",                        "COLLABORATOR",    0, MARC_AUTHORITY },
	  { "commentator",                         "COMMENTATOR",     0, MARC_AUTHORITY },
	  { "compiler",                            "COMPILER",        0, MARC_AUTHORITY },
	  { "degree grantor",                      "DEGREEGRANTOR",   0, MARC_AUTHORITY },
	  { "director",                            "DIRECTOR",        0, MARC_AUTHORITY },
	  { "event",                               "EVENT",           0, NO_AUTHORITY   },
	  { "inventor",                            "INVENTOR",        0, MARC_AUTHORITY },
	  { "organizer of meeting",                "ORGANIZER",       0, MARC_AUTHORITY },
	  { "patent holder",                       "ASSIGNEE",        0, MARC_AUTHORITY },
	  { "performer",                           "PERFORMER",       0, MARC_AUTHORITY },
	  { "producer",                            "PRODUCER",        0, MARC_AUTHORITY },
	  { "recipient",                           "RECIPIENT",       0, MARC_AUTHORITY },
	  { "redactor",                            "REDACTOR",        0, MARC_AUTHORITY },
	  { "reporter",                            "REPORTER",        0, MARC_AUTHORITY },
	  { "sponsor",                             "SPONSOR",         0, MARC_AUTHORITY },
	  { "translator",                          "TRANSLATOR",      0, MARC_AUTHORITY },
	  { "writer",                              "WRITER",          0, MARC_AUTHORITY },
	};
	int i, n, nfields, ntypes = sizeof( names ) / sizeof( convert );
	int f_asis, f_corp, f_conf;
	newstr role;

	newstr_init( &role );
	nfields = fields_num( f );
	for ( n=0; n<ntypes; ++n ) {
		for ( i=0; i<nfields; ++i ) {
			if ( fields_level( f, i )!=level ) continue;
			if ( f->data[i].len==0 ) continue;
			f_asis = f_corp = f_conf = 0;
			newstr_strcpy( &role, f->tag[i].data );
			if ( newstr_findreplace( &role, ":ASIS", "" )) f_asis=1;
			if ( newstr_findreplace( &role, ":CORP", "" )) f_corp=1;
			if ( newstr_findreplace( &role, ":CONF", "" )) f_conf=1;
			if ( strcasecmp( role.data, names[n].internal ) )
				continue;
			if ( f_asis ) {
				output_tag( outptr, lvl2indent(level),               "name",     NULL, TAG_OPEN,      TAG_NEWLINE, NULL );
				output_fil( outptr, lvl2indent(incr_level(level,1)), "namePart", f, i, TAG_OPENCLOSE, TAG_NEWLINE, NULL );
			} else if ( f_corp ) {
				output_tag( outptr, lvl2indent(level),               "name",     NULL, TAG_OPEN,      TAG_NEWLINE, "type", "corporate", NULL );
				output_fil( outptr, lvl2indent(incr_level(level,1)), "namePart", f, i, TAG_OPENCLOSE, TAG_NEWLINE, NULL );
			} else if ( f_conf ) {
				output_tag( outptr, lvl2indent(level),               "name",     NULL, TAG_OPEN,      TAG_NEWLINE, "type", "conference", NULL );
				output_fil( outptr, lvl2indent(incr_level(level,1)), "namePart", f, i, TAG_OPENCLOSE, TAG_NEWLINE, NULL );
			} else {
				output_name(outptr, f->data[i].data, level);
			}
			output_tag( outptr, lvl2indent(incr_level(level,1)), "role", NULL, TAG_OPEN, TAG_NEWLINE, NULL );
			if ( names[n].code & MARC_AUTHORITY )
				output_tag( outptr, lvl2indent(incr_level(level,2)), "roleTerm", names[n].mods, TAG_OPENCLOSE, TAG_NEWLINE, "authority", "marcrelator", "type", "text", NULL );
			else
				output_tag( outptr, lvl2indent(incr_level(level,2)), "roleTerm", names[n].mods, TAG_OPENCLOSE, TAG_NEWLINE, "type", "text", NULL );
			output_tag( outptr, lvl2indent(incr_level(level,1)), "role", NULL, TAG_CLOSE, TAG_NEWLINE, NULL );
			output_tag( outptr, lvl2indent(level),               "name", NULL, TAG_CLOSE, TAG_NEWLINE, NULL );
			fields_setused( f, i );
		}
	}
	newstr_free( &role );
}
Пример #16
0
static void
bibtex_cleantoken( newstr *s, param *p )
{

	if ( p && p->latexin==0 ) return;

	/* 'textcomp' annotations */
	newstr_findreplace( s, "\\textit", "" );
	newstr_findreplace( s, "\\textbf", "" );
	newstr_findreplace( s, "\\textsl", "" );
	newstr_findreplace( s, "\\textsc", "" );
	newstr_findreplace( s, "\\textsf", "" );
	newstr_findreplace( s, "\\texttt", "" );
	newstr_findreplace( s, "\\textsubscript", "" );
	newstr_findreplace( s, "\\textsuperscript", "" );
	newstr_findreplace( s, "\\emph", "" );
	newstr_findreplace( s, "\\url", "" );

	/* Other text annotations */
	newstr_findreplace( s, "\\it ", "" );
	newstr_findreplace( s, "\\em ", "" );

	newstr_findreplace( s, "\\%", "%" );
	newstr_findreplace( s, "\\$", "$" );
	newstr_findreplace( s, "{", "" );
	newstr_findreplace( s, "}", "" );
	while ( newstr_findreplace( s, "  ", " " ) ) {}

	/* 'textcomp' annotations that we don't want to substitute on output*/
	newstr_findreplace( s, "\\textdollar", "$" );
	newstr_findreplace( s, "\\textunderscore", "_" );
}
Пример #17
0
static void
bibtex_cleantoken( newstr *s )
{
	/* 'textcomp' annotations */
	newstr_findreplace( s, "\\textit", "" );
	newstr_findreplace( s, "\\textbf", "" );
	newstr_findreplace( s, "\\textsl", "" );
	newstr_findreplace( s, "\\textsc", "" );
	newstr_findreplace( s, "\\textsf", "" );
	newstr_findreplace( s, "\\texttt", "" );
	newstr_findreplace( s, "\\textsubscript", "" );
	newstr_findreplace( s, "\\textsuperscript", "" );
	newstr_findreplace( s, "\\emph", "" );
	newstr_findreplace( s, "\\url", "" );
	newstr_findreplace( s, "\\mbox", "" );

	/* Other text annotations */
	newstr_findreplace( s, "\\it ", "" );
	newstr_findreplace( s, "\\em ", "" );

	newstr_findreplace( s, "\\%", "%" );
	newstr_findreplace( s, "\\$", "$" );
	while ( newstr_findreplace( s, "  ", " " ) ) {}

	/* 'textcomp' annotations that we don't want to substitute on output*/
	newstr_findreplace( s, "\\textdollar", "$" );
	newstr_findreplace( s, "\\textunderscore", "_" );

	bibtex_process_bracket( s );
	bibtex_process_tilde( s );

}