/* Split keywords="" with semicolons. * Commas are also frequently used, but will break * entries like: * keywords="Microscopy, Confocal" * Returns BIBL_OK or BIBL_ERR_MEMERR */ static int process_keywords( fields *info, newstr *d, int level ) { int fstatus, status = BIBL_OK; newstr keyword; char *p; if ( !d || d->len==0 ) return BIBL_OK; p = d->data; newstr_init( &keyword ); while ( *p ) { p = newstr_cpytodelim( &keyword, skip_ws( p ), ";", 1 ); newstr_trimendingws( &keyword ); if ( newstr_memerr( &keyword ) ) { status = BIBL_ERR_MEMERR; goto out; } if ( keyword.len ) { fstatus = fields_add( info, "KEYWORD", keyword.data, level ); if ( fstatus!=FIELDS_OK ) { status = BIBL_ERR_MEMERR; goto out; } } } out: newstr_free( &keyword ); return status; }
static char * bibtex_item( char *p, newstr *s ) { int nquotes = 0; int nbrackets = 0; while ( *p ) { if ( !nquotes && !nbrackets ) { if ( *p==',' || *p=='=' || *p=='}' || *p==')' ) goto out; } if ( *p=='\"' && *(p-1)!='\\') { nquotes = ( nquotes==0 ); newstr_addchar( s, *p ); } else if ( *p=='{' ) { nbrackets++; /*if ( s->len!=0 )*/ newstr_addchar( s, *p ); } else if ( *p=='}' ) { nbrackets--; /*if ( nbrackets>0 )*/ newstr_addchar( s, *p ); } else { if ( s->len!=0 || ( s->len==0 && !is_ws( *p ) ) ) newstr_addchar( s, *p ); } p++; } out: newstr_trimendingws( s ); return p; }
static void bibtex_split( list *tokens, newstr *s ) { newstr currtok; int nquotes = 0, nbrackets = 0; int i, n = s->len; newstr_init( &currtok ); for ( i=0; i<n; ++i ) { if ( s->data[i]=='\"' ) { if ( nquotes ) nquotes = 0; else nquotes = 1; newstr_addchar( &currtok, '\"' ); } else if ( s->data[i]=='{' ) { nbrackets++; newstr_addchar( &currtok, '{' ); } else if ( s->data[i]=='}' ) { nbrackets--; newstr_addchar( &currtok, '}' ); } else if ( s->data[i]=='#' && !nquotes && !nbrackets ) { if ( currtok.len ) list_add( tokens, currtok.data ); newstr_empty( &currtok ); } else if ( !is_ws( s->data[i] ) || nquotes || nbrackets ) { newstr_addchar( &currtok, s->data[i] ); } } if ( currtok.len ) list_add( tokens, currtok.data ); for ( i=0; i<tokens->n; ++i ) { newstr_trimendingws( &(tokens->str[i]) ); } newstr_free( &currtok ); }
static int test_trimws( newstr *s ) { char str1[] = " ksjadfk lajskfjds askdjflkj "; char str2[] = " "; int failed = 0; newstr_empty( s ); newstr_trimstartingws( s ); if ( string_mismatch( s, 0, "" ) ) failed++; newstr_trimendingws( s ); if ( string_mismatch( s, 0, "" ) ) failed++; newstr_strcpy( s, str2 ); newstr_trimstartingws( s ); if ( string_mismatch( s, 0, "" ) ) failed++; newstr_strcpy( s, str2 ); newstr_trimendingws( s ); if ( string_mismatch( s, 0, "" ) ) failed++; newstr_strcpy( s, str1 ); newstr_trimstartingws( s ); if ( string_mismatch( s, strlen("ksjadfk lajskfjds askdjflkj "), "ksjadfk lajskfjds askdjflkj " ) ) failed++; newstr_trimendingws( s ); if ( string_mismatch( s, strlen("ksjadfk lajskfjds askdjflkj"), "ksjadfk lajskfjds askdjflkj" ) ) failed++; newstr_strcpy( s, str1 ); newstr_trimendingws( s ); if ( string_mismatch( s, strlen(" ksjadfk lajskfjds askdjflkj"), " ksjadfk lajskfjds askdjflkj" ) ) failed++; newstr_trimstartingws( s ); if ( string_mismatch( s, strlen("ksjadfk lajskfjds askdjflkj"), "ksjadfk lajskfjds askdjflkj" ) ) failed++; newstr_empty( s ); newstr_stripws( s ); if ( string_mismatch( s, 0, "" ) ) failed++; newstr_strcpy( s, "0123456789" ); newstr_stripws( s ); if ( string_mismatch( s, 10, "0123456789" ) ) failed++; newstr_strcpy( s, str1 ); newstr_stripws( s ); if ( string_mismatch( s, strlen("ksjadfklajskfjdsaskdjflkj"), "ksjadfklajskfjdsaskdjflkj" ) ) failed++; return failed; }
static char * process_endline2( newstr *tag, newstr *data, char *p ) { while ( *p==' ' || *p=='\t' ) p++; while ( *p && *p!='\r' && *p!='\n' ) newstr_addchar( data, *p++ ); newstr_trimendingws( data ); while ( *p=='\r' || *p=='\n' ) p++; return p; }
static char* process_line( newstr *tag, newstr *data, char *p ) { int i = 0; while ( i<6 && *p ) { if ( i<2 ) newstr_addchar( tag, *p ); p++; i++; } while ( *p==' ' || *p=='\t' ) p++; while ( *p && *p!='\r' && *p!='\n' ) newstr_addchar( data, *p++ ); newstr_trimendingws( data ); while ( *p=='\n' || *p=='\r' ) p++; return p; }
static char* copacin_addtag2( char *p, newstr *tag, newstr *data ) { int i; i =0; while ( i<3 && *p ) { newstr_addchar( tag, *p++ ); i++; } while ( *p==' ' || *p=='\t' ) p++; while ( *p && *p!='\r' && *p!='\n' ) { newstr_addchar( data, *p ); p++; } newstr_trimendingws( data ); while ( *p=='\n' || *p=='\r' ) p++; return p; }
static char * process_isiline( newstr *tag, newstr *data, char *p ) { int i; /* collect tag and skip past it */ i = 0; while ( i<2 && *p && *p!='\r' && *p!='\n') { newstr_addchar( tag, *p++ ); i++; } while ( *p==' ' || *p=='\t' ) p++; while ( *p && *p!='\r' && *p!='\n' ) newstr_addchar( data, *p++ ); newstr_trimendingws( data ); while ( *p=='\r' || *p=='\n' ) p++; return p; }
static int bibtex_split( list *tokens, newstr *s ) { int i, n = s->len, nbrackets = 0, status = BIBL_OK; newstr tok, *t; newstr_init( &tok ); for ( i=0; i<n; ++i ) { if ( s->data[i]=='{' && ( i==0 || s->data[i-1]!='\\' ) ) { nbrackets++; newstr_addchar( &tok, '{' ); } else if ( s->data[i]=='}' && ( i==0 || s->data[i-1]!='\\' ) ) { nbrackets--; newstr_addchar( &tok, '}' ); } else if ( !is_ws( s->data[i] ) || nbrackets ) { newstr_addchar( &tok, s->data[i] ); } else if ( is_ws( s->data[i] ) ) { if ( tok.len ) { t = list_add( tokens, &tok ); if ( !t ) { status = BIBL_ERR_MEMERR; goto out; } } newstr_empty( &tok ); } } if ( tok.len ) { t = list_add( tokens, &tok ); if ( !t ) { status = BIBL_ERR_MEMERR; goto out; } } for ( i=0; i<tokens->n; ++i ) { newstr_trimstartingws( list_get( tokens, i ) ); newstr_trimendingws( list_get( tokens, i ) ); } out: newstr_free( &tok ); return status; }
/* * sentelink = {file://localhost/full/path/to/file.pdf,Sente,PDF} */ static int process_sente( fields *info, newstr *d, int level ) { int fstatus, status = BIBL_OK; newstr link; newstr_init( &link ); newstr_cpytodelim( &link, skip_ws( d->data ), ",", 0 ); newstr_trimendingws( &link ); if ( newstr_memerr( &link ) ) status = BIBL_ERR_MEMERR; if ( status==BIBL_OK && link.len ) { fstatus = fields_add( info, "FILEATTACH", link.data, level ); if ( fstatus!=FIELDS_OK ) status = BIBL_ERR_MEMERR; } newstr_free( &link ); return status; }
/* * file={Description:/full/path/to/file.pdf:PDF} */ static int process_file( fields *info, newstr *d, int level ) { int fstatus, status = BIBL_OK; char *p = d->data; int i, n, n1, n2; newstr link; n = count_colons( p ); if ( n > 1 ) { /* A DOS file can contain a colon ":C:/....pdf:PDF" */ /* Extract after 1st and up to last colons */ n1 = first_colon( p ) + 1; n2 = last_colon( p ); newstr_init( &link ); for ( i=n1; i<n2; ++i ) { newstr_addchar( &link, p[i] ); } newstr_trimstartingws( &link ); newstr_trimendingws( &link ); if ( newstr_memerr( &link ) ) { status = BIBL_ERR_MEMERR; goto out; } if ( link.len ) { fstatus = fields_add( info, "FILEATTACH", link.data, level ); if ( fstatus!=FIELDS_OK ) status = BIBL_ERR_MEMERR; } out: newstr_free( &link ); } else { /* This field isn't formatted properly, so just copy directly */ fstatus = fields_add( info, "FILEATTACH", p, level ); if ( fstatus!=FIELDS_OK ) status = BIBL_ERR_MEMERR; } return status; }