static int medin_assembleref( xml *node, fields *info ) { int status = BIBL_OK; if ( node->down ) { if ( xml_tagexact( node, "PubmedArticle" ) ) status = medin_pubmedarticle( node->down, info ); else if ( xml_tagexact( node, "MedlineCitation" ) ) status = medin_medlinecitation( node->down, info ); else status = medin_assembleref( node->down, info ); } if ( status!=BIBL_OK ) return status; if ( node->next ) { status = medin_assembleref( node->next, info ); if ( status!=BIBL_OK ) return status; } /* assume everything is a journal article */ if ( fields_num( info ) ) { status = fields_add( info, "RESOURCE", "text", 0 ); if ( status!=FIELDS_OK ) return BIBL_ERR_MEMERR; status = fields_add( info, "ISSUANCE", "continuing", 1 ); if ( status!=FIELDS_OK ) return BIBL_ERR_MEMERR; status = fields_add( info, "GENRE", "periodical", 1 ); if ( status!=FIELDS_OK ) return BIBL_ERR_MEMERR; status = fields_add( info, "GENRE", "academic journal", 1 ); if ( status!=FIELDS_OK ) return BIBL_ERR_MEMERR; status = BIBL_OK; } return status; }
/* <AuthorList CompleteYN="Y"> * <Author> * <LastName>Barondeau</LastName> * <ForeName>David P</ForeName> * ( or <FirstName>David P</FirstName> ) * <Initials>DP</Initials> * </Author> * <Author> * <CollectiveName>Organization</CollectiveName> * </Author> * </AuthorList> */ static int medin_author( xml *node, newstr *name ) { char *p; if ( xml_tagexact( node, "LastName" ) ) { if ( name->len ) { newstr_prepend( name, "|" ); newstr_prepend( name, xml_data( node ) ); } else newstr_strcat( name, xml_data( node ) ); } else if ( xml_tagexact( node, "ForeName" ) || xml_tagexact( node, "FirstName" ) ) { p = xml_data( node ); while ( p && *p ) { if ( name->len ) newstr_addchar( name, '|' ); while ( *p && *p==' ' ) p++; while ( *p && *p!=' ' ) newstr_addchar( name, *p++ ); } } else if ( xml_tagexact( node, "Initials" ) && !strchr( name->data, '|' )) { p = xml_data( node ); while ( p && *p ) { if ( name->len ) newstr_addchar( name, '|' ); if ( !is_ws(*p) ) newstr_addchar( name, *p++ ); } } if ( node->next ) medin_author( node->next, name ); return BIBL_OK; }
static int medin_pubmedarticle( xml *node, fields *info ) { int status = BIBL_OK; if ( node->down ) { if ( xml_tagexact( node, "MedlineCitation" ) ) status = medin_medlinecitation( node->down, info ); else if ( xml_tagexact( node, "PubmedData" ) ) status = medin_pubmeddata( node->down, info ); if ( status!=BIBL_OK ) return status; } if ( node->next ) status = medin_pubmedarticle( node->next, info ); return status; }
static int wordin_people( xml *node, fields *info, char *type ) { int ret = BIBL_OK; if ( xml_tagexact( node, "b:Author" ) && node->down ) { ret = wordin_people( node->down, info, type ); } else if ( xml_tagexact( node, "b:NameList" ) && node->down ) { ret = wordin_people( node->down, info, type ); } else if ( xml_tagexact( node, "b:Person" ) ) { if ( node->down ) ret = wordin_person( node->down, info, type ); if ( ret!=BIBL_OK ) return ret; if ( node->next ) ret = wordin_people( node->next, info, type ); } return ret; }
static int medin_authorlist( xml *node, fields *info ) { int fstatus, status; newstr name; char *tag; newstr_init( &name ); node = node->down; while ( node ) { if ( xml_tagexact( node, "Author" ) && node->down ) { status = medin_author( node->down, &name ); tag = "AUTHOR"; if ( !name.len ) { status = medin_corpauthor( node->down, &name ); tag = "AUTHOR:CORP"; } if ( newstr_memerr( &name ) || status!=BIBL_OK ) return BIBL_ERR_MEMERR; if ( name.len ) { fstatus = fields_add(info,tag,name.data,0); if ( fstatus!=FIELDS_OK ) return BIBL_ERR_MEMERR; } newstr_empty( &name ); } node = node->next; } newstr_free( &name ); return BIBL_OK; }
static int medin_doconvert( xml *node, fields *info, xml_convert *c, int nc, int *found ) { int i, fstatus; char *d; *found = 0; if ( !xml_hasdata( node ) ) return BIBL_OK; d = xml_data( node ); for ( i=0; i<nc && *found==0; ++i ) { if ( c[i].a==NULL ) { if ( xml_tagexact( node, c[i].in ) ) { *found = 1; fstatus = fields_add( info, c[i].out, d, c[i].level ); if ( fstatus!=FIELDS_OK ) return BIBL_ERR_MEMERR; } } else { if ( xml_tag_attrib( node, c[i].in, c[i].a, c[i].aval)){ *found = 1; fstatus = fields_add( info, c[i].out, d, c[i].level ); if ( fstatus!=FIELDS_OK ) return BIBL_ERR_MEMERR; } } } return BIBL_OK; }
static int xml_getencodingr( xml *node ) { newstr *s; int n = CHARSET_UNKNOWN, m; if ( xml_tagexact( node, "xml" ) ) { s = xml_getattrib( node, "encoding" ); if ( s && s->data ) { if ( !strcasecmp( s->data, "UTF-8" ) ) n = CHARSET_UNICODE; else n = get_charset( s->data ); if ( n==CHARSET_UNKNOWN ) { fprintf( stderr, "Warning: did not recognize " "encoding '%s'\n", s->data ); } } } if ( node->down ) { m = xml_getencodingr( node->down ); if ( m!=CHARSET_UNKNOWN ) n = m; } if ( node->next ) { m = xml_getencodingr( node->next ); if ( m!=CHARSET_UNKNOWN ) n = m; } return n; }
static int medin_corpauthor( xml *node, newstr *name ) { if ( xml_tagexact( node, "CollectiveName" ) ) { newstr_strcpy( name, xml_data( node ) ); } else if ( node->next ) medin_corpauthor( node->next, name ); return BIBL_OK; }
static int medin_meshheadinglist( xml *node, fields *info ) { int status = BIBL_OK; if ( xml_tagexact( node, "MeshHeading" ) && node->down ) { status = medin_meshheading( node->down, info ); if ( status!=BIBL_OK ) return status; } if ( node->next ) status = medin_meshheadinglist( node->next, info ); return status; }
static int wordin_assembleref( xml *node, fields *info ) { int ret = BIBL_OK; if ( xml_tagexact( node, "b:Source" ) ) { if ( node->down ) ret = wordin_reference( node->down, info ); } else if ( node->tag->len==0 && node->down ) { ret = wordin_assembleref( node->down, info ); } return ret; }
/* wordin_person_last() * * From an xml list, extract the value from the first entry * of <b:Last>xxxx</b:Last> and copy into name * * Additional <b:Last>yyyyy</b:Last> will be ignored. * * Returns BIBL_ERR_MEMERR on memory error, BIBL_OK otherwise. */ static int wordin_person_last( xml *node, newstr *name ) { while ( node && !xml_tagexact( node, "b:Last" ) ) node = node->next; if ( node && node->value->len ) { newstr_strcpy( name, node->value->data ); if ( newstr_memerr( name ) ) return BIBL_ERR_MEMERR; } return BIBL_OK; }
static int medin_medlinecitation( xml *node, fields *info ) { int fstatus, status = BIBL_OK; if ( xml_tagexact( node, "PMID" ) && node->value->data ) { fstatus = fields_add( info, "PMID", node->value->data, 0 ); if ( fstatus!=FIELDS_OK ) return BIBL_ERR_MEMERR; } if ( node->down ) { if ( xml_tagexact( node, "Article" ) ) { status = medin_article( node->down, info ); } else if ( xml_tagexact( node, "MedlineJournalInfo" ) ) { status = medin_journal2( node->down, info ); } else if ( xml_tagexact( node, "MeshHeadingList" ) ) status = medin_meshheadinglist( node->down, info ); if ( status!=BIBL_OK ) return status; } if ( node->next ) status = medin_medlinecitation( node->next, info ); return status; }
/* wordin_person_first() * * From an xml list, extract the value of any * <b:First>xxxx</b:First> and append "|xxxx" to name. * * Returns BIBL_ERR_MEMERR on memory error, BIBL_OK otherwise */ static int wordin_person_first( xml *node, newstr *name ) { for ( ; node; node=node->next ) { if ( !xml_tagexact( node, "b:First" ) ) continue; if ( node->value->len ) { if ( name->len ) newstr_addchar( name, '|' ); newstr_strcat( name, node->value->data ); if ( newstr_memerr( name ) ) return BIBL_ERR_MEMERR; } } return BIBL_OK; }
/* <Journal> * <ISSN>0027-8424</ISSN> * <JournalIssue PrintYN="Y"> * <Volume>100</Volume> * <Issue>21</Issue> * <PubDate> * <Year>2003</Year> * <Month>Oct</Month> * <Day>14</Day> * </PubDate> * </Journal Issue> * </Journal> * * or.... * * <Journal> * <ISSN IssnType="Print">0735-0414</ISSN> * <JournalIssue CitedMedium="Print"> * <Volume>38</Volume> * <Issue>1</Issue> * <PubDate> * <MedlineDate>2003 Jan-Feb</MedlineDate> * </PubDate> * </JournalIssue> * <Title>Alcohol and alcoholism (Oxford, Oxfordshire) </Title> * <ISOAbbreviation>Alcohol Alcohol.</ISOAbbreviation> * </Journal> */ static int medin_journal1( xml *node, fields *info ) { xml_convert c[] = { { "Title", NULL, NULL, "TITLE", 1 }, { "ISOAbbreviation", NULL, NULL, "SHORTTITLE", 1 }, { "ISSN", NULL, NULL, "ISSN", 1 }, { "Volume", NULL, NULL, "VOLUME", 1 }, { "Issue", NULL, NULL, "ISSUE", 1 }, { "Year", NULL, NULL, "PARTYEAR", 1 }, { "Month", NULL, NULL, "PARTMONTH", 1 }, { "Day", NULL, NULL, "PARTDAY", 1 }, }; int nc = sizeof( c ) / sizeof( c[0] ), status, found; if ( xml_hasdata( node ) ) { status = medin_doconvert( node, info, c, nc, &found ); if ( status!=BIBL_OK ) return status; if ( !found ) { if ( xml_tagexact( node, "MedlineDate" ) ) { status = medin_medlinedate( info, xml_data( node ), 1 ); if ( status!=BIBL_OK ) return status; } if ( xml_tagexact( node, "Language" ) ) { status = medin_language( node, info, 1 ); if ( status!=BIBL_OK ) return status; } } } if ( node->down ) { status = medin_journal1( node->down, info ); if ( status!=BIBL_OK ) return status; } if ( node->next ) { status = medin_journal1( node->next, info ); if ( status!=BIBL_OK ) return status; } return BIBL_OK; }
/* <Pagination> * <MedlinePgn>12111-6</MedlinePgn> * </Pagination> */ static int medin_pagination( xml *node, fields *info ) { int i, fstatus, status; newstr sp, ep; char *p, *pp; if ( xml_tagexact( node, "MedlinePgn" ) && node->value ) { newstrs_init( &sp, &ep, NULL ); p = newstr_cpytodelim( &sp, xml_data( node ), "-", 1 ); if ( newstr_memerr( &sp ) ) return BIBL_ERR_MEMERR; if ( sp.len ) { fstatus = fields_add( info, "PAGESTART", sp.data, 1 ); if ( fstatus!=FIELDS_OK ) return BIBL_ERR_MEMERR; } p = newstr_cpytodelim( &ep, p, "", 0 ); if ( newstr_memerr( &ep ) ) return BIBL_ERR_MEMERR; if ( ep.len ) { if ( sp.len > ep.len ) { for ( i=sp.len-ep.len; i<sp.len; ++i ) sp.data[i] = ep.data[i-sp.len+ep.len]; pp = sp.data; } else pp = ep.data; fstatus = fields_add( info, "PAGEEND", pp, 1 ); if ( fstatus!=FIELDS_OK ) return BIBL_ERR_MEMERR; } newstrs_free( &sp, &ep, NULL ); } if ( node->down ) { status = medin_pagination( node->down, info ); if ( status!=BIBL_OK ) return status; } if ( node->next ) { status = medin_pagination( node->next, info ); if ( status!=BIBL_OK ) return status; } return BIBL_OK; }
static int medin_article( xml *node, fields *info ) { int fstatus, status = BIBL_OK; if ( xml_tagexact( node, "Journal" ) ) status = medin_journal1( node, info ); else if ( xml_tagexact( node, "ArticleTitle" ) ) status = medin_articletitle( node, info ); else if ( xml_tagexact( node, "Pagination" ) && node->down ) status = medin_pagination( node->down, info ); else if ( xml_tagexact( node, "Abstract" ) && node->down ) status = medin_abstract( node->down, info ); else if ( xml_tagexact( node, "AuthorList" ) ) status = medin_authorlist( node, info ); else if ( xml_tagexact( node, "Language" ) ) status = medin_language( node, info, 0 ); else if ( xml_tagexact( node, "Affiliation" ) ) { fstatus = fields_add( info, "ADDRESS", xml_data( node ), 0 ); if ( fstatus!=FIELDS_OK ) status = BIBL_ERR_MEMERR; } if ( status!=BIBL_OK ) return status; if ( node->next ) status = medin_article( node->next, info ); return BIBL_OK; }
int xml_tag_attrib( xml *node, char *s, char *attrib, char *value ) { if ( !xml_tagexact( node, s ) ) return 0; return xml_hasattrib( node, attrib, value ); }
static int wordin_reference( xml *node, fields *info ) { int status, ret = BIBL_OK; if ( xml_hasdata( node ) ) { if ( xml_tagexact( node, "b:Tag" ) ) { status = fields_add( info, "REFNUM", xml_data( node ), 0 ); if ( status!=FIELDS_OK ) ret = BIBL_ERR_MEMERR; } else if ( xml_tagexact( node, "b:SourceType" ) ) { } else if ( xml_tagexact( node, "b:City" ) ) { status = fields_add( info, "ADDRESS", xml_data( node ), 0 ); if ( status!=FIELDS_OK ) ret = BIBL_ERR_MEMERR; } else if ( xml_tagexact( node, "b:Publisher" ) ) { status = fields_add( info, "PUBLISHER", xml_data( node ), 0 ); if ( status!=FIELDS_OK ) ret = BIBL_ERR_MEMERR; } else if ( xml_tagexact( node, "b:Title" ) ) { status = fields_add( info, "TITLE", xml_data( node ), 0 ); if ( status!=FIELDS_OK ) ret = BIBL_ERR_MEMERR; } else if ( xml_tagexact( node, "b:JournalName" ) ) { status = fields_add( info, "TITLE", xml_data( node ), 1 ); if ( status!=FIELDS_OK ) ret = BIBL_ERR_MEMERR; } else if ( xml_tagexact( node, "b:Volume" ) ) { status = fields_add( info, "VOLUME", xml_data( node ), 1 ); if ( status!=FIELDS_OK ) ret = BIBL_ERR_MEMERR; } else if ( xml_tagexact( node, "b:Comments" ) ) { status = fields_add( info, "NOTES", xml_data( node ), 0 ); if ( status!=FIELDS_OK ) ret = BIBL_ERR_MEMERR; } else if ( xml_tagexact( node, "b:Pages" ) ) { ret = wordin_pages( node, info ); } else if ( xml_tagexact( node, "b:Author" ) && node->down ) { ret = wordin_people( node->down, info, "AUTHOR" ); } else if ( xml_tagexact( node, "b:Editor" ) && node->down ) { ret = wordin_people( node->down, info, "EDITOR" ); } } if ( ret==BIBL_OK && node->next ) wordin_reference( node->next, info ); return ret; }
int xml_tagwithdata( xml *node, char *tag ) { if ( !xml_hasdata( node ) ) return 0; return xml_tagexact( node, tag ); }