void bdb_cache_return_entry_rw( struct bdb_info *bdb, Entry *e, int rw, DB_LOCK *lock ) { EntryInfo *ei; int free = 0; ei = e->e_private; if ( ei && ( ei->bei_state & CACHE_ENTRY_NOT_CACHED ) && ( bdb_cache_entryinfo_trylock( ei ) == 0 )) { if ( ei->bei_state & CACHE_ENTRY_NOT_CACHED ) { /* Releasing the entry can only be done when * we know that nobody else is using it, i.e we * should have an entry_db writelock. But the * flag is only set by the thread that loads the * entry, and only if no other threads has found * it while it was working. All other threads * clear the flag, which mean that we should be * the only thread using the entry if the flag * is set here. */ ei->bei_e = NULL; ei->bei_state ^= CACHE_ENTRY_NOT_CACHED; free = 1; } bdb_cache_entryinfo_unlock( ei ); } bdb_cache_entry_db_unlock( bdb, lock ); if ( free ) { e->e_private = NULL; bdb_entry_return( e ); } }
int bdb_cache_find_id( Operation *op, DB_TXN *tid, ID id, EntryInfo **eip, int flag, DB_LOCK *lock ) { struct bdb_info *bdb = (struct bdb_info *) op->o_bd->be_private; Entry *ep = NULL; int rc = 0, load = 0; EntryInfo ei = { 0 }; ei.bei_id = id; #ifdef SLAP_ZONE_ALLOC slap_zh_rlock(bdb->bi_cache.c_zctx); #endif /* If we weren't given any info, see if we have it already cached */ if ( !*eip ) { again: ldap_pvt_thread_rdwr_rlock( &bdb->bi_cache.c_rwlock ); *eip = (EntryInfo *) avl_find( bdb->bi_cache.c_idtree, (caddr_t) &ei, bdb_id_cmp ); if ( *eip ) { /* If the lock attempt fails, the info is in use */ if ( bdb_cache_entryinfo_trylock( *eip )) { int del = (*eip)->bei_state & CACHE_ENTRY_DELETED; ldap_pvt_thread_rdwr_runlock( &bdb->bi_cache.c_rwlock ); /* If this node is being deleted, treat * as if the delete has already finished */ if ( del ) { return DB_NOTFOUND; } /* otherwise, wait for the info to free up */ ldap_pvt_thread_yield(); goto again; } /* If this info isn't hooked up to its parent yet, * unlock and wait for it to be fully initialized */ if ( (*eip)->bei_state & CACHE_ENTRY_NOT_LINKED ) { bdb_cache_entryinfo_unlock( *eip ); ldap_pvt_thread_rdwr_runlock( &bdb->bi_cache.c_rwlock ); ldap_pvt_thread_yield(); goto again; } flag |= ID_LOCKED; } ldap_pvt_thread_rdwr_runlock( &bdb->bi_cache.c_rwlock ); } /* See if the ID exists in the database; add it to the cache if so */ if ( !*eip ) { #ifndef BDB_HIER rc = bdb_id2entry( op->o_bd, tid, id, &ep ); if ( rc == 0 ) { rc = bdb_cache_find_ndn( op, tid, &ep->e_nname, eip ); if ( *eip ) flag |= ID_LOCKED; if ( rc ) { ep->e_private = NULL; #ifdef SLAP_ZONE_ALLOC bdb_entry_return( bdb, ep, (*eip)->bei_zseq ); #else bdb_entry_return( ep ); #endif ep = NULL; } } #else rc = hdb_cache_find_parent(op, tid, id, eip ); if ( rc == 0 ) flag |= ID_LOCKED; #endif } /* Ok, we found the info, do we have the entry? */ if ( rc == 0 ) { if ( !( flag & ID_LOCKED )) { bdb_cache_entryinfo_lock( *eip ); flag |= ID_LOCKED; } if ( (*eip)->bei_state & CACHE_ENTRY_DELETED ) { rc = DB_NOTFOUND; } else { (*eip)->bei_finders++; (*eip)->bei_state |= CACHE_ENTRY_REFERENCED; if ( flag & ID_NOENTRY ) { bdb_cache_entryinfo_unlock( *eip ); return 0; } /* Make sure only one thread tries to load the entry */ load1: #ifdef SLAP_ZONE_ALLOC if ((*eip)->bei_e && !slap_zn_validate( bdb->bi_cache.c_zctx, (*eip)->bei_e, (*eip)->bei_zseq)) { (*eip)->bei_e = NULL; (*eip)->bei_zseq = 0; } #endif if ( !(*eip)->bei_e && !((*eip)->bei_state & CACHE_ENTRY_LOADING)) { load = 1; (*eip)->bei_state |= CACHE_ENTRY_LOADING; flag |= ID_CHKPURGE; } if ( !load ) { /* Clear the uncached state if we are not * loading it, i.e it is already cached or * another thread is currently loading it. */ if ( (*eip)->bei_state & CACHE_ENTRY_NOT_CACHED ) { (*eip)->bei_state ^= CACHE_ENTRY_NOT_CACHED; flag |= ID_CHKPURGE; } } if ( flag & ID_LOCKED ) { bdb_cache_entryinfo_unlock( *eip ); flag ^= ID_LOCKED; } rc = bdb_cache_entry_db_lock( bdb, tid, *eip, load, 0, lock ); if ( (*eip)->bei_state & CACHE_ENTRY_DELETED ) { rc = DB_NOTFOUND; bdb_cache_entry_db_unlock( bdb, lock ); bdb_cache_entryinfo_lock( *eip ); (*eip)->bei_finders--; bdb_cache_entryinfo_unlock( *eip ); } else if ( rc == 0 ) { if ( load ) { if ( !ep) { rc = bdb_id2entry( op->o_bd, tid, id, &ep ); } if ( rc == 0 ) { ep->e_private = *eip; #ifdef BDB_HIER while ( (*eip)->bei_state & CACHE_ENTRY_NOT_LINKED ) ldap_pvt_thread_yield(); bdb_fix_dn( ep, 0 ); #endif bdb_cache_entryinfo_lock( *eip ); (*eip)->bei_e = ep; #ifdef SLAP_ZONE_ALLOC (*eip)->bei_zseq = *((ber_len_t *)ep - 2); #endif ep = NULL; if ( flag & ID_NOCACHE ) { /* Set the cached state only if no other thread * found the info while we were loading the entry. */ if ( (*eip)->bei_finders == 1 ) { (*eip)->bei_state |= CACHE_ENTRY_NOT_CACHED; flag ^= ID_CHKPURGE; } } bdb_cache_entryinfo_unlock( *eip ); bdb_cache_lru_link( bdb, *eip ); } if ( rc == 0 ) { /* If we succeeded, downgrade back to a readlock. */ rc = bdb_cache_entry_db_relock( bdb, tid, *eip, 0, 0, lock ); } else { /* Otherwise, release the lock. */ bdb_cache_entry_db_unlock( bdb, lock ); } } else if ( !(*eip)->bei_e ) { /* Some other thread is trying to load the entry, * wait for it to finish. */ bdb_cache_entry_db_unlock( bdb, lock ); bdb_cache_entryinfo_lock( *eip ); flag |= ID_LOCKED; goto load1; #ifdef BDB_HIER } else { /* Check for subtree renames */ rc = bdb_fix_dn( (*eip)->bei_e, 1 ); if ( rc ) { bdb_cache_entry_db_relock( bdb, tid, *eip, 1, 0, lock ); /* check again in case other modifier did it already */ if ( bdb_fix_dn( (*eip)->bei_e, 1 ) ) rc = bdb_fix_dn( (*eip)->bei_e, 2 ); bdb_cache_entry_db_relock( bdb, tid, *eip, 0, 0, lock ); } #endif } bdb_cache_entryinfo_lock( *eip ); (*eip)->bei_finders--; if ( load ) (*eip)->bei_state ^= CACHE_ENTRY_LOADING; bdb_cache_entryinfo_unlock( *eip ); } } } if ( flag & ID_LOCKED ) { bdb_cache_entryinfo_unlock( *eip ); } if ( ep ) { ep->e_private = NULL; #ifdef SLAP_ZONE_ALLOC bdb_entry_return( bdb, ep, (*eip)->bei_zseq ); #else bdb_entry_return( ep ); #endif } if ( rc == 0 ) { int purge = 0; if (( flag & ID_CHKPURGE ) || bdb->bi_cache.c_eimax ) { ldap_pvt_thread_mutex_lock( &bdb->bi_cache.c_count_mutex ); if ( flag & ID_CHKPURGE ) { bdb->bi_cache.c_cursize++; if ( !bdb->bi_cache.c_purging && bdb->bi_cache.c_cursize > bdb->bi_cache.c_maxsize ) { purge = 1; bdb->bi_cache.c_purging = 1; } } else if ( !bdb->bi_cache.c_purging && bdb->bi_cache.c_eimax && bdb->bi_cache.c_leaves > bdb->bi_cache.c_eimax ) { purge = 1; bdb->bi_cache.c_purging = 1; } ldap_pvt_thread_mutex_unlock( &bdb->bi_cache.c_count_mutex ); } if ( purge ) bdb_cache_lru_purge( bdb ); } #ifdef SLAP_ZONE_ALLOC if (rc == 0 && (*eip)->bei_e) { slap_zn_rlock(bdb->bi_cache.c_zctx, (*eip)->bei_e); } slap_zh_runlock(bdb->bi_cache.c_zctx); #endif return rc; }
/* This is best-effort only. If all entries in the cache are * busy, they will all be kept. This is unlikely to happen * unless the cache is very much smaller than the working set. */ static void bdb_cache_lru_purge( struct bdb_info *bdb ) { DB_LOCK lock, *lockp; EntryInfo *elru, *elnext = NULL; int islocked; ID eicount, ecount; ID count, efree, eifree = 0; #ifdef LDAP_DEBUG int iter; #endif /* Wait for the mutex; we're the only one trying to purge. */ ldap_pvt_thread_mutex_lock( &bdb->bi_cache.c_lru_mutex ); if ( bdb->bi_cache.c_cursize > bdb->bi_cache.c_maxsize ) { efree = bdb->bi_cache.c_cursize - bdb->bi_cache.c_maxsize; efree += bdb->bi_cache.c_minfree; } else { efree = 0; } /* maximum number of EntryInfo leaves to cache. In slapcat * we always free all leaf nodes. */ if ( slapMode & SLAP_TOOL_READONLY ) { eifree = bdb->bi_cache.c_leaves; } else if ( bdb->bi_cache.c_eimax && bdb->bi_cache.c_leaves > bdb->bi_cache.c_eimax ) { eifree = bdb->bi_cache.c_minfree * 10; if ( eifree >= bdb->bi_cache.c_leaves ) eifree /= 2; } if ( !efree && !eifree ) { ldap_pvt_thread_mutex_unlock( &bdb->bi_cache.c_lru_mutex ); bdb->bi_cache.c_purging = 0; return; } if ( bdb->bi_cache.c_txn ) { lockp = &lock; } else { lockp = NULL; } count = 0; eicount = 0; ecount = 0; #ifdef LDAP_DEBUG iter = 0; #endif /* Look for an unused entry to remove */ for ( elru = bdb->bi_cache.c_lruhead; elru; elru = elnext ) { elnext = elru->bei_lrunext; if ( bdb_cache_entryinfo_trylock( elru )) goto bottom; /* This flag implements the clock replacement behavior */ if ( elru->bei_state & ( CACHE_ENTRY_REFERENCED )) { elru->bei_state &= ~CACHE_ENTRY_REFERENCED; bdb_cache_entryinfo_unlock( elru ); goto bottom; } /* If this node is in the process of linking into the cache, * or this node is being deleted, skip it. */ if (( elru->bei_state & ( CACHE_ENTRY_NOT_LINKED | CACHE_ENTRY_DELETED | CACHE_ENTRY_LOADING | CACHE_ENTRY_ONELEVEL )) || elru->bei_finders > 0 ) { bdb_cache_entryinfo_unlock( elru ); goto bottom; } if ( bdb_cache_entryinfo_trylock( elru->bei_parent )) { bdb_cache_entryinfo_unlock( elru ); goto bottom; } /* entryinfo is locked */ islocked = 1; /* If we can successfully writelock it, then * the object is idle. */ if ( bdb_cache_entry_db_lock( bdb, bdb->bi_cache.c_txn, elru, 1, 1, lockp ) == 0 ) { /* Free entry for this node if it's present */ if ( elru->bei_e ) { ecount++; /* the cache may have gone over the limit while we * weren't looking, so double check. */ if ( !efree && ecount > bdb->bi_cache.c_maxsize ) efree = bdb->bi_cache.c_minfree; if ( count < efree ) { elru->bei_e->e_private = NULL; #ifdef SLAP_ZONE_ALLOC bdb_entry_return( bdb, elru->bei_e, elru->bei_zseq ); #else bdb_entry_return( elru->bei_e ); #endif elru->bei_e = NULL; count++; } else { /* Keep this node cached, skip to next */ bdb_cache_entry_db_unlock( bdb, lockp ); goto next; } } bdb_cache_entry_db_unlock( bdb, lockp ); /* * If it is a leaf node, and we're over the limit, free it. */ if ( elru->bei_kids ) { /* Drop from list, we ignore it... */ LRU_DEL( &bdb->bi_cache, elru ); } else if ( eicount < eifree ) { /* Too many leaf nodes, free this one */ bdb_cache_delete_internal( &bdb->bi_cache, elru, 0 ); bdb_cache_delete_cleanup( &bdb->bi_cache, elru ); islocked = 0; eicount++; } /* Leave on list until we need to free it */ } next: if ( islocked ) { bdb_cache_entryinfo_unlock( elru ); bdb_cache_entryinfo_unlock( elru->bei_parent ); } if ( count >= efree && eicount >= eifree ) break; bottom: if ( elnext == bdb->bi_cache.c_lruhead ) break; #ifdef LDAP_DEBUG iter++; #endif } if ( count || ecount > bdb->bi_cache.c_cursize ) { ldap_pvt_thread_mutex_lock( &bdb->bi_cache.c_count_mutex ); /* HACK: we seem to be losing track, fix up now */ if ( ecount > bdb->bi_cache.c_cursize ) bdb->bi_cache.c_cursize = ecount; bdb->bi_cache.c_cursize -= count; ldap_pvt_thread_mutex_unlock( &bdb->bi_cache.c_count_mutex ); } bdb->bi_cache.c_lruhead = elnext; ldap_pvt_thread_mutex_unlock( &bdb->bi_cache.c_lru_mutex ); bdb->bi_cache.c_purging = 0; }
/* Walk up the tree from a child node, looking for an ID that's already * been linked into the cache. */ int hdb_cache_find_parent( Operation *op, DB_TXN *txn, ID id, EntryInfo **res ) { struct bdb_info *bdb = (struct bdb_info *) op->o_bd->be_private; EntryInfo ei, eip, *ei2 = NULL, *ein = NULL, *eir = NULL; int rc, add; ei.bei_id = id; ei.bei_kids = NULL; ei.bei_ckids = 0; for (;;) { rc = hdb_dn2id_parent( op, txn, &ei, &eip.bei_id ); if ( rc ) break; /* Save the previous node, if any */ ei2 = ein; /* Create a new node for the current ID */ ein = bdb_cache_entryinfo_new( &bdb->bi_cache ); ein->bei_id = ei.bei_id; ein->bei_kids = ei.bei_kids; ein->bei_nrdn = ei.bei_nrdn; ein->bei_rdn = ei.bei_rdn; ein->bei_ckids = ei.bei_ckids; #ifdef SLAP_ZONE_ALLOC ein->bei_bdb = bdb; #endif ei.bei_ckids = 0; add = 1; /* This node is not fully connected yet */ ein->bei_state |= CACHE_ENTRY_NOT_LINKED; /* If this is the first time, save this node * to be returned later. */ if ( eir == NULL ) { eir = ein; ein->bei_finders++; } again: /* Insert this node into the ID tree */ ldap_pvt_thread_rdwr_wlock( &bdb->bi_cache.c_rwlock ); if ( avl_insert( &bdb->bi_cache.c_idtree, (caddr_t)ein, bdb_id_cmp, bdb_id_dup_err ) ) { EntryInfo *eix = ein->bei_lrunext; if ( bdb_cache_entryinfo_trylock( eix )) { ldap_pvt_thread_rdwr_wunlock( &bdb->bi_cache.c_rwlock ); ldap_pvt_thread_yield(); goto again; } ldap_pvt_thread_rdwr_wunlock( &bdb->bi_cache.c_rwlock ); /* Someone else created this node just before us. * Free our new copy and use the existing one. */ bdb_cache_entryinfo_free( &bdb->bi_cache, ein ); /* if it was the node we were looking for, just return it */ if ( eir == ein ) { *res = eix; rc = 0; break; } ein = ei2; ei2 = eix; add = 0; /* otherwise, link up what we have and return */ goto gotparent; } /* If there was a previous node, link it to this one */ if ( ei2 ) ei2->bei_parent = ein; /* Look for this node's parent */ par2: if ( eip.bei_id ) { ei2 = (EntryInfo *) avl_find( bdb->bi_cache.c_idtree, (caddr_t) &eip, bdb_id_cmp ); } else { ei2 = &bdb->bi_cache.c_dntree; } if ( ei2 && bdb_cache_entryinfo_trylock( ei2 )) { ldap_pvt_thread_rdwr_wunlock( &bdb->bi_cache.c_rwlock ); ldap_pvt_thread_yield(); ldap_pvt_thread_rdwr_wlock( &bdb->bi_cache.c_rwlock ); goto par2; } if ( add ) bdb->bi_cache.c_eiused++; if ( ei2 && ( ei2->bei_kids || !ei2->bei_id )) bdb->bi_cache.c_leaves++; ldap_pvt_thread_rdwr_wunlock( &bdb->bi_cache.c_rwlock ); gotparent: /* Got the parent, link in and we're done. */ if ( ei2 ) { bdb_cache_entryinfo_lock( eir ); ein->bei_parent = ei2; if ( avl_insert( &ei2->bei_kids, (caddr_t)ein, bdb_rdn_cmp, avl_dup_error) == 0 ) ei2->bei_ckids++; /* Reset all the state info */ for (ein = eir; ein != ei2; ein=ein->bei_parent) ein->bei_state &= ~CACHE_ENTRY_NOT_LINKED; bdb_cache_entryinfo_unlock( ei2 ); eir->bei_finders--; *res = eir; break; } ei.bei_kids = NULL; ei.bei_id = eip.bei_id; ei.bei_ckids = 1; avl_insert( &ei.bei_kids, (caddr_t)ein, bdb_rdn_cmp, avl_dup_error ); } return rc; }