TA_RetCode TA_DictAddPair_S( TA_Dict *dict, TA_String *key, void *value ) { TA_PrivDictInfo *theDict; dnode_t *node; TA_String *dupKey; TA_Libc *libHandle; dict_t *kazlibDict; theDict = (TA_PrivDictInfo *)dict; if( (theDict == NULL) || (key == NULL) || (value == NULL) ) return TA_BAD_PARAM; kazlibDict = &theDict->d; libHandle = theDict->libHandle; /* Verify if an entry exist already with the same key. */ node = dict_lookup( libHandle, kazlibDict, TA_StringToChar(key) ); if( node ) { /* An entry already exist with the same key... * Re-use the existing node. Just replace the * 'value' part. * De-allocate the older 'value'. */ if( theDict->freeValueFunc ) (*theDict->freeValueFunc)( libHandle, dnode_get( node ) ); dnode_put( node, value ); } else { /* Alloc/insert a new key-value pair in the dictionary. */ dupKey = TA_StringDup( TA_GetGlobalStringCache( libHandle ), key ); if( !dupKey ) return TA_ALLOC_ERR; if( !dict_alloc_insert( libHandle, kazlibDict, TA_StringToChar(dupKey), value ) ) { TA_StringFree( TA_GetGlobalStringCache( libHandle ), dupKey ); return TA_ALLOC_ERR; } } return TA_SUCCESS; }
TA_RetCode TA_DictAddPair_I( TA_Dict *dict, int key, void *value ) { TA_PrivDictInfo *theDict; dnode_t *node; TA_Libc *libHandle; dict_t *kazlibDict; theDict = (TA_PrivDictInfo *)dict; if( (theDict == NULL) || (value == NULL) ) return TA_BAD_PARAM; kazlibDict = &theDict->d; libHandle = theDict->libHandle; /* Verify if an entry exist already with the same key. */ node = dict_lookup( libHandle, kazlibDict, (void *)key ); if( node ) { /* An entry already exist with the same key... * Re-use the existing node. Just replace the * 'value' part. * De-allocate the older 'value'. */ if( theDict->freeValueFunc ) (*theDict->freeValueFunc)( libHandle, dnode_get( node ) ); dnode_put( node, value ); } else { /* Insert the new key-value pair in the dictionary. */ if( !dict_alloc_insert( libHandle, kazlibDict, (void *)key, value ) ) return TA_ALLOC_ERR; } return TA_SUCCESS; }
int RP_mapping::process_string(char *str, int ln) { char *arg1, *arg2; for ( arg1 = arg2 = str; !isspace( *arg2) && *arg2; arg2++ ) ; if ( !*arg2 ) return 0; *arg2++ = 0x0; if ( !*arg2) return 0; for ( ; *arg2 && isspace(*arg2); arg2++ ) ; if ( !*arg2 ) return 0; // next - just trim all spaces at end char *ptr = arg2 + 1; for ( ; !isspace(*ptr) && *ptr; ptr++ ) ; *ptr = 0x0; /* O`k, now arg1 contains address or name and arg2 contains name of struct */ // first check for struct presence tid_t tid = get_struc_id(arg2); if ( BADADDR == tid ) { if ( verbose ) msg("Cannot find struct '%s'\n", arg2); return 0; } // next try to resolve first arg as name ea_t ea = get_name_ea(BADADDR, arg1); if ( ea != BADADDR ) { /* first check for already presenting */ dnode_t *again = is_presented(ea); if ( NULL != again ) { if ( verbose ) msg("Warning, address %X already in dictionary, overwrite\n", ea); dnode_put(again, (void *)tid); } else { /* place to dict new mapping name -> tid_t */ dict_alloc_insert(mapping, (const void *)ea, (void *)tid); } #ifdef RP_DEBUG msg("Mapping %X (%s) -> %s (%X) \n", ea, arg1, arg2, tid); #endif return 1; } /* well may be we shold add yet one underscore at begin of name ? */ { int len = strlen(arg1); char *dup = (char *)qalloc(len + 2); *dup = '_'; strcpy(dup + 1, arg1); ea = get_name_ea(BADADDR, dup); if ( BADADDR != ea ) { dnode_t *again = is_presented(ea); if ( NULL != again ) { if ( verbose ) msg("Warning, address %X already in dictionary, overwrite\n", ea); dnode_put(again, (void *)tid); } else { dict_alloc_insert(mapping, (const void *)ea, (void *)tid); } #ifdef RP_DEBUG msg("Mapping %X (%s) -> %s (%X) \n", ea, dup, arg2, tid); #endif qfree(dup); return 1; } qfree(dup); } /* well - may be we has address ? */ if ( *arg1 == '0' && ( arg1[1] == 'x' || arg1[1] == 'X' ) ) { arg1 += 2; if ( ! *arg1 ) { if ( verbose ) msg("Line %d. Bad address '%s'\n", ln, arg1 - 2); return 0; } } char *notused; ea = strtol(arg1, ¬used, 0x10); if ( !ea ) { if ( verbose ) msg("Line %d. Bad address '%s'\n", ln, arg1); return 0; } // O`k it seems that we really has some address. Lets check it if ( NULL == getseg(ea) ) { if ( verbose ) msg("Line %d. Address %X is not in your file\n", ln, ea); return 0; } dnode_t *again = is_presented(ea); if ( NULL != again ) { if ( verbose ) msg("Warning, address %X already in dictionary, overwrite\n", ea); dnode_put(again, (void *)tid); } else { dict_alloc_insert(mapping, (const void *)ea, (void *)tid); } #ifdef RP_DEBUG msg("Mapping %X -> %s (%X) \n", ea, arg2, tid); #endif return 1; }
int main(int argc, char **argv) { char *merylCount = 0L; char *fastaName = 0L; int arg=1; while (arg < argc) { if (strcmp(argv[arg], "-m") == 0) { merylCount = argv[++arg]; } else if (strcmp(argv[arg], "-f") == 0) { fastaName = argv[++arg]; } else { fprintf(stderr, "unknown option '%s'\n", argv[arg]); } arg++; } if ((merylCount == 0L) || (fastaName == 0L)) { fprintf(stderr, "usage: %s -m <meryl-name-prefix> -f <fasta-file>\n", argv[0]); exit(1); } // Open the count files // merylStreamReader *MSR = new merylStreamReader(merylCount); fprintf(stderr, "Mers are "uint32FMT" bases.\n", MSR->merSize()); fprintf(stderr, "There are "uint64FMT" unique (copy = 1) mers.\n", MSR->numberOfUniqueMers()); fprintf(stderr, "There are "uint64FMT" distinct mers.\n", MSR->numberOfDistinctMers()); fprintf(stderr, "There are "uint64FMT" mers total.\n", MSR->numberOfTotalMers()); // Guess how many mers we can fit into 512MB, then report how many chunks we need to do. uint32 merSize = MSR->merSize(); uint64 memoryLimit = 700 * 1024 * 1024; uint64 perMer = sizeof(kMerLite) + sizeof(dnode_t); uint64 mersPerBatch = memoryLimit / perMer; uint32 numBatches = MSR->numberOfDistinctMers() / mersPerBatch; uint32 batch = 0; dnode_t *nodes = new dnode_t [mersPerBatch]; kMerLite *mers = new kMerLite [mersPerBatch]; if (MSR->numberOfDistinctMers() % mersPerBatch) numBatches++; fprintf(stderr, "perMer: "uint64FMT" bytes ("uint64FMT" for kMerLite, "uint64FMT" for dnode_t.\n", perMer, (uint64)sizeof(kMerLite), (uint64)sizeof(dnode_t)); fprintf(stderr, "We can fit "uint64FMT" mers into "uint64FMT"MB.\n", mersPerBatch, memoryLimit >> 20); fprintf(stderr, "So we need "uint32FMT" batches to verify the count.\n", numBatches); while (MSR->validMer()) { uint64 mersRemain = mersPerBatch; dict_t *merDict = dict_create(mersPerBatch, kMerLiteSort); batch++; // STEP 1: Insert mersPerBatch into the merDict // fprintf(stderr, "STEP 1 BATCH "uint32FMTW(2)": Insert into merDict\n", batch); while (MSR->nextMer() && mersRemain) { mersRemain--; mers[mersRemain] = MSR->theFMer(); // initialize the node with the value, then insert the node // into the tree using the key int32 val = (int32)MSR->theCount(); dnode_init(&nodes[mersRemain], (void *)val); dict_insert(merDict, &nodes[mersRemain], &mers[mersRemain]); } // STEP 2: Stream the original file, decrementing the count // fprintf(stderr, "STEP 2 BATCH "uint32FMTW(2)": Stream fasta\n", batch); seqStream *CS = new seqStream(fastaName, true); merStream *MS = new merStream(new kMerBuilder(merSize), CS); kMerLite mer; dnode_t *nod; while (MS->nextMer()) { mer = MS->theFMer(); nod = dict_lookup(merDict, &mer); if (nod != 0L) { int32 val = (int32)dnode_get(nod); val--; dnode_put(nod, (void *)val); } else { // Unless the whole meryl file fit into our merDict, we cannot warn if // we don't find mers. // if (numBatches == 1) { char str[1024]; fprintf(stderr, "Didn't find node for mer '%s'\n", mer.merToString(merSize, str)); } } } delete MS; delete CS; // STEP 3: Check every node in the tree to make sure that the counts // are exactly zero. // fprintf(stderr, "STEP 3 BATCH "uint32FMTW(2)": Check\n", batch); nod = dict_first(merDict); while (nod) { int32 val = (int32)dnode_get(nod); kMerLite const *nodmer = (kMerLite const *)dnode_getkey(nod); if (val != 0) { char str[1024]; fprintf(stderr, "Got count "int32FMT" for mer '%s'\n", val, nodmer->merToString(merSize, str)); } nod = dict_next(merDict, nod); } // STEP 4: Destroy the dictionary. // fprintf(stderr, "STEP 4 BATCH "uint32FMTW(2)": Destroy\n", batch); while ((nod = dict_first(merDict))) dict_delete(merDict, nod); dict_destroy(merDict); } }