num_ty bi_sim(const string_impl& w1, const string_impl& w2) { uint m = w1.length(), n = w2.length(); string_impl x = w1, y = w2; lower_case(&x); lower_case(&y); char_impl x1 = x[0], y1 = y[0]; upper_case(&x1); upper_case(&y1); x = x1 + x; y = y1 + y; // initialize and fill matrix vector<vector<num_ty>> f; f.resize(m+1); for (uint i = 0; i <= m; ++i) { f[i].resize(n+1); f[i][0] = 0.0; } for (uint i = 0; i <= n; ++i) f[0][i] = 0.0; // calculate bi_sim for (uint i = 1; i <= m; ++i) for (uint j = 1; j <= n; ++j) f[i][j] = max(max(f[i-1][j], f[i][j-1]), f[i-1][j-1] + id(x[i-1], y[j-1]) + id(x[i], y[j])); return f[m][n] / (2 * max(m, n)); }
char* encipher(const char *src, char *key, int is_encode) { int i, klen, slen; char *dest; dest = strdup(src); upper_case(dest); upper_case(key); /* strip out non-letters */ for (i = 0, slen = 0; dest[slen] != '\0'; slen++) if (isupper(dest[slen])) dest[i++] = dest[slen]; dest[slen = i] = '\0'; /* null pad it, make it safe to use */ klen = strlen(key); for (i = 0; i < slen; i++) { if (!isupper(dest[i])) continue; dest[i] = 'A' + (is_encode ? dest[i] - 'A' + key[i % klen] - 'A' : dest[i] - key[i % klen] + 26) % 26; } return dest; }
/* 2010-10-22 : new routine */ int upper_case_compare( char *str1 , char* str2 ) { char upper_buf1[ MAXSTRLEN ] ; char upper_buf2[ MAXSTRLEN ] ; upper_case( upper_buf1 , str1 ) ; upper_case( upper_buf2 , str2 ) ; return ( strcmp( upper_buf1 , upper_buf2 ) ) ; }
string remote_tell(string cname, string from, string mud, string to, string msg, int wiz_level) { object ob; string fromid; string no_tell; string can_tell; // mapping conn; //string reject; string tell_out; if (ob = MESSAGE_D->find_user(to)) { if (ob->query("env/invisible")) return "这个人现在不在线上。"; fromid = lower_case(from + "@" + mud); no_tell = ob->query("env/no_tell"); if ((! intp(wiz_level) || wiz_level < 3) && (no_tell == "all" || no_tell == "ALL" || is_sub(fromid, no_tell))) { can_tell = ob->query("env/can_tell"); if (! is_sub(fromid, can_tell)) return "这个人不想听你罗嗦啦。"; } fromid = capitalize(from) + "@" + upper_case(mud); msg = replace_string(msg, "\n", ""); if (cname) tell_out = sprintf(HIG "%s(%s)告诉你:%s\n" NOR, cname, fromid, msg); else { cname = "未知"; tell_out = sprintf(HIG "%s 告诉你:%s\n" NOR, fromid, msg); } to = capitalize(to); if (! notice_user(cname, fromid, ob, tell_out)) msg = sprintf(HIG "你的话没有送到%s(%s@%s)的耳边。\n" NOR, ob->name(1), to, upper_case(INTERMUD_MUD_NAME)); else msg = sprintf(HIG "你告诉%s(%s@%s):%s" NOR, ob->name(1), to, upper_case(INTERMUD_MUD_NAME), msg); return msg; } else return "这个人现在不在线上。"; }
void do_index( string *skills ) { string buf; int x; buf = "SKILLS\nINDEX\nThere are currently aproximately half a quintrillion different skills available to players in the game. This file is an index of the entire tree. Enjoy.\n\n"; for( x = 0; x < sizeof(skills); x++ ) { string skill = skills[x]; int dot = member( skill, '.' ); if( dot == -1 ) { // no dot means root skill buf += " ~CBRT+ ~CTIT"+upper_case(skill)+"\n"; } else { while( (dot = member(skill,'.')) != -1 ) { buf += " "; skill = skill[dot+1..]; } buf += " ~CDEF- ~CBRT"+skill+"\n"; } } buf += "\n~CLABSee also: ~CREFskills, stats, spells"; do_write( "index", buf ); }
QString compute_oiio_files_filter() { stringstream sstr; string extensions; OIIO::getattribute("extension_list", extensions); vector<string> formats; split(extensions, ";", formats); for (const_each<vector<string> > i = formats; i; ++i) { const string::size_type sep = i->find_first_of(':'); const string format = i->substr(0, sep); const string extlist = i->substr(sep + 1); vector<string> exts; split(extlist, ",", exts); sstr << upper_case(format) << " Files ("; for (const_each<vector<string> > e = exts; e; ++e) { if (e.it() != exts.begin()) sstr << ";"; sstr << "*." << *e; } sstr << ");;"; } sstr << "All Files (*.*)"; return QString::fromStdString(sstr.str()); }
string from6to8(string text) { text = upper_case(text); bool found = true; while( found ) { found = false; if( text.find("~[0X2B]") < text.length()) { found = true; text = text.replace(text.find("~[0X2B]"),7,"~[0X02]"); } if( text.find("~[0X2C]") < text.length() ) { found = true; text = text.replace(text.find("~[0X2C]"),7,"~[0X03]"); } if( text.find("~[0X2D]")< text.length()) { found = true; text = text.replace(text.find("~[0X2D]"),7,"~[0X04]"); } if( text.find("~[0X2E]") < text.length()) { found = true; text = text.replace(text.find("~[0X2E]"),7,"~[0X05]"); } if( text.find("~[0X2F]") < text.length() ) { found = true; text = text.replace(text.find("~[0X2F]"),7,"~[0X06]"); } } return text; }
static bool test_upper_case_non_empty() { emit_test("Test upper_case() on a non-empty std::string."); std::string a("lower UPPER"); upper_case(a); emit_output_expected_header(); emit_retval("%s", "LOWER UPPER"); emit_output_actual_header(); emit_retval("%s", a.c_str()); if(strcmp(a.c_str(), "LOWER UPPER") != MATCH) { FAIL; } PASS; }
void echo(int connfd, char *prefix) { size_t n; char buf[MAXLINE]; rio_t rio; Rio_readinitb(&rio, connfd); while((n = Rio_readlineb(&rio, buf, MAXLINE)) != 0) { bytecnt += n; printf("%sreceived %d bytes (%d total)\n", prefix, (int) n, bytecnt); upper_case(buf); Rio_writen(connfd, buf, n); } }
void incoming_request(mapping info) { mapping minfo; string tmsg; #ifdef DEBUG CHANNEL_D->do_channel(this_object(), DEBUG, sprintf("gchannel message get from %s:%s.", info["HOSTADDRESS"],info["PORTUDP"])); #endif if(!ACCESS_CHECK(previous_object())) return; if (info["NAME"]) { if (info["NAME"] == Mud_name()) return ; minfo = DNS_MASTER->query_mud_info(info["NAME"]); if (!minfo || !strlen(info["USRNAME"]) || !DNS_MASTER->dns_mudp(info["NAME"])) { // We don't accept the message. But ping them anyway. PING_Q->send_ping_q(info["HOSTADDRESS"], info["PORTUDP"]); return ; } if ( (info["HOSTADDRESS"] != minfo["HOSTADDRESS"]) ) { if (info["EMOTE"]) tmsg = info["USRNAME"]+"@"+upper_case(info["NAME"])+" "+info["MSG"]; else tmsg = info["USRNAME"]+"@"+info["NAME"]+": "+info["MSG"]; // Faked. sheeze... dns_log("dns_fake",sprintf( "Gchannel: %s %s\n%s", ctime(time()), info["HOSTADDRESS"],tmsg)); DNS_MASTER->send_udp(info["HOSTADDRESS"], info["PORTUDP"], "@@@"+DNS_WARNING+ "||NAME:"+Mud_name()+ "||MSG: Fake gchannel msg: "+tmsg+ "||FAKEHOST:"+info["HOSTADDRESS"]+ "@@@\n"); return; } if( undefinedp(info["CNAME"]) ) set("channel_id", sprintf("%s@%s", info["USRNAME"], info["NAME"]) ); else set("channel_id", sprintf("%s(%s@%s)",info["CNAME"], info["USRNAME"], info["NAME"]) ); CHANNEL_D->do_channel(this_object(), lower_case(info["CHANNEL"]), info["MSG"], info["EMOTE"], 1); } //if (info["NAME"]) }
int ident_zip(char *fn) { ZIP* zip; struct zipent* zipf; printf("Zip file to ident = '%s'\n", fn); if ((zip = openzip(fn)) == 0) { printf("Error, cannot open zip file '%s' !\n", fn); return 1; }; while (zipf = readzip(zip)) { upper_case(zipf->name); romident(zipf->name, zipf->crc32, zipf->uncompressed_size); }; closezip(zip); return 0; }
QString get_oiio_image_files_filter() { static QString filter; static QMutex mutex; QMutexLocker locker(&mutex); if (filter.isEmpty()) { stringstream sstr; string extensions; OIIO::getattribute("extension_list", extensions); vector<string> formats; split(extensions, ";", formats); for (const_each<vector<string> > i = formats; i; ++i) { const string::size_type sep = i->find_first_of(':'); const string format = i->substr(0, sep); const string extlist = i->substr(sep + 1); vector<string> exts; split(extlist, ",", exts); sstr << upper_case(format) << " Files ("; for (const_each<vector<string> > e = exts; e; ++e) { if (e.it() != exts.begin()) sstr << " "; sstr << "*." << *e; } sstr << ");;"; } sstr << "All Files (*.*)"; filter = QString::fromStdString(sstr.str()); } return filter; }
string timestamp(){ string rawtz; int *t, gmtoff, offset; gmtoff = localtime(time())[LT_GMTOFF]; // if the gmtoffset is set to zero by the driver, then we need to // calculate an offset if (gmtoff == 0){ // if the timezone.cfg file exists, use it else default to GMT rawtz = query_tz(); offset = TIME_D->GetOffset(rawtz); offset += EXTRA_TIME_OFFSET; } else { rawtz = upper_case(localtime(time())[LT_ZONE]); offset = 0; } t = localtime(time()+(offset*3600)); return sprintf("%04d.%02d.%02d-%02d.%02d", t[LT_YEAR], (t[LT_MON])+1, t[LT_MDAY], t[LT_HOUR], t[LT_MIN]); }
int settings_set_variable(const char* key, const char* val, settings_t* sttngs) { char* _key = str_strip(key); upper_case(_key); char* _val = str_strip(val); int rv = 0; if (strcmp(_key, "START_OBSERVATION")==0) { sttngs->start += _atotime(_val, &rv); if (rv==-1) rv=0; } else if (strcmp(_key, "END_OBSERVATION")==0) { sttngs->stop += _atotime(_val, &rv); if (rv==-1) rv=0; } else if (strcmp(_key, "MAG")==0) { sttngs->mag_min = _atof(_val, &rv); } else if (strcmp(_key, "UM")==0) { sttngs->um_min = _atof(_val, &rv); } else if (strcmp(_key, "RA_RATE_MIN")==0) { sttngs->ra_rate_min = _atof(_val, &rv); } else if (strcmp(_key, "RA_RATE_MAX")==0) { sttngs->ra_rate_max = _atof(_val, &rv); } else if (strcmp(_key, "DECL_RATE_MIN")==0) { sttngs->decl_rate_min = _atof(_val, &rv); } else if (strcmp(_key, "DECL_RATE_MAX")==0) { sttngs->decl_rate_max = _atof(_val, &rv); } else if (strcmp(_key, "DAY")==0) { sttngs->start += _atodate(_val, &rv); sttngs->stop += _atodate(_val, &rv); } else if (strcmp(_key, "EFEM_DIR")==0) { sttngs->dir = strdup(_val); } else if (strcmp(_key, "BLACK_LIST")==0) { int i=0; char* str = _val; char* tmp; int len = strlen(_val); int flag=1; while (++i<len) { if (_val[i]==' ' || _val[i]=='\t' || _val[i]=='\n') { if (flag) { _val[i]='\0'; tmp = str_strip(str); if (strlen(tmp)) { vec_add((void **)&sttngs->black_list, (void *)&tmp); str = &(_val[i])+1; flag=0; } else { free(tmp); } } } else { flag=1; } } tmp = str_strip(str); if (strlen(tmp)) { vec_add((void **)&sttngs->black_list, (void *)&tmp); } else { free(tmp); } } else if (strcmp(_key, "RA_POSITION_MIN")==0) { sttngs->ra_position_min = _ra(_val, &rv); } else if (strcmp(_key, "RA_POSITION_MAX")==0) { sttngs->ra_position_max = _ra(_val, &rv); } else if (strcmp(_key, "REPORT_TYPE")==0) { if (strcmp(_val, "HTML")==0) { sttngs->report_type = report_type_html; } } else if (strcmp(_key, "REPORT_HTML_FONT_SIZE")==0) { sttngs->report_html_font_size = strdup(_val); } else if (strcmp(_key, "USE_ORB_FILE")==0) { sttngs->use_orb_file = (strcmp(_val, "NO")!=0); } else if (strcmp(_key, "ORB_FILE")==0) { sttngs->orb_file = strdup(_val); } sfree(_val); sfree(_key); return rv; }
int main(int argc, char *argv[]) { int sockfd, newsockfd, portno, clilen; char buffer[256]; struct sockaddr_in serv_addr, cli_addr; int n; FILE *fp; char line[256]; char *line_cap; bool flag_eof = false; printf("Inside Server Code\n"); if (argc < 3) { fprintf(stderr, "Usage: %s <filename> <port no>\n", argv[0]); exit(1); } /* Socket to Send Capitalized Words to Client*/ sockfd = socket(AF_INET, SOCK_STREAM, 0); if (sockfd < 0) error("ERROR opening socket"); memset((char *) &serv_addr, 0, sizeof(serv_addr)); /* Populating Server Information */ portno = atoi(argv[2]); serv_addr.sin_family = AF_INET; serv_addr.sin_addr.s_addr = INADDR_ANY; serv_addr.sin_port = htons(portno); /*Server Binds and Listens */ if (bind(sockfd, (struct sockaddr *) &serv_addr, sizeof(serv_addr)) < 0) error("ERROR on binding"); listen(sockfd,5); /* Accepting Connections*/ clilen = sizeof(cli_addr); newsockfd = accept(sockfd, (struct sockaddr *) &cli_addr, &clilen); if (newsockfd < 0) error("ERROR on accept"); /*Open the File for reading*/ fp = fopen(argv[1], "r"); if (fp == NULL) exit(EXIT_FAILURE); while(1){ bzero(buffer,256); n = read(newsockfd,buffer,256); if (n < 0) error("ERROR reading from socket"); printf("Here is the message: %s",buffer); if(!strcmp(buffer, "LINE\n")){ if(fgets (line, 256, fp) == NULL){ printf("eof"); flag_eof = true; fseek(fp, 0L, SEEK_SET); } if (flag_eof == true){ fgets(line, 256, fp); flag_eof=false; } printf("Server Read = %s", line); line_cap = upper_case(line); printf("Server Processed = %s", line_cap); if (feof(fp)) fseek(fp, 0L, SEEK_SET); /* Send that Capitalized Line to the Client */ n = write(newsockfd, line, sizeof(line)); if (n < 0) error("ERROR writing to socket"); } else exit(1); } /*Close after while loops is over i.e., after the client has sent 10 things */ fclose(fp); return 0; }
//--------- Begin of function Font::text_width ----// // // Given the textPtr string, then calculate the width (unit:pixel). // // char* textPtr = the textPtr to be put on screen // [int] textPtrLen = the length of the textPtr to be put // (default : -1, until NULL character) // [int] maxDispWidth = the maximum width can be displayed // the textPtr width cannot > maxDispWidth // [cap] set all letter to Cap letter (default : 0) // // Note : static var text_line_count is used to pass value to text_height() // // Return : <int> the screen width of the textPtr display using this font // int Font::text_width(const char* textPtr, int textPtrLen, int maxDispWidth, int cap) { int charWidth, x=0, lenCount, maxLen=0, wordWidth=0; short textChar; if( !init_flag ) return x; textPtr = translate.process(textPtr); if( textPtrLen < 0 ) textPtrLen = strlen(textPtr); text_line_count=1; //-------------------------------------// for( lenCount=1 ; *textPtr && lenCount<=textPtrLen ; textPtr++, lenCount++, x+=inter_char_space ) { textChar = *((unsigned char*)textPtr); // ###### begin Ban 18/1 ######// if( cap==1 ) { textChar = upper_case(textChar); } // ###### end Ban 18/1 ######// //-- if the line exceed the given max width, advance to next line --// if( maxDispWidth && x > maxDispWidth ) { maxLen = maxDispWidth; x = wordWidth; // the last word of the prev line wraps to next line text_line_count++; } //--- if the textPtr has more than 1 line, get the longest line ---// if( textChar == '\n' ) { if( x>maxLen ) maxLen=x; x=0; wordWidth=0; text_line_count++; continue; // next character } //-------- control char: FIRST_NATION_COLOR_CODE_IN_TEXT -----------// else if( textChar >= FIRST_NATION_COLOR_CODE_IN_TEXT && textChar <= LAST_NATION_COLOR_CODE_IN_TEXT ) // display nation color bar in text { x += NATION_COLOR_BAR_WIDTH; wordWidth = 0; } //--- add the width of the character to the total line width ---// else if( textChar == ' ' ) { x += space_width; wordWidth = 0; } else if( textChar >= first_char && textChar <= last_char ) { charWidth = font_info_array[textChar-first_char].width; x += charWidth; wordWidth += charWidth; } else { x += space_width; wordWidth += space_width; } if( maxDispWidth && wordWidth > maxDispWidth ) { x -= wordWidth - maxDispWidth; wordWidth = maxDispWidth; } } //-------------------------------------------// if( maxDispWidth && x > maxDispWidth ) text_line_count++; if( textPtr[-1] == '\n' ) // if last character is line feed, don't count double text_line_count--; return max(maxLen,x); }
//--------- Start of function Font::put ---------// // // write text with pre-inited fonts // // int x,y = location of the font // char* text = the text to be put on screen // [int] clearBack = clear background with back_color or not // (default : 0) // [int] x2 = display font up to the right border x2 and also // clear the area between the last character and right border // [int] cap = set all letter to Cap letter (default : 0) // // Return : <int> lastX, the x coordination of the last pixel of last font // int Font::put(int x,int y,const char* textPtr, char clearBack, int x2, int cap ) { err_when( x<0 || y<0 ); if( !init_flag ) return x; //-------- process translation ---------// short textChar; textPtr = translate.process(textPtr); //-------------------------------------// int textPtrLen = strlen(textPtr); if( x2 < 0 ) // default x2 = x+max_font_width*textPtrLen; x2 = min( x2, VGA_WIDTH-1 ); int y2 = y+font_height-1; //-------------------------------------// FontInfo* fontInfo; for( int lenCount=1 ; *textPtr && lenCount<=textPtrLen ; textPtr++, lenCount++ ) { textChar = *((unsigned char*)textPtr); // textChar is <unsiged char> // ###### begin Ban 18/1 ######// if( cap==1 ) { textChar = upper_case(textChar); } // ###### end Ban 18/1 ######// //--------------- space character ------------------// if( textChar == ' ' ) { if( x+space_width > x2 ) break; x += space_width; } //####### patch begin Gilbert 28/2 ########// // --------- control char: FIRST_NATION_COLOR_CODE_IN_TEXT -----------// else if( textChar >= FIRST_NATION_COLOR_CODE_IN_TEXT && textChar <= LAST_NATION_COLOR_CODE_IN_TEXT ) // display nation color bar in text { if( x2 >= 0 && x+NATION_COLOR_BAR_WIDTH-1 > x2 ) // exceed right border x2 break; char colorCode = game.color_remap_array[textChar-FIRST_NATION_COLOR_CODE_IN_TEXT].main_color; nation_array.disp_nation_color(x, y+2, colorCode); x += NATION_COLOR_BAR_WIDTH; } //####### end begin Gilbert 28/2 ########// //------------- normal character ----------------// else if( textChar >= first_char && textChar <= last_char ) { fontInfo = font_info_array+textChar-first_char; if( x+fontInfo->width > x2 ) break; if( fontInfo->width > 0 ) { Vga::active_buf->put_bitmap_trans(x, y+fontInfo->offset_y, font_bitmap_buf + fontInfo->bitmap_offset); x += fontInfo->width; // inter-character space } } else { //------ tab or unknown character -------// if( textChar == '\t' ) // Tab x += space_width*8; // one tab = 8 space chars else x += space_width; } //--------- inter-character space ---------// x+=inter_char_space; } return x-1; }
lexeme lexer::get_lexeme(void){ do{ /* we will roatate here till CIN isn't over */ if(_i == _read_chars_num){ _pconf._source_stream->read(_buffer, static_cast<streamsize>(_pconf._read_buffer_size)); _read_chars_num = static_cast<size_t>(_pconf._source_stream->gcount()); if(!_read_chars_num){ if(_current_state == C2){ _strbuf_copy = _strbuf; _strbuf.clear(); _current_state = C1; _i = 0; //in case we have just one word, and there is nothing more, we return word and preparing for the next TERM return WORD; } return TERM; /* end of stream reached*/ } //if(_tot_characters_read + _read_chars_num > _pconf._max_content_length) /* we have reached max_content_length limitation */ // throw out_of_range(get_module_msg("max content length reached (") + stream_cast<string>(_pconf._max_content_length) + ")"); _tot_characters_read += _read_chars_num; /* increment read character counter */ _i = 0; } for(; _i < _read_chars_num; _i++){ //_strbuf.push_back(_buffer[_i]); _strbuf.append(1, _buffer[_i]); switch(_buffer[_i]){ case COLON : _current_state = Lexer[_current_state][3]; break; //case SEMICOLON : // _current_state = Lexer[_current_state][4]; // break; case QUOTE : _current_state = Lexer[_current_state][5]; break; case ENTER : _current_state = Lexer[_current_state][1]; break; case CR_RETURN : _current_state = Lexer[_current_state][2]; break; case SPACEBAR : _current_state = Lexer[_current_state][6]; break; case EQUALITY : _current_state = Lexer[_current_state][10]; break; case LEFT_SQUARE_BRACKET: _current_state = Lexer[_current_state][11]; break; case RIGHT_SQUARE_BRACKET: _current_state = Lexer[_current_state][12]; break; case SLASH: _current_state = Lexer[_current_state][13]; break; case MULT: _current_state = Lexer[_current_state][14]; break; default: _current_state = Lexer[_current_state][0]; break; } bool _was_erased = false; /* now, check out if we're in Terminate state */ if(Lexer[_current_state][8] == T && Lexer[_current_state][9] == YES){ //pushing back one symbol //take you attention that we just not have to decrese _i, (due to implementation) if(int(_strbuf.length()-1) >= 0){ _strbuf.erase(_strbuf.length()-1); _was_erased = true; } } if(Lexer[_current_state][8] == T && Lexer[_current_state][9] == NO && _i + 1 <= _read_chars_num){ _i++; } switch(_current_state){ case C3: /* we found WORD */ // That it is not the proper place for this hack // but it make parser simplier _strbuf_copy = _strbuf; _strbuf.clear(); _current_state = C1; switch(_strbuf_copy.length()){ case 1: _upper_str = upper_case(_strbuf_copy); if(_upper_str == _BOLD) return BOLD; if(_upper_str == _ITALIC) return ITALIC; if(_upper_str == _UNDERLINE) return UNDERLINE; if(_upper_str == _HEADER) return HEADER; if(_upper_str == _SMALL) return SMALL; if(_upper_str == _OFFTOPIC) return OFFTOPIC; if(_upper_str == _Q) return QQUOTE; if(_upper_str == _MULT) return MULT; break; case 2: _upper_str = upper_case(_strbuf_copy); if(_upper_str == _HR) return HR; break; case 3: _upper_str = upper_case(_strbuf_copy); if(_upper_str == _SUP) return SUP; if(_upper_str == _SUB) return SUB; if(_upper_str == _RED) return RED; if(_upper_str == _URL) return URL; if(_upper_str == _IMG) return IMG; if(_upper_str == _PRE) return PRE; if(_upper_str == _RED) return RED; break; case 4: _upper_str = upper_case(_strbuf_copy); if(_upper_str == _SIZE) return SIZE; if(_upper_str == _FACE) return FACE; if(_upper_str == _LIST) return LIST; if(_upper_str == _CODE) return CODE; if(_upper_str == _QUOTE) return QQUOTE; if(_upper_str == _BLUE) return BLUE; break; case 5: _upper_str = upper_case(_strbuf_copy); if(_upper_str == _EMAIL) return EMAIL; if(_upper_str == _QUOTE) return QQUOTE; if(_upper_str == _COLOR) return COLOR; if(_upper_str == _GREEN) return GREEN; if(_upper_str == _WHITE) return WHITE; if(_upper_str == _BLACK) return BLACK; if(_upper_str == _OLIST) return OLIST; case 6: _upper_str = upper_case(_strbuf_copy); if(_upper_str == _ORANGE) return ORANGE; if(_upper_str == _PURPLE) return PURPLE; if(_upper_str == _YELLOW) return YELLOW; }; return WORD; case C6: /* we found COLON */ _strbuf_copy = _strbuf; _strbuf.clear(); _current_state = C1; return COLON; case C7: /* we found SEMICOLON */ _strbuf_copy = _strbuf; _strbuf.clear(); _current_state = C1; return SEMICOLON; case C8: /* we found QUOTE */ _strbuf_copy = _strbuf; _strbuf.clear(); _current_state = C1; return QUOTE; case C11: /* we found SPACE */ _strbuf_copy = _strbuf; _strbuf.clear(); _current_state = C1; return SPACE; case C12: /* we found EQUALITY */ _strbuf_copy = _strbuf; _strbuf.clear(); _current_state = C1; return EQUALITY; case C13: /* we found LEFT_SQUARE_BRACKET */ _strbuf_copy = _strbuf; _strbuf.clear(); _current_state = C1; return LEFT_SQUARE_BRACKET; case C14: /* we found RIGHT_SQUARE_BRACKET */ _strbuf_copy = _strbuf; _strbuf.clear(); _current_state = C1; return RIGHT_SQUARE_BRACKET; case C15: /* we found SLASH */ _strbuf_copy = _strbuf; _strbuf.clear(); _current_state = C1; return SLASH; case C16: /* we found MULT */ _strbuf_copy = _strbuf; _strbuf.clear(); _current_state = C1; return MULT; default : break; } } }while(_read_chars_num <= _pconf._read_buffer_size); return TERM; }
void align_reads(meta_data& meta_details, vector<reference_index>& refindex) //void align_reads(string& first_read_file, string& second_read_file, string& sam_file, vector<reference_index>& refindex) { unordered_map<string, int> umap; ofstream fp_detail, fp_fastq; ifstream fp_read_first, fp_read_second; ofstream fp_sam;//, fp_sam_first, fp_sam_second; string detail_file = meta_details.output_path + meta_details.index + "_Details.txt"; string sam_output_file = meta_details.output_path + meta_details.index + "_All.sam"; string sam_fread_file = meta_details.output_path + meta_details.index + "_R1.sam"; string sam_sread_file = meta_details.output_path + meta_details.index + "_R2.sam"; string fastq_output_file = meta_details.output_path + meta_details.index + "_All.detail"; fp_detail.open(detail_file.c_str(), ofstream::out); fp_read_first.open(meta_details.first_read.c_str(), ifstream::in); fp_read_second.open(meta_details.second_read.c_str(), ifstream::in); fp_sam.open(sam_output_file.c_str(), ofstream::out | ofstream::app); //fp_sam_first.open(sam_fread_file.c_str(), ofstream::out); //fp_sam_second.open(sam_sread_file.c_str(), ofstream::out); fp_fastq.open(fastq_output_file.c_str(), ofstream::out); string input_first, read_name_first; string readseq_first, refgenome_first; string input_second, read_name_second; string readseq_second, refgenome_second; string quality_first, quality_second; string sam_output_name; string readsequence, readquality; fragment_alignment fragment_alignment_first; fragment_alignment fragment_alignment_second; vector<string> sam_output_first; vector<string> sam_output_second; /* for(int i = 0; i < refindex.size(); i++) { fp_fastq << "> CONSENSUS" << endl; fp_fastq << refindex[i].ref << endl; } */ int map = 0, count = 0, cant_map = 0, name_length; cell **matrix = NULL; //init_matrix(matrix);//does not work matrix = new cell *[2 * FRAGMENT_SIZE + 5]; for(int i = 0; i < 2 * FRAGMENT_SIZE + 5; i++) { matrix[i] = new cell[FRAGMENT_SIZE + 5]; } if(DEBUG == 99) fp_detail << "size of matrix = " << sizeof(matrix) << endl; while(getline(fp_read_first, read_name_first) && getline(fp_read_second, read_name_second)) { getline(fp_read_first, readseq_first); getline(fp_read_first, input_first); getline(fp_read_first, quality_first); getline(fp_read_second, readseq_second); getline(fp_read_second, input_second); getline(fp_read_second, quality_second); for(int i = 0; i < read_name_first.length(); i++) { if(read_name_first.at(i) == read_name_second.at(i)) name_length = i; else break; } sam_output_name = meta_details.index + "_" + read_name_first.substr(0, name_length); /* if(read_name_first.compare(read_name_second) != 0) { exit(0); continue; } */ //cout << "Input = " << readseq_first << endl; if(count >= MAXREAD && MAXREAD != 0) break; count += 1; if(count <= MINREAD) continue; if(count % 100000 == 0) { cout << endl << "Total " << count << " Reads Aligned for " << meta_details.index << endl << endl; } fp_detail << "####################################################################################################################" << endl; fp_detail << count << ") " << sam_output_name << endl << endl; fp_detail << "####################################################################################################################" << endl; if(DEBUG == 99) { fp_detail << "First ) " << read_name_first << endl; fp_detail << "Read String = " << readseq_first << endl;//.substr(0, 80) << endl; fp_detail << "Quality = " << input_first << ": " << quality_first << endl << endl; fp_detail << "Second) " << read_name_second << endl; fp_detail << "Read String = " << readseq_second << endl;//.substr(0, 80) << endl; fp_detail << "Quality = " << input_second << ": " << quality_second << endl << endl; //referenceless_alignment(readseq_first, readseq_second, quality_first, // quality_second, matrix, fp_detail); } upper_case(readseq_first); upper_case(readseq_second); referenceless_alignment(readseq_first, readseq_second, quality_first, quality_second, readsequence, readquality, matrix, fp_detail); /* ////////////////////////////////////////////////////////////////////////////////////////// align_read_to_reference(readseq_first, read_name_first, fragment_alignment_first, quality_first, sam_output_first, refindex, matrix, fp_sam_first, fp_detail); align_read_to_reference(readseq_second, read_name_second, fragment_alignment_second, quality_second, sam_output_second, refindex, matrix, fp_sam_second, fp_detail); merge_both_alignments(refindex, sam_output_name, fragment_alignment_first, fragment_alignment_second, fp_sam, fp_detail, fp_fastq, umap); sam_output_first.clear(); fragment_alignment_first.alignment.clear(); sam_output_second.clear(); fragment_alignment_second.alignment.clear(); ///////////////////////////////////////////////////////////////////////////////////////// */ fragment_alignment final_alignment_info; vector<string> final_result; align_read_to_reference(readsequence, sam_output_name, final_alignment_info, readquality, final_result, refindex, meta_details, matrix, fp_sam, fp_detail); if(umap.find(final_result[5]) == umap.end()) { umap[final_result[5]] = 1; fp_fastq << final_result[0] << endl; fp_fastq << final_result[5] << endl; fp_fastq << final_result[9] << endl; } else { umap[final_result[5]] += 1; } final_result.clear(); final_alignment_info.alignment.clear(); //break; } if(DEBUG == 99) { fp_detail << endl << "Overall Statistics - " << endl; fp_detail << "Total read = " << count << endl; } //remove_matrix(matrix); for(int i = 0; i < 2 * FRAGMENT_SIZE + 5; i++) { delete [] matrix[i]; matrix[i] = NULL; } delete [] matrix; matrix = NULL; fp_detail.close(); fp_read_first.close(); fp_read_second.close(); fp_sam.close(); //fp_sam_first.close(); //fp_sam_second.close(); fp_fastq.close(); ifstream fp_input; fp_input.open(fastq_output_file.c_str(), ifstream::in); ofstream fp_fasta; string fastq_fasta_file = meta_details.output_path + meta_details.index + "_All.fasta"; fp_fasta.open(fastq_fasta_file.c_str(), ofstream::out); string read_name, cigar, alignment_string; for(int i = 0; i < refindex.size(); i++) { fp_fasta << ">CONSENSUS" << endl; fp_fasta << refindex[i].ref << endl; } while(getline(fp_input, read_name)) { getline(fp_input, cigar); getline(fp_input, alignment_string); fp_fasta << ">" << read_name; fp_fasta << "|DUPCOUNT=" << umap[cigar]; fp_fasta << "|CIGAR=" << cigar << endl; fp_fasta << alignment_string << endl; } }
int compare(void* empty,int x_len,const void* x_c,int y_len,const void* y_c) { std::string::size_type t_pozX = 0; std::string::size_type t_pozY = 0; size_t tmp; std::string stringX,stringY; std::string x,y; bool overwrite; char t_result,n_result; bool national_char; n_result = 0; t_result = 0; x = std::string((const char*)x_c,x_len); y = std::string((const char*)y_c,y_len); // pomijanie kodow specjalnych.... // zmiana kolejnosci dla kodow 1e i 1f /* if( x.find(0x1d) != x.npos) { t_pozX = x.find(0x1d); //stringX = x.substr(t_pozX+7); x = x.substr(0,t_pozX); } if( y.find(0x1d) != x.npos) { t_pozY = y.find(0x1d); //stringX = x.substr(t_pozX+7); y = y.substr(0,t_pozY); } */ if( code_page != 1252 ) { for(size_t i = 0; i<x.size(); ++i) { if( ((unsigned char)x[i]) < 32 ) x.erase(i,1); } for(size_t i = 0; i<y.size(); ++i) { if( ((unsigned char)y[i]) < 32 ) y.erase(i,1); } } overwrite = true; stringX = upper_case(x); if( stringX.find(0x1e) != stringX.npos) { t_pozX = stringX.find(0x1e); stringX = x.substr(t_pozX+1); //stringX += x.substr(0,t_pozX); overwrite = false; } if( stringX.find(0x1f) != stringX.npos) { t_pozX = stringX.find(0x1f); //stringX = x.substr(t_pozX+1); stringX += x.substr(0,t_pozX); overwrite = false; } if( overwrite ) stringX = x; overwrite = true; stringY = upper_case(y); if( stringY.find(0x1e) != stringY.npos) { t_pozY = stringY.find(0x1e); stringY = y.substr(t_pozY+1); //stringY += y.substr(0,t_pozY); overwrite = false; } if( stringY.find(0x1f) != stringY.npos) { t_pozY = stringY.find(0x1f); //stringY = y.substr(t_pozY+7); stringY += y.substr(0,t_pozY); overwrite = false; } if( overwrite ) stringY = y; /* overwrite = true; stringY = upper_case(y); if( stringY.find("~[0X1E]") != stringY.npos) { t_pozY = stringY.find("~[0X1E]"); stringY = y.substr(t_pozY+7); stringY += y.substr(0,t_pozY); overwrite = false; } if( stringY.find("~[0X1F]") != stringY.npos) { t_pozY = stringY.find("~[0X1F]"); stringY = y.substr(t_pozY+7); stringY += y.substr(0,t_pozY); overwrite = false; } if( overwrite ) stringY = y; */ t_pozY = 0; t_pozX=0; while( stringX.find("~") != stringX.npos ) { tmp = stringX.find("~"); if( tmp != stringX.find("[")-1 ) { stringX = stringX.substr(0,tmp) + stringX.substr(tmp+1); } if( tmp == 0 ) stringX = stringX.substr(stringX.find("]")+1); else stringX = stringX.substr(0,tmp) + stringX.substr(stringX.find("]")+1); } while( stringY.find("~") != stringY.npos ) { tmp = stringY.find("~"); if( tmp != stringY.find("[")-1 ) { stringY = stringY.substr(0,tmp) + stringY.substr(tmp+1); } if( tmp == 0 ) stringY = stringY.substr(stringY.find("]")+1); else stringY = stringY.substr(0,tmp) + stringY.substr(stringY.find("]")+1); } national_char = false; while(stringX.length() > t_pozX && stringY.length() > t_pozY) { // // ~[0x1b2b] - od tego ~[0x1e] - od tego symbolu sortowanie � // ~[0x1b2c] - do tego ~[0x1f]- do tego symbolu // t_result = c_compare(stringX[t_pozX],stringY[t_pozY]); if( t_result == -1 ) return -1; if( t_result == 1 ) return 1; if( t_result == -2 ) { national_char = true; n_result = -1; } if( t_result == 2 ) { national_char = true; n_result = 1; } t_pozX++; t_pozY++; } if( national_char && stringX.length() == stringY.length() ) { return n_result; } if(stringX.length() > stringY.length()) return 1; if(stringX.length() < stringY.length()) return -1; return 0; }
void get_current_configuration(struct supported_gdb_version *sp) { FILE *fp; static char buf[512]; char *p; #ifdef __alpha__ target_data.target = ALPHA; #endif #ifdef __i386__ target_data.target = X86; #endif #ifdef __powerpc__ target_data.target = PPC; #endif #ifdef __ia64__ target_data.target = IA64; #endif #ifdef __s390__ target_data.target = S390; #endif #ifdef __s390x__ target_data.target = S390X; #endif #ifdef __powerpc64__ target_data.target = PPC64; #endif #ifdef __x86_64__ target_data.target = X86_64; #endif #ifdef __arm__ target_data.target = ARM; #endif #ifdef __aarch64__ target_data.target = ARM64; #endif #ifdef __mips__ target_data.target = MIPS; #endif set_initial_target(sp); /* * Override target if specified on command line. */ target_data.host = target_data.target; if (target_data.target_as_param) { if ((target_data.target == X86 || target_data.target == X86_64) && (name_to_target((char *)target_data.target_as_param) == ARM)) { /* * Debugging of ARM core files supported on X86, and on * X86_64 when built as a 32-bit executable. */ target_data.target = ARM; } else if ((target_data.target == X86 || target_data.target == X86_64) && (name_to_target((char *)target_data.target_as_param) == MIPS)) { /* * Debugging of MIPS little-endian core files * supported on X86, and on X86_64 when built as a * 32-bit executable. */ target_data.target = MIPS; } else if ((target_data.target == X86_64) && (name_to_target((char *)target_data.target_as_param) == X86)) { /* * Build an X86 crash binary on an X86_64 host. */ target_data.target = X86; } else if ((target_data.target == X86_64) && (name_to_target((char *)target_data.target_as_param) == ARM64)) { /* * Build an ARM64 crash binary on an X86_64 host. */ target_data.target = ARM64; } else if ((target_data.target == X86_64) && (name_to_target((char *)target_data.target_as_param) == PPC64)) { /* * Build a PPC64 little-endian crash binary on an X86_64 host. */ target_data.target = PPC64; } else if ((target_data.target == PPC64) && (name_to_target((char *)target_data.target_as_param) == PPC)) { /* * Build an PPC crash binary on an PPC64 host. */ target_data.target = PPC; } else if (name_to_target((char *)target_data.target_as_param) == target_data.host) { if ((target_data.initial_gdb_target != UNKNOWN) && (target_data.host != target_data.initial_gdb_target)) arch_mismatch(sp); } else { fprintf(stderr, "\ntarget=%s is not supported on the %s host architecture\n\n", target_data.target_as_param, target_to_name(target_data.host)); exit(1); } } /* * Impose implied (sticky) target if an initial build has been * done in the source tree. */ if (target_data.initial_gdb_target && (target_data.target != target_data.initial_gdb_target)) { if ((target_data.initial_gdb_target == ARM) && (target_data.target != ARM)) { if ((target_data.target == X86) || (target_data.target == X86_64)) target_data.target = ARM; else arch_mismatch(sp); } if ((target_data.target == ARM) && (target_data.initial_gdb_target != ARM)) arch_mismatch(sp); if ((target_data.initial_gdb_target == MIPS) && (target_data.target != MIPS)) { if ((target_data.target == X86) || (target_data.target == X86_64)) target_data.target = MIPS; else arch_mismatch(sp); } if ((target_data.initial_gdb_target == X86) && (target_data.target != X86)) { if (target_data.target == X86_64) target_data.target = X86; else arch_mismatch(sp); } if ((target_data.target == X86) && (target_data.initial_gdb_target != X86)) arch_mismatch(sp); if ((target_data.initial_gdb_target == ARM64) && (target_data.target != ARM64)) { if (target_data.target == X86_64) target_data.target = ARM64; else arch_mismatch(sp); } if ((target_data.target == ARM64) && (target_data.initial_gdb_target != ARM64)) arch_mismatch(sp); if ((target_data.initial_gdb_target == PPC64) && (target_data.target != PPC64)) { if (target_data.target == X86_64) target_data.target = PPC64; else arch_mismatch(sp); } if ((target_data.target == PPC64) && (target_data.initial_gdb_target != PPC64)) arch_mismatch(sp); if ((target_data.initial_gdb_target == PPC) && (target_data.target != PPC)) { if (target_data.target == PPC64) target_data.target = PPC; else arch_mismatch(sp); } if ((target_data.target == PPC) && (target_data.initial_gdb_target != PPC)) arch_mismatch(sp); } if ((fp = fopen("Makefile", "r")) == NULL) { perror("Makefile"); goto get_release; } while (fgets(buf, 512, fp)) { if (strncmp(buf, "PROGRAM=", strlen("PROGRAM=")) == 0) { p = strstr(buf, "=") + 1; strip_linefeeds(p); upper_case(p, target_data.program); if (target_data.flags & DAEMON) strcat(target_data.program, "D"); continue; } } fclose(fp); get_release: target_data.release[0] = '\0'; if (file_exists(".rh_rpm_package")) { if ((fp = fopen(".rh_rpm_package", "r")) == NULL) { perror(".rh_rpm_package"); } else { if (fgets(buf, 512, fp)) { strip_linefeeds(buf); if (strlen(buf)) { buf[MAXSTRLEN-1] = '\0'; strcpy(target_data.release, buf); } else fprintf(stderr, "WARNING: .rh_rpm_package file is empty!\n"); } else fprintf(stderr, "WARNING: .rh_rpm_package file is empty!\n"); fclose(fp); if (strlen(target_data.release)) return; } } else fprintf(stderr, "WARNING: .rh_rpm_package file does not exist!\n"); if ((fp = fopen("defs.h", "r")) == NULL) { perror("defs.h"); return; } while (fgets(buf, 512, fp)) { if (strncmp(buf, "#define BASELEVEL_REVISION", strlen("#define BASELEVEL_REVISION")) == 0) { p = strstr(buf, "\"") + 1; strip_linefeeds(p); p[strlen(p)-1] = '\0'; strcpy(target_data.release, p); break; } } fclose(fp); }
void align_reads(vector<pair<string, string> >& reference, string& read_file, string& sam_file, vector<reference_index>& refindex) { time_t tstrt, tbgn, tnd; time(&tstrt); ///////////////////////////////////////////////// ifstream fp_nano; string nano_input; string nano_file = "last_but_not_nano.txt"; char *nano = new char[nano_file.length() + 1]; strcpy(nano, nano_file.c_str()); fp_nano.open(nano, ifstream::in); unordered_map<string, int> nano_read; while(getline(fp_nano, nano_input)) { //cout << "" << nano_input << endl; nano_read[nano_input] = 1; //continue; } cout << "Total Size of Nano Read = " << nano_read.size() << endl << endl; fp_nano.close(); delete [] nano; //////////////////////////////////////////////// ifstream fp_read; ofstream fp_sam; ofstream fp_2D; //vector<string> output; char *read = new char[read_file.length() + 1]; strcpy(read, read_file.c_str()); char *sam = new char[sam_file.length() + 1]; strcpy(sam, sam_file.c_str()); //string file_2D_str = logstr.str() + "reverse.fa"; //string file_2D_str = "2D_file_1K"; fp_read.open(read, ifstream::in); fp_sam.open(sam, ofstream::out | ofstream::app); //fp_2D.open(file_2D_str.c_str(), ofstream::out); string input, read_name, ref_name; string readseq, refgenome; string slash = "/"; int map = 0; int count = 0; int cant_map = 0; int invalid_count = 0; fp_csv << "cnt, red_nam, red_len, red_dir, ref_nam, ref_len, ref_pos, score, span, " << "percent, aln_len, spn_rat, aln_tim, tot_tim" << endl; while(getline(fp_read, input)) { int find = input.find(slash); if(find != string::npos) read_name = input.substr(1, find - 1); else read_name = input.substr(1); getline(fp_read, readseq); //getline(fp_read, input); //getline(fp_read, input); //ratio problem with channel_46_read_98_1406145606_2D //if(read_name.find("channel_407_read_0_1405093831_2D") == std::string::npos)//to optimize the output //if(read_name.find("channel_17_read_24_1405524767_2D") == std::string::npos)//small read to optimize //if(read_name.find("channel_201_read_10_1405541481_2D") == std::string::npos)//to compare version 1 and 2 //if(read_name.find("channel_64_read_7_1403826200_template") == std::string::npos)//max length reads analysis //if(read_name.find("channel_424_read_1_1403566249_template") == std::string::npos)//found in last but not in nano //if(read_name.find("2D") == std::string::npos)//03-09-2015 // continue;//2D== 1D != //if(read_name.find("channel_237_read_42_1406145606_2D") == std::string::npos) //if(read_name.find("channel_322_read_11_1405524767_template") == std::string::npos) //if(readseq.length() > 100) // continue; //if(read_name.find("channel_171_read_2_1403855963_2D") == std::string::npos)//20 times higher than last //if(read_name.find("channel_82_read_0_1403855963_2D") == std::string::npos)//20 times higher than last //if(read_name.find("channel_221_read_19_1406145606_2D") == std::string::npos)//has maximul length of deletion //if(read_name.find("channel_415_read_6_1406242409_template") == std::string::npos)//has 5 times less length than last // continue; //if(read_name.find("channel_167_read_19_1403811400_2D") == std::string::npos)//analyze output validity // continue; //if(nano_read.find(read_name) == nano_read.end()) // continue; //if(read_name.find("channel_352_read_34_1405541481_template") == std::string::npos)//Why there are multiple results //if(read_name.find("channel_68_read_22_1405541481_template") == std::string::npos)//multiple results, boundary problem //if(read_name.find("channel_261_read_39_1405541481_template") == std::string::npos)//multiple result indexing // continue; //if(read_name.find("channel_302_read_2_1403855963_2D") == std::string::npos)//found in mms not in ssg = align length // continue; //readseq = readseq.substr(readseq.length() / 2, readseq.length() - readseq.length() / 2); if(count >= MAXREAD && MAXREAD != 0) break; count += 1; //if(count < 65433) continue; cout << count << ") " << read_name << endl; if(readseq.length() < MINREADLEN || readseq.length() > MAXREADLEN)//03-09-2015 { cout << "Invalid String Found" << endl; invalid_count += 1; count -= 1; time(&tnd); //fp_csv << count << ", " << readseq.length() << ", 0, 0, 0, " << // "0, 0, 0, 0, 0, 0, 0, " << difftime(tnd, tstrt) << endl; continue; } if(count <= MINREAD) continue; //if(count < 318) continue; time(&tbgn); fp_csv << count << ", " << read_name << ", " << readseq.length() << ", "; upper_case(readseq); //reverse_str(readseq); //readseq = reverse_complement(readseq); //fp_2D << input << endl; //fp_2D << readseq << endl; //continue; int match_info, global_match = -1, indpos; int match, max_match = 0, match_index, dir; vector<string> final_result; time_t start, end; clock_t t_start, t_end; //for(int i = 0; i < reference.size(); i++) { //vector<pair<int, pair<int, int> > > kmer_ref; vector<pair<int, vector<pair<int, int> > > > kmer_ref; cout << "Analysis for forward:" << endl; time(&start); t_start = clock(); read_vs_reference(readseq, read_name, FF, refindex, kmer_ref); t_end = clock(); time(&end); cout << "Total time taken for calling forward read_vs_ref = " << difftime(end, start) << endl; //cout << "Total time for Hash_Lookup = " << (float(t_end - t_start) / CLOCKS_PER_SEC) << endl; t_lookup += t_end - t_start; //align(readseq, read_name, FF, refindex, kmer_ref, final_result); //if(final_result.size() == 0) //{ //kmer_ref.clear(); cout << "Data for reverse:" << endl; time(&start); t_start = clock(); string reverse = reverse_complement(readseq); read_vs_reference(reverse, read_name, FR, refindex, kmer_ref); t_end = clock(); time(&end); cout << "Total time taken for calling reverse read_vs_ref = " << difftime(end, start) << endl; //cout << "Total time for Hash_Lookup = " << (float(t_end - t_start) / CLOCKS_PER_SEC) << endl; t_lookup += t_end - t_start; cout << endl <<endl; //uncomment here align(readseq, read_name, FR, refindex, kmer_ref, final_result); //} if(final_result.size() == 0) { cant_map += 1; kmer_ref.clear(); time(&tnd); //fp_csv << "0, 0, "; fp_csv << difftime(tnd, tbgn) << ", " << difftime(tnd, tstrt) << endl; //fp_csv << endl; continue; } assert(final_result.size() == 11); cout << "No Assertion Occurred for ReferenceIndex = " << read_name << endl; cout << endl << endl; kmer_ref.clear(); } //cout << "reference_index = " << indpos << ", and match_index = " << match_index << // ", and direction = " << dir << ", with matching = " << max_match << endl; //break; //global_match = read_vs_reference(reference, readseq, read_name, final_result); fp_sam << final_result[0]; for(int k = 1; k < final_result.size(); k++) { fp_sam << "\t" << final_result[k]; //cout << i << ": " << output[k] << endl; } fp_sam << endl; map += 1; final_result.clear(); //if(count >10000) // break; time(&tnd); fp_csv << difftime(tnd, tbgn) << ", " << difftime(tnd, tstrt) << endl; cout << "\nTime taken to process " << count << "th read = " << difftime(tnd, tstrt) << "\n" << endl; //break; } cout << endl; cout << "Overall Statistics - " << endl; cout << "total reference size = " << reference.size() << endl << endl; cout << "Total read = " << count << endl << endl; cout << "Total read mapped = " << map << endl << endl; cout << "Total unmapped read = " << cant_map << endl << endl; cout << "Out of range read (< 100 or > 15000) = " << invalid_count << endl << endl; //cout << "Total MAX_MATCHED (= " << MAX_MATCHED << ") Reads = " << MAX_SCORED << endl << endl; fp_read.close(); fp_sam.close(); fp_2D.close(); delete [] read; delete [] sam; }
void input_reference(vector<pair<string, string> >& reference, string& ref_file, string& sam_file) { ifstream fp_ref; ofstream fp_sam; char *ref = new char[ref_file.length() + 1]; strcpy(ref, ref_file.c_str()); char *sam = new char[sam_file.length() + 1]; strcpy(sam, sam_file.c_str()); fp_ref.open(ref, ifstream::in); fp_sam.open(sam, ofstream::out); string space = " "; string input, output; string ref_name; string sequence; getline(fp_ref, input); while(!fp_ref.eof()) { //getline(fp_ref, input); size_t find = input.find(space); if(find != string::npos) ref_name = input.substr(1, find); else ref_name = input.substr(1); sequence = ""; //cout << ref_name << endl; while(getline(fp_ref, input)) { if(input.length() < 1) break; if(input.at(0) == '>') { break; } sequence += input; } //if(ref_name.find("chr13") == string::npos) // continue;//added on 03-13-15 upper_case(sequence); //cout << sequence << endl; reference.push_back(make_pair(ref_name, sequence)); output = "@SQ\tSN:" + ref_name + "\tLN:"; fp_sam << output << sequence.length() << endl; if(input.length() < 1) break; //added on 03-11-15 } fp_ref.close(); fp_sam.close(); delete [] ref; delete [] sam; }
void align_reads(vector<pair<string, string> >& reference, string& read_file, string& sam_file, vector<reference_index>& refindex) { time_t tstrt, tbgn, tnd; time(&tstrt); /*//Can be used for analyzing the difference between LAST and NanoBLASTer ifstream fp_nano; string nano_input; string nano_file = "last_but_not_nano.txt"; char *nano = new char[nano_file.length() + 1]; strcpy(nano, nano_file.c_str()); fp_nano.open(nano, ifstream::in); unordered_map<string, int> nano_read; while(getline(fp_nano, nano_input)) { //cout << "" << nano_input << endl; nano_read[nano_input] = 1; //continue; } cout << "Total Size of Nano Read = " << nano_read.size() << endl << endl; fp_nano.close(); delete [] nano; */ ifstream fp_read; ofstream fp_sam; char *read = new char[read_file.length() + 1]; strcpy(read, read_file.c_str()); char *sam = new char[sam_file.length() + 1]; strcpy(sam, sam_file.c_str()); fp_read.open(read, ifstream::in); fp_sam.open(sam, ofstream::out | ofstream::app); string input, read_name, ref_name; string readseq, refgenome; string slash = "/"; int map = 0; int count = 0; int cant_map = 0; int invalid_count = 0; fp_csv << "cnt, red_nam, red_len, red_dir, ref_nam, ref_len, ref_pos, score, span, " << "percent, aln_len, spn_rat, aln_tim, tot_tim" << endl; getline(fp_read, input); while(!fp_read.eof()) { //int find = input.find(slash); //if(find != string::npos) // read_name = input.substr(1, find - 1); //else // read_name = input.substr(1); read_name = input.substr(1); readseq = ""; while(getline(fp_read, input)) { if(input.length() == 0) continue; if(input.at(0) == '>') break; readseq += input; } //getline(fp_read, input); //getline(fp_read, input); //ratio problem with channel_46_read_98_1406145606_2D //if(read_name.find("channel_407_read_0_1405093831_2D") == std::string::npos)//to optimize the output //if(read_name.find("channel_17_read_24_1405524767_2D") == std::string::npos)//small read to optimize //if(read_name.find("channel_201_read_10_1405541481_2D") == std::string::npos)//to compare version 1 and 2 //if(read_name.find("channel_64_read_7_1403826200_template") == std::string::npos)//max length reads analysis //if(read_name.find("channel_424_read_1_1403566249_template") == std::string::npos)//found in last but not in nano //if(read_name.find("2D") == std::string::npos)//03-09-2015 //if(read_name.find("channel_237_read_42_1406145606_2D") == std::string::npos) //if(read_name.find("channel_322_read_11_1405524767_template") == std::string::npos) //if(read_name.find("channel_171_read_2_1403855963_2D") == std::string::npos)//20 times higher than last //if(read_name.find("channel_82_read_0_1403855963_2D") == std::string::npos)//20 times higher than last //if(read_name.find("channel_221_read_19_1406145606_2D") == std::string::npos)//has maximul length of deletion //if(read_name.find("channel_415_read_6_1406242409_template") == std::string::npos)//has 5 times less length than last //if(read_name.find("channel_167_read_19_1403811400_2D") == std::string::npos)//analyze output validity //if(read_name.find("channel_474_read_32_1405524767_template") == std::string::npos)//found in last and nano repeat //if(read_name.find("channel_468_read_12_1403811400_complement") == std::string::npos)//cause exception in nano repeat //if(read_name.find("channel_345_read_7_1403811400_2D") == std::string::npos)//max length increased //if(read_name.find("channel_104_read_1_1403551548_template") == std::string::npos)//different in edit not lis //if(read_name.find("channel_216_read_0_1403551548_template") == std::string::npos)//different in lis not edit //if(read_name.find("channel_118_read_6_1403551548_template") == std::string::npos)//different in lis and edit //if(read_name.find("channel_486_read_0_1403566249_template") == std::string::npos)//reverse problem //if(nano_read.find(read_name) == nano_read.end()) //if(read_name.find("channel_352_read_34_1405541481_template") == std::string::npos)//Why there are multiple results //if(read_name.find("channel_68_read_22_1405541481_template") == std::string::npos)//multiple results, boundary problem //if(read_name.find("channel_261_read_39_1405541481_template") == std::string::npos)//multiple result indexing //if(read_name.find("channel_302_read_2_1403855963_2D") == std::string::npos)//found in mms not in ssg = align length //if(read_name.find("channel_243_read_0_1403595798_template") == std::string::npos)//found in 40655 not in lis+edit //if(read_name.find("channel_452_read_46_1405541481_template") == std::string::npos)//same problem as above //if(read_name.find("channel_431_read_2_1403915857_template") == std::string::npos)//require top 40 tuple list to solve //readseq = readseq.substr(readseq.length() / 2, readseq.length() - readseq.length() / 2); //if(read_name.find("channel_199_read_0_1403841073_template") == std::string::npos)//solved //if(read_name.find("channel_480_read_91_1406242409_template") == std::string::npos)//in last and not in nano //if(read_name.find("channel_389_read_57_1406242409_template") == std::string::npos)//solved //if(read_name.find("channel_56_read_1_1403826200_template") == std::string::npos) //if(read_name.find("channel_356_read_29_1406242409_template") == std::string::npos)// < 40 in nano very weird //if(read_name.find("channel_131_read_5_1403826200_template") == string::npos)// < 100 in nano seems weird //if(read_name.find("channel_75_read_80_1406145606_template") == std::string::npos)//80% last not found now solved // continue; if(count >= MAXREAD && MAXREAD != 0) break; count += 1; //if(count < 11762) continue; cout << count << ") " << read_name << endl; if(readseq.length() < MINREADLEN || readseq.length() > MAXREADLEN)//03-09-2015 { cout << "Invalid String Found" << endl; invalid_count += 1; count -= 1; fp_sam << read_name << "\t4\t*\t0\t0\t*\t*\t0\t0\t" << readseq << "\t*" << endl; time(&tnd); //fp_csv << count << ", " << readseq.length() << ", 0, 0, 0, " << // "0, 0, 0, 0, 0, 0, 0, " << difftime(tnd, tstrt) << endl; continue; } if(count <= MINREAD) continue; //if(count < 318) continue; time(&tbgn); if(DEBUG == 99) fp_csv << count << ", " << read_name << ", " << readseq.length() << ", "; upper_case(readseq); //reverse_str(readseq); //readseq = reverse_complement(readseq); int match_info, global_match = -1, indpos; int match, max_match = 0, match_index, dir; vector<vector<string> > list_final_result; //time_t start, end; //clock_t t_start, t_end; //for(int i = 0; i < reference.size(); i++) { //vector<pair<int, pair<int, int> > > kmer_ref; vector<pair<int, vector<pair<int, int> > > > kmer_ref; //cout << "Analysis for forward:" << endl; // //time(&start); //t_start = clock(); read_vs_reference(readseq, read_name, FF, refindex, kmer_ref); //t_end = clock(); //time(&end); //cout << "Total time taken for calling forward read_vs_ref = " << difftime(end, start) << endl; //t_lookup += t_end - t_start; //align(readseq, read_name, FF, refindex, kmer_ref, final_result); //cout << "Data for reverse:" << endl; //time(&start); //t_start = clock(); if(SINGLE == 1) { string reverse = reverse_complement(readseq); read_vs_reference(reverse, read_name, FR, refindex, kmer_ref); } //t_end = clock(); //time(&end); //cout << "Total time taken for calling reverse read_vs_ref = " << difftime(end, start) << endl; //t_lookup += t_end - t_start; //cout << endl <<endl; //uncomment here for aligninng read list_final_result.clear(); align(readseq, read_name, FR, refindex, kmer_ref, list_final_result); if(list_final_result.size() == 0) { cant_map += 1; kmer_ref.clear(); //time(&tnd); //fp_csv << difftime(tnd, tbgn) << ", " << difftime(tnd, tstrt) << endl; fp_sam << read_name << "\t4\t*\t0\t0\t*\t*\t0\t0\t" << readseq << "\t*" << endl; continue; } kmer_ref.clear(); } for(int i = 0; i < list_final_result.size(); i++) { vector<string>& final_result = list_final_result[i]; fp_sam << final_result[0]; for(int k = 1; k < final_result.size(); k++) { fp_sam << "\t" << final_result[k]; //cout << i << ": " << output[k] << endl; } fp_sam << endl; map += 1; final_result.clear(); } /* if(list_final_result.size() == 0 && SAM_FORMAT == 1) { fp_sam << read_name << "\t4\t*\t0\t0\t*\t*\t0\t0\t" << readseq << "\t*" << endl; } */ //time(&tnd); list_final_result.clear(); if(DEBUG == 99) fp_csv << endl;//difftime(tnd, tbgn) << ", " << difftime(tnd, tstrt) << endl; //cout << "\nTime taken to process " << count << "th read = " << difftime(tnd, tstrt) << "\n" << endl; //break; } cout << endl; cout << "Overall Statistics - " << endl; cout << "total reference size = " << reference.size() << endl << endl; cout << "Total read = " << count << endl << endl; cout << "Total read mapped = " << map << endl << endl; cout << "Total unmapped read = " << cant_map << endl << endl; cout << "Out of range read (< 100 or > 15000) = " << invalid_count << endl << endl; //cout << "Total MAX_MATCHED (= " << MAX_MATCHED << ") Reads = " << MAX_SCORED << endl << endl; fp_read.close(); fp_sam.close(); delete [] read; delete [] sam; }