BASKER_INLINE
  int Basker<Int,Entry,Exe_Space>::nfactor_diag_error
  (
   INT_1DARRAY threads_start
   )
  {
    Int nthread_remalloc = 0;
    for(Int ti = 0; ti < num_threads; ti++)
      {

	//Note: jdb we can make this into a switch
	if(thread_array(ti).error_type ==
	   BASKER_ERROR_NOERROR)
	  {
	    threads_start(ti) = BASKER_MAX_IDX;
	    continue;
	  }//end if NOERROR

	if(thread_array(ti).error_type ==
	   BASKER_ERROR_SINGULAR)
	  {
	    printf("ERROR THREAD: %d DIAGBLK SINGULAR: %d\n",
		   ti,
		   thread_array(ti).error_blk);
	    return BASKER_ERROR;
	  }//end if SINGULAR
	
	if(thread_array(ti).error_type ==
	   BASKER_ERROR_NOMALLOC)
	  {
	    printf("ERROR THREADS: %d DIAGBLK NOMALLOC: %d\n",
		   ti,
		   thread_array(ti).error_blk);
	    return BASKER_ERROR;
	  }//end if NOMALLOC
	
	if(thread_array(ti).error_type ==
	   BASKER_ERROR_REMALLOC)
	  {

	    BASKER_ASSERT(thread_array(ti).error_blk > 0,
			  "nfactor_diag_error error_blk");

	    printf("ERROR THREADS: %d DIAGBLK MALLOC: %d \n",
		   ti,
		   thread_array(ti).error_blk);
	    
	    	    //Clean the workspace
	    printf("test: %d %d \n",
		   thread_array(ti).iws_size*thread_array(ti).iws_mult,
		   thread_array(ti).ews_size*thread_array(ti).ews_mult);

	    for(Int i = 0; 
		i < thread_array(ti).iws_size*thread_array(ti).iws_mult;
		i++)
	      {
		thread_array(ti).iws(i) = (Int) 0;
	      }
	    for(Int i = 0;
		i < thread_array(ti).ews_size*thread_array(ti).ews_mult;
		i++)
	      {
		thread_array(ti).ews(i) = (Entry) 0;
	      }

	   

	    //Resize L
	    BASKER_MATRIX &L = LBTF(thread_array(ti).error_blk);
	    REALLOC_INT_1DARRAY(L.row_idx,
			       L.nnz,
			       thread_array(ti).error_info);
	    REALLOC_ENTRY_1DARRAY(L.val,
				 L.nnz,
				 thread_array(ti).error_info);
	    L.nnz = thread_array(ti).error_info;
	    for(Int i = 0; i < L.ncol; i++)
	      {
		L.col_ptr(i) = 0;
	      }

	    for(Int i = L.srow; i < (L.srow+L.nrow); i++)
	      {
		gperm(i) = BASKER_MAX_IDX;
	      }

	    //Resize U
	    BASKER_MATRIX &U = UBTF(thread_array(ti).error_blk);
	    REALLOC_INT_1DARRAY(U.row_idx,
			       U.nnz,
			       thread_array(ti).error_info);
	    REALLOC_ENTRY_1DARRAY(U.val,
				 U.nnz,
				 thread_array(ti).error_info);
	    U.nnz = thread_array(ti).error_info;
	    for(Int i = 0; i < U.ncol; i++)
	      {
		U.col_ptr(i) = 0;
	      }

	    
	    printf("Setting thread start(%d) %d \n",
		   ti, thread_array(ti).error_blk);

	    threads_start(ti) = thread_array(ti).error_blk;
	

	    //Reset 
	    thread_array(ti).error_type = BASKER_ERROR_NOERROR;
	    thread_array(ti).error_blk  = BASKER_MAX_IDX;
	    thread_array(ti).error_info = BASKER_MAX_IDX;

	    nthread_remalloc++;
    
	  }//if REMALLOC

      }//for all threads
    
    if(nthread_remalloc == 0)
      {
	return BASKER_SUCCESS;
      }
    else
      {
	return nthread_remalloc;
      }

    //Should never be here
    BASKER_ASSERT(0==1, "nfactor_diag_error, should never");
    return BASKER_SUCCESS;
  }//end nfactor_diag_error
  BASKER_INLINE
  int Basker<Int,Entry,Exe_Space>::serial_btf_solve
  (
   ENTRY_1DARRAY y,
   ENTRY_1DARRAY x
   )
  {

    
    for(Int i = 0; i < gn; ++i)
      {
	x(i) = y(i);
	y(i) = (Entry) 0.0;
      }
    
    //printf("Test \n");

    //Start in C and go backwards
    //In first level, only due U\L\x->y
    for(Int b = (btf_nblks-btf_tabs_offset)-1;
	b>= 0; b--)
      {
        
        #ifdef BASKER_DEBUG_SOLVE_RHS
        printf("\n\n btf b: %d \n", b);
        #endif

	//---Lower solve
	BASKER_MATRIX &LC = LBTF(b);
	//L\x -> y 
	lower_tri_solve(LC,x,y);

	BASKER_MATRIX &UC = UBTF(b);
	//U\x -> y
	upper_tri_solve(UC,x,y);

        #ifdef BASKER_DEBUG_SOLVE_RHS
        printf("Before spmv\n");
        printf("Inner Vector y print\n");
        printVec(y, gn);
        printf("Inner Vector x print\n");
        printVec(x, gn);
        printf("\n");
        #endif

       
	//-----Update
	//if(b > btf_tabs_offset)
	  {
	//x = BTF_C*y;
            //printf("spmv tab: %d \n", b+btf_tabs_offset);
         spmv_BTF(b+btf_tabs_offset,
		 BTF_C, y, x);
	  }
          
          #ifdef BASKER_DEBUG_SOLVE_RHS
          printf("After spmv\n");
          printf("Inner Vector y print\n");
          printVec(y, gn);
          printf("Inner Vector x print\n");
          printVec(x, gn);
          #endif
        

	//BASKER_MATRIX &UC = UBTF[b];
	//U\x -> y
	//upper_tri_solve(UC,x,y);

      }



    #ifdef BASKER_DEBUG_SOLVE_RHS
    printf("Done, BTF-C Solve \n");
    printf("\n x \n");
    printVec(x, gn);
    printf("\n y \n");
    printVec(y, gn);
    printf("\n\n");
    #endif

    
    //Update B
    //BTF_B*y -> x
    if(btf_tabs_offset !=  0)
      {
        neg_spmv(BTF_B,y,x);
      }

    #ifdef BASKER_DEBUG_SOLVE_RHS
    printf("Done, SPMV BTF_B UPDATE \n");
    printf("\n x \n");
    printVec(x, gn);
    printf("\n y \n");
    printVec(y, gn);
    printf("\n\n");
    #endif

    //now do the forward backwared solve
    //L\x ->y
    serial_forward_solve(x,y);

    //U\y->x
    serial_backward_solve(y,x);

    //copy lower part down
    #ifdef BASKER_DEBUG_SOLVE_RHS
    printf("copying lower starting: %d \n",
	   btf_tabs[btf_tabs_offset]);
    #endif
    for(Int i = btf_tabs(btf_tabs_offset); i < gn; ++i)
      {
	//x[i] = y[i];
	x(i) = y(i);
      }
    
    //Comeback and fix
    return 0;
    
  }//end serial_btf_solve