/* p = amd(A+A') if symmetric is true, or amd(A'A) otherwise */
int *csr_amd (int order, const csr *A)  /* order 0:natural, 1:Chol, 2:LU, 3:QR */
{
  csr *C, *A2, *AT ;
  int *Cp, *Cj, *last, *W, *len, *nv, *next, *P, *head, *elen, *degree, *w,
    *hhead, *ATp, *ATj, d, dk, dext, lemax = 0, e, elenk, eln, i, j, k, k1,
    k2, k3, jlast, ln, dense, nzmax, mindeg = 0, nvi, nvj, nvk, mark, wnvi,
    ok, cnz, nel = 0, p, p1, p2, p3, p4, pj, pk, pk1, pk2, pn, q, n, m, t ;
  unsigned int h ;
  /* --- Construct matrix C ----------------------------------------------- */
  if (!CS_CSC (A) || order <= 0 || order > 3) return (NULL) ; /* check */
  AT = csr_transpose (A, 0) ;             /* compute A' */
  if (!AT) return (NULL) ;
  m = A->m ; n = A->n ;
  if ( n != m) return(NULL); /* For rectangular matrices, use csr_amd */
  dense = (int)CS_MAX (16, 10 * sqrt ((double) n)) ;   /* find dense threshold */
  dense = CS_MIN (n-2, dense) ;
  if (order == 1 && n == m)
    {
      C = csr_add (A, AT, 0, 0) ;         /* C = A+A' */
    }
  else if (order == 2)
    {
      ATp = AT->p ;                       /* drop dense columns from AT */
      ATj = AT->j ;
      for (p2 = 0, j = 0 ; j < m ; j++)
        {
	  p = ATp [j] ;                   /* column j of AT starts here */
	  ATp [j] = p2 ;                  /* new column j starts here */
	  if (ATp [j+1] - p > dense) continue ;   /* skip dense col j */
	  for ( ; p < ATp [j+1] ; p++) ATj [p2++] = ATj [p] ;
        }
      ATp [m] = p2 ;                      /* finalize AT */
      A2 = csr_transpose (AT, 0) ;        /* A2 = AT' */
      C = A2 ? csr_multiply (AT, A2) : NULL ; /* C=A'*A with no dense rows */
      csr_spfree (A2) ;
    }
  else
    {
      C = csr_multiply (AT, A) ;          /* C=A'*A */
    }
  csr_spfree (AT) ;
  if (!C) return (NULL) ;
  csr_fkeep (C, &csr_diag, NULL) ;         /* drop diagonal entries */
  Cp = C->p ;
  cnz = Cp [n] ;
  P = (int*)malloc (CS_MAX(n+1,1) * sizeof (int)) ;     /* allocate result */
  W = (int*)malloc (CS_MAX(8*(n+1),1) * sizeof (int)) ; /* get workspace */
  t = cnz + cnz/5 + 2*n ;                 /* add elbow room to C */
  if (!P || !W || !csr_sprealloc (C, t)) return (csr_idone (P, C, W, 0)) ;
  len  = W           ; nv     = W +   (n+1) ; next   = W + 2*(n+1) ;
  head = W + 3*(n+1) ; elen   = W + 4*(n+1) ; degree = W + 5*(n+1) ;
  w    = W + 6*(n+1) ; hhead  = W + 7*(n+1) ;
  last = P ;                              /* use P as workspace for last */
  /* --- Initialize quotient graph ---------------------------------------- */
  for (k = 0 ; k < n ; k++) len [k] = Cp [k+1] - Cp [k] ;
  len [n] = 0 ;
  nzmax = C->nzmax ;
  Cj = C->j ;
  for (i = 0 ; i <= n ; i++)
    {
      head [i] = -1 ;                     /* degree list i is empty */
      last [i] = -1 ;
      next [i] = -1 ;
      hhead [i] = -1 ;                    /* hash list i is empty */
      nv [i] = 1 ;                        /* node i is just one node */
      w [i] = 1 ;                         /* node i is alive */
      elen [i] = 0 ;                      /* Ek of node i is empty */
      degree [i] = len [i] ;              /* degree of node i */
    }
  mark = csr_wclear (0, 0, w, n) ;         /* clear w */
  elen [n] = -2 ;                         /* n is a dead element */
  Cp [n] = -1 ;                           /* n is a root of assembly tree */
  w [n] = 0 ;                             /* n is a dead element */
  /* --- Initialize degree lists ------------------------------------------ */
  for (i = 0 ; i < n ; i++)
    {
      d = degree [i] ;
      if (d == 0)                         /* node i is empty */
        {
	  elen [i] = -2 ;                 /* element i is dead */
	  nel++ ;
	  Cp [i] = -1 ;                   /* i is a root of assemby tree */
	  w [i] = 0 ;
        }
      else if (d > dense)                 /* node i is dense */
        {
	  nv [i] = 0 ;                    /* absorb i into element n */
	  elen [i] = -1 ;                 /* node i is dead */
	  nel++ ;
	  Cp [i] = CS_FLIP (n) ;
	  nv [n]++ ;
        }
      else
        {
	  if (head [d] != -1) last [head [d]] = i ;
	  next [i] = head [d] ;           /* put node i in degree list d */
	  head [d] = i ;
        }
    }
  while (nel < n)                         /* while (selecting pivots) do */
    {
      /* --- Select node of minimum approximate degree -------------------- */
      for (k = -1 ; mindeg < n && (k = head [mindeg]) == -1 ; mindeg++) ;
      if (next [k] != -1) last [next [k]] = -1 ;
      head [mindeg] = next [k] ;          /* remove k from degree list */
      elenk = elen [k] ;                  /* elenk = |Ek| */
      nvk = nv [k] ;                      /* # of nodes k represents */
      nel += nvk ;                        /* nv[k] nodes of A eliminated */
      /* --- Garbage collection ------------------------------------------- */
      if (elenk > 0 && cnz + mindeg >= nzmax)
        {
	  for (j = 0 ; j < n ; j++)
            {
	      if ((p = Cp [j]) >= 0)      /* j is a live node or element */
                {
		  Cp [j] = Cj [p] ;       /* save first entry of object */
		  Cj [p] = CS_FLIP (j) ;  /* first entry is now CS_FLIP(j) */
                }
            }
	  for (q = 0, p = 0 ; p < cnz ; ) /* scan all of memory */
            {
	      if ((j = CS_FLIP (Cj [p++])) >= 0)  /* found object j */
                {
		  Cj [q] = Cp [j] ;       /* restore first entry of object */
		  Cp [j] = q++ ;          /* new pointer to object j */
		  for (k3 = 0 ; k3 < len [j]-1 ; k3++) Cj [q++] = Cj [p++] ;
                }
            }
	  cnz = q ;                       /* Cj [cnz...nzmax-1] now free */
        }
      /* --- Construct new element ---------------------------------------- */
      dk = 0 ;
      nv [k] = -nvk ;                     /* flag k as in Lk */
      p = Cp [k] ;
      pk1 = (elenk == 0) ? p : cnz ;      /* do in place if elen[k] == 0 */
      pk2 = pk1 ;
      for (k1 = 1 ; k1 <= elenk + 1 ; k1++)
        {
	  if (k1 > elenk)
            {
	      e = k ;                     /* search the nodes in k */
	      pj = p ;                    /* list of nodes starts at Cj[pj]*/
	      ln = len [k] - elenk ;      /* length of list of nodes in k */
            }
	  else
            {
	      e = Cj [p++] ;              /* search the nodes in e */
	      pj = Cp [e] ;
	      ln = len [e] ;              /* length of list of nodes in e */
            }
	  for (k2 = 1 ; k2 <= ln ; k2++)
            {
	      i = Cj [pj++] ;
	      if ((nvi = nv [i]) <= 0) continue ; /* node i dead, or seen */
	      dk += nvi ;                 /* degree[Lk] += size of node i */
	      nv [i] = -nvi ;             /* negate nv[i] to denote i in Lk*/
	      Cj [pk2++] = i ;            /* place i in Lk */
	      if (next [i] != -1) last [next [i]] = last [i] ;
	      if (last [i] != -1)         /* remove i from degree list */
                {
		  next [last [i]] = next [i] ;
                }
	      else
                {
		  head [degree [i]] = next [i] ;
                }
            }
	  if (e != k)
            {
	      Cp [e] = CS_FLIP (k) ;      /* absorb e into k */
	      w [e] = 0 ;                 /* e is now a dead element */
            }
        }
      if (elenk != 0) cnz = pk2 ;         /* Cj [cnz...nzmax] is free */
      degree [k] = dk ;                   /* external degree of k - |Lk\i| */
      Cp [k] = pk1 ;                      /* element k is in Cj[pk1..pk2-1] */
      len [k] = pk2 - pk1 ;
      elen [k] = -2 ;                     /* k is now an element */
      /* --- Find set differences ----------------------------------------- */
      mark = csr_wclear (mark, lemax, w, n) ;  /* clear w if necessary */
      for (pk = pk1 ; pk < pk2 ; pk++)    /* scan 1: find |Le\Lk| */
        {
	  i = Cj [pk] ;
	  if ((eln = elen [i]) <= 0) continue ;/* skip if elen[i] empty */
	  nvi = -nv [i] ;                      /* nv [i] was negated */
	  wnvi = mark - nvi ;
	  for (p = Cp [i] ; p <= Cp [i] + eln - 1 ; p++)  /* scan Ei */
            {
	      e = Cj [p] ;
	      if (w [e] >= mark)
                {
		  w [e] -= nvi ;          /* decrement |Le\Lk| */
                }
	      else if (w [e] != 0)        /* ensure e is a live element */
                {
		  w [e] = degree [e] + wnvi ; /* 1st time e seen in scan 1 */
                }
            }
        }
      /* --- Degree update ------------------------------------------------ */
      for (pk = pk1 ; pk < pk2 ; pk++)    /* scan2: degree update */
        {
	  i = Cj [pk] ;                   /* consider node i in Lk */
	  p1 = Cp [i] ;
	  p2 = p1 + elen [i] - 1 ;
	  pn = p1 ;
	  for (h = 0, d = 0, p = p1 ; p <= p2 ; p++)    /* scan Ei */
            {
	      e = Cj [p] ;
	      if (w [e] != 0)             /* e is an unabsorbed element */
                {
		  dext = w [e] - mark ;   /* dext = |Le\Lk| */
		  if (dext > 0)
                    {
		      d += dext ;         /* sum up the set differences */
		      Cj [pn++] = e ;     /* keep e in Ei */
		      h += e ;            /* compute the hash of node i */
                    }
		  else
                    {
		      Cp [e] = CS_FLIP (k) ;  /* aggressive absorb. e->k */
		      w [e] = 0 ;             /* e is a dead element */
                    }
                }
            }
	  elen [i] = pn - p1 + 1 ;        /* elen[i] = |Ei| */
	  p3 = pn ;
	  p4 = p1 + len [i] ;
	  for (p = p2 + 1 ; p < p4 ; p++) /* prune edges in Ai */
            {
	      j = Cj [p] ;
	      if ((nvj = nv [j]) <= 0) continue ; /* node j dead or in Lk */
	      d += nvj ;                  /* degree(i) += |j| */
	      Cj [pn++] = j ;             /* place j in node list of i */
	      h += j ;                    /* compute hash for node i */
            }
	  if (d == 0)                     /* check for mass elimination */
            {
	      Cp [i] = CS_FLIP (k) ;      /* absorb i into k */
	      nvi = -nv [i] ;
	      dk -= nvi ;                 /* |Lk| -= |i| */
	      nvk += nvi ;                /* |k| += nv[i] */
	      nel += nvi ;
	      nv [i] = 0 ;
	      elen [i] = -1 ;             /* node i is dead */
            }
	  else
            {
	      degree [i] = CS_MIN (degree [i], d) ;   /* update degree(i) */
	      Cj [pn] = Cj [p3] ;         /* move first node to end */
	      Cj [p3] = Cj [p1] ;         /* move 1st el. to end of Ei */
	      Cj [p1] = k ;               /* add k as 1st element in of Ei */
	      
	      len [i] = pn - p1 + 1 ;     /* new len of adj. list of node i */
	      h %= n ;                    /* finalize hash of i */
	      next [i] = hhead [h] ;      /* place i in hash bucket */
	      hhead [h] = i ;
	      last [i] = h ;              /* save hash of i in last[i] */
            }
        }                                   /* scan2 is done */
      degree [k] = dk ;                   /* finalize |Lk| */
      lemax = CS_MAX (lemax, dk) ;
      mark = csr_wclear (mark+lemax, lemax, w, n) ;    /* clear w */
      /* --- Supernode detection ------------------------------------------ */
      for (pk = pk1 ; pk < pk2 ; pk++)
        {
	  i = Cj [pk] ;
	  if (nv [i] >= 0) continue ;         /* skip if i is dead */
	  h = last [i] ;                      /* scan hash bucket of node i */
	  i = hhead [h] ;
	  hhead [h] = -1 ;                    /* hash bucket will be empty */
	  for ( ; i != -1 && next [i] != -1 ; i = next [i], mark++)
            {
	      ln = len [i] ;
	      eln = elen [i] ;
	      for (p = Cp [i]+1 ; p <= Cp [i] + ln-1 ; p++) w [Cj [p]] = mark;
	      jlast = i ;
	      for (j = next [i] ; j != -1 ; ) /* compare i with all j */
                {
		  ok = (len [j] == ln) && (elen [j] == eln) ;
		  for (p = Cp [j] + 1 ; ok && p <= Cp [j] + ln - 1 ; p++)
		    {
		      if (w [Cj [p]] != mark) ok = 0 ;    /* compare i and j*/
                    }
		  if (ok)                     /* i and j are identical */
                    {
		      Cp [j] = CS_FLIP (i) ;  /* absorb j into i */
		      nv [i] += nv [j] ;
		      nv [j] = 0 ;
		      elen [j] = -1 ;         /* node j is dead */
		      j = next [j] ;          /* delete j from hash bucket */
		      next [jlast] = j ;
                    }
		  else
                    {
		      jlast = j ;             /* j and i are different */
		      j = next [j] ;
                    }
                }
            }
        }
      /* --- Finalize new element------------------------------------------ */
      for (p = pk1, pk = pk1 ; pk < pk2 ; pk++)   /* finalize Lk */
        {
	  i = Cj [pk] ;
	  if ((nvi = -nv [i]) <= 0) continue ;/* skip if i is dead */
	  nv [i] = nvi ;                      /* restore nv[i] */
	  d = degree [i] + dk - nvi ;         /* compute external degree(i) */
	  d = CS_MIN (d, n - nel - nvi) ;
	  if (head [d] != -1) last [head [d]] = i ;
	  next [i] = head [d] ;               /* put i back in degree list */
	  last [i] = -1 ;
	  head [d] = i ;
	  mindeg = CS_MIN (mindeg, d) ;       /* find new minimum degree */
	  degree [i] = d ;
	  Cj [p++] = i ;                      /* place i in Lk */
        }
      nv [k] = nvk ;                      /* # nodes absorbed into k */
      if ((len [k] = p-pk1) == 0)         /* length of adj list of element k*/
        {
	  Cp [k] = -1 ;                   /* k is a root of the tree */
	  w [k] = 0 ;                     /* k is now a dead element */
        }
      if (elenk != 0) cnz = p ;           /* free unused space in Lk */
    }
  /* --- Postordering ----------------------------------------------------- */
  for (i = 0 ; i < n ; i++) Cp [i] = CS_FLIP (Cp [i]) ;/* fix assembly tree */
  for (j = 0 ; j <= n ; j++) head [j] = -1 ;
  for (j = n ; j >= 0 ; j--)              /* place unordered nodes in lists */
    {
      if (nv [j] > 0) continue ;          /* skip if j is an element */
      next [j] = head [Cp [j]] ;          /* place j in list of its parent */
      head [Cp [j]] = j ;
    }
  for (e = n ; e >= 0 ; e--)              /* place elements in lists */
    {
      if (nv [e] <= 0) continue ;         /* skip unless e is an element */
      if (Cp [e] != -1)
        {
	  next [e] = head [Cp [e]] ;      /* place e in list of its parent */
	  head [Cp [e]] = e ;
        }
    }
  for (k = 0, i = 0 ; i <= n ; i++)       /* postorder the assembly tree */
    {
      if (Cp [i] == -1) k = csr_tdfs (i, k, head, next, P, w) ;
    }
  return (csr_idone (P, C, W, 1)) ;
}
Beispiel #2
0
int main(int argc, char *argv[])
{
    if (argc != 2)
    {
        fprintf(stderr, "usage: %s csr_matrix_file\n", argv[0]);
        exit(0);
    }

    int i, j, k;

    struct timespec start, end;

    int num_threads = 1;
#pragma omp parallel
    {
#pragma omp master
        {
            num_threads = omp_get_num_threads();
        }
    }
    printf("Thread number: %d.\n", num_threads);

#pragma omp parallel for
    for (i = 0; i < num_threads; i++)
    {
        int cpu = omp_get_thread_num();
        thread_bind(cpu);
    }

    FILE *fp;

    struct csr_mat_t csr, csr_re, csr_t, csr_t_re, csr_t_t;
    struct blk_mat_t blk;

    struct csr_cont_t csr_h, csr_v;
    struct blk_cont_t blk_h, blk_t_h;

    read_csr_mat(argv[1], &csr);
    int rows = csr.rows;
    int cols = csr.cols;
    INT64 non_zeros = csr.non_zeros;

    csr_transpose(&csr, &csr_t);
    release_csr_mat(&csr);
    int *reorder_map = (int*)malloc(cols * sizeof(int));
    csr_reorder(&csr_t, &csr_re, reorder_map);
    release_csr_mat(&csr_t);
    csr_transpose(&csr_re, &csr_t_t);
    release_csr_mat(&csr_re);
    split_csr_lb_nz(&csr_t_t, &csr_h, num_threads, SPLIT_HORIZON);
    release_csr_mat(&csr_t_t);
    csr_cont_to_blk_cont(&csr_h, &blk_h);
    release_csr_cont(&csr_h);

    printf("Notify: finished the preprocessing.\n");

    FLOAT *x = (FLOAT*)numa_alloc(cols * sizeof(FLOAT));
    FLOAT *y = (FLOAT*)numa_alloc(rows * sizeof(FLOAT));

    for (i = 0; i < cols; i++)
    {
        x[i] = 1.0;
    }

    // warm up
    spmv_blks(&blk_h, x, y, NULL);

    printf("Notify: begin csr spmv.\n");
    clock_gettime(CLOCK_MONOTONIC_RAW, &start);
    for (i = 0; i < LOOP_TIME; i++)
    {
        spmv_blks(&blk_h, x, y, NULL);
    }
    clock_gettime(CLOCK_MONOTONIC_RAW, &end);
    double time = get_sec(&start, &end) / LOOP_TIME;
    double gflops = 2.0 * non_zeros / time * 1e-9;
    printf("Notify: blk spmv time = %lfs, perf = %lf GFLOPS.\n", time, gflops);
    // result_file(y, rows);

    return 0;
}