int 
stress_majorization_with_hierarchy(
    vtx_data* graph,    /* Input graph in sparse representation	 */
    int n,              /* Number of nodes */
    int nedges_graph,   /* Number of edges */
    double** d_coords,  /* Coordinates of nodes (output layout)  */
    int dim,            /* Dimemsionality of layout */
    int smart_ini,      /* smart initialization */
    int model,          /* difference model */
    int maxi,           /* max iterations */
    double levels_gap
)
{
    int iterations = 0;    /* Output: number of iteration of the process */

	/*************************************************
	** Computation of full, dense, unrestricted k-D ** 
	** stress minimization by majorization          ** 
	** This function imposes HIERARCHY CONSTRAINTS  **
	*************************************************/

	int i,j,k;
	bool directionalityExist = FALSE;
	float * lap1 = NULL;
	float * dist_accumulator = NULL;
	float * tmp_coords = NULL;
	float ** b = NULL;
#ifdef NONCORE
	FILE * fp=NULL;
#endif
	double * degrees = NULL;
	float * lap2=NULL;
	int lap_length;
	float * f_storage=NULL;
	float ** coords=NULL;

	double conj_tol=tolerance_cg;        /* tolerance of Conjugate Gradient */
	float ** unpackedLap = NULL;
	CMajEnv *cMajEnv = NULL;
	clock_t start_time;
	double y_0;
	int length;
	DistType diameter;
	float * Dij=NULL;
    /* to compensate noises, we never consider gaps smaller than 'abs_tol' */
	double abs_tol=1e-2; 
    /* Additionally, we never consider gaps smaller than 'abs_tol'*<avg_gap> */
    double relative_tol=levels_sep_tol; 
	int *ordering=NULL, *levels=NULL;
	double hierarchy_spread;
	float constant_term;
	int count;
	double degree;
	int step;
	float val;
	double old_stress, new_stress;
	bool converged;
	int len;
    int num_levels;
    float *hierarchy_boundaries;

	if (graph[0].edists!=NULL) {
		for (i=0; i<n; i++) {
			for (j=1; j<graph[i].nedges; j++) {
				 directionalityExist = directionalityExist || (graph[i].edists[j]!=0);
			}
		}
	}
	if (!directionalityExist) {
		return stress_majorization_kD_mkernel(graph, n, nedges_graph, d_coords, dim, smart_ini, model, maxi);
	}

	/******************************************************************
	** First, partition nodes into layers: These are our constraints **
	******************************************************************/

	if (smart_ini) {
		double* x;
		double* y;
		if (dim>2) {
			/* the dim==2 case is handled below			 */
			stress_majorization_kD_mkernel(graph, n, nedges_graph, d_coords+1, dim-1, smart_ini, model, 15);
			/* now copy the y-axis into the (dim-1)-axis */
			for (i=0; i<n; i++) {
				d_coords[dim-1][i] = d_coords[1][i];
			}
		}

		x = d_coords[0]; y = d_coords[1];
		compute_y_coords(graph, n, y, n);
		hierarchy_spread = compute_hierarchy(graph, n, abs_tol, relative_tol, y, &ordering, &levels, &num_levels);
		if (num_levels<=1) {
			/* no hierarchy found, use faster algorithm */
			return stress_majorization_kD_mkernel(graph, n, nedges_graph, d_coords, dim, smart_ini, model, maxi);
		}

		if (levels_gap>0) {
			/* ensure that levels are separated in the initial layout */
			double displacement = 0;
			int stop;
			for (i=0; i<num_levels; i++) {
				displacement+=MAX((double)0,levels_gap-(y[ordering[levels[i]]]+displacement-y[ordering[levels[i]-1]]));
				stop = i<num_levels-1 ? levels[i+1] : n;
				for (j=levels[i]; j<stop; j++) {
					y[ordering[j]] += displacement;
				}
			}
		}
		if (dim==2) {
			IMDS_given_dim(graph, n, y, x, Epsilon);
		}
	}
	else {
        initLayout(graph, n, dim, d_coords);
		hierarchy_spread = compute_hierarchy(graph, n, abs_tol, relative_tol, NULL, &ordering, &levels, &num_levels);		
	}
    if (n == 1) return 0;

	hierarchy_boundaries = N_GNEW(num_levels, float);

	/****************************************************
	** Compute the all-pairs-shortest-distances matrix **
	****************************************************/

	if (maxi==0)
		return iterations;

    if (model == MODEL_SUBSET) {
        /* weight graph to separate high-degree nodes */
        /* and perform slower Dijkstra-based computation */
        if (Verbose)
            fprintf(stderr, "Calculating subset model");
        Dij = compute_apsp_artifical_weights_packed(graph, n);
    } else if (model == MODEL_CIRCUIT) {
        Dij = circuitModel(graph, n);
        if (!Dij) {
            agerr(AGWARN,
                  "graph is disconnected. Hence, the circuit model\n");
            agerr(AGPREV,
                  "is undefined. Reverting to the shortest path model.\n");
        }
    }
    if (!Dij) {
        if (Verbose)
            fprintf(stderr, "Calculating shortest paths");
        Dij = compute_apsp_packed(graph, n);
    }

	diameter=-1;
	length = n+n*(n-1)/2;
	for (i=0; i<length; i++) {
		if (Dij[i]>diameter) {
			diameter = (int)Dij[i];
		}
	}

	if (!smart_ini) {
		/* for numerical stability, scale down layout		 */
		/* No Jiggling, might conflict with constraints			 */
		double max=1;		
		for (i=0; i<dim; i++) {	
			for (j=0; j<n; j++) {
				if (fabs(d_coords[i][j])>max) {
					max=fabs(d_coords[i][j]);
				}
			}	
		}
		for (i=0; i<dim; i++) {	
			for (j=0; j<n; j++) {
				d_coords[i][j]*=10/max;
			}	
		}
	}		

	if (levels_gap>0) {
		int length = n+n*(n-1)/2;
		double sum1, sum2, scale_ratio;
		int count;
		sum1=(float)(n*(n-1)/2);
		sum2=0;
		for (count=0, i=0; i<n-1; i++) {
			count++; // skip self distance
			for (j=i+1; j<n; j++,count++) {
				sum2+=distance_kD(d_coords, dim, i, j)/Dij[count];
			}
		}
		scale_ratio=sum2/sum1;
		/* double scale_ratio=10; */
		for (i=0; i<length; i++) {
			Dij[i]*=(float)scale_ratio;
		}
	}

	/**************************
	** Layout initialization **
	**************************/

	for (i=0; i<dim; i++) {		
		orthog1(n, d_coords[i]);
	}

	/* for the y-coords, don't center them, but translate them so y[0]=0 */
	y_0 = d_coords[1][0];
	for (i=0; i<n; i++) {
		d_coords[1][i] -= y_0;
	}

	coords = N_GNEW(dim, float*);
	f_storage = N_GNEW(dim*n, float);
	for (i=0; i<dim; i++) {
		coords[i] = f_storage+i*n;
		for (j=0; j<n; j++) {
			coords[i][j] = (float)(d_coords[i][j]);
		}
	}

	/* compute constant term in stress sum
	 * which is \sum_{i<j} w_{ij}d_{ij}^2
     */
	constant_term=(float)(n*(n-1)/2);
	
	/**************************
	** Laplacian computation **
	**************************/
			
	lap2 = Dij;
	lap_length = n+n*(n-1)/2;
	square_vec(lap_length, lap2);
	/* compute off-diagonal entries */
	invert_vec(lap_length, lap2);
	
	/* compute diagonal entries */
	count=0;
	degrees = N_GNEW(n, double);
	set_vector_val(n, 0, degrees);
	for (i=0; i<n-1; i++) {
		degree=0;
		count++; // skip main diag entry
		for (j=1; j<n-i; j++,count++) {
			val = lap2[count];
			degree+=val; degrees[i+j]-=val;
		}
		degrees[i]-=degree;
	}
	for (step=n,count=0,i=0; i<n; i++,count+=step,step--) {
		lap2[count]=(float)degrees[i];
	}

#ifdef NONCORE
	fpos_t pos;
	if (n>max_nodes_in_mem) {
		#define FILENAME "tmp_Dij$$$.bin"
		fp = fopen(FILENAME, "wb");
		fwrite(lap2, sizeof(float), lap_length, fp);
		fclose(fp);
	}
#endif
		
	/*************************
	** Layout optimization  **
	*************************/
	
	b = N_GNEW (dim, float*);
	b[0] = N_GNEW (dim*n, float);
	for (k=1; k<dim; k++) {
		b[k] = b[0]+k*n;
	}

	tmp_coords = N_GNEW(n, float);
	dist_accumulator = N_GNEW(n, float);
#ifdef NONCORE
	if (n<=max_nodes_in_mem) {
#endif
		lap1 = N_GNEW(lap_length, float);
#ifdef NONCORE
	}
int stress_majorization_cola(
    vtx_data * graph,	/* Input graph in sparse representation  */
    int n,              /* Number of nodes */
    int nedges_graph,	/* Number of edges */
    double **d_coords,	/* Coordinates of nodes (output layout)  */
    node_t **nodes,	/* Original nodes */
    int dim,            /* Dimemsionality of layout */
    int model,	        /* difference model */
    int maxi,	        /* max iterations */
    ipsep_options * opt)
{
    int iterations = 0;	  /* Output: number of iteration of the process */

	/*************************************************
	** Computation of full, dense, unrestricted k-D ** 
	** stress minimization by majorization          ** 
	** This function imposes HIERARCHY CONSTRAINTS  **
	*************************************************/

    int i, j, k;
    float *lap1 = NULL;
    float *dist_accumulator = NULL;
    float *tmp_coords = NULL;
    float **b = NULL;
    double *degrees = NULL;
    float *lap2 = NULL;
    int lap_length;
    float *f_storage = NULL;
    float **coords = NULL;
    int orig_n = n;

    /*double conj_tol=tolerance_cg; *//* tolerance of Conjugate Gradient */
    CMajEnvVPSC *cMajEnvHor = NULL;
    CMajEnvVPSC *cMajEnvVrt = NULL;
    double y_0;
    int length;
    DistType diameter;
    float *Dij = NULL;
    float constant_term;
    int count;
    double degree;
    int step;
    float val;
    double old_stress, new_stress = 0;
    boolean converged;
    int len;
    double nsizeScale = 0;
    float maxEdgeLen = 0;
    double max = 1;

    initLayout(graph, n, dim, d_coords, nodes);
    if (n == 1)
	return 0;

    for (i = 0; i < n; i++) {
	for (j = 1; j < graph[i].nedges; j++) {
	    maxEdgeLen = MAX(graph[i].ewgts[j], maxEdgeLen);
	}
    }

	/****************************************************
	** Compute the all-pairs-shortest-distances matrix **
	****************************************************/

    if (maxi == 0)
	return iterations;

    if (Verbose)
	start_timer();

    if (model == MODEL_SUBSET) {
	/* weight graph to separate high-degree nodes */
	/* and perform slower Dijkstra-based computation */
	if (Verbose)
	    fprintf(stderr, "Calculating subset model");
	Dij = compute_apsp_artifical_weights_packed(graph, n);
    } else if (model == MODEL_CIRCUIT) {
	Dij = circuitModel(graph, n);
	if (!Dij) {
	    agerr(AGWARN,
		  "graph is disconnected. Hence, the circuit model\n");
	    agerr(AGPREV,
		  "is undefined. Reverting to the shortest path model.\n");
	}
    } else if (model == MODEL_MDS) {
	if (Verbose)
	    fprintf(stderr, "Calculating MDS model");
	Dij = mdsModel(graph, n);
    }
    if (!Dij) {
	if (Verbose)
	    fprintf(stderr, "Calculating shortest paths");
	Dij = compute_apsp_packed(graph, n);
    }
    if (Verbose) {
	fprintf(stderr, ": %.2f sec\n", elapsed_sec());
	fprintf(stderr, "Setting initial positions");
	start_timer();
    }

    diameter = -1;
    length = n + n * (n - 1) / 2;
    for (i = 0; i < length; i++) {
	if (Dij[i] > diameter) {
	    diameter = (int) Dij[i];
	}
    }

    /* for numerical stability, scale down layout                */
    /* No Jiggling, might conflict with constraints                      */
    for (i = 0; i < dim; i++) {
	for (j = 0; j < n; j++) {
	    if (fabs(d_coords[i][j]) > max) {
		max = fabs(d_coords[i][j]);
	    }
	}
    }
    for (i = 0; i < dim; i++) {
	for (j = 0; j < n; j++) {
	    d_coords[i][j] *= 10 / max;
	}
    }

	/**************************
	** Layout initialization **
	**************************/

    for (i = 0; i < dim; i++) {
	orthog1(n, d_coords[i]);
    }

    /* for the y-coords, don't center them, but translate them so y[0]=0 */
    y_0 = d_coords[1][0];
    for (i = 0; i < n; i++) {
	d_coords[1][i] -= y_0;
    }
    if (Verbose) fprintf(stderr, ": %.2f sec", elapsed_sec());

	/**************************
	** Laplacian computation **
	**************************/

    lap2 = Dij;
    lap_length = n + n * (n - 1) / 2;
    square_vec(lap_length, lap2);
    /* compute off-diagonal entries */
    invert_vec(lap_length, lap2);

    if (opt->clusters->nclusters > 0) {
	int nn = n + opt->clusters->nclusters * 2;
	int clap_length = nn + nn * (nn - 1) / 2;
	float *clap = N_GNEW(clap_length, float);
	int c0, c1;
	float v;
	c0 = c1 = 0;
	for (i = 0; i < nn; i++) {
	    for (j = 0; j < nn - i; j++) {
		if (i < n && j < n - i) {
		    v = lap2[c0++];
		} else {
		    /* v=j==1?i%2:0; */
		    if (j == 1 && i % 2 == 1) {
			v = maxEdgeLen;
			v *= v;
			if (v > 0.01) {
			    v = 1.0 / v;
			}
		    } else
			v = 0;
		}
		clap[c1++] = v;
	    }
	}
	free(lap2);
	lap2 = clap;
	n = nn;
	lap_length = clap_length;
    }