void ks2d1s(float x1[], float y1[], unsigned long n1, void (*quadvl)(float, float, float *, float *, float *, float *), float *d1, float *prob) { void pearsn(float x[], float y[], unsigned long n, float *r, float *prob, float *z); float probks(float alam); void quadct(float x, float y, float xx[], float yy[], unsigned long nn, float *fa, float *fb, float *fc, float *fd); unsigned long j; float dum,dumm,fa,fb,fc,fd,ga,gb,gc,gd,r1,rr,sqen; *d1=0.0; for (j=1;j<=n1;j++) { quadct(x1[j],y1[j],x1,y1,n1,&fa,&fb,&fc,&fd); (*quadvl)(x1[j],y1[j],&ga,&gb,&gc,&gd); *d1=FMAX(*d1,fabs(fa-ga)); *d1=FMAX(*d1,fabs(fb-gb)); *d1=FMAX(*d1,fabs(fc-gc)); *d1=FMAX(*d1,fabs(fd-gd)); } pearsn(x1,y1,n1,&r1,&dum,&dumm); sqen=sqrt((double)n1); rr=sqrt(1.0-r1*r1); *prob=probks(*d1*sqen/(1.0+rr*(0.25-0.75/sqen))); }
static double ksTwo(int *vect1, int *vect2, int n) { int j1 = 1, j2 = 1; double en1 = (double)n, en2 = (double)n, f01 = 0.0, f02 = 0.0, fn1 = 0.0, fn2 = 0.0, d1 = 0.0, d2 = 0.0, d = 0.0, dt = 0.0, result; qsort(vect1, n, sizeof(int), intCompare); qsort(vect2, n, sizeof(int), intCompare); while(j1 <= n && j2 <= n) { if(vect1[j1 - 1] < vect2[j2 - 1]) { fn1 = j1 / en1; d1 = fabs(fn1 - f02); d2 = fabs(f01 - f02); if(d1 > d2) dt = d1; else dt = d2; if(dt > d) d = dt; f01 = fn1; j1++; } else { fn2 = j2/en2; d1 = fabs(fn2 - f01); d2 = fabs(f02 - f01); if(d1 > d2) dt = d1; else dt = d2; if(dt > d) d = dt; f02 = fn2; j2++; } } result = probks(sqrt(en1 * en2 / (en1 + en2)) * d); return result; }
void kstwo(double data1[], unsigned long n1, double data2[], unsigned long n2, double *d, double *prob) { double probks(double alam); void sort(unsigned long n, double arr[]); unsigned long j1=1,j2=1; double d1,d2,dt,en1,en2,en,fn1=0.0,fn2=0.0; sort(n1,data1); sort(n2,data2); en1=n1; en2=n2; *d=0.0; while (j1 <= n1 && j2 <= n2) { if ((d1=data1[j1]) <= (d2=data2[j2])) fn1=j1++/en1; if (d2 <= d1) fn2=j2++/en2; if ((dt=fabs(fn2-fn1)) > *d) *d=dt; } en=sqrt(en1*en2/(en1+en2)); *prob=probks((en+0.12+0.11/en)*(*d)); }
void ksone(double data[], unsigned long n, double (*func)(double), double *d, double *prob) { double probks(double alam); void sort(unsigned long n, double arr[]); unsigned long j; double dt,en,ff,fn,fo=0.0; sort(n,data); en=n; *d=0.0; for (j=1;j<=n;j++) { fn=j/en; ff=(*func)(data[j]); dt=FMAX(fabs(fo-ff),fabs(fn-ff)); if (dt > *d) *d=dt; fo=fn; } en=sqrt(en); *prob=probks((en+0.12+0.11/en)*(*d)); }
int main(void) { int i,j,jmax; char txt[ISCAL+1]; float alam,aval; printf("probability function for kolmogorov-smirnov statistic\n\n"); printf("%7s %10s %13s\n","lambda","value:","graph:"); for (i=1;i<=NPTS;i++) { alam=i*EPS; aval=probks(alam); jmax=(int) (0.5+(ISCAL-1)*aval); for (j=0;j<ISCAL;j++) { if (j < jmax) txt[j]='*'; else txt[j]=' '; } txt[ISCAL]='\0'; printf("%8.6f %10.6f %s\n",alam,aval,txt); } return 0; }
void ks2d2s(float x1[], float y1[], unsigned long n1, float x2[], float y2[], unsigned long n2, float *d, float *prob) { void pearsn(float x[], float y[], unsigned long n, float *r, float *prob, float *z); float probks(float alam); void quadct(float x, float y, float xx[], float yy[], unsigned long nn, float *fa, float *fb, float *fc, float *fd); unsigned long j; float d1,d2,dum,dumm,fa,fb,fc,fd,ga,gb,gc,gd,r1,r2,rr,sqen; d1=0.0; for (j=1;j<=n1;j++) { quadct(x1[j],y1[j],x1,y1,n1,&fa,&fb,&fc,&fd); quadct(x1[j],y1[j],x2,y2,n2,&ga,&gb,&gc,&gd); d1=FMAX(d1,fabs(fa-ga)); d1=FMAX(d1,fabs(fb-gb)); d1=FMAX(d1,fabs(fc-gc)); d1=FMAX(d1,fabs(fd-gd)); } d2=0.0; for (j=1;j<=n2;j++) { quadct(x2[j],y2[j],x1,y1,n1,&fa,&fb,&fc,&fd); quadct(x2[j],y2[j],x2,y2,n2,&ga,&gb,&gc,&gd); d2=FMAX(d2,fabs(fa-ga)); d2=FMAX(d2,fabs(fb-gb)); d2=FMAX(d2,fabs(fc-gc)); d2=FMAX(d2,fabs(fd-gd)); } *d=0.5*(d1+d2); sqen=sqrt(n1*n2/(double)(n1+n2)); pearsn(x1,y1,n1,&r1,&dum,&dumm); pearsn(x2,y2,n2,&r2,&dum,&dumm); rr=sqrt(1.0-0.5*(r1*r1+r2*r2)); *prob=probks(*d*sqen/(1.0+rr*(0.25-0.75/sqen))); }
double ks2(int *cls, int len, int *n0, int *n1, double *kstail) { /* compute KS statistic cls should be 0 or 1. if larger take as invalid */ int i ; double en0, en1, en ; double y, ymax ; /* count class sizes */ if (len <= 1) { *kstail = 1.0 ; return 0 ; } *n0 = *n1 = 0 ; for (i=0; i<len; i++) { if (cls[i]>1) continue ; if (cls[i]<0) continue ; if (cls[i]==0) ++*n0 ; if (cls[i]==1) ++*n1 ; } if (MIN(*n0,*n1)==0) { /** printf("warning ks2 has only 1 class passed\n") ; for (i=0; i<len ; i++) { printf("zz1 %d %d\n",i,cls[i]) ; } */ *kstail = 1.0 ; return 0 ; } en0 = (double) *n0 ; en1 = (double) *n1 ; ymax = y = 0.0 ; /* running stat */ ; for (i=0; i<len; i++) { if (cls[i]>1) continue ; if (cls[i]<0) continue ; if (cls[i]==0) y += 1.0/en0 ; if (cls[i]==1) y -= 1.0/en1 ; ymax = MAX(ymax,fabs(y)) ; } /* Numerical recipes p 626 */ en = sqrt(en0*en1/(en0+en1)) ; y = en+.12+(0.11/en) ; y *= ymax ; *kstail = probks(y) ; return y ; /** crude analysis: variance of 1 step above is (1/n0 + 1/n1) / (n0+n1) and so variance of y is brownian motion not bridge is (1/n0+1/n1) We want to rescale y to correspond to Brownian bridge. First order correction is en. We actually use a Bartlett correction of some sort Normalized y seems like what to return. */ }
double ks_significance(double dist, double n1,double n2) { double en=sqrt(n1*n2/(n1+n2)); return probks((en+0.12+0.11/en)*dist); }