/* for easier posterior manipulation a matrix is returned rather than a vector even even at the cost of hogging more memory */ void do_ird_variable(double *answer, int *obs, SEXP strList, int binary_measure){ SEXP first, second; R_len_t len; int i, j, ij; len = length(strList); ij = 0; for (i = 0; i < len; i++){ for (j = 0; j < i; j++){ first = VECTOR_ELT(strList, i); second = VECTOR_ELT(strList, j); if (length(first) == 0 || length(second) == 0){ ij++; continue; } obs[ij] += 1; if (length(first) == 1 && length(second) == 1){ /* simple case */ if (STRCMPR(first, 0, second, 0) == 0){ answer[ij] += 1.0; } ij++; continue; } answer[ij] += do_binary(first, second, binary_measure); /* binary measure */ ij++; } } }
void do_ipi_variable(double *answer, int *obs, SEXP strList, int binary_measure){ SEXP count, uniqueLemmas, valueVector, coidentityVector, first, second; int i, j, ij, nobs, index; R_len_t nlem, nlemu; double *valueVector_ptrs, *coidentityVector_ptrs, val, bval, totalCount, *count_ptrs, firstVal, secondVal, aux; /* count lemmas unique lemmas */ PROTECT(uniqueLemmas = single_arg_R_fun("unique", strList)); /* 1 */ PROTECT(count = allocVector(REALSXP, length(uniqueLemmas))); /* 2 */ memset(REAL(count), 0.0, length(uniqueLemmas) * sizeof(double)); count_ptrs = REAL(count); totalCount = 0.0; nlem = length(strList); nlemu = length(uniqueLemmas); for (i = 0; i < nlem; i++){ if (length(VECTOR_ELT(strList, i)) == 0) continue; index = matchFirstStr(VECTOR_ELT(strList, i), uniqueLemmas); count_ptrs[index] = count_ptrs[index] + 1.0; totalCount = totalCount + 1.0; } /* compute the TAX values */ PROTECT(valueVector = allocVector(REALSXP, nlemu)); /* 3 */ PROTECT(coidentityVector = allocVector(REALSXP, nlemu)); /* 4 */ memset(REAL(valueVector), 0.0, nlemu * sizeof(double)); memset(REAL(coidentityVector), 0.0, nlemu* sizeof(double)); valueVector_ptrs = REAL(valueVector); coidentityVector_ptrs = REAL(coidentityVector); for (i = 0; i < nlemu; i++){ first = VECTOR_ELT(uniqueLemmas, i); for (j = 0; j < nlemu; j++){ second = VECTOR_ELT(uniqueLemmas, j); bval = do_binary(first, second, binary_measure); if (bval == 0.0){ coidentityVector_ptrs[i]++; continue; } valueVector_ptrs[i] += (bval * count_ptrs[j]); } valueVector_ptrs[i] = valueVector_ptrs[i] / totalCount; } /* apply identity values */ ij = 0; for (i = 0; i < nlem; i++){ for (j = 0; j < i; j++){ first = VECTOR_ELT(strList, i); second = VECTOR_ELT(strList, j); if (length(first) == 0 || length(second) == 0){ /* if any na pass */ ij++; continue; } obs[ij]++; bval = do_binary(first, second, binary_measure); firstVal = valueVector_ptrs[matchFirstStr(first, uniqueLemmas)]; secondVal = valueVector_ptrs[matchFirstStr(second, uniqueLemmas)]; aux = bval * (firstVal + secondVal) / 2.0; val = 1.0 - (aux - 1.0) / totalCount; answer[ij] += (val / (val + (1.0 - aux))); ij++; } } UNPROTECT(4); }
/* Program extracts from Chapter 13 of