// Calculate equal categories void MgFeatureNumericFunctions::GetEqualCategories(VECTOR &values, int numCats, double dataMin, double dataMax, VECTOR &distValues) { // Expected categories should be more than zero if (numCats <= 0) { STRING message = MgServerFeatureUtil::GetMessage(L"MgInvalidComputedProperty"); MgStringCollection arguments; arguments.Add(message); throw new MgFeatureServiceException(L"MgServerSelectFeatures.GetEqualCategories", __LINE__, __WFILE__, &arguments, L"", NULL); } // find the range of the data values double min = DoubleMaxValue; double max = -DoubleMaxValue; int cnt = (int)values.size(); if (cnt <= 0) { return; } // Nothing to do, we just send back Property Definition to clients from reader for (int i=0; i < cnt; i++) { double val = values[i]; if (val > max) max = val; if (val < min) min = val; } // expand the range a little to account for numerical instability double delta = 0.0001 * (max - min); min -= delta; max += delta; // but don't let the values extend beyond the data min/max if (min < dataMin) min = dataMin; if (max > dataMax) max = dataMax; // This method ignores dataMin and dataMax. A different "Equal" distribution // might ignore the actual data values and create categories based on dataMin // and dataMax when those values are not +/- infinity. // fill in the categories distValues.push_back(min); delta = (max - min) / (double)numCats; for (int i=1; i<numCats; i++) { double nextval = distValues[i-1] + delta; distValues.push_back(nextval); } distValues.push_back(max); }
void MgFeatureNumericFunctions::GetMaximum(VECTOR &values, VECTOR &distValues) { // TODO: Change this algorithm to take reader directly instead of vector // find the range of the data values distValues.push_back(MgServerFeatureUtil::Maximum(values)); }
// Calculate Standard Deviation for the values void MgFeatureNumericFunctions::GetStandardDeviation(VECTOR &values, VECTOR &distValues) { double mean = 0; int cnt = (int)values.size(); if (cnt <= 0) { return; } // Nothing to do, we just send back Property Definition to clients from reader double min = DoubleMaxValue; double max = -DoubleMaxValue; for (int i=0; i < cnt; i++) { double val = values[i]; if (val > max) max = val; if (val < min) min = val; mean += val; } // expand min and max a little to account for numerical instability double delta = 0.0001 * (max - min); min -= delta; max += delta; // compute the mean, variance and standard deviation double count = (double)cnt; // (guaranteed to be > 0) mean /= count; double variance = 0; for (int i=0; i < cnt; i++) { variance += (values[i] - mean) * (values[i] - mean); } double deviation = sqrt((double)(variance / count)); // Set the base date as min date if (m_type == MgPropertyType::DateTime) { deviation += min; } distValues.push_back(deviation); return; }
// Calculate average void MgFeatureNumericFunctions::GetMeanValue(VECTOR &values, VECTOR &distValues) { double mean = 0; int cnt = (int)values.size(); if (cnt <= 0) { return; } // Nothing to do, we just send back Property Definition to clients from reader for (int i=0; i < cnt; i++) { double val = values[i]; mean += val; } // compute the mean, variance and standard deviation double count = (double)cnt; // (guaranteed to be > 0) mean /= count; distValues.push_back(mean); return; }
int run_main (int, ACE_TCHAR *[]) { ACE_START_TEST (ACE_TEXT ("Vector_Test")); VECTOR vector; size_t i; for (i = 0; i < TOP; ++i) vector.push_back (i); ACE_TEST_ASSERT (vector.size () == TOP); ACE_DEBUG ((LM_DEBUG, ACE_TEXT ("Size: %d\n"), vector.size ())); for (i = 0; i < TOP; ++i) ACE_TEST_ASSERT (vector[i] == i); // Test to be sure the iterator gets the correct count and entries. ITERATOR iter (vector); DATA *p_item = 0 ; size_t iter_count = 0; while (!iter.done ()) { if (iter.next (p_item) == 0) ACE_ERROR ((LM_ERROR, ACE_TEXT ("Fail to get value on iter pass %d\n"), iter_count)); if (*p_item != iter_count) ACE_ERROR ((LM_ERROR, ACE_TEXT ("Iter pass %d got %d\n"), iter_count, *p_item)); iter_count++; iter.advance(); } if (iter_count != TOP) ACE_ERROR ((LM_ERROR, ACE_TEXT ("Iterated %d elements; expected %d\n"), iter_count, TOP)); for (i = 0; i < (TOP - LEFT); ++i) vector.pop_back (); ACE_TEST_ASSERT (vector.size () == LEFT); ACE_DEBUG ((LM_DEBUG, ACE_TEXT ("Size: %d\n"), vector.size ())); for (i = 0; i < LEFT; ++i) { ACE_TEST_ASSERT (vector[i] == i); ACE_DEBUG ((LM_DEBUG, ACE_TEXT ("vector[%d]:%d\n"), i, vector[i])); } vector.resize(RESIZE, 0); ACE_DEBUG ((LM_DEBUG, ACE_TEXT ("After resize\n"))); for (i = 0; i < RESIZE ; ++i) { // The original vector of size LEFT must have the same original contents // the new elements should have the value 0 (this value is passed as // second argument of the resize() call. if (i < LEFT) { ACE_TEST_ASSERT (vector[i] == i); } else { ACE_TEST_ASSERT (vector[i] == 0); } ACE_DEBUG ((LM_DEBUG, ACE_TEXT ("vector[%d]:%d\n"), i, vector[i])); } vector.clear (); ACE_TEST_ASSERT (vector.size () == 0); ACE_DEBUG ((LM_DEBUG, ACE_TEXT ("Size: %d\n"), vector.size ())); // test resize (shrink and enlarge with buffer realloc) VECTOR vector2; // should be around 32 size_t boundary = vector2.capacity (); // we fill everything up with 1 // 1, 1, 1, 1, 1, 1, 1, 1, // 1, 1, 1, 1, 1, 1, 1, 1, // 1, 1, 1, 1, 1, 1, 1, 1, // 1, 1, 1, 1, 1, 1, 1, 1, for (i = 0; i < boundary; ++i) vector2.push_back (FILLER1); // we throw almost everything away. vector2.resize (1, 0); // we fill up with another pattern // 1, 2, 2, 2, 2, 2, 2, 2, // 2, 2, 2, 2, 2, 2, 2, 2, // 2, 2, 2, 2, 2, 2, 2, 2, // 2, 2, 2, 2, 2, 2, 2, 2, // 2, for (i = 0; i < boundary; ++i) vector2.push_back (FILLER2); // now we check the result ACE_TEST_ASSERT (vector2[0] == FILLER1); for (i = 0; i < boundary; ++i) ACE_TEST_ASSERT (vector2[i+1] == FILLER2); VECTOR v1; VECTOR v2; v1.push_back (1); v2.push_back (1); v1.push_back (2); v2.push_back (2); if (v1 != v2) ACE_ERROR ((LM_ERROR, ACE_TEXT ("Inequality test failed!\n"))); if (!(v1 == v2)) ACE_ERROR ((LM_ERROR, ACE_TEXT ("Equality test failed!\n"))); v1.push_back (3); if (v1.size () != 3) ACE_ERROR ((LM_ERROR, ACE_TEXT ("v1's size should be 3\n"))); v1.swap (v2); if (v2.size () != 3) ACE_ERROR ((LM_ERROR, ACE_TEXT ("v2's size should be 3\n"))); ACE_END_TEST; return 0; }
//------------------------------------------------------------------------- // Jenks' Optimization Method // //------------------------------------------------------------------------- void MgFeatureNumericFunctions::GetJenksCategories( VECTOR &inputData, int numPartsRequested, double dataMin, double dataMax, VECTOR &distValues ) { // numPartsRequested // 2 - 10; 5 is good int i = 0; // index for numObservations (may be very large) int j = 0; // index for numPartsRequested (about 4-8) int k = 0; // Sort the data values in ascending order std::sort(inputData.begin(), inputData.end()); int numObservations = (int)inputData.size(); // may be very large // Possible improvement: Rework the code to use normal 0 based arrays. // Actually it doesn't matter much since we have to create two // matrices that themselves use more memory than the local copy // of inputData. // // In order to ease the use of original FORTRAN and the later BASIC // code, I will use 1 origin arrays and copy the inputData into // a local array; // I'll dimension the arrays one larger than necessary and use the // index values from the original code. // // The algorithm must calculate with floating point values. // If more optimization is attempted in the future, be aware of // problems with calculations using mixed numeric types. std::vector<double> data; data.push_back(0); // dummy value at index 0 std::copy(inputData.begin(), inputData.end(), std::back_inserter(data)); // copy from parameter inputData so that data index starts from 1 // Note that the Matrix constructors initialize all values to 0. // mat1 contains integer values used for indices into data // mat2 contains floating point values of data and bigNum MgMatrix<int> mat1(numObservations + 1, numPartsRequested + 1); MgMatrix<double> mat2(numObservations + 1, numPartsRequested + 1); // const double bigNum = 1e+14; // from original BASIC code; // const double bigNum = std::numeric_limits<double>::max(); const double bigNum = DBL_MAX; // compiler's float.h for (i = 1; i <= numPartsRequested; ++i) { mat1.Set(1, i, 1); for (j = 2; j <= numObservations; ++j) { mat2.Set(j, i, bigNum); } } std::vector<int> classBounds; classBounds.push_back(-2); // dummy value for (i = 1; i <= numPartsRequested; ++i) { classBounds.push_back(-1); } for (int L = 2; L <= numObservations; ++L) { double s1 = 0; double s2 = 0; double v = 0; int w = 0; for (int m = 1; m <= L; ++m) { int i3 = L - m + 1; double val = data[i3]; s2 += (double(val) * double(val)); // if datatype of val is ever allowed to be same as template // parameter T, make sure multiplication is done in double. s1 += val; ++w; v = s2 - ((s1 * s1) / w); int i4 = i3 - 1; if (i4 > 0) { for (j = 2; j <= numPartsRequested; ++j) { double tempnum = v + mat2.Get(i4, j - 1); if (double(mat2.Get(L, j)) >= tempnum) { mat1.Set(L, j, i3); mat2.Set(L, j, tempnum); } } } } mat1.Set(L, 1, 1); mat2.Set(L, 1, v); } k = numObservations; for (j = numPartsRequested; j >= 1; --j) { if (k >= 0 && k <= numObservations) { classBounds[j] = mat1.Get(k, j); k = mat1.Get(k, j) - 1; } } std::vector<int> indices; indices.push_back(0); for (i = 2; i <= numPartsRequested; ++i) { int index = classBounds[i] - 1; if (index > indices.back()) { indices.push_back(index); } } FixGroups(inputData, indices); double val = 0.0; int index = 0; int totIndex = (int)indices.size(); for (int i = 1; i < totIndex; ++i) { index = indices[i] - 1; val = inputData[index]; distValues.push_back(val); } index = numObservations - 1; val = inputData[index]; distValues.push_back(val); int retCnt = (int)distValues.size(); int inCnt = (int)inputData.size(); if (retCnt > 0 && inCnt > 0) { if (!doubles_equal(distValues[0],inputData[0])) { distValues.insert(distValues.begin(), inputData[0]); } } return; }
// Calculate Quantile Distribution for the values void MgFeatureNumericFunctions::GetQuantileCategories( VECTOR &values, int numCats, double dataMin, double dataMax, VECTOR &distValues ) { // Expected categories should be more than zero if (numCats <= 0) { STRING message = MgServerFeatureUtil::GetMessage(L"MgInvalidComputedProperty"); MgStringCollection arguments; arguments.Add(message); throw new MgFeatureServiceException(L"MgServerSelectFeatures.GetEqualCategories", __LINE__, __WFILE__, &arguments, L"", NULL); } int count = (int)values.size(); if (count <= 0) { return; } // Nothing to do, we just send back Property Definition to clients from reader // Sort the data values in ascending order std::sort(values.begin(), values.end()); // How many go into each full bucket? int perBucket = ROUND((double)count/(double)numCats); if (perBucket * numCats > count) perBucket--; // How many buckets are full, and how many are missing one? int nearlyFullBuckets = numCats - (count - perBucket * numCats); int fullBuckets = numCats - nearlyFullBuckets; // expand min and max a little to account for numerical instability double delta = 0.0001 * (values[count-1] - values[0]); double* categories = new double[numCats+1]; // the first and last categories are limited by the data method limits categories[0] = values[0] - delta; if (categories[0] < dataMin) categories[0] = dataMin; categories[numCats] = values[count-1] + delta; if (categories[numCats] > dataMax) categories[numCats] = dataMax; // Mix full and nearly-full buckets to fill in the categories between the ends. int indexOfLast = -1; for ( int i = 1; i<numCats; i++) { bool doingSmallBucket = (nearlyFullBuckets > fullBuckets); // find the index of the last element we want in this bucket indexOfLast += (doingSmallBucket ? perBucket : perBucket + 1); // make category value be halfway between that element and the next categories[i] = 0.5 * (values[indexOfLast] + values[indexOfLast+1]); // Decrement count of correct bucket type. if (doingSmallBucket) nearlyFullBuckets--; else fullBuckets--; } for (int kk = 0; kk < numCats+1; kk++) { distValues.push_back(categories[kk]); } delete[] categories; // Delete the memory allocated before }
// Calculate Standard Deviation for the values void MgFeatureNumericFunctions::GetStandardDeviationCategories( VECTOR &values, int numCats, double dataMin, double dataMax, VECTOR &distValues) { // Expected categories should be more than zero if (numCats <= 0) { STRING message = MgServerFeatureUtil::GetMessage(L"MgInvalidComputedProperty"); MgStringCollection arguments; arguments.Add(message); throw new MgFeatureServiceException(L"MgServerSelectFeatures.GetEqualCategories", __LINE__, __WFILE__, &arguments, L"", NULL); } // collect information about the data values double min = DoubleMaxValue; double max = -DoubleMaxValue; double mean = 0; int cnt = (int)values.size(); if (cnt <= 0) { return; } // Nothing to do, we just send back Property Definition to clients from reader for (int i=0; i < cnt; i++) { double val = values[i]; if (val > max) max = val; if (val < min) min = val; mean += val; } // expand min and max a little to account for numerical instability double delta = 0.0001 * (max - min); min -= delta; max += delta; // compute the mean, variance and standard deviation double count = (double)cnt; // (guaranteed to be > 0) mean /= count; double variance = 0; for (int i=0; i < cnt; i++) { double val = values[i]; variance += (val - mean) * (val - mean); } double deviation = sqrt(variance / count); // fill in the middle category/categories double* cats = new double[numCats+1]; int midCat, highMidCat; if (numCats % 2 == 0) { midCat = numCats / 2; highMidCat = midCat; cats[midCat] = mean; } else { midCat = (numCats - 1) / 2; highMidCat = midCat + 1; cats[midCat] = mean - 0.5 * deviation; cats[highMidCat] = mean + 0.5 * deviation; } // fill in the other categories for (int i=midCat-1; i>=0; i--) cats[i] = cats[i+1] - deviation; for (int i=highMidCat; i<=numCats; i++) cats[i] = cats[i-1] + deviation; // if the data method specifies strict a strict min and/or max, use them if (!IsInf(dataMin) && !IsNan(dataMin) && (dataMin != -DoubleMaxValue)) min = dataMin; if (!IsInf(dataMax) && !IsNan(dataMax) && (dataMax != DoubleMaxValue)) max = dataMax; // flatten/clip any categories that extend beyond the min/max range for (int i=0; i<=numCats; i++) { if (cats[i] < min) cats[i] = min; else if (cats[i] > max) cats[i] = max; } for (int kk = 0; kk < numCats+1; kk++) { distValues.push_back(cats[kk]); } delete[] cats; // Delete the memory allocated before }