summaryrefslogtreecommitdiffstats
path: root/intl/icu/source/tools/toolutil/denseranges.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'intl/icu/source/tools/toolutil/denseranges.cpp')
-rw-r--r--intl/icu/source/tools/toolutil/denseranges.cpp160
1 files changed, 160 insertions, 0 deletions
diff --git a/intl/icu/source/tools/toolutil/denseranges.cpp b/intl/icu/source/tools/toolutil/denseranges.cpp
new file mode 100644
index 000000000..3b83715f2
--- /dev/null
+++ b/intl/icu/source/tools/toolutil/denseranges.cpp
@@ -0,0 +1,160 @@
+// Copyright (C) 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+* Copyright (C) 2010, International Business Machines
+* Corporation and others. All Rights Reserved.
+*******************************************************************************
+* file name: denseranges.cpp
+* encoding: US-ASCII
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2010sep25
+* created by: Markus W. Scherer
+*
+* Helper code for finding a small number of dense ranges.
+*/
+
+#include "unicode/utypes.h"
+#include "denseranges.h"
+
+// Definitions in the anonymous namespace are invisible outside this file.
+namespace {
+
+/**
+ * Collect up to 15 range gaps and sort them by ascending gap size.
+ */
+class LargestGaps {
+public:
+ LargestGaps(int32_t max) : maxLength(max<=kCapacity ? max : kCapacity), length(0) {}
+
+ void add(int32_t gapStart, int64_t gapLength) {
+ int32_t i=length;
+ while(i>0 && gapLength>gapLengths[i-1]) {
+ --i;
+ }
+ if(i<maxLength) {
+ // The new gap is now one of the maxLength largest.
+ // Insert the new gap, moving up smaller ones of the previous
+ // length largest.
+ int32_t j= length<maxLength ? length++ : maxLength-1;
+ while(j>i) {
+ gapStarts[j]=gapStarts[j-1];
+ gapLengths[j]=gapLengths[j-1];
+ --j;
+ }
+ gapStarts[i]=gapStart;
+ gapLengths[i]=gapLength;
+ }
+ }
+
+ void truncate(int32_t newLength) {
+ if(newLength<length) {
+ length=newLength;
+ }
+ }
+
+ int32_t count() const { return length; }
+ int32_t gapStart(int32_t i) const { return gapStarts[i]; }
+ int64_t gapLength(int32_t i) const { return gapLengths[i]; }
+
+ int32_t firstAfter(int32_t value) const {
+ if(length==0) {
+ return -1;
+ }
+ int32_t minValue=0;
+ int32_t minIndex=-1;
+ for(int32_t i=0; i<length; ++i) {
+ if(value<gapStarts[i] && (minIndex<0 || gapStarts[i]<minValue)) {
+ minValue=gapStarts[i];
+ minIndex=i;
+ }
+ }
+ return minIndex;
+ }
+
+private:
+ static const int32_t kCapacity=15;
+
+ int32_t maxLength;
+ int32_t length;
+ int32_t gapStarts[kCapacity];
+ int64_t gapLengths[kCapacity];
+};
+
+} // namespace
+
+/**
+ * Does it make sense to write 1..capacity ranges?
+ * Returns 0 if not, otherwise the number of ranges.
+ * @param values Sorted array of signed-integer values.
+ * @param length Number of values.
+ * @param density Minimum average range density, in 256th. (0x100=100%=perfectly dense.)
+ * Should be 0x80..0x100, must be 1..0x100.
+ * @param ranges Output ranges array.
+ * @param capacity Maximum number of ranges.
+ * @return Minimum number of ranges (at most capacity) that have the desired density,
+ * or 0 if that density cannot be achieved.
+ */
+U_CAPI int32_t U_EXPORT2
+uprv_makeDenseRanges(const int32_t values[], int32_t length,
+ int32_t density,
+ int32_t ranges[][2], int32_t capacity) {
+ if(length<=2) {
+ return 0;
+ }
+ int32_t minValue=values[0];
+ int32_t maxValue=values[length-1]; // Assume minValue<=maxValue.
+ // Use int64_t variables for intermediate-value precision and to avoid
+ // signed-int32_t overflow of maxValue-minValue.
+ int64_t maxLength=(int64_t)maxValue-(int64_t)minValue+1;
+ if(length>=(density*maxLength)/0x100) {
+ // Use one range.
+ ranges[0][0]=minValue;
+ ranges[0][1]=maxValue;
+ return 1;
+ }
+ if(length<=4) {
+ return 0;
+ }
+ // See if we can split [minValue, maxValue] into 2..capacity ranges,
+ // divided by the 1..(capacity-1) largest gaps.
+ LargestGaps gaps(capacity-1);
+ int32_t i;
+ int32_t expectedValue=minValue;
+ for(i=1; i<length; ++i) {
+ ++expectedValue;
+ int32_t actualValue=values[i];
+ if(expectedValue!=actualValue) {
+ gaps.add(expectedValue, (int64_t)actualValue-(int64_t)expectedValue);
+ expectedValue=actualValue;
+ }
+ }
+ // We know gaps.count()>=1 because we have fewer values (length) than
+ // the length of the [minValue..maxValue] range (maxLength).
+ // (Otherwise we would have returned with the one range above.)
+ int32_t num;
+ for(i=0, num=2;; ++i, ++num) {
+ if(i>=gaps.count()) {
+ // The values are too sparse for capacity or fewer ranges
+ // of the requested density.
+ return 0;
+ }
+ maxLength-=gaps.gapLength(i);
+ if(length>num*2 && length>=(density*maxLength)/0x100) {
+ break;
+ }
+ }
+ // Use the num ranges with the num-1 largest gaps.
+ gaps.truncate(num-1);
+ ranges[0][0]=minValue;
+ for(i=0; i<=num-2; ++i) {
+ int32_t gapIndex=gaps.firstAfter(minValue);
+ int32_t gapStart=gaps.gapStart(gapIndex);
+ ranges[i][1]=gapStart-1;
+ ranges[i+1][0]=minValue=(int32_t)(gapStart+gaps.gapLength(gapIndex));
+ }
+ ranges[num-1][1]=maxValue;
+ return num;
+}