summaryrefslogtreecommitdiffstats
path: root/libraries/pack200/src/coding.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'libraries/pack200/src/coding.cpp')
-rw-r--r--libraries/pack200/src/coding.cpp1762
1 files changed, 881 insertions, 881 deletions
diff --git a/libraries/pack200/src/coding.cpp b/libraries/pack200/src/coding.cpp
index 6bd17a3c..8e872013 100644
--- a/libraries/pack200/src/coding.cpp
+++ b/libraries/pack200/src/coding.cpp
@@ -48,12 +48,12 @@ extern coding basic_codings[];
#pragma GCC diagnostic ignored "-Wunused-variable"
#define CODING_PRIVATE(spec) \
- int spec_ = spec; \
- int B = CODING_B(spec_); \
- int H = CODING_H(spec_); \
- int L = 256 - H; \
- int S = CODING_S(spec_); \
- int D = CODING_D(spec_)
+ int spec_ = spec; \
+ int B = CODING_B(spec_); \
+ int H = CODING_H(spec_); \
+ int L = 256 - H; \
+ int S = CODING_S(spec_); \
+ int D = CODING_D(spec_)
#define IS_NEG_CODE(S, codeVal) ((((int)(codeVal) + 1) & ((1 << S) - 1)) == 0)
@@ -61,568 +61,568 @@ extern coding basic_codings[];
static int decode_sign(int S, uint32_t ux)
{ // == Coding.decodeSign32
- assert(S > 0);
- uint32_t sigbits = (ux >> S);
- if (IS_NEG_CODE(S, ux))
- return (int)(~sigbits);
- else
- return (int)(ux - sigbits);
- // Note that (int)(ux-sigbits) can be negative, if ux is large enough.
+ assert(S > 0);
+ uint32_t sigbits = (ux >> S);
+ if (IS_NEG_CODE(S, ux))
+ return (int)(~sigbits);
+ else
+ return (int)(ux - sigbits);
+ // Note that (int)(ux-sigbits) can be negative, if ux is large enough.
}
coding *coding::init()
{
- if (umax > 0)
- return this; // already done
- assert(spec != 0); // sanity
-
- // fill in derived fields
- CODING_PRIVATE(spec);
-
- // Return nullptr if 'arb(BHSD)' parameter constraints are not met:
- if (B < 1 || B > B_MAX)
- return nullptr;
- if (H < 1 || H > 256)
- return nullptr;
- if (S < 0 || S > 2)
- return nullptr;
- if (D < 0 || D > 1)
- return nullptr;
- if (B == 1 && H != 256)
- return nullptr; // 1-byte coding must be fixed-size
- if (B >= 5 && H == 256)
- return nullptr; // no 5-byte fixed-size coding
-
- // first compute the range of the coding, in 64 bits
- int64_t range = 0;
- {
- int64_t H_i = 1;
- for (int i = 0; i < B; i++)
- {
- range += H_i;
- H_i *= H;
- }
- range *= L;
- range += H_i;
- }
- assert(range > 0); // no useless codings, please
-
- int this_umax;
-
- // now, compute min and max
- if (range >= ((int64_t)1 << 32))
- {
- this_umax = INT_MAX_VALUE;
- this->umin = INT_MIN_VALUE;
- this->max = INT_MAX_VALUE;
- this->min = INT_MIN_VALUE;
- }
- else
- {
- this_umax = (range > INT_MAX_VALUE) ? INT_MAX_VALUE : (int)range - 1;
- this->max = this_umax;
- this->min = this->umin = 0;
- if (S != 0 && range != 0)
- {
- int64_t maxPosCode = range - 1;
- int64_t maxNegCode = range - 1;
- while (IS_NEG_CODE(S, maxPosCode))
- --maxPosCode;
- while (!IS_NEG_CODE(S, maxNegCode))
- --maxNegCode;
- int maxPos = decode_sign(S, (uint32_t)maxPosCode);
- if (maxPos < 0)
- this->max = INT_MAX_VALUE; // 32-bit wraparound
- else
- this->max = maxPos;
- if (maxNegCode < 0)
- this->min = 0; // No negative codings at all.
- else
- this->min = decode_sign(S, (uint32_t)maxNegCode);
- }
- }
-
- assert(!(isFullRange | isSigned | isSubrange)); // init
- if (min < 0)
- this->isSigned = true;
- if (max < INT_MAX_VALUE && range <= INT_MAX_VALUE)
- this->isSubrange = true;
- if (max == INT_MAX_VALUE && min == INT_MIN_VALUE)
- this->isFullRange = true;
-
- // do this last, to reduce MT exposure (should have a membar too)
- this->umax = this_umax;
-
- return this;
+ if (umax > 0)
+ return this; // already done
+ assert(spec != 0); // sanity
+
+ // fill in derived fields
+ CODING_PRIVATE(spec);
+
+ // Return nullptr if 'arb(BHSD)' parameter constraints are not met:
+ if (B < 1 || B > B_MAX)
+ return nullptr;
+ if (H < 1 || H > 256)
+ return nullptr;
+ if (S < 0 || S > 2)
+ return nullptr;
+ if (D < 0 || D > 1)
+ return nullptr;
+ if (B == 1 && H != 256)
+ return nullptr; // 1-byte coding must be fixed-size
+ if (B >= 5 && H == 256)
+ return nullptr; // no 5-byte fixed-size coding
+
+ // first compute the range of the coding, in 64 bits
+ int64_t range = 0;
+ {
+ int64_t H_i = 1;
+ for (int i = 0; i < B; i++)
+ {
+ range += H_i;
+ H_i *= H;
+ }
+ range *= L;
+ range += H_i;
+ }
+ assert(range > 0); // no useless codings, please
+
+ int this_umax;
+
+ // now, compute min and max
+ if (range >= ((int64_t)1 << 32))
+ {
+ this_umax = INT_MAX_VALUE;
+ this->umin = INT_MIN_VALUE;
+ this->max = INT_MAX_VALUE;
+ this->min = INT_MIN_VALUE;
+ }
+ else
+ {
+ this_umax = (range > INT_MAX_VALUE) ? INT_MAX_VALUE : (int)range - 1;
+ this->max = this_umax;
+ this->min = this->umin = 0;
+ if (S != 0 && range != 0)
+ {
+ int64_t maxPosCode = range - 1;
+ int64_t maxNegCode = range - 1;
+ while (IS_NEG_CODE(S, maxPosCode))
+ --maxPosCode;
+ while (!IS_NEG_CODE(S, maxNegCode))
+ --maxNegCode;
+ int maxPos = decode_sign(S, (uint32_t)maxPosCode);
+ if (maxPos < 0)
+ this->max = INT_MAX_VALUE; // 32-bit wraparound
+ else
+ this->max = maxPos;
+ if (maxNegCode < 0)
+ this->min = 0; // No negative codings at all.
+ else
+ this->min = decode_sign(S, (uint32_t)maxNegCode);
+ }
+ }
+
+ assert(!(isFullRange | isSigned | isSubrange)); // init
+ if (min < 0)
+ this->isSigned = true;
+ if (max < INT_MAX_VALUE && range <= INT_MAX_VALUE)
+ this->isSubrange = true;
+ if (max == INT_MAX_VALUE && min == INT_MIN_VALUE)
+ this->isFullRange = true;
+
+ // do this last, to reduce MT exposure (should have a membar too)
+ this->umax = this_umax;
+
+ return this;
}
coding *coding::findBySpec(int spec)
{
- for (coding *scan = &basic_codings[0];; scan++)
- {
- if (scan->spec == spec)
- return scan->init();
- if (scan->spec == 0)
- break;
- }
- coding *ptr = NEW(coding, 1);
- if (!ptr)
- return nullptr;
- coding *c = ptr->initFrom(spec);
- if (c == nullptr)
- {
- ::free(ptr);
- }
- else
- // else caller should free it...
- c->isMalloc = true;
- return c;
+ for (coding *scan = &basic_codings[0];; scan++)
+ {
+ if (scan->spec == spec)
+ return scan->init();
+ if (scan->spec == 0)
+ break;
+ }
+ coding *ptr = NEW(coding, 1);
+ if (!ptr)
+ return nullptr;
+ coding *c = ptr->initFrom(spec);
+ if (c == nullptr)
+ {
+ ::free(ptr);
+ }
+ else
+ // else caller should free it...
+ c->isMalloc = true;
+ return c;
}
coding *coding::findBySpec(int B, int H, int S, int D)
{
- if (B < 1 || B > B_MAX)
- return nullptr;
- if (H < 1 || H > 256)
- return nullptr;
- if (S < 0 || S > 2)
- return nullptr;
- if (D < 0 || D > 1)
- return nullptr;
- return findBySpec(CODING_SPEC(B, H, S, D));
+ if (B < 1 || B > B_MAX)
+ return nullptr;
+ if (H < 1 || H > 256)
+ return nullptr;
+ if (S < 0 || S > 2)
+ return nullptr;
+ if (D < 0 || D > 1)
+ return nullptr;
+ return findBySpec(CODING_SPEC(B, H, S, D));
}
void coding::free()
{
- if (isMalloc)
- {
- ::free(this);
- }
+ if (isMalloc)
+ {
+ ::free(this);
+ }
}
void coding_method::reset(value_stream *state)
{
- assert(state->rp == state->rplimit); // not in mid-stream, please
- // assert(this == vs0.cm);
- state[0] = vs0;
- if (uValues != nullptr)
- {
- uValues->reset(state->helper());
- }
+ assert(state->rp == state->rplimit); // not in mid-stream, please
+ // assert(this == vs0.cm);
+ state[0] = vs0;
+ if (uValues != nullptr)
+ {
+ uValues->reset(state->helper());
+ }
}
uint32_t coding::parse(byte *&rp, int B, int H)
{
- int L = 256 - H;
- byte *ptr = rp;
- // hand peel the i==0 part of the loop:
- uint32_t b_i = *ptr++ & 0xFF;
- if (B == 1 || b_i < (uint32_t)L)
- {
- rp = ptr;
- return b_i;
- }
- uint32_t sum = b_i;
- uint32_t H_i = H;
- assert(B <= B_MAX);
- for (int i = 2; i <= B_MAX; i++)
- { // easy for compilers to unroll if desired
- b_i = *ptr++ & 0xFF;
- sum += b_i * H_i;
- if (i == B || b_i < (uint32_t)L)
- {
- rp = ptr;
- return sum;
- }
- H_i *= H;
- }
- assert(false);
- return 0;
+ int L = 256 - H;
+ byte *ptr = rp;
+ // hand peel the i==0 part of the loop:
+ uint32_t b_i = *ptr++ & 0xFF;
+ if (B == 1 || b_i < (uint32_t)L)
+ {
+ rp = ptr;
+ return b_i;
+ }
+ uint32_t sum = b_i;
+ uint32_t H_i = H;
+ assert(B <= B_MAX);
+ for (int i = 2; i <= B_MAX; i++)
+ { // easy for compilers to unroll if desired
+ b_i = *ptr++ & 0xFF;
+ sum += b_i * H_i;
+ if (i == B || b_i < (uint32_t)L)
+ {
+ rp = ptr;
+ return sum;
+ }
+ H_i *= H;
+ }
+ assert(false);
+ return 0;
}
uint32_t coding::parse_lgH(byte *&rp, int B, int H, int lgH)
{
- assert(H == (1 << lgH));
- int L = 256 - (1 << lgH);
- byte *ptr = rp;
- // hand peel the i==0 part of the loop:
- uint32_t b_i = *ptr++ & 0xFF;
- if (B == 1 || b_i < (uint32_t)L)
- {
- rp = ptr;
- return b_i;
- }
- uint32_t sum = b_i;
- uint32_t lg_H_i = lgH;
- assert(B <= B_MAX);
- for (int i = 2; i <= B_MAX; i++)
- { // easy for compilers to unroll if desired
- b_i = *ptr++ & 0xFF;
- sum += b_i << lg_H_i;
- if (i == B || b_i < (uint32_t)L)
- {
- rp = ptr;
- return sum;
- }
- lg_H_i += lgH;
- }
- assert(false);
- return 0;
+ assert(H == (1 << lgH));
+ int L = 256 - (1 << lgH);
+ byte *ptr = rp;
+ // hand peel the i==0 part of the loop:
+ uint32_t b_i = *ptr++ & 0xFF;
+ if (B == 1 || b_i < (uint32_t)L)
+ {
+ rp = ptr;
+ return b_i;
+ }
+ uint32_t sum = b_i;
+ uint32_t lg_H_i = lgH;
+ assert(B <= B_MAX);
+ for (int i = 2; i <= B_MAX; i++)
+ { // easy for compilers to unroll if desired
+ b_i = *ptr++ & 0xFF;
+ sum += b_i << lg_H_i;
+ if (i == B || b_i < (uint32_t)L)
+ {
+ rp = ptr;
+ return sum;
+ }
+ lg_H_i += lgH;
+ }
+ assert(false);
+ return 0;
}
static const char ERB[] = "EOF reading band";
void coding::parseMultiple(byte *&rp, int N, byte *limit, int B, int H)
{
- if (N < 0)
- {
- unpack_abort("bad value count");
- return;
- }
- byte *ptr = rp;
- if (B == 1 || H == 256)
- {
- size_t len = (size_t)N * B;
- if (len / B != (size_t)N || ptr + len > limit)
- {
- unpack_abort(ERB);
- return;
- }
- rp = ptr + len;
- return;
- }
- // Note: We assume rp has enough zero-padding.
- int L = 256 - H;
- int n = B;
- while (N > 0)
- {
- ptr += 1;
- if (--n == 0)
- {
- // end of encoding at B bytes, regardless of byte value
- }
- else
- {
- int b = (ptr[-1] & 0xFF);
- if (b >= L)
- {
- // keep going, unless we find a byte < L
- continue;
- }
- }
- // found the last byte
- N -= 1;
- n = B; // reset length counter
- // do an error check here
- if (ptr > limit)
- {
- unpack_abort(ERB);
- return;
- }
- }
- rp = ptr;
- return;
+ if (N < 0)
+ {
+ unpack_abort("bad value count");
+ return;
+ }
+ byte *ptr = rp;
+ if (B == 1 || H == 256)
+ {
+ size_t len = (size_t)N * B;
+ if (len / B != (size_t)N || ptr + len > limit)
+ {
+ unpack_abort(ERB);
+ return;
+ }
+ rp = ptr + len;
+ return;
+ }
+ // Note: We assume rp has enough zero-padding.
+ int L = 256 - H;
+ int n = B;
+ while (N > 0)
+ {
+ ptr += 1;
+ if (--n == 0)
+ {
+ // end of encoding at B bytes, regardless of byte value
+ }
+ else
+ {
+ int b = (ptr[-1] & 0xFF);
+ if (b >= L)
+ {
+ // keep going, unless we find a byte < L
+ continue;
+ }
+ }
+ // found the last byte
+ N -= 1;
+ n = B; // reset length counter
+ // do an error check here
+ if (ptr > limit)
+ {
+ unpack_abort(ERB);
+ return;
+ }
+ }
+ rp = ptr;
+ return;
}
bool value_stream::hasHelper()
{
- // If my coding method is a pop-style method,
- // then I need a second value stream to transmit
- // unfavored values.
- // This can be determined by examining fValues.
- return cm->fValues != nullptr;
+ // If my coding method is a pop-style method,
+ // then I need a second value stream to transmit
+ // unfavored values.
+ // This can be determined by examining fValues.
+ return cm->fValues != nullptr;
}
void value_stream::init(byte *rp_, byte *rplimit_, coding *defc)
{
- rp = rp_;
- rplimit = rplimit_;
- sum = 0;
- cm = nullptr; // no need in the simple case
- setCoding(defc);
+ rp = rp_;
+ rplimit = rplimit_;
+ sum = 0;
+ cm = nullptr; // no need in the simple case
+ setCoding(defc);
}
void value_stream::setCoding(coding *defc)
{
- if (defc == nullptr)
- {
- unpack_abort("bad coding");
- defc = coding::findByIndex(_meta_canon_min); // random pick for recovery
- }
-
- c = (*defc);
-
- // choose cmk
- cmk = cmk_ERROR;
- switch (c.spec)
- {
- case BYTE1_spec:
- cmk = cmk_BYTE1;
- break;
- case CHAR3_spec:
- cmk = cmk_CHAR3;
- break;
- case UNSIGNED5_spec:
- cmk = cmk_UNSIGNED5;
- break;
- case DELTA5_spec:
- cmk = cmk_DELTA5;
- break;
- case BCI5_spec:
- cmk = cmk_BCI5;
- break;
- case BRANCH5_spec:
- cmk = cmk_BRANCH5;
- break;
- default:
- if (c.D() == 0)
- {
- switch (c.S())
- {
- case 0:
- cmk = cmk_BHS0;
- break;
- case 1:
- cmk = cmk_BHS1;
- break;
- default:
- cmk = cmk_BHS;
- break;
- }
- }
- else
- {
- if (c.S() == 1)
- {
- if (c.isFullRange)
- cmk = cmk_BHS1D1full;
- if (c.isSubrange)
- cmk = cmk_BHS1D1sub;
- }
- if (cmk == cmk_ERROR)
- cmk = cmk_BHSD1;
- }
- }
+ if (defc == nullptr)
+ {
+ unpack_abort("bad coding");
+ defc = coding::findByIndex(_meta_canon_min); // random pick for recovery
+ }
+
+ c = (*defc);
+
+ // choose cmk
+ cmk = cmk_ERROR;
+ switch (c.spec)
+ {
+ case BYTE1_spec:
+ cmk = cmk_BYTE1;
+ break;
+ case CHAR3_spec:
+ cmk = cmk_CHAR3;
+ break;
+ case UNSIGNED5_spec:
+ cmk = cmk_UNSIGNED5;
+ break;
+ case DELTA5_spec:
+ cmk = cmk_DELTA5;
+ break;
+ case BCI5_spec:
+ cmk = cmk_BCI5;
+ break;
+ case BRANCH5_spec:
+ cmk = cmk_BRANCH5;
+ break;
+ default:
+ if (c.D() == 0)
+ {
+ switch (c.S())
+ {
+ case 0:
+ cmk = cmk_BHS0;
+ break;
+ case 1:
+ cmk = cmk_BHS1;
+ break;
+ default:
+ cmk = cmk_BHS;
+ break;
+ }
+ }
+ else
+ {
+ if (c.S() == 1)
+ {
+ if (c.isFullRange)
+ cmk = cmk_BHS1D1full;
+ if (c.isSubrange)
+ cmk = cmk_BHS1D1sub;
+ }
+ if (cmk == cmk_ERROR)
+ cmk = cmk_BHSD1;
+ }
+ }
}
static int getPopValue(value_stream *self, uint32_t uval)
{
- if (uval > 0)
- {
- // note that the initial parse performed a range check
- assert(uval <= (uint32_t)self->cm->fVlength);
- return self->cm->fValues[uval - 1];
- }
- else
- {
- // take an unfavored value
- return self->helper()->getInt();
- }
+ if (uval > 0)
+ {
+ // note that the initial parse performed a range check
+ assert(uval <= (uint32_t)self->cm->fVlength);
+ return self->cm->fValues[uval - 1];
+ }
+ else
+ {
+ // take an unfavored value
+ return self->helper()->getInt();
+ }
}
int coding::sumInUnsignedRange(int x, int y)
{
- assert(isSubrange);
- int range = (int)(umax + 1);
- assert(range > 0);
- x += y;
- if (x != (int)((int64_t)(x - y) + (int64_t)y))
- {
- // 32-bit overflow interferes with range reduction.
- // Back off from the overflow by adding a multiple of range:
- if (x < 0)
- {
- x -= range;
- assert(x >= 0);
- }
- else
- {
- x += range;
- assert(x < 0);
- }
- }
- if (x < 0)
- {
- x += range;
- if (x >= 0)
- return x;
- }
- else if (x >= range)
- {
- x -= range;
- if (x < range)
- return x;
- }
- else
- {
- // in range
- return x;
- }
- // do it the hard way
- x %= range;
- if (x < 0)
- x += range;
- return x;
+ assert(isSubrange);
+ int range = (int)(umax + 1);
+ assert(range > 0);
+ x += y;
+ if (x != (int)((int64_t)(x - y) + (int64_t)y))
+ {
+ // 32-bit overflow interferes with range reduction.
+ // Back off from the overflow by adding a multiple of range:
+ if (x < 0)
+ {
+ x -= range;
+ assert(x >= 0);
+ }
+ else
+ {
+ x += range;
+ assert(x < 0);
+ }
+ }
+ if (x < 0)
+ {
+ x += range;
+ if (x >= 0)
+ return x;
+ }
+ else if (x >= range)
+ {
+ x -= range;
+ if (x < range)
+ return x;
+ }
+ else
+ {
+ // in range
+ return x;
+ }
+ // do it the hard way
+ x %= range;
+ if (x < 0)
+ x += range;
+ return x;
}
static int getDeltaValue(value_stream *self, uint32_t uval, bool isSubrange)
{
- assert((uint32_t)(self->c.isSubrange) == (uint32_t)isSubrange);
- assert(self->c.isSubrange | self->c.isFullRange);
- if (isSubrange)
- return self->sum = self->c.sumInUnsignedRange(self->sum, (int)uval);
- else
- return self->sum += (int)uval;
+ assert((uint32_t)(self->c.isSubrange) == (uint32_t)isSubrange);
+ assert(self->c.isSubrange | self->c.isFullRange);
+ if (isSubrange)
+ return self->sum = self->c.sumInUnsignedRange(self->sum, (int)uval);
+ else
+ return self->sum += (int)uval;
}
bool value_stream::hasValue()
{
- if (rp < rplimit)
- return true;
- if (cm == nullptr)
- return false;
- if (cm->next == nullptr)
- return false;
- cm->next->reset(this);
- return hasValue();
+ if (rp < rplimit)
+ return true;
+ if (cm == nullptr)
+ return false;
+ if (cm->next == nullptr)
+ return false;
+ cm->next->reset(this);
+ return hasValue();
}
int value_stream::getInt()
{
- if (rp >= rplimit)
- {
- // Advance to next coding segment.
- if (rp > rplimit || cm == nullptr || cm->next == nullptr)
- {
- // Must perform this check and throw an exception on bad input.
- unpack_abort(ERB);
- return 0;
- }
- cm->next->reset(this);
- return getInt();
- }
-
- CODING_PRIVATE(c.spec);
- uint32_t uval;
- enum
- {
- B5 = 5,
- B3 = 3,
- H128 = 128,
- H64 = 64,
- H4 = 4
- };
- switch (cmk)
- {
- case cmk_BHS:
- assert(D == 0);
- uval = coding::parse(rp, B, H);
- if (S == 0)
- return (int)uval;
- return decode_sign(S, uval);
-
- case cmk_BHS0:
- assert(S == 0 && D == 0);
- uval = coding::parse(rp, B, H);
- return (int)uval;
-
- case cmk_BHS1:
- assert(S == 1 && D == 0);
- uval = coding::parse(rp, B, H);
- return DECODE_SIGN_S1(uval);
-
- case cmk_BYTE1:
- assert(c.spec == BYTE1_spec);
- assert(B == 1 && H == 256 && S == 0 && D == 0);
- return *rp++ & 0xFF;
-
- case cmk_CHAR3:
- assert(c.spec == CHAR3_spec);
- assert(B == B3 && H == H128 && S == 0 && D == 0);
- return coding::parse_lgH(rp, B3, H128, 7);
-
- case cmk_UNSIGNED5:
- assert(c.spec == UNSIGNED5_spec);
- assert(B == B5 && H == H64 && S == 0 && D == 0);
- return coding::parse_lgH(rp, B5, H64, 6);
-
- case cmk_BHSD1:
- assert(D == 1);
- uval = coding::parse(rp, B, H);
- if (S != 0)
- uval = (uint32_t)decode_sign(S, uval);
- return getDeltaValue(this, uval, (bool)c.isSubrange);
-
- case cmk_BHS1D1full:
- assert(S == 1 && D == 1 && c.isFullRange);
- uval = coding::parse(rp, B, H);
- uval = (uint32_t)DECODE_SIGN_S1(uval);
- return getDeltaValue(this, uval, false);
-
- case cmk_BHS1D1sub:
- assert(S == 1 && D == 1 && c.isSubrange);
- uval = coding::parse(rp, B, H);
- uval = (uint32_t)DECODE_SIGN_S1(uval);
- return getDeltaValue(this, uval, true);
-
- case cmk_DELTA5:
- assert(c.spec == DELTA5_spec);
- assert(B == B5 && H == H64 && S == 1 && D == 1 && c.isFullRange);
- uval = coding::parse_lgH(rp, B5, H64, 6);
- sum += DECODE_SIGN_S1(uval);
- return sum;
-
- case cmk_BCI5:
- assert(c.spec == BCI5_spec);
- assert(B == B5 && H == H4 && S == 0 && D == 0);
- return coding::parse_lgH(rp, B5, H4, 2);
-
- case cmk_BRANCH5:
- assert(c.spec == BRANCH5_spec);
- assert(B == B5 && H == H4 && S == 2 && D == 0);
- uval = coding::parse_lgH(rp, B5, H4, 2);
- return decode_sign(S, uval);
-
- case cmk_pop:
- uval = coding::parse(rp, B, H);
- if (S != 0)
- {
- uval = (uint32_t)decode_sign(S, uval);
- }
- if (D != 0)
- {
- assert(c.isSubrange | c.isFullRange);
- if (c.isSubrange)
- sum = c.sumInUnsignedRange(sum, (int)uval);
- else
- sum += (int)uval;
- uval = (uint32_t)sum;
- }
- return getPopValue(this, uval);
-
- case cmk_pop_BHS0:
- assert(S == 0 && D == 0);
- uval = coding::parse(rp, B, H);
- return getPopValue(this, uval);
-
- case cmk_pop_BYTE1:
- assert(c.spec == BYTE1_spec);
- assert(B == 1 && H == 256 && S == 0 && D == 0);
- return getPopValue(this, *rp++ & 0xFF);
-
- default:
- break;
- }
- assert(false);
- return 0;
+ if (rp >= rplimit)
+ {
+ // Advance to next coding segment.
+ if (rp > rplimit || cm == nullptr || cm->next == nullptr)
+ {
+ // Must perform this check and throw an exception on bad input.
+ unpack_abort(ERB);
+ return 0;
+ }
+ cm->next->reset(this);
+ return getInt();
+ }
+
+ CODING_PRIVATE(c.spec);
+ uint32_t uval;
+ enum
+ {
+ B5 = 5,
+ B3 = 3,
+ H128 = 128,
+ H64 = 64,
+ H4 = 4
+ };
+ switch (cmk)
+ {
+ case cmk_BHS:
+ assert(D == 0);
+ uval = coding::parse(rp, B, H);
+ if (S == 0)
+ return (int)uval;
+ return decode_sign(S, uval);
+
+ case cmk_BHS0:
+ assert(S == 0 && D == 0);
+ uval = coding::parse(rp, B, H);
+ return (int)uval;
+
+ case cmk_BHS1:
+ assert(S == 1 && D == 0);
+ uval = coding::parse(rp, B, H);
+ return DECODE_SIGN_S1(uval);
+
+ case cmk_BYTE1:
+ assert(c.spec == BYTE1_spec);
+ assert(B == 1 && H == 256 && S == 0 && D == 0);
+ return *rp++ & 0xFF;
+
+ case cmk_CHAR3:
+ assert(c.spec == CHAR3_spec);
+ assert(B == B3 && H == H128 && S == 0 && D == 0);
+ return coding::parse_lgH(rp, B3, H128, 7);
+
+ case cmk_UNSIGNED5:
+ assert(c.spec == UNSIGNED5_spec);
+ assert(B == B5 && H == H64 && S == 0 && D == 0);
+ return coding::parse_lgH(rp, B5, H64, 6);
+
+ case cmk_BHSD1:
+ assert(D == 1);
+ uval = coding::parse(rp, B, H);
+ if (S != 0)
+ uval = (uint32_t)decode_sign(S, uval);
+ return getDeltaValue(this, uval, (bool)c.isSubrange);
+
+ case cmk_BHS1D1full:
+ assert(S == 1 && D == 1 && c.isFullRange);
+ uval = coding::parse(rp, B, H);
+ uval = (uint32_t)DECODE_SIGN_S1(uval);
+ return getDeltaValue(this, uval, false);
+
+ case cmk_BHS1D1sub:
+ assert(S == 1 && D == 1 && c.isSubrange);
+ uval = coding::parse(rp, B, H);
+ uval = (uint32_t)DECODE_SIGN_S1(uval);
+ return getDeltaValue(this, uval, true);
+
+ case cmk_DELTA5:
+ assert(c.spec == DELTA5_spec);
+ assert(B == B5 && H == H64 && S == 1 && D == 1 && c.isFullRange);
+ uval = coding::parse_lgH(rp, B5, H64, 6);
+ sum += DECODE_SIGN_S1(uval);
+ return sum;
+
+ case cmk_BCI5:
+ assert(c.spec == BCI5_spec);
+ assert(B == B5 && H == H4 && S == 0 && D == 0);
+ return coding::parse_lgH(rp, B5, H4, 2);
+
+ case cmk_BRANCH5:
+ assert(c.spec == BRANCH5_spec);
+ assert(B == B5 && H == H4 && S == 2 && D == 0);
+ uval = coding::parse_lgH(rp, B5, H4, 2);
+ return decode_sign(S, uval);
+
+ case cmk_pop:
+ uval = coding::parse(rp, B, H);
+ if (S != 0)
+ {
+ uval = (uint32_t)decode_sign(S, uval);
+ }
+ if (D != 0)
+ {
+ assert(c.isSubrange | c.isFullRange);
+ if (c.isSubrange)
+ sum = c.sumInUnsignedRange(sum, (int)uval);
+ else
+ sum += (int)uval;
+ uval = (uint32_t)sum;
+ }
+ return getPopValue(this, uval);
+
+ case cmk_pop_BHS0:
+ assert(S == 0 && D == 0);
+ uval = coding::parse(rp, B, H);
+ return getPopValue(this, uval);
+
+ case cmk_pop_BYTE1:
+ assert(c.spec == BYTE1_spec);
+ assert(B == 1 && H == 256 && S == 0 && D == 0);
+ return getPopValue(this, *rp++ & 0xFF);
+
+ default:
+ break;
+ }
+ assert(false);
+ return 0;
}
static int moreCentral(int x, int y)
{ // used to find end of Pop.{F}
- // Suggested implementation from the Pack200 specification:
- uint32_t kx = (x >> 31) ^ (x << 1);
- uint32_t ky = (y >> 31) ^ (y << 1);
- return (kx < ky ? x : y);
+ // Suggested implementation from the Pack200 specification:
+ uint32_t kx = (x >> 31) ^ (x << 1);
+ uint32_t ky = (y >> 31) ^ (y << 1);
+ return (kx < ky ? x : y);
}
// static maybe_inline
// int moreCentral2(int x, int y, int min) {
@@ -641,7 +641,7 @@ static const byte *no_meta[] = {nullptr};
#define NO_META (*(byte **)no_meta)
enum
{
- POP_FAVORED_N = -2
+ POP_FAVORED_N = -2
};
// mode bits
@@ -650,395 +650,395 @@ enum
// This function knows all about meta-coding.
void coding_method::init(byte *&band_rp, byte *band_limit, byte *&meta_rp, int mode,
- coding *defc, int N, intlist *valueSink)
+ coding *defc, int N, intlist *valueSink)
{
- assert(N != 0);
-
- assert(u != nullptr); // must be pre-initialized
- // if (u == nullptr) u = unpacker::current(); // expensive
-
- int op = (meta_rp == nullptr) ? _meta_default : (*meta_rp++ & 0xFF);
- coding *foundc = nullptr;
- coding *to_free = nullptr;
-
- if (op == _meta_default)
- {
- foundc = defc;
- // and fall through
- }
- else if (op >= _meta_canon_min && op <= _meta_canon_max)
- {
- foundc = coding::findByIndex(op);
- // and fall through
- }
- else if (op == _meta_arb)
- {
- int args = (*meta_rp++ & 0xFF);
- // args = (D:[0..1] + 2*S[0..2] + 8*(B:[1..5]-1))
- int D = ((args >> 0) & 1);
- int S = ((args >> 1) & 3);
- int B = ((args >> 3) & -1) + 1;
- // & (H[1..256]-1)
- int H = (*meta_rp++ & 0xFF) + 1;
- foundc = coding::findBySpec(B, H, S, D);
- to_free = foundc; // findBySpec may dynamically allocate
- if (foundc == nullptr)
- {
- unpack_abort("illegal arbitrary coding");
- return;
- }
- // and fall through
- }
- else if (op >= _meta_run && op < _meta_pop)
- {
- int args = (op - _meta_run);
- // args: KX:[0..3] + 4*(KBFlag:[0..1]) + 8*(ABDef:[0..2])
- int KX = ((args >> 0) & 3);
- int KBFlag = ((args >> 2) & 1);
- int ABDef = ((args >> 3) & -1);
- assert(ABDef <= 2);
- // & KB: one of [0..255] if KBFlag=1
- int KB = (!KBFlag ? 3 : (*meta_rp++ & 0xFF));
- int K = (KB + 1) << (KX * 4);
- int N2 = (N >= 0) ? N - K : N;
- if (N == 0 || (N2 <= 0 && N2 != N))
- {
- unpack_abort("illegal run encoding");
- }
- if ((mode & DISABLE_RUN) != 0)
- {
- unpack_abort("illegal nested run encoding");
- }
-
- // & Enc{ ACode } if ADef=0 (ABDef != 1)
- // No direct nesting of 'run' in ACode, but in BCode it's OK.
- int disRun = mode | DISABLE_RUN;
- if (ABDef == 1)
- {
- this->init(band_rp, band_limit, NO_META, disRun, defc, K, valueSink);
- }
- else
- {
- this->init(band_rp, band_limit, meta_rp, disRun, defc, K, valueSink);
- }
-
- // & Enc{ BCode } if BDef=0 (ABDef != 2)
- coding_method *tail = U_NEW(coding_method, 1);
- if (!tail)
- return;
- tail->u = u;
-
- // The 'run' codings may be nested indirectly via 'pop' codings.
- // This means that this->next may already be filled in, if
- // ACode was of type 'pop' with a 'run' token coding.
- // No problem: Just chain the upcoming BCode onto the end.
- for (coding_method *self = this;; self = self->next)
- {
- if (self->next == nullptr)
- {
- self->next = tail;
- break;
- }
- }
-
- if (ABDef == 2)
- {
- tail->init(band_rp, band_limit, NO_META, mode, defc, N2, valueSink);
- }
- else
- {
- tail->init(band_rp, band_limit, meta_rp, mode, defc, N2, valueSink);
- }
- // Note: The preceding calls to init should be tail-recursive.
-
- return; // done; no falling through
- }
- else if (op >= _meta_pop && op < _meta_limit)
- {
- int args = (op - _meta_pop);
- // args: (FDef:[0..1]) + 2*UDef:[0..1] + 4*(TDefL:[0..11])
- int FDef = ((args >> 0) & 1);
- int UDef = ((args >> 1) & 1);
- int TDefL = ((args >> 2) & -1);
- assert(TDefL <= 11);
- int TDef = (TDefL > 0);
- int TL = (TDefL <= 6) ? (2 << TDefL) : (256 - (4 << (11 - TDefL)));
- int TH = (256 - TL);
- if (N <= 0)
- {
- unpack_abort("illegal pop encoding");
- }
- if ((mode & DISABLE_POP) != 0)
- {
- unpack_abort("illegal nested pop encoding");
- }
-
- // No indirect nesting of 'pop', but 'run' is OK.
- int disPop = DISABLE_POP;
-
- // & Enc{ FCode } if FDef=0
- int FN = POP_FAVORED_N;
- assert(valueSink == nullptr);
- intlist fValueSink;
- fValueSink.init();
- coding_method fval;
- BYTES_OF(fval).clear();
- fval.u = u;
- if (FDef != 0)
- {
- fval.init(band_rp, band_limit, NO_META, disPop, defc, FN, &fValueSink);
- }
- else
- {
- fval.init(band_rp, band_limit, meta_rp, disPop, defc, FN, &fValueSink);
- }
- bytes fvbuf;
- fValues = (u->saveTo(fvbuf, fValueSink.b), (int *)fvbuf.ptr);
- fVlength = fValueSink.length(); // i.e., the parameter K
- fValueSink.free();
-
- // Skip the first {F} run in all subsequent passes.
- // The next call to this->init(...) will set vs0.rp to point after the {F}.
-
- // & Enc{ TCode } if TDef=0 (TDefL==0)
- if (TDef != 0)
- {
- coding *tcode = coding::findBySpec(1, 256); // BYTE1
- // find the most narrowly sufficient code:
- for (int B = 2; B <= B_MAX; B++)
- {
- if (fVlength <= tcode->umax)
- break; // found it
- tcode->free();
- tcode = coding::findBySpec(B, TH);
- if (!tcode)
- return;
- }
- if (!(fVlength <= tcode->umax))
- {
- unpack_abort("pop.L value too small");
- }
- this->init(band_rp, band_limit, NO_META, disPop, tcode, N, nullptr);
- tcode->free();
- }
- else
- {
- this->init(band_rp, band_limit, meta_rp, disPop, defc, N, nullptr);
- }
-
- // Count the number of zero tokens right now.
- // Also verify that they are in bounds.
- int UN = 0; // one {U} for each zero in {T}
- value_stream vs = vs0;
- for (int i = 0; i < N; i++)
- {
- uint32_t val = vs.getInt();
- if (val == 0)
- UN += 1;
- if (!(val <= (uint32_t)fVlength))
- {
- unpack_abort("pop token out of range");
- }
- }
- vs.done();
-
- // & Enc{ UCode } if UDef=0
- if (UN != 0)
- {
- uValues = U_NEW(coding_method, 1);
- if (uValues == nullptr)
- return;
- uValues->u = u;
- if (UDef != 0)
- {
- uValues->init(band_rp, band_limit, NO_META, disPop, defc, UN, nullptr);
- }
- else
- {
- uValues->init(band_rp, band_limit, meta_rp, disPop, defc, UN, nullptr);
- }
- }
- else
- {
- if (UDef == 0)
- {
- int uop = (*meta_rp++ & 0xFF);
- if (uop > _meta_canon_max)
- // %%% Spec. requires the more strict (uop != _meta_default).
- unpack_abort("bad meta-coding for empty pop/U");
- }
- }
-
- // Bug fix for 6259542
- // Last of all, adjust vs0.cmk to the 'pop' flavor
- for (coding_method *self = this; self != nullptr; self = self->next)
- {
- coding_method_kind cmk2 = cmk_pop;
- switch (self->vs0.cmk)
- {
- case cmk_BHS0:
- cmk2 = cmk_pop_BHS0;
- break;
- case cmk_BYTE1:
- cmk2 = cmk_pop_BYTE1;
- break;
- default:
- break;
- }
- self->vs0.cmk = cmk2;
- if (self != this)
- {
- assert(self->fValues == nullptr); // no double init
- self->fValues = this->fValues;
- self->fVlength = this->fVlength;
- assert(self->uValues == nullptr); // must stay nullptr
- }
- }
-
- return; // done; no falling through
- }
- else
- {
- unpack_abort("bad meta-coding");
- }
-
- // Common code here skips a series of values with one coding.
- assert(foundc != nullptr);
-
- assert(vs0.cmk == cmk_ERROR); // no garbage, please
- assert(vs0.rp == nullptr); // no garbage, please
- assert(vs0.rplimit == nullptr); // no garbage, please
- assert(vs0.sum == 0); // no garbage, please
-
- vs0.init(band_rp, band_limit, foundc);
-
- // Done with foundc. Free if necessary.
- if (to_free != nullptr)
- {
- to_free->free();
- to_free = nullptr;
- }
- foundc = nullptr;
-
- coding &c = vs0.c;
- CODING_PRIVATE(c.spec);
- // assert sane N
- assert((uint32_t)N < INT_MAX_VALUE || N == POP_FAVORED_N);
-
- // Look at the values, or at least skip over them quickly.
- if (valueSink == nullptr)
- {
- // Skip and ignore values in the first pass.
- c.parseMultiple(band_rp, N, band_limit, B, H);
- }
- else if (N >= 0)
- {
- // Pop coding, {F} sequence, initial run of values...
- assert((mode & DISABLE_POP) != 0);
- value_stream vs = vs0;
- for (int n = 0; n < N; n++)
- {
- int val = vs.getInt();
- valueSink->add(val);
- }
- band_rp = vs.rp;
- }
- else
- {
- // Pop coding, {F} sequence, final run of values...
- assert((mode & DISABLE_POP) != 0);
- assert(N == POP_FAVORED_N);
- int min = INT_MIN_VALUE; // farthest from the center
- // min2 is based on the buggy specification of centrality in version 150.7
- // no known implementations transmit this value, but just in case...
- // int min2 = INT_MIN_VALUE;
- int last = 0;
- // if there were initial runs, find the potential sentinels in them:
- for (int i = 0; i < valueSink->length(); i++)
- {
- last = valueSink->get(i);
- min = moreCentral(min, last);
- // min2 = moreCentral2(min2, last, min);
- }
- value_stream vs = vs0;
- for (;;)
- {
- int val = vs.getInt();
- if (valueSink->length() > 0 && (val == last || val == min)) //|| val == min2
- break;
- valueSink->add(val);
- last = val;
- min = moreCentral(min, last);
- // min2 = moreCentral2(min2, last, min);
- }
- band_rp = vs.rp;
- }
-
- // Get an accurate upper limit now.
- vs0.rplimit = band_rp;
- vs0.cm = this;
-
- return; // success
+ assert(N != 0);
+
+ assert(u != nullptr); // must be pre-initialized
+ // if (u == nullptr) u = unpacker::current(); // expensive
+
+ int op = (meta_rp == nullptr) ? _meta_default : (*meta_rp++ & 0xFF);
+ coding *foundc = nullptr;
+ coding *to_free = nullptr;
+
+ if (op == _meta_default)
+ {
+ foundc = defc;
+ // and fall through
+ }
+ else if (op >= _meta_canon_min && op <= _meta_canon_max)
+ {
+ foundc = coding::findByIndex(op);
+ // and fall through
+ }
+ else if (op == _meta_arb)
+ {
+ int args = (*meta_rp++ & 0xFF);
+ // args = (D:[0..1] + 2*S[0..2] + 8*(B:[1..5]-1))
+ int D = ((args >> 0) & 1);
+ int S = ((args >> 1) & 3);
+ int B = ((args >> 3) & -1) + 1;
+ // & (H[1..256]-1)
+ int H = (*meta_rp++ & 0xFF) + 1;
+ foundc = coding::findBySpec(B, H, S, D);
+ to_free = foundc; // findBySpec may dynamically allocate
+ if (foundc == nullptr)
+ {
+ unpack_abort("illegal arbitrary coding");
+ return;
+ }
+ // and fall through
+ }
+ else if (op >= _meta_run && op < _meta_pop)
+ {
+ int args = (op - _meta_run);
+ // args: KX:[0..3] + 4*(KBFlag:[0..1]) + 8*(ABDef:[0..2])
+ int KX = ((args >> 0) & 3);
+ int KBFlag = ((args >> 2) & 1);
+ int ABDef = ((args >> 3) & -1);
+ assert(ABDef <= 2);
+ // & KB: one of [0..255] if KBFlag=1
+ int KB = (!KBFlag ? 3 : (*meta_rp++ & 0xFF));
+ int K = (KB + 1) << (KX * 4);
+ int N2 = (N >= 0) ? N - K : N;
+ if (N == 0 || (N2 <= 0 && N2 != N))
+ {
+ unpack_abort("illegal run encoding");
+ }
+ if ((mode & DISABLE_RUN) != 0)
+ {
+ unpack_abort("illegal nested run encoding");
+ }
+
+ // & Enc{ ACode } if ADef=0 (ABDef != 1)
+ // No direct nesting of 'run' in ACode, but in BCode it's OK.
+ int disRun = mode | DISABLE_RUN;
+ if (ABDef == 1)
+ {
+ this->init(band_rp, band_limit, NO_META, disRun, defc, K, valueSink);
+ }
+ else
+ {
+ this->init(band_rp, band_limit, meta_rp, disRun, defc, K, valueSink);
+ }
+
+ // & Enc{ BCode } if BDef=0 (ABDef != 2)
+ coding_method *tail = U_NEW(coding_method, 1);
+ if (!tail)
+ return;
+ tail->u = u;
+
+ // The 'run' codings may be nested indirectly via 'pop' codings.
+ // This means that this->next may already be filled in, if
+ // ACode was of type 'pop' with a 'run' token coding.
+ // No problem: Just chain the upcoming BCode onto the end.
+ for (coding_method *self = this;; self = self->next)
+ {
+ if (self->next == nullptr)
+ {
+ self->next = tail;
+ break;
+ }
+ }
+
+ if (ABDef == 2)
+ {
+ tail->init(band_rp, band_limit, NO_META, mode, defc, N2, valueSink);
+ }
+ else
+ {
+ tail->init(band_rp, band_limit, meta_rp, mode, defc, N2, valueSink);
+ }
+ // Note: The preceding calls to init should be tail-recursive.
+
+ return; // done; no falling through
+ }
+ else if (op >= _meta_pop && op < _meta_limit)
+ {
+ int args = (op - _meta_pop);
+ // args: (FDef:[0..1]) + 2*UDef:[0..1] + 4*(TDefL:[0..11])
+ int FDef = ((args >> 0) & 1);
+ int UDef = ((args >> 1) & 1);
+ int TDefL = ((args >> 2) & -1);
+ assert(TDefL <= 11);
+ int TDef = (TDefL > 0);
+ int TL = (TDefL <= 6) ? (2 << TDefL) : (256 - (4 << (11 - TDefL)));
+ int TH = (256 - TL);
+ if (N <= 0)
+ {
+ unpack_abort("illegal pop encoding");
+ }
+ if ((mode & DISABLE_POP) != 0)
+ {
+ unpack_abort("illegal nested pop encoding");
+ }
+
+ // No indirect nesting of 'pop', but 'run' is OK.
+ int disPop = DISABLE_POP;
+
+ // & Enc{ FCode } if FDef=0
+ int FN = POP_FAVORED_N;
+ assert(valueSink == nullptr);
+ intlist fValueSink;
+ fValueSink.init();
+ coding_method fval;
+ BYTES_OF(fval).clear();
+ fval.u = u;
+ if (FDef != 0)
+ {
+ fval.init(band_rp, band_limit, NO_META, disPop, defc, FN, &fValueSink);
+ }
+ else
+ {
+ fval.init(band_rp, band_limit, meta_rp, disPop, defc, FN, &fValueSink);
+ }
+ bytes fvbuf;
+ fValues = (u->saveTo(fvbuf, fValueSink.b), (int *)fvbuf.ptr);
+ fVlength = fValueSink.length(); // i.e., the parameter K
+ fValueSink.free();
+
+ // Skip the first {F} run in all subsequent passes.
+ // The next call to this->init(...) will set vs0.rp to point after the {F}.
+
+ // & Enc{ TCode } if TDef=0 (TDefL==0)
+ if (TDef != 0)
+ {
+ coding *tcode = coding::findBySpec(1, 256); // BYTE1
+ // find the most narrowly sufficient code:
+ for (int B = 2; B <= B_MAX; B++)
+ {
+ if (fVlength <= tcode->umax)
+ break; // found it
+ tcode->free();
+ tcode = coding::findBySpec(B, TH);
+ if (!tcode)
+ return;
+ }
+ if (!(fVlength <= tcode->umax))
+ {
+ unpack_abort("pop.L value too small");
+ }
+ this->init(band_rp, band_limit, NO_META, disPop, tcode, N, nullptr);
+ tcode->free();
+ }
+ else
+ {
+ this->init(band_rp, band_limit, meta_rp, disPop, defc, N, nullptr);
+ }
+
+ // Count the number of zero tokens right now.
+ // Also verify that they are in bounds.
+ int UN = 0; // one {U} for each zero in {T}
+ value_stream vs = vs0;
+ for (int i = 0; i < N; i++)
+ {
+ uint32_t val = vs.getInt();
+ if (val == 0)
+ UN += 1;
+ if (!(val <= (uint32_t)fVlength))
+ {
+ unpack_abort("pop token out of range");
+ }
+ }
+ vs.done();
+
+ // & Enc{ UCode } if UDef=0
+ if (UN != 0)
+ {
+ uValues = U_NEW(coding_method, 1);
+ if (uValues == nullptr)
+ return;
+ uValues->u = u;
+ if (UDef != 0)
+ {
+ uValues->init(band_rp, band_limit, NO_META, disPop, defc, UN, nullptr);
+ }
+ else
+ {
+ uValues->init(band_rp, band_limit, meta_rp, disPop, defc, UN, nullptr);
+ }
+ }
+ else
+ {
+ if (UDef == 0)
+ {
+ int uop = (*meta_rp++ & 0xFF);
+ if (uop > _meta_canon_max)
+ // %%% Spec. requires the more strict (uop != _meta_default).
+ unpack_abort("bad meta-coding for empty pop/U");
+ }
+ }
+
+ // Bug fix for 6259542
+ // Last of all, adjust vs0.cmk to the 'pop' flavor
+ for (coding_method *self = this; self != nullptr; self = self->next)
+ {
+ coding_method_kind cmk2 = cmk_pop;
+ switch (self->vs0.cmk)
+ {
+ case cmk_BHS0:
+ cmk2 = cmk_pop_BHS0;
+ break;
+ case cmk_BYTE1:
+ cmk2 = cmk_pop_BYTE1;
+ break;
+ default:
+ break;
+ }
+ self->vs0.cmk = cmk2;
+ if (self != this)
+ {
+ assert(self->fValues == nullptr); // no double init
+ self->fValues = this->fValues;
+ self->fVlength = this->fVlength;
+ assert(self->uValues == nullptr); // must stay nullptr
+ }
+ }
+
+ return; // done; no falling through
+ }
+ else
+ {
+ unpack_abort("bad meta-coding");
+ }
+
+ // Common code here skips a series of values with one coding.
+ assert(foundc != nullptr);
+
+ assert(vs0.cmk == cmk_ERROR); // no garbage, please
+ assert(vs0.rp == nullptr); // no garbage, please
+ assert(vs0.rplimit == nullptr); // no garbage, please
+ assert(vs0.sum == 0); // no garbage, please
+
+ vs0.init(band_rp, band_limit, foundc);
+
+ // Done with foundc. Free if necessary.
+ if (to_free != nullptr)
+ {
+ to_free->free();
+ to_free = nullptr;
+ }
+ foundc = nullptr;
+
+ coding &c = vs0.c;
+ CODING_PRIVATE(c.spec);
+ // assert sane N
+ assert((uint32_t)N < INT_MAX_VALUE || N == POP_FAVORED_N);
+
+ // Look at the values, or at least skip over them quickly.
+ if (valueSink == nullptr)
+ {
+ // Skip and ignore values in the first pass.
+ c.parseMultiple(band_rp, N, band_limit, B, H);
+ }
+ else if (N >= 0)
+ {
+ // Pop coding, {F} sequence, initial run of values...
+ assert((mode & DISABLE_POP) != 0);
+ value_stream vs = vs0;
+ for (int n = 0; n < N; n++)
+ {
+ int val = vs.getInt();
+ valueSink->add(val);
+ }
+ band_rp = vs.rp;
+ }
+ else
+ {
+ // Pop coding, {F} sequence, final run of values...
+ assert((mode & DISABLE_POP) != 0);
+ assert(N == POP_FAVORED_N);
+ int min = INT_MIN_VALUE; // farthest from the center
+ // min2 is based on the buggy specification of centrality in version 150.7
+ // no known implementations transmit this value, but just in case...
+ // int min2 = INT_MIN_VALUE;
+ int last = 0;
+ // if there were initial runs, find the potential sentinels in them:
+ for (int i = 0; i < valueSink->length(); i++)
+ {
+ last = valueSink->get(i);
+ min = moreCentral(min, last);
+ // min2 = moreCentral2(min2, last, min);
+ }
+ value_stream vs = vs0;
+ for (;;)
+ {
+ int val = vs.getInt();
+ if (valueSink->length() > 0 && (val == last || val == min)) //|| val == min2
+ break;
+ valueSink->add(val);
+ last = val;
+ min = moreCentral(min, last);
+ // min2 = moreCentral2(min2, last, min);
+ }
+ band_rp = vs.rp;
+ }
+
+ // Get an accurate upper limit now.
+ vs0.rplimit = band_rp;
+ vs0.cm = this;
+
+ return; // success
}
coding basic_codings[] = {
- // This one is not a usable irregular coding, but is used by cp_Utf8_chars.
- CODING_INIT(3, 128, 0, 0),
-
- // Fixed-length codings:
- CODING_INIT(1, 256, 0, 0), CODING_INIT(1, 256, 1, 0), CODING_INIT(1, 256, 0, 1),
- CODING_INIT(1, 256, 1, 1), CODING_INIT(2, 256, 0, 0), CODING_INIT(2, 256, 1, 0),
- CODING_INIT(2, 256, 0, 1), CODING_INIT(2, 256, 1, 1), CODING_INIT(3, 256, 0, 0),
- CODING_INIT(3, 256, 1, 0), CODING_INIT(3, 256, 0, 1), CODING_INIT(3, 256, 1, 1),
- CODING_INIT(4, 256, 0, 0), CODING_INIT(4, 256, 1, 0), CODING_INIT(4, 256, 0, 1),
- CODING_INIT(4, 256, 1, 1),
-
- // Full-range variable-length codings:
- CODING_INIT(5, 4, 0, 0), CODING_INIT(5, 4, 1, 0), CODING_INIT(5, 4, 2, 0),
- CODING_INIT(5, 16, 0, 0), CODING_INIT(5, 16, 1, 0), CODING_INIT(5, 16, 2, 0),
- CODING_INIT(5, 32, 0, 0), CODING_INIT(5, 32, 1, 0), CODING_INIT(5, 32, 2, 0),
- CODING_INIT(5, 64, 0, 0), CODING_INIT(5, 64, 1, 0), CODING_INIT(5, 64, 2, 0),
- CODING_INIT(5, 128, 0, 0), CODING_INIT(5, 128, 1, 0), CODING_INIT(5, 128, 2, 0),
- CODING_INIT(5, 4, 0, 1), CODING_INIT(5, 4, 1, 1), CODING_INIT(5, 4, 2, 1),
- CODING_INIT(5, 16, 0, 1), CODING_INIT(5, 16, 1, 1), CODING_INIT(5, 16, 2, 1),
- CODING_INIT(5, 32, 0, 1), CODING_INIT(5, 32, 1, 1), CODING_INIT(5, 32, 2, 1),
- CODING_INIT(5, 64, 0, 1), CODING_INIT(5, 64, 1, 1), CODING_INIT(5, 64, 2, 1),
- CODING_INIT(5, 128, 0, 1), CODING_INIT(5, 128, 1, 1), CODING_INIT(5, 128, 2, 1),
-
- // Variable length subrange codings:
- CODING_INIT(2, 192, 0, 0), CODING_INIT(2, 224, 0, 0), CODING_INIT(2, 240, 0, 0),
- CODING_INIT(2, 248, 0, 0), CODING_INIT(2, 252, 0, 0), CODING_INIT(2, 8, 0, 1),
- CODING_INIT(2, 8, 1, 1), CODING_INIT(2, 16, 0, 1), CODING_INIT(2, 16, 1, 1),
- CODING_INIT(2, 32, 0, 1), CODING_INIT(2, 32, 1, 1), CODING_INIT(2, 64, 0, 1),
- CODING_INIT(2, 64, 1, 1), CODING_INIT(2, 128, 0, 1), CODING_INIT(2, 128, 1, 1),
- CODING_INIT(2, 192, 0, 1), CODING_INIT(2, 192, 1, 1), CODING_INIT(2, 224, 0, 1),
- CODING_INIT(2, 224, 1, 1), CODING_INIT(2, 240, 0, 1), CODING_INIT(2, 240, 1, 1),
- CODING_INIT(2, 248, 0, 1), CODING_INIT(2, 248, 1, 1), CODING_INIT(3, 192, 0, 0),
- CODING_INIT(3, 224, 0, 0), CODING_INIT(3, 240, 0, 0), CODING_INIT(3, 248, 0, 0),
- CODING_INIT(3, 252, 0, 0), CODING_INIT(3, 8, 0, 1), CODING_INIT(3, 8, 1, 1),
- CODING_INIT(3, 16, 0, 1), CODING_INIT(3, 16, 1, 1), CODING_INIT(3, 32, 0, 1),
- CODING_INIT(3, 32, 1, 1), CODING_INIT(3, 64, 0, 1), CODING_INIT(3, 64, 1, 1),
- CODING_INIT(3, 128, 0, 1), CODING_INIT(3, 128, 1, 1), CODING_INIT(3, 192, 0, 1),
- CODING_INIT(3, 192, 1, 1), CODING_INIT(3, 224, 0, 1), CODING_INIT(3, 224, 1, 1),
- CODING_INIT(3, 240, 0, 1), CODING_INIT(3, 240, 1, 1), CODING_INIT(3, 248, 0, 1),
- CODING_INIT(3, 248, 1, 1), CODING_INIT(4, 192, 0, 0), CODING_INIT(4, 224, 0, 0),
- CODING_INIT(4, 240, 0, 0), CODING_INIT(4, 248, 0, 0), CODING_INIT(4, 252, 0, 0),
- CODING_INIT(4, 8, 0, 1), CODING_INIT(4, 8, 1, 1), CODING_INIT(4, 16, 0, 1),
- CODING_INIT(4, 16, 1, 1), CODING_INIT(4, 32, 0, 1), CODING_INIT(4, 32, 1, 1),
- CODING_INIT(4, 64, 0, 1), CODING_INIT(4, 64, 1, 1), CODING_INIT(4, 128, 0, 1),
- CODING_INIT(4, 128, 1, 1), CODING_INIT(4, 192, 0, 1), CODING_INIT(4, 192, 1, 1),
- CODING_INIT(4, 224, 0, 1), CODING_INIT(4, 224, 1, 1), CODING_INIT(4, 240, 0, 1),
- CODING_INIT(4, 240, 1, 1), CODING_INIT(4, 248, 0, 1), CODING_INIT(4, 248, 1, 1),
- CODING_INIT(0, 0, 0, 0)};
+ // This one is not a usable irregular coding, but is used by cp_Utf8_chars.
+ CODING_INIT(3, 128, 0, 0),
+
+ // Fixed-length codings:
+ CODING_INIT(1, 256, 0, 0), CODING_INIT(1, 256, 1, 0), CODING_INIT(1, 256, 0, 1),
+ CODING_INIT(1, 256, 1, 1), CODING_INIT(2, 256, 0, 0), CODING_INIT(2, 256, 1, 0),
+ CODING_INIT(2, 256, 0, 1), CODING_INIT(2, 256, 1, 1), CODING_INIT(3, 256, 0, 0),
+ CODING_INIT(3, 256, 1, 0), CODING_INIT(3, 256, 0, 1), CODING_INIT(3, 256, 1, 1),
+ CODING_INIT(4, 256, 0, 0), CODING_INIT(4, 256, 1, 0), CODING_INIT(4, 256, 0, 1),
+ CODING_INIT(4, 256, 1, 1),
+
+ // Full-range variable-length codings:
+ CODING_INIT(5, 4, 0, 0), CODING_INIT(5, 4, 1, 0), CODING_INIT(5, 4, 2, 0),
+ CODING_INIT(5, 16, 0, 0), CODING_INIT(5, 16, 1, 0), CODING_INIT(5, 16, 2, 0),
+ CODING_INIT(5, 32, 0, 0), CODING_INIT(5, 32, 1, 0), CODING_INIT(5, 32, 2, 0),
+ CODING_INIT(5, 64, 0, 0), CODING_INIT(5, 64, 1, 0), CODING_INIT(5, 64, 2, 0),
+ CODING_INIT(5, 128, 0, 0), CODING_INIT(5, 128, 1, 0), CODING_INIT(5, 128, 2, 0),
+ CODING_INIT(5, 4, 0, 1), CODING_INIT(5, 4, 1, 1), CODING_INIT(5, 4, 2, 1),
+ CODING_INIT(5, 16, 0, 1), CODING_INIT(5, 16, 1, 1), CODING_INIT(5, 16, 2, 1),
+ CODING_INIT(5, 32, 0, 1), CODING_INIT(5, 32, 1, 1), CODING_INIT(5, 32, 2, 1),
+ CODING_INIT(5, 64, 0, 1), CODING_INIT(5, 64, 1, 1), CODING_INIT(5, 64, 2, 1),
+ CODING_INIT(5, 128, 0, 1), CODING_INIT(5, 128, 1, 1), CODING_INIT(5, 128, 2, 1),
+
+ // Variable length subrange codings:
+ CODING_INIT(2, 192, 0, 0), CODING_INIT(2, 224, 0, 0), CODING_INIT(2, 240, 0, 0),
+ CODING_INIT(2, 248, 0, 0), CODING_INIT(2, 252, 0, 0), CODING_INIT(2, 8, 0, 1),
+ CODING_INIT(2, 8, 1, 1), CODING_INIT(2, 16, 0, 1), CODING_INIT(2, 16, 1, 1),
+ CODING_INIT(2, 32, 0, 1), CODING_INIT(2, 32, 1, 1), CODING_INIT(2, 64, 0, 1),
+ CODING_INIT(2, 64, 1, 1), CODING_INIT(2, 128, 0, 1), CODING_INIT(2, 128, 1, 1),
+ CODING_INIT(2, 192, 0, 1), CODING_INIT(2, 192, 1, 1), CODING_INIT(2, 224, 0, 1),
+ CODING_INIT(2, 224, 1, 1), CODING_INIT(2, 240, 0, 1), CODING_INIT(2, 240, 1, 1),
+ CODING_INIT(2, 248, 0, 1), CODING_INIT(2, 248, 1, 1), CODING_INIT(3, 192, 0, 0),
+ CODING_INIT(3, 224, 0, 0), CODING_INIT(3, 240, 0, 0), CODING_INIT(3, 248, 0, 0),
+ CODING_INIT(3, 252, 0, 0), CODING_INIT(3, 8, 0, 1), CODING_INIT(3, 8, 1, 1),
+ CODING_INIT(3, 16, 0, 1), CODING_INIT(3, 16, 1, 1), CODING_INIT(3, 32, 0, 1),
+ CODING_INIT(3, 32, 1, 1), CODING_INIT(3, 64, 0, 1), CODING_INIT(3, 64, 1, 1),
+ CODING_INIT(3, 128, 0, 1), CODING_INIT(3, 128, 1, 1), CODING_INIT(3, 192, 0, 1),
+ CODING_INIT(3, 192, 1, 1), CODING_INIT(3, 224, 0, 1), CODING_INIT(3, 224, 1, 1),
+ CODING_INIT(3, 240, 0, 1), CODING_INIT(3, 240, 1, 1), CODING_INIT(3, 248, 0, 1),
+ CODING_INIT(3, 248, 1, 1), CODING_INIT(4, 192, 0, 0), CODING_INIT(4, 224, 0, 0),
+ CODING_INIT(4, 240, 0, 0), CODING_INIT(4, 248, 0, 0), CODING_INIT(4, 252, 0, 0),
+ CODING_INIT(4, 8, 0, 1), CODING_INIT(4, 8, 1, 1), CODING_INIT(4, 16, 0, 1),
+ CODING_INIT(4, 16, 1, 1), CODING_INIT(4, 32, 0, 1), CODING_INIT(4, 32, 1, 1),
+ CODING_INIT(4, 64, 0, 1), CODING_INIT(4, 64, 1, 1), CODING_INIT(4, 128, 0, 1),
+ CODING_INIT(4, 128, 1, 1), CODING_INIT(4, 192, 0, 1), CODING_INIT(4, 192, 1, 1),
+ CODING_INIT(4, 224, 0, 1), CODING_INIT(4, 224, 1, 1), CODING_INIT(4, 240, 0, 1),
+ CODING_INIT(4, 240, 1, 1), CODING_INIT(4, 248, 0, 1), CODING_INIT(4, 248, 1, 1),
+ CODING_INIT(0, 0, 0, 0)};
#define BASIC_INDEX_LIMIT (int)(sizeof(basic_codings) / sizeof(basic_codings[0]) - 1)
coding *coding::findByIndex(int idx)
{
- int index_limit = BASIC_INDEX_LIMIT;
- assert(_meta_canon_min == 1 && _meta_canon_max + 1 == index_limit);
+ int index_limit = BASIC_INDEX_LIMIT;
+ assert(_meta_canon_min == 1 && _meta_canon_max + 1 == index_limit);
- if (idx >= _meta_canon_min && idx <= _meta_canon_max)
- return basic_codings[idx].init();
- else
- return nullptr;
+ if (idx >= _meta_canon_min && idx <= _meta_canon_max)
+ return basic_codings[idx].init();
+ else
+ return nullptr;
}