Bug#928111: [pre-approval] unblock: icu/63.2-1
Hi Paul,
On Sun, Jun 16, 2019 at 9:50 PM Paul Gevers <elbrus@debian.org> wrote:
> On 16-06-2019 11:20, László Böszörményi (GCS) wrote:
> > The debdiff is larger for the following changes. The backported
> > security fixes are no longer under debian/patches but inline. The ABI
> > break, called the 'ICU-20250' issue upstream is reversed with a patch.
> > Then the s/63.1/63.2/ changes, etc.
>
> Can you please provide a diff between the patches-applied tree of the
> current buster version and a patches-applied tree of the current sid
> version?
Of course, attached. The diff size went down from 165 kB to 39 kB as
you see, even if the documentation and s/63.1/63.2/ changes are still
in as well.
Regards,
Laszlo/GCS
diff -Nur icu-63.1/readme.html icu-63.2/readme.html
--- icu-63.1/readme.html 2018-10-15 18:02:37.000000000 +0000
+++ icu-63.2/readme.html 2019-04-11 22:38:30.000000000 +0000
@@ -3,7 +3,7 @@
<html lang="en-US" xmlns="http://www.w3.org/1999/xhtml" xml:lang="en-US">
<head>
- <title>ReadMe for ICU 63.1</title>
+ <title>ReadMe for ICU 63.2</title>
<meta name="COPYRIGHT" content=
"Copyright (C) 2016 and later: Unicode, Inc. and others. License & terms of use: http://www.unicode.org/copyright.html"/>
<!-- meta name="COPYRIGHT" content=
@@ -32,7 +32,7 @@
International Components for Unicode<br/>
<span class="only-rc">Release Candidate</span>
<span class="only-milestone">(Milestone Release)</span>
- <abbr title="International Components for Unicode">ICU</abbr> 63.1 ReadMe
+ <abbr title="International Components for Unicode">ICU</abbr> 63.2 ReadMe
</h1>
<!-- Shouldn't need to comment/uncomment this paragraph, just change the body class -->
diff -Nur icu-63.1/source/common/umutablecptrie.cpp icu-63.2/source/common/umutablecptrie.cpp
--- icu-63.1/source/common/umutablecptrie.cpp 2018-10-01 22:39:56.000000000 +0000
+++ icu-63.2/source/common/umutablecptrie.cpp 2019-06-16 20:23:58.000000000 +0000
@@ -60,6 +60,7 @@
constexpr int32_t INDEX_3_18BIT_BLOCK_LENGTH = UCPTRIE_INDEX_3_BLOCK_LENGTH + UCPTRIE_INDEX_3_BLOCK_LENGTH / 8;
class AllSameBlocks;
+class MixedBlocks;
class MutableCodePointTrie : public UMemory {
public:
@@ -92,8 +93,10 @@
void maskValues(uint32_t mask);
UChar32 findHighStart() const;
int32_t compactWholeDataBlocks(int32_t fastILimit, AllSameBlocks &allSameBlocks);
- int32_t compactData(int32_t fastILimit, uint32_t *newData, int32_t dataNullIndex);
- int32_t compactIndex(int32_t fastILimit, UErrorCode &errorCode);
+ int32_t compactData(
+ int32_t fastILimit, uint32_t *newData, int32_t newDataCapacity,
+ int32_t dataNullIndex, MixedBlocks &mixedBlocks, UErrorCode &errorCode);
+ int32_t compactIndex(int32_t fastILimit, MixedBlocks &mixedBlocks, UErrorCode &errorCode);
int32_t compactTrie(int32_t fastILimit, UErrorCode &errorCode);
uint32_t *index = nullptr;
@@ -548,28 +551,8 @@
}
}
-inline bool
-equalBlocks(const uint32_t *s, const uint32_t *t, int32_t length) {
- while (length > 0 && *s == *t) {
- ++s;
- ++t;
- --length;
- }
- return length == 0;
-}
-
-inline bool
-equalBlocks(const uint16_t *s, const uint32_t *t, int32_t length) {
- while (length > 0 && *s == *t) {
- ++s;
- ++t;
- --length;
- }
- return length == 0;
-}
-
-inline bool
-equalBlocks(const uint16_t *s, const uint16_t *t, int32_t length) {
+template<typename UIntA, typename UIntB>
+bool equalBlocks(const UIntA *s, const UIntB *t, int32_t length) {
while (length > 0 && *s == *t) {
++s;
++t;
@@ -585,36 +568,6 @@
}
/** Search for an identical block. */
-int32_t findSameBlock(const uint32_t *p, int32_t pStart, int32_t length,
- const uint32_t *q, int32_t qStart, int32_t blockLength) {
- // Ensure that we do not even partially get past length.
- length -= blockLength;
-
- q += qStart;
- while (pStart <= length) {
- if (equalBlocks(p + pStart, q, blockLength)) {
- return pStart;
- }
- ++pStart;
- }
- return -1;
-}
-
-int32_t findSameBlock(const uint16_t *p, int32_t pStart, int32_t length,
- const uint32_t *q, int32_t qStart, int32_t blockLength) {
- // Ensure that we do not even partially get past length.
- length -= blockLength;
-
- q += qStart;
- while (pStart <= length) {
- if (equalBlocks(p + pStart, q, blockLength)) {
- return pStart;
- }
- ++pStart;
- }
- return -1;
-}
-
int32_t findSameBlock(const uint16_t *p, int32_t pStart, int32_t length,
const uint16_t *q, int32_t qStart, int32_t blockLength) {
// Ensure that we do not even partially get past length.
@@ -655,30 +608,9 @@
* Look for maximum overlap of the beginning of the other block
* with the previous, adjacent block.
*/
-int32_t getOverlap(const uint32_t *p, int32_t length,
- const uint32_t *q, int32_t qStart, int32_t blockLength) {
- int32_t overlap = blockLength - 1;
- U_ASSERT(overlap <= length);
- q += qStart;
- while (overlap > 0 && !equalBlocks(p + (length - overlap), q, overlap)) {
- --overlap;
- }
- return overlap;
-}
-
-int32_t getOverlap(const uint16_t *p, int32_t length,
- const uint32_t *q, int32_t qStart, int32_t blockLength) {
- int32_t overlap = blockLength - 1;
- U_ASSERT(overlap <= length);
- q += qStart;
- while (overlap > 0 && !equalBlocks(p + (length - overlap), q, overlap)) {
- --overlap;
- }
- return overlap;
-}
-
-int32_t getOverlap(const uint16_t *p, int32_t length,
- const uint16_t *q, int32_t qStart, int32_t blockLength) {
+template<typename UIntA, typename UIntB>
+int32_t getOverlap(const UIntA *p, int32_t length,
+ const UIntB *q, int32_t qStart, int32_t blockLength) {
int32_t overlap = blockLength - 1;
U_ASSERT(overlap <= length);
q += qStart;
@@ -807,6 +739,171 @@
int32_t refCounts[CAPACITY];
};
+// Custom hash table for mixed-value blocks to be found anywhere in the
+// compacted data or index so far.
+class MixedBlocks {
+public:
+ MixedBlocks() {}
+ ~MixedBlocks() {
+ uprv_free(table);
+ }
+
+ bool init(int32_t maxLength, int32_t newBlockLength) {
+ // We store actual data indexes + 1 to reserve 0 for empty entries.
+ int32_t maxDataIndex = maxLength - newBlockLength + 1;
+ int32_t newLength;
+ if (maxDataIndex <= 0xfff) { // 4k
+ newLength = 6007;
+ shift = 12;
+ mask = 0xfff;
+ } else if (maxDataIndex <= 0x7fff) { // 32k
+ newLength = 50021;
+ shift = 15;
+ mask = 0x7fff;
+ } else if (maxDataIndex <= 0x1ffff) { // 128k
+ newLength = 200003;
+ shift = 17;
+ mask = 0x1ffff;
+ } else {
+ // maxDataIndex up to around MAX_DATA_LENGTH, ca. 1.1M
+ newLength = 1500007;
+ shift = 21;
+ mask = 0x1fffff;
+ }
+ if (newLength > capacity) {
+ uprv_free(table);
+ table = (uint32_t *)uprv_malloc(newLength * 4);
+ if (table == nullptr) {
+ return false;
+ }
+ capacity = newLength;
+ }
+ length = newLength;
+ uprv_memset(table, 0, length * 4);
+
+ blockLength = newBlockLength;
+ return true;
+ }
+
+ template<typename UInt>
+ void extend(const UInt *data, int32_t minStart, int32_t prevDataLength, int32_t newDataLength) {
+ int32_t start = prevDataLength - blockLength;
+ if (start >= minStart) {
+ ++start; // Skip the last block that we added last time.
+ } else {
+ start = minStart; // Begin with the first full block.
+ }
+ for (int32_t end = newDataLength - blockLength; start <= end; ++start) {
+ uint32_t hashCode = makeHashCode(data, start);
+ addEntry(data, start, hashCode, start);
+ }
+ }
+
+ template<typename UIntA, typename UIntB>
+ int32_t findBlock(const UIntA *data, const UIntB *blockData, int32_t blockStart) const {
+ uint32_t hashCode = makeHashCode(blockData, blockStart);
+ int32_t entryIndex = findEntry(data, blockData, blockStart, hashCode);
+ if (entryIndex >= 0) {
+ return (table[entryIndex] & mask) - 1;
+ } else {
+ return -1;
+ }
+ }
+
+ int32_t findAllSameBlock(const uint32_t *data, uint32_t blockValue) const {
+ uint32_t hashCode = makeHashCode(blockValue);
+ int32_t entryIndex = findEntry(data, blockValue, hashCode);
+ if (entryIndex >= 0) {
+ return (table[entryIndex] & mask) - 1;
+ } else {
+ return -1;
+ }
+ }
+
+private:
+ template<typename UInt>
+ uint32_t makeHashCode(const UInt *blockData, int32_t blockStart) const {
+ int32_t blockLimit = blockStart + blockLength;
+ uint32_t hashCode = blockData[blockStart++];
+ do {
+ hashCode = 37 * hashCode + blockData[blockStart++];
+ } while (blockStart < blockLimit);
+ return hashCode;
+ }
+
+ uint32_t makeHashCode(uint32_t blockValue) const {
+ uint32_t hashCode = blockValue;
+ for (int32_t i = 1; i < blockLength; ++i) {
+ hashCode = 37 * hashCode + blockValue;
+ }
+ return hashCode;
+ }
+
+ template<typename UInt>
+ void addEntry(const UInt *data, int32_t blockStart, uint32_t hashCode, int32_t dataIndex) {
+ U_ASSERT(0 <= dataIndex && dataIndex < (int32_t)mask);
+ int32_t entryIndex = findEntry(data, data, blockStart, hashCode);
+ if (entryIndex < 0) {
+ table[~entryIndex] = (hashCode << shift) | (dataIndex + 1);
+ }
+ }
+
+ template<typename UIntA, typename UIntB>
+ int32_t findEntry(const UIntA *data, const UIntB *blockData, int32_t blockStart,
+ uint32_t hashCode) const {
+ uint32_t shiftedHashCode = hashCode << shift;
+ int32_t initialEntryIndex = (hashCode % (length - 1)) + 1; // 1..length-1
+ for (int32_t entryIndex = initialEntryIndex;;) {
+ uint32_t entry = table[entryIndex];
+ if (entry == 0) {
+ return ~entryIndex;
+ }
+ if ((entry & ~mask) == shiftedHashCode) {
+ int32_t dataIndex = (entry & mask) - 1;
+ if (equalBlocks(data + dataIndex, blockData + blockStart, blockLength)) {
+ return entryIndex;
+ }
+ }
+ entryIndex = nextIndex(initialEntryIndex, entryIndex);
+ }
+ }
+
+ int32_t findEntry(const uint32_t *data, uint32_t blockValue, uint32_t hashCode) const {
+ uint32_t shiftedHashCode = hashCode << shift;
+ int32_t initialEntryIndex = (hashCode % (length - 1)) + 1; // 1..length-1
+ for (int32_t entryIndex = initialEntryIndex;;) {
+ uint32_t entry = table[entryIndex];
+ if (entry == 0) {
+ return ~entryIndex;
+ }
+ if ((entry & ~mask) == shiftedHashCode) {
+ int32_t dataIndex = (entry & mask) - 1;
+ if (allValuesSameAs(data + dataIndex, blockLength, blockValue)) {
+ return entryIndex;
+ }
+ }
+ entryIndex = nextIndex(initialEntryIndex, entryIndex);
+ }
+ }
+
+ inline int32_t nextIndex(int32_t initialEntryIndex, int32_t entryIndex) const {
+ // U_ASSERT(0 < initialEntryIndex && initialEntryIndex < length);
+ return (entryIndex + initialEntryIndex) % length;
+ }
+
+ // Hash table.
+ // The length is a prime number, larger than the maximum data length.
+ // The "shift" lower bits store a data index + 1.
+ // The remaining upper bits store a partial hashCode of the block data values.
+ uint32_t *table = nullptr;
+ int32_t capacity = 0;
+ int32_t length = 0;
+ int32_t shift = 0;
+ uint32_t mask = 0;
+
+ int32_t blockLength = 0;
+};
+
int32_t MutableCodePointTrie::compactWholeDataBlocks(int32_t fastILimit, AllSameBlocks &allSameBlocks) {
#ifdef UCPTRIE_DEBUG
bool overflow = false;
@@ -962,8 +1059,9 @@
*
* It does not try to find an optimal order of writing, deduplicating, and overlapping blocks.
*/
-int32_t MutableCodePointTrie::compactData(int32_t fastILimit,
- uint32_t *newData, int32_t dataNullIndex) {
+int32_t MutableCodePointTrie::compactData(
+ int32_t fastILimit, uint32_t *newData, int32_t newDataCapacity,
+ int32_t dataNullIndex, MixedBlocks &mixedBlocks, UErrorCode &errorCode) {
#ifdef UCPTRIE_DEBUG
int32_t countSame=0, sumOverlaps=0;
bool printData = dataLength == 29088 /* line.brk */ ||
@@ -983,8 +1081,14 @@
#endif
}
- int32_t iLimit = highStart >> UCPTRIE_SHIFT_3;
int32_t blockLength = UCPTRIE_FAST_DATA_BLOCK_LENGTH;
+ if (!mixedBlocks.init(newDataCapacity, blockLength)) {
+ errorCode = U_MEMORY_ALLOCATION_ERROR;
+ return 0;
+ }
+ mixedBlocks.extend(newData, 0, 0, newDataLength);
+
+ int32_t iLimit = highStart >> UCPTRIE_SHIFT_3;
int32_t inc = SMALL_DATA_BLOCKS_PER_BMP_BLOCK;
int32_t fastLength = 0;
for (int32_t i = ASCII_I_LIMIT; i < iLimit; i += inc) {
@@ -992,12 +1096,17 @@
blockLength = UCPTRIE_SMALL_DATA_BLOCK_LENGTH;
inc = 1;
fastLength = newDataLength;
+ if (!mixedBlocks.init(newDataCapacity, blockLength)) {
+ errorCode = U_MEMORY_ALLOCATION_ERROR;
+ return 0;
+ }
+ mixedBlocks.extend(newData, 0, 0, newDataLength);
}
if (flags[i] == ALL_SAME) {
uint32_t value = index[i];
- int32_t n;
// Find an earlier part of the data array of length blockLength
// that is filled with this value.
+ int32_t n = mixedBlocks.findAllSameBlock(newData, value);
// If we find a match, and the current block is the data null block,
// and it is not a fast block but matches the start of a fast block,
// then we need to continue looking.
@@ -1005,12 +1114,10 @@
// and not all of the rest of the fast block is filled with this value.
// Otherwise trie.getRange() would detect that the fast block starts at
// dataNullOffset and assume incorrectly that it is filled with the null value.
- for (int32_t start = 0;
- (n = findAllSameBlock(newData, start, newDataLength,
- value, blockLength)) >= 0 &&
- i == dataNullIndex && i >= fastILimit && n < fastLength &&
- isStartOfSomeFastBlock(n, index, fastILimit);
- start = n + 1) {}
+ while (n >= 0 && i == dataNullIndex && i >= fastILimit && n < fastLength &&
+ isStartOfSomeFastBlock(n, index, fastILimit)) {
+ n = findAllSameBlock(newData, n + 1, newDataLength, value, blockLength);
+ }
if (n >= 0) {
DEBUG_DO(++countSame);
index[i] = n;
@@ -1023,14 +1130,16 @@
}
#endif
index[i] = newDataLength - n;
+ int32_t prevDataLength = newDataLength;
while (n < blockLength) {
newData[newDataLength++] = value;
++n;
}
+ mixedBlocks.extend(newData, 0, prevDataLength, newDataLength);
}
} else if (flags[i] == MIXED) {
const uint32_t *block = data + index[i];
- int32_t n = findSameBlock(newData, 0, newDataLength, block, 0, blockLength);
+ int32_t n = mixedBlocks.findBlock(newData, block, 0);
if (n >= 0) {
DEBUG_DO(++countSame);
index[i] = n;
@@ -1043,9 +1152,11 @@
}
#endif
index[i] = newDataLength - n;
+ int32_t prevDataLength = newDataLength;
while (n < blockLength) {
newData[newDataLength++] = block[n++];
}
+ mixedBlocks.extend(newData, 0, prevDataLength, newDataLength);
}
} else /* SAME_AS */ {
uint32_t j = index[i];
@@ -1061,7 +1172,8 @@
return newDataLength;
}
-int32_t MutableCodePointTrie::compactIndex(int32_t fastILimit, UErrorCode &errorCode) {
+int32_t MutableCodePointTrie::compactIndex(int32_t fastILimit, MixedBlocks &mixedBlocks,
+ UErrorCode &errorCode) {
int32_t fastIndexLength = fastILimit >> (UCPTRIE_FAST_SHIFT - UCPTRIE_SHIFT_3);
if ((highStart >> UCPTRIE_FAST_SHIFT) <= fastIndexLength) {
// Only the linear fast index, no multi-stage index tables.
@@ -1095,6 +1207,12 @@
}
}
+ if (!mixedBlocks.init(fastIndexLength, UCPTRIE_INDEX_3_BLOCK_LENGTH)) {
+ errorCode = U_MEMORY_ALLOCATION_ERROR;
+ return 0;
+ }
+ mixedBlocks.extend(fastIndex, 0, 0, fastIndexLength);
+
// Examine index-3 blocks. For each determine one of:
// - same as the index-3 null block
// - same as a fast-index block
@@ -1105,6 +1223,7 @@
// Also determine an upper limit for the index-3 table length.
int32_t index3Capacity = 0;
i3FirstNull = index3NullOffset;
+ bool hasLongI3Blocks = false;
// If the fast index covers the whole BMP, then
// the multi-stage index is only for supplementary code points.
// Otherwise, the multi-stage index covers all of Unicode.
@@ -1129,13 +1248,13 @@
index3Capacity += UCPTRIE_INDEX_3_BLOCK_LENGTH;
} else {
index3Capacity += INDEX_3_18BIT_BLOCK_LENGTH;
+ hasLongI3Blocks = true;
}
i3FirstNull = 0;
}
} else {
if (oredI3 <= 0xffff) {
- int32_t n = findSameBlock(fastIndex, 0, fastIndexLength,
- index, i, UCPTRIE_INDEX_3_BLOCK_LENGTH);
+ int32_t n = mixedBlocks.findBlock(fastIndex, index, i);
if (n >= 0) {
flags[i] = I3_BMP;
index[i] = n;
@@ -1146,6 +1265,7 @@
} else {
flags[i] = I3_18;
index3Capacity += INDEX_3_18BIT_BLOCK_LENGTH;
+ hasLongI3Blocks = true;
}
}
i = j;
@@ -1166,6 +1286,18 @@
}
uprv_memcpy(index16, fastIndex, fastIndexLength * 2);
+ if (!mixedBlocks.init(index16Capacity, UCPTRIE_INDEX_3_BLOCK_LENGTH)) {
+ errorCode = U_MEMORY_ALLOCATION_ERROR;
+ return 0;
+ }
+ MixedBlocks longI3Blocks;
+ if (hasLongI3Blocks) {
+ if (!longI3Blocks.init(index16Capacity, INDEX_3_18BIT_BLOCK_LENGTH)) {
+ errorCode = U_MEMORY_ALLOCATION_ERROR;
+ return 0;
+ }
+ }
+
// Compact the index-3 table and write an uncompacted version of the index-2 table.
uint16_t index2[UNICODE_LIMIT >> UCPTRIE_SHIFT_2]; // index2Capacity
int32_t i2Length = 0;
@@ -1185,8 +1317,7 @@
} else if (f == I3_BMP) {
i3 = index[i];
} else if (f == I3_16) {
- int32_t n = findSameBlock(index16, index3Start, indexLength,
- index, i, UCPTRIE_INDEX_3_BLOCK_LENGTH);
+ int32_t n = mixedBlocks.findBlock(index16, index, i);
if (n >= 0) {
i3 = n;
} else {
@@ -1198,12 +1329,18 @@
index, i, UCPTRIE_INDEX_3_BLOCK_LENGTH);
}
i3 = indexLength - n;
+ int32_t prevIndexLength = indexLength;
while (n < UCPTRIE_INDEX_3_BLOCK_LENGTH) {
index16[indexLength++] = index[i + n++];
}
+ mixedBlocks.extend(index16, index3Start, prevIndexLength, indexLength);
+ if (hasLongI3Blocks) {
+ longI3Blocks.extend(index16, index3Start, prevIndexLength, indexLength);
+ }
}
} else {
U_ASSERT(f == I3_18);
+ U_ASSERT(hasLongI3Blocks);
// Encode an index-3 block that contains one or more data indexes exceeding 16 bits.
int32_t j = i;
int32_t jLimit = i + UCPTRIE_INDEX_3_BLOCK_LENGTH;
@@ -1236,8 +1373,7 @@
index16[k++] = v;
index16[k - 9] = upperBits;
} while (j < jLimit);
- int32_t n = findSameBlock(index16, index3Start, indexLength,
- index16, indexLength, INDEX_3_18BIT_BLOCK_LENGTH);
+ int32_t n = longI3Blocks.findBlock(index16, index16, indexLength);
if (n >= 0) {
i3 = n | 0x8000;
} else {
@@ -1249,6 +1385,7 @@
index16, indexLength, INDEX_3_18BIT_BLOCK_LENGTH);
}
i3 = (indexLength - n) | 0x8000;
+ int32_t prevIndexLength = indexLength;
if (n > 0) {
int32_t start = indexLength;
while (n < INDEX_3_18BIT_BLOCK_LENGTH) {
@@ -1257,6 +1394,10 @@
} else {
indexLength += INDEX_3_18BIT_BLOCK_LENGTH;
}
+ mixedBlocks.extend(index16, index3Start, prevIndexLength, indexLength);
+ if (hasLongI3Blocks) {
+ longI3Blocks.extend(index16, index3Start, prevIndexLength, indexLength);
+ }
}
}
if (index3NullOffset < 0 && i3FirstNull >= 0) {
@@ -1279,16 +1420,23 @@
}
// Compact the index-2 table and write the index-1 table.
+ static_assert(UCPTRIE_INDEX_2_BLOCK_LENGTH == UCPTRIE_INDEX_3_BLOCK_LENGTH,
+ "must re-init mixedBlocks");
int32_t blockLength = UCPTRIE_INDEX_2_BLOCK_LENGTH;
int32_t i1 = fastIndexLength;
for (int32_t i = 0; i < i2Length; i += blockLength) {
- if ((i2Length - i) < blockLength) {
+ int32_t n;
+ if ((i2Length - i) >= blockLength) {
+ // normal block
+ U_ASSERT(blockLength == UCPTRIE_INDEX_2_BLOCK_LENGTH);
+ n = mixedBlocks.findBlock(index16, index2, i);
+ } else {
// highStart is inside the last index-2 block. Shorten it.
blockLength = i2Length - i;
+ n = findSameBlock(index16, index3Start, indexLength,
+ index2, i, blockLength);
}
int32_t i2;
- int32_t n = findSameBlock(index16, index3Start, indexLength,
- index2, i, blockLength);
if (n >= 0) {
i2 = n;
} else {
@@ -1299,9 +1447,11 @@
n = getOverlap(index16, indexLength, index2, i, blockLength);
}
i2 = indexLength - n;
+ int32_t prevIndexLength = indexLength;
while (n < blockLength) {
index16[indexLength++] = index2[i + n++];
}
+ mixedBlocks.extend(index16, index3Start, prevIndexLength, indexLength);
}
// Set the index-1 table entry.
index16[i1++] = i2;
@@ -1369,7 +1519,11 @@
uprv_memcpy(newData, asciiData, sizeof(asciiData));
int32_t dataNullIndex = allSameBlocks.findMostUsed();
- int32_t newDataLength = compactData(fastILimit, newData, dataNullIndex);
+
+ MixedBlocks mixedBlocks;
+ int32_t newDataLength = compactData(fastILimit, newData, newDataCapacity,
+ dataNullIndex, mixedBlocks, errorCode);
+ if (U_FAILURE(errorCode)) { return 0; }
U_ASSERT(newDataLength <= newDataCapacity);
uprv_free(data);
data = newData;
@@ -1394,7 +1548,7 @@
dataNullOffset = UCPTRIE_NO_DATA_NULL_OFFSET;
}
- int32_t indexLength = compactIndex(fastILimit, errorCode);
+ int32_t indexLength = compactIndex(fastILimit, mixedBlocks, errorCode);
highStart = realHighStart;
return indexLength;
}
diff -Nur icu-63.1/source/common/umutex.h icu-63.2/source/common/umutex.h
--- icu-63.1/source/common/umutex.h 2018-10-01 22:39:56.000000000 +0000
+++ icu-63.2/source/common/umutex.h 2019-04-11 22:38:30.000000000 +0000
@@ -54,15 +54,23 @@
#include <atomic>
-U_NAMESPACE_BEGIN
-
// Export an explicit template instantiation of std::atomic<int32_t>.
// When building DLLs for Windows this is required as it is used as a data member of the exported SharedObject class.
// See digitlst.h, pluralaffix.h, datefmt.h, and others for similar examples.
-#if U_PF_WINDOWS <= U_PLATFORM && U_PLATFORM <= U_PF_CYGWIN
+#if U_PF_WINDOWS <= U_PLATFORM && U_PLATFORM <= U_PF_CYGWIN && !defined(U_IN_DOXYGEN)
+ #if defined(__clang__)
+ // Suppress the warning that the explicit instantiation after explicit specialization has no effect.
+ #pragma clang diagnostic push
+ #pragma clang diagnostic ignored "-Winstantiation-after-specialization"
+ #endif
template struct U_COMMON_API std::atomic<int32_t>;
+ #if defined(__clang__)
+ #pragma clang diagnostic pop
+ #endif
#endif
+U_NAMESPACE_BEGIN
+
typedef std::atomic<int32_t> u_atomic_int32_t;
#define ATOMIC_INT32_T_INITIALIZER(val) ATOMIC_VAR_INIT(val)
diff -Nur icu-63.1/source/common/unicode/uvernum.h icu-63.2/source/common/unicode/uvernum.h
--- icu-63.1/source/common/unicode/uvernum.h 2018-10-01 22:39:56.000000000 +0000
+++ icu-63.2/source/common/unicode/uvernum.h 2019-04-11 22:38:30.000000000 +0000
@@ -66,7 +66,7 @@
* This value will change in the subsequent releases of ICU
* @stable ICU 2.6
*/
-#define U_ICU_VERSION_MINOR_NUM 1
+#define U_ICU_VERSION_MINOR_NUM 2
/** The current ICU patchlevel version as an integer.
* This value will change in the subsequent releases of ICU
@@ -121,7 +121,7 @@
* This value will change in the subsequent releases of ICU
* @stable ICU 2.4
*/
-#define U_ICU_VERSION "63.1"
+#define U_ICU_VERSION "63.2"
/**
* The current ICU library major version number as a string, for library name suffixes.
@@ -140,7 +140,7 @@
/** Data version in ICU4C.
* @internal ICU 4.4 Internal Use Only
**/
-#define U_ICU_DATA_VERSION "63.1"
+#define U_ICU_DATA_VERSION "63.2"
#endif /* U_HIDE_INTERNAL_API */
/*===========================================================================
diff -Nur icu-63.1/source/configure icu-63.2/source/configure
--- icu-63.1/source/configure 2018-10-01 22:39:56.000000000 +0000
+++ icu-63.2/source/configure 2019-04-11 22:38:30.000000000 +0000
@@ -1,6 +1,6 @@
#! /bin/sh
# Guess values for system-dependent variables and create Makefiles.
-# Generated by GNU Autoconf 2.69 for ICU 63.1.
+# Generated by GNU Autoconf 2.69 for ICU 63.2.
#
# Report bugs to <http://icu-project.org/bugs>.
#
@@ -582,8 +582,8 @@
# Identity of this package.
PACKAGE_NAME='ICU'
PACKAGE_TARNAME='International Components for Unicode'
-PACKAGE_VERSION='63.1'
-PACKAGE_STRING='ICU 63.1'
+PACKAGE_VERSION='63.2'
+PACKAGE_STRING='ICU 63.2'
PACKAGE_BUGREPORT='http://icu-project.org/bugs'
PACKAGE_URL='http://icu-project.org'
@@ -1370,7 +1370,7 @@
# Omit some internal or obsolete options to make the list less imposing.
# This message is too long to be a string in the A/UX 3.1 sh.
cat <<_ACEOF
-\`configure' configures ICU 63.1 to adapt to many kinds of systems.
+\`configure' configures ICU 63.2 to adapt to many kinds of systems.
Usage: $0 [OPTION]... [VAR=VALUE]...
@@ -1437,7 +1437,7 @@
if test -n "$ac_init_help"; then
case $ac_init_help in
- short | recursive ) echo "Configuration of ICU 63.1:";;
+ short | recursive ) echo "Configuration of ICU 63.2:";;
esac
cat <<\_ACEOF
@@ -1574,7 +1574,7 @@
test -n "$ac_init_help" && exit $ac_status
if $ac_init_version; then
cat <<\_ACEOF
-ICU configure 63.1
+ICU configure 63.2
generated by GNU Autoconf 2.69
Copyright (C) 2012 Free Software Foundation, Inc.
@@ -2266,7 +2266,7 @@
This file contains any messages produced by compilers while
running configure, to aid debugging if configure makes a mistake.
-It was created by ICU $as_me 63.1, which was
+It was created by ICU $as_me 63.2, which was
generated by GNU Autoconf 2.69. Invocation command line was
$ $0 $@
@@ -8434,7 +8434,7 @@
# report actual input values of CONFIG_FILES etc. instead of their
# values after options handling.
ac_log="
-This file was extended by ICU $as_me 63.1, which was
+This file was extended by ICU $as_me 63.2, which was
generated by GNU Autoconf 2.69. Invocation command line was
CONFIG_FILES = $CONFIG_FILES
@@ -8488,7 +8488,7 @@
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
ac_cs_version="\\
-ICU config.status 63.1
+ICU config.status 63.2
configured by $0, generated by GNU Autoconf 2.69,
with options \\"\$ac_cs_config\\"
Binary files icu-63.1/source/data/in/icudt63l.dat and icu-63.2/source/data/in/icudt63l.dat differ
diff -Nur icu-63.1/source/i18n/japancal.cpp icu-63.2/source/i18n/japancal.cpp
--- icu-63.1/source/i18n/japancal.cpp 2018-10-01 22:39:56.000000000 +0000
+++ icu-63.2/source/i18n/japancal.cpp 2019-04-11 22:38:30.000000000 +0000
@@ -18,6 +18,16 @@
#if !UCONFIG_NO_FORMATTING
#if U_PLATFORM_HAS_WINUWP_API == 0
#include <stdlib.h> // getenv() is not available in UWP env
+#else
+#ifndef WIN32_LEAN_AND_MEAN
+# define WIN32_LEAN_AND_MEAN
+#endif
+# define VC_EXTRALEAN
+# define NOUSER
+# define NOSERVICE
+# define NOIME
+# define NOMCX
+#include <windows.h>
#endif
#include "cmemory.h"
#include "erarules.h"
diff -Nur icu-63.1/source/i18n/unicode/numberrangeformatter.h icu-63.2/source/i18n/unicode/numberrangeformatter.h
--- icu-63.1/source/i18n/unicode/numberrangeformatter.h 2018-10-15 18:02:37.000000000 +0000
+++ icu-63.2/source/i18n/unicode/numberrangeformatter.h 2019-04-11 22:38:30.000000000 +0000
@@ -185,8 +185,14 @@
* Export an explicit template instantiation. See datefmt.h
* (When building DLLs for Windows this is required.)
*/
-#if U_PF_WINDOWS <= U_PLATFORM && U_PLATFORM <= U_PF_CYGWIN && !defined(U_IN_DOXYGEN)
-template struct U_I18N_API std::atomic<impl::NumberRangeFormatterImpl*>;
+#if U_PLATFORM == U_PF_WINDOWS && !defined(U_IN_DOXYGEN)
+} // namespace icu::number
+U_NAMESPACE_END
+
+template struct U_I18N_API std::atomic< U_NAMESPACE_QUALIFIER number::impl::NumberRangeFormatterImpl*>;
+
+U_NAMESPACE_BEGIN
+namespace number { // icu::number
#endif
/** \endcond */
diff -Nur icu-63.1/source/test/intltest/incaltst.cpp icu-63.2/source/test/intltest/incaltst.cpp
--- icu-63.1/source/test/intltest/incaltst.cpp 2018-09-29 00:34:42.000000000 +0000
+++ icu-63.2/source/test/intltest/incaltst.cpp 2019-04-11 22:38:30.000000000 +0000
@@ -77,6 +77,7 @@
CASE(7,TestPersian);
CASE(8,TestPersianFormat);
CASE(9,TestTaiwan);
+ CASE(10,TestJapaneseHeiseiToReiwa);
default: name = ""; break;
}
}
@@ -626,23 +627,23 @@
// Test simple parse/format with adopt
UDate aDate = 0;
- // Test parse with missing era (should default to current era, heisei)
+ // Test parse with missing era (should default to current era)
// Test parse with incomplete information
logln("Testing parse w/ missing era...");
- SimpleDateFormat *fmt = new SimpleDateFormat(UnicodeString("y.M.d"), Locale("ja_JP@calendar=japanese"), status);
+ SimpleDateFormat *fmt = new SimpleDateFormat(UnicodeString("y/M/d"), Locale("ja_JP@calendar=japanese"), status);
CHECK(status, "creating date format instance");
if(!fmt) {
errln("Couldn't create en_US instance");
} else {
UErrorCode s2 = U_ZERO_ERROR;
cal2->clear();
- UnicodeString samplestr("1.1.9");
+ UnicodeString samplestr("1/5/9");
logln(UnicodeString() + "Test Year: " + samplestr);
aDate = fmt->parse(samplestr, s2);
ParsePosition pp=0;
fmt->parse(samplestr, *cal2, pp);
- CHECK(s2, "parsing the 1.1.9 string");
- logln("*cal2 after 119 parse:");
+ CHECK(s2, "parsing the 1/5/9 string");
+ logln("*cal2 after 159 parse:");
str.remove();
fmt2->format(aDate, str);
logln(UnicodeString() + "as Gregorian Calendar: " + str);
@@ -653,7 +654,7 @@
int32_t expectYear = 1;
int32_t expectEra = JapaneseCalendar::getCurrentEra();
if((gotYear!=1) || (gotEra != expectEra)) {
- errln(UnicodeString("parse "+samplestr+" of 'y.m.d' as Japanese Calendar, expected year ") + expectYear +
+ errln(UnicodeString("parse "+samplestr+" of 'y/m/d' as Japanese Calendar, expected year ") + expectYear +
UnicodeString(" and era ") + expectEra +", but got year " + gotYear + " and era " + gotEra + " (Gregorian:" + str +")");
} else {
logln(UnicodeString() + " year: " + gotYear + ", era: " + gotEra);
@@ -666,7 +667,7 @@
// Test simple parse/format with adopt
UDate aDate = 0;
- // Test parse with missing era (should default to current era, heisei)
+ // Test parse with missing era (should default to current era)
// Test parse with incomplete information
logln("Testing parse w/ just year...");
SimpleDateFormat *fmt = new SimpleDateFormat(UnicodeString("y"), Locale("ja_JP@calendar=japanese"), status);
@@ -678,7 +679,7 @@
cal2->clear();
UnicodeString samplestr("1");
logln(UnicodeString() + "Test Year: " + samplestr);
- aDate = fmt->parse(samplestr, s2);
+ aDate = fmt->parse(samplestr, s2); // Should be parsed as the first day of the current era
ParsePosition pp=0;
fmt->parse(samplestr, *cal2, pp);
CHECK(s2, "parsing the 1 string");
@@ -691,7 +692,7 @@
int32_t gotYear = cal2->get(UCAL_YEAR, s2);
int32_t gotEra = cal2->get(UCAL_ERA, s2);
int32_t expectYear = 1;
- int32_t expectEra = 235; //JapaneseCalendar::kCurrentEra;
+ int32_t expectEra = JapaneseCalendar::getCurrentEra();
if((gotYear!=1) || (gotEra != expectEra)) {
errln(UnicodeString("parse "+samplestr+" of 'y' as Japanese Calendar, expected year ") + expectYear +
UnicodeString(" and era ") + expectEra +", but got year " + gotYear + " and era " + gotEra + " (Gregorian:" + str +")");
@@ -707,6 +708,40 @@
delete fmt2;
}
+void IntlCalendarTest::TestJapaneseHeiseiToReiwa() {
+ Calendar *cal;
+ UErrorCode status = U_ZERO_ERROR;
+ cal = Calendar::createInstance(status);
+ CHECK(status, UnicodeString("Creating default Gregorian Calendar"));
+ cal->set(2019, UCAL_APRIL, 29);
+
+ DateFormat *jfmt = DateFormat::createDateInstance(DateFormat::LONG, "ja@calendar=japanese");
+ CHECK(status, UnicodeString("Creating date format ja@calendar=japanese"))
+
+ const char* EXPECTED_FORMAT[4] = {
+ "\\u5E73\\u621031\\u5E744\\u670829\\u65E5", // Heisei 31 April 29
+ "\\u5E73\\u621031\\u5E744\\u670830\\u65E5", // Heisei 31 April 30
+ "\\u4EE4\\u548c1\\u5E745\\u67081\\u65E5", // Reiwa 1 May 1
+ "\\u4EE4\\u548c1\\u5E745\\u67082\\u65E5" // Reiwa 1 May 2
+ };
+
+ for (int32_t i = 0; i < 4; i++) {
+ UnicodeString dateStr;
+ UDate d = cal->getTime(status);
+ CHECK(status, UnicodeString("Get test date"));
+ jfmt->format(d, dateStr);
+ UnicodeString expected(UnicodeString(EXPECTED_FORMAT[i], -1, US_INV).unescape());
+ if (expected.compare(dateStr) != 0) {
+ errln(UnicodeString("Formatting year:") + cal->get(UCAL_YEAR, status) + " month:"
+ + cal->get(UCAL_MONTH, status) + " day:" + (cal->get(UCAL_DATE, status) + 1)
+ + " - expected: " + expected + " / actual: " + dateStr);
+ }
+ cal->add(UCAL_DATE, 1, status);
+ CHECK(status, UnicodeString("Add 1 day"));
+ }
+ delete jfmt;
+ delete cal;
+}
diff -Nur icu-63.1/source/test/intltest/incaltst.h icu-63.2/source/test/intltest/incaltst.h
--- icu-63.1/source/test/intltest/incaltst.h 2018-09-29 00:34:42.000000000 +0000
+++ icu-63.2/source/test/intltest/incaltst.h 2019-04-11 22:38:30.000000000 +0000
@@ -34,6 +34,7 @@
void TestJapanese(void);
void TestJapaneseFormat(void);
void TestJapanese3860(void);
+ void TestJapaneseHeiseiToReiwa(void);
void TestPersian(void);
void TestPersianFormat(void);
diff -Nur icu-63.1/source/test/intltest/numbertest.h icu-63.2/source/test/intltest/numbertest.h
--- icu-63.1/source/test/intltest/numbertest.h 2018-10-01 22:39:56.000000000 +0000
+++ icu-63.2/source/test/intltest/numbertest.h 2019-04-11 22:38:30.000000000 +0000
@@ -10,6 +10,7 @@
#include "intltest.h"
#include "number_affixutils.h"
#include "numparse_stringsegment.h"
+#include "numrange_impl.h"
#include "unicode/locid.h"
#include "unicode/numberformatter.h"
#include "unicode/numberrangeformatter.h"
diff -Nur icu-63.1/source/test/testdata/format.txt icu-63.2/source/test/testdata/format.txt
--- icu-63.1/source/test/testdata/format.txt 2018-10-01 22:39:56.000000000 +0000
+++ icu-63.2/source/test/testdata/format.txt 2019-04-11 22:38:30.000000000 +0000
@@ -488,42 +488,44 @@
"AD 02008"
},
- // Japanese
- {
- "en_US@calendar=japanese",
- "",
- "PATTERN=G y",
- "YEAR=8",
- "Heisei 8"
- },
- {
- "en_US@calendar=japanese",
- "",
- "PATTERN=G yy",
- "YEAR=8",
- "Heisei 08"
- },
- {
- "en_US@calendar=japanese",
- "",
- "PATTERN=G yyy",
- "YEAR=8",
- "Heisei 008"
- },
- {
- "en_US@calendar=japanese",
- "",
- "PATTERN=G yyyy",
- "YEAR=8",
- "Heisei 0008"
- },
- {
- "en_US@calendar=japanese",
- "",
- "PATTERN=G yyyyy",
- "YEAR=8",
- "Heisei 00008"
- },
+// The following test case is commented out as the current era
+// depends on the current time when the test is run.
+// // Japanese
+// {
+// "en_US@calendar=japanese",
+// "",
+// "PATTERN=G y",
+// "YEAR=8",
+// "Reiwa 8"
+// },
+// {
+// "en_US@calendar=japanese",
+// "",
+// "PATTERN=G yy",
+// "YEAR=8",
+// "Reiwa 08"
+// },
+// {
+// "en_US@calendar=japanese",
+// "",
+// "PATTERN=G yyy",
+// "YEAR=8",
+// "Reiwa 008"
+// },
+// {
+// "en_US@calendar=japanese",
+// "",
+// "PATTERN=G yyyy",
+// "YEAR=8",
+// "Reiwa 0008"
+// },
+// {
+// "en_US@calendar=japanese",
+// "",
+// "PATTERN=G yyyyy",
+// "YEAR=8",
+// "Reiwa 00008"
+// },
}
}
Reply to: