From b9746381eb11d39197d6a7fc813abdad1e3ea108 Mon Sep 17 00:00:00 2001 From: James Seibel Date: Wed, 12 Nov 2025 07:21:54 -0600 Subject: [PATCH] Add varint encoding for full data Closes Merge !93 Thanks Ryan Hitchman! --- .../core/sql/dto/FullDataSourceV2DTO.java | 221 +++++++++++++++++- .../core/sql/dto/util/VarintUtil.java | 69 ++++++ .../java/tests/DhFullDataSourceRepoTests.java | 42 +++- core/src/test/java/tests/VarintTest.java | 94 ++++++++ 4 files changed, 416 insertions(+), 10 deletions(-) create mode 100644 core/src/main/java/com/seibel/distanthorizons/core/sql/dto/util/VarintUtil.java create mode 100644 core/src/test/java/tests/VarintTest.java diff --git a/core/src/main/java/com/seibel/distanthorizons/core/sql/dto/FullDataSourceV2DTO.java b/core/src/main/java/com/seibel/distanthorizons/core/sql/dto/FullDataSourceV2DTO.java index 2d7917a1b..d0446454f 100644 --- a/core/src/main/java/com/seibel/distanthorizons/core/sql/dto/FullDataSourceV2DTO.java +++ b/core/src/main/java/com/seibel/distanthorizons/core/sql/dto/FullDataSourceV2DTO.java @@ -31,6 +31,7 @@ import com.seibel.distanthorizons.core.pooling.PhantomArrayListPool; import com.seibel.distanthorizons.core.pos.DhSectionPos; import com.seibel.distanthorizons.core.network.INetworkObject; import com.seibel.distanthorizons.core.sql.dto.util.FullDataMinMaxPosUtil; +import com.seibel.distanthorizons.core.sql.dto.util.VarintUtil; import com.seibel.distanthorizons.core.util.BoolUtil; import com.seibel.distanthorizons.core.util.FullDataPointUtil; import com.seibel.distanthorizons.core.util.ListUtil; @@ -109,7 +110,7 @@ public class FullDataSourceV2DTO FullDataSourceV2DTO dto = FullDataSourceV2DTO.CreateEmptyDataSourceForDecoding(); // populate arrays - writeDataSourceDataArrayToBlob(dataSource.dataPoints, dto.compressedDataByteArray, compressionModeEnum); + writeDataSourceDataArrayToBlobV2(dataSource.dataPoints, dto.compressedDataByteArray, compressionModeEnum); writeGenerationStepsToBlob(dataSource.columnGenerationSteps, dto.compressedColumnGenStepByteArray, compressionModeEnum); writeWorldCompressionModeToBlob(dataSource.columnWorldCompressionMode, dto.compressedWorldCompressionModeByteArray, compressionModeEnum); writeDataMappingToBlob(dataSource.mapping, dto.compressedMappingByteArray, compressionModeEnum); @@ -199,8 +200,8 @@ public class FullDataSourceV2DTO { // format validation // - if (DATA_FORMAT.V1_NO_ADJACENT_DATA != this.dataFormatVersion - && DATA_FORMAT.V2_LATEST != this.dataFormatVersion) + if (this.dataFormatVersion != DATA_FORMAT.V1_NO_ADJACENT_DATA + && this.dataFormatVersion != DATA_FORMAT.V2_LATEST) { throw new IllegalStateException("Data source population only supports formats: ["+DATA_FORMAT.V1_NO_ADJACENT_DATA +","+DATA_FORMAT.V2_LATEST +"], data format found: ["+this.dataFormatVersion+"]."); } @@ -234,7 +235,15 @@ public class FullDataSourceV2DTO { readBlobToGenerationSteps(this.compressedColumnGenStepByteArray, dataSource.columnGenerationSteps, compressionModeEnum); readBlobToWorldCompressionMode(this.compressedWorldCompressionModeByteArray, dataSource.columnWorldCompressionMode, compressionModeEnum); - readBlobToDataSourceDataArray(this.compressedDataByteArray, dataSource.dataPoints, compressionModeEnum); + + if (this.dataFormatVersion == 1) + { + readBlobToDataSourceDataArrayV1(this.compressedDataByteArray, dataSource.dataPoints, compressionModeEnum); + } + else + { + readBlobToDataSourceDataArrayV2(this.compressedDataByteArray, dataSource.dataPoints, compressionModeEnum); + } } else { @@ -398,7 +407,7 @@ public class FullDataSourceV2DTO } - private static void writeDataSourceDataArrayToBlob( + public static void writeDataSourceDataArrayToBlobV1( LongArrayList[] inputDataArray, ByteArrayList outputByteArray, EDhApiDataCompressionMode compressionModeEnum) throws IOException { @@ -435,7 +444,7 @@ public class FullDataSourceV2DTO outputByteArray.addElements(0, byteArrayOutputStream.toByteArray()); } } - private static void readBlobToDataSourceDataArray( + private static void readBlobToDataSourceDataArrayV1( ByteArrayList inputCompressedDataByteArray, LongArrayList[] outputDataLongArray, EDhApiDataCompressionMode compressionModeEnum) throws IOException, DataCorruptedException { @@ -470,6 +479,206 @@ public class FullDataSourceV2DTO } } + private static void writeDataSourceDataArrayToBlobV2( + LongArrayList[] inputDataArray, ByteArrayList outputByteArray, + EDhApiDataCompressionMode compressionModeEnum) throws IOException + { + ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream(); + try (DhDataOutputStream compressedOut = new DhDataOutputStream(byteArrayOutputStream, compressionModeEnum)) + { + int dataArrayLength = FullDataSourceV2.WIDTH * FullDataSourceV2.WIDTH; + + // this method would be simpler if we allocated a bunch of temporary arrays, + // but we're trying to avoid garbage. + + // 1. column lengths + for (int xz = 0; xz < dataArrayLength; xz++) + { + LongArrayList col = inputDataArray[xz]; + int size = col != null ? col.size() : 0; + VarintUtil.writeVarint(compressedOut, size); + } + + // 2. column ids, with "is lit" and "is discontinuous" bits + int previousBottomY = 0; + + for (int xz = 0; xz < dataArrayLength; xz++) + { + LongArrayList col = inputDataArray[xz]; + int size = col != null ? col.size() : 0; + for (int y = 0; y < size; y++) + { + long data = col.getLong(y); + + int id = FullDataPointUtil.getId(data); + int height = FullDataPointUtil.getHeight(data); + int bottomY = FullDataPointUtil.getBottomY(data); + + boolean hasLight = (FullDataPointUtil.getBlockLight(data) | FullDataPointUtil.getSkyLight(data)) != LodUtil.MIN_MC_LIGHT; + + // all datapoints are contiguous, with no gaps + // so having both height and bottomY is redundant. We could store the prediction + // in an array, but it's much cheaper to just recompute it later. + int expectedBottomY = previousBottomY - height; + boolean hasDiscontinuity = bottomY != expectedBottomY; + previousBottomY = bottomY; + + VarintUtil.writeVarint(compressedOut, (id << 2) | (hasLight ? 2 : 0) | (hasDiscontinuity ? 1 : 0)); + } + } + + // 3. heights + for (int xz = 0; xz < dataArrayLength; xz++) + { + LongArrayList col = inputDataArray[xz]; + int size = (col != null) ? col.size() : 0; + for (int y = 0; y < size; y++) + { + long data = col.getLong(y); + VarintUtil.writeVarint(compressedOut, FullDataPointUtil.getHeight(data)); + } + } + + // 4. bottomY (only the mis-predicted ones) + previousBottomY = 0; + for (int xz = 0; xz < dataArrayLength; xz++) + { + LongArrayList col = inputDataArray[xz]; + int size = (col != null) ? col.size() : 0; + for (int y = 0; y < size; y++) + { + long data = col.getLong(y); + + int height = FullDataPointUtil.getHeight(data); + int bottomY = FullDataPointUtil.getBottomY(data); + + int expectedBottomY = previousBottomY - height; + if (bottomY != expectedBottomY) + { + VarintUtil.writeVarint(compressedOut, VarintUtil.zigzagEncode(bottomY - expectedBottomY)); + } + previousBottomY = bottomY; + } + } + + // 5. packed Light (only lit sections) + for (int xz = 0; xz < dataArrayLength; xz++) + { + LongArrayList col = inputDataArray[xz]; + int size = col != null ? col.size() : 0; + for (int y = 0; y < size; y++) + { + long data = col.getLong(y); + int blockLight = FullDataPointUtil.getBlockLight(data); + int skyLight = FullDataPointUtil.getSkyLight(data); + byte packedLight = (byte) ((blockLight << 4) | skyLight); + if (packedLight != 0) + { + compressedOut.writeByte(packedLight); + } + } + } + + compressedOut.flush(); + byteArrayOutputStream.close(); + outputByteArray.addElements(0, byteArrayOutputStream.toByteArray()); + } + } + private static void readBlobToDataSourceDataArrayV2( + ByteArrayList inputCompressedDataByteArray, + LongArrayList[] outputDataLongArray, EDhApiDataCompressionMode compressionModeEnum) + throws IOException, DataCorruptedException + { + ByteArrayInputStream byteArrayInputStream = new ByteArrayInputStream(inputCompressedDataByteArray.elements()); + try (DhDataInputStream compressedIn = new DhDataInputStream(byteArrayInputStream, compressionModeEnum)) + { + // 1. column counts, preallocate + int numColumns = FullDataSourceV2.WIDTH * FullDataSourceV2.WIDTH; + for (int i = 0; i < numColumns; i++) + { + int count = VarintUtil.readVarint(compressedIn); + ListUtil.clearAndSetSize(outputDataLongArray[i], count); + } + + // 2. ids and flags for min_y and light + for (LongArrayList col : outputDataLongArray) + { + for (int i = 0; i < col.size(); i++) + { + int encodedId = VarintUtil.readVarint(compressedIn); + col.set(i, FullDataPointUtil.encode(encodedId >> 2, 1, encodedId & 1, (byte) (encodedId & 2), (byte) 0)); + } + } + + // 3. height + for (LongArrayList col : outputDataLongArray) + { + for (int i = 0; i < col.size(); i++) + { + int height = VarintUtil.readVarint(compressedIn); + long data = col.getLong(i); + col.set(i, FullDataPointUtil.setHeight(data, height)); + } + } + + // 4. bottomY + int previousBottomY = 0; + for (LongArrayList col : outputDataLongArray) + { + for (int i = 0; i < col.size(); i++) + { + long data = col.getLong(i); + int error = 0; + if (FullDataPointUtil.getBottomY(data) != 0) + { + error = VarintUtil.zigzagDecode(VarintUtil.readVarint(compressedIn)); + } + int bottomY = previousBottomY - FullDataPointUtil.getHeight(data) + error; + col.set(i, FullDataPointUtil.setBottomY(data, bottomY)); + previousBottomY = bottomY; + } + } + + // 5. lights + for (LongArrayList col : outputDataLongArray) + { + for (int i = 0; i < col.size(); i++) + { + long data = col.getLong(i); + boolean hasLight = FullDataPointUtil.getBlockLight(data) != 0; + byte skyLight = 0; + byte blockLight = 0; + if (hasLight) + { + byte packedLight = compressedIn.readByte(); + skyLight = (byte) (packedLight & 0xF); + blockLight = (byte) (packedLight >> 4); + } + + col.set(i, FullDataPointUtil.setSkyLight( + FullDataPointUtil.setBlockLight(data, blockLight), + skyLight)); + } + } + + if (FullDataPointUtil.RUN_VALIDATION) + { + // These points all bypassed validation because of using setters. + for (LongArrayList col : outputDataLongArray) + { + for (int i = 0; i < col.size(); i++) + { + FullDataPointUtil.validateDatapoint(col.getLong(i)); + } + } + } + } + catch (EOFException e) + { + throw new DataCorruptedException(e); + } + } + private static void writeGenerationStepsToBlob(ByteArrayList inputColumnGenStepByteArray, ByteArrayList outputByteArray, EDhApiDataCompressionMode compressionModeEnum) throws IOException { diff --git a/core/src/main/java/com/seibel/distanthorizons/core/sql/dto/util/VarintUtil.java b/core/src/main/java/com/seibel/distanthorizons/core/sql/dto/util/VarintUtil.java new file mode 100644 index 000000000..ccad5cc17 --- /dev/null +++ b/core/src/main/java/com/seibel/distanthorizons/core/sql/dto/util/VarintUtil.java @@ -0,0 +1,69 @@ +package com.seibel.distanthorizons.core.sql.dto.util; + +import com.seibel.distanthorizons.core.sql.dto.FullDataSourceV2DTO; +import com.seibel.distanthorizons.core.util.objects.dataStreams.DhDataInputStream; +import com.seibel.distanthorizons.core.util.objects.dataStreams.DhDataOutputStream; + +import java.io.IOException; + +public class VarintUtil +{ + + /** + * zigzagEncode maps 0=>0, -1=>1, 1=>2, -2=>3, 3=>4, etc. + * this helps encode small magnitude signed numbers as small varints. + * https://lemire.me/blog/2022/11/25/making-all-your-integers-positive-with-zigzag-encoding/ + */ + public static int zigzagEncode(int n) + { + // if n is (byte)-1, this results in: + // 0b1111_1110 ^ 0b1111_1111 == 0b0000_0001 + return (n << 1) ^ (n >> 31); + } + + public static int zigzagDecode(int n) + { return (n >>> 1) ^ -(n & 1); } + + + + /** + * @param value should be a zigzag encoded value + * created via {@link VarintUtil#zigzagEncode(int)} + */ + public static void writeVarint(DhDataOutputStream out, int value) throws IOException + { + if (value < 0) + { + throw new IllegalArgumentException("varint given ["+value+"], varint only accepts positive values."); + } + + while (value >= 128) + { + out.writeByte(value | 128); + value >>>= 7; // 128 = 2^7 + } + out.writeByte(value); + } + + public static int readVarint(DhDataInputStream in) throws IOException + { + int value = 0; + int shift = 0; + byte b; + do + { + if (shift >= 32) + { + throw new IOException("invalid varint"); + } + b = in.readByte(); + value |= (b & 127) << shift; + shift += 7; + } + while ((b & 128) != 0); + return value; + } + + + +} diff --git a/core/src/test/java/tests/DhFullDataSourceRepoTests.java b/core/src/test/java/tests/DhFullDataSourceRepoTests.java index 055933372..1f0bbd62e 100644 --- a/core/src/test/java/tests/DhFullDataSourceRepoTests.java +++ b/core/src/test/java/tests/DhFullDataSourceRepoTests.java @@ -87,16 +87,23 @@ public class DhFullDataSourceRepoTests Random seededRandom = new Random(3); - for (int i = 0; i < FullDataSourceV2.WIDTH * FullDataSourceV2.WIDTH; i++) + for (int arrayIndex = 0; arrayIndex < FullDataSourceV2.WIDTH * FullDataSourceV2.WIDTH; arrayIndex++) { - fullDataArray[i] = new LongArrayList(1); + fullDataArray[arrayIndex] = new LongArrayList(1); // random column heights so we can differentiate // columns from each other int columnCount = Math.abs(seededRandom.nextInt() % 31) + 1; - for (int j = 0; j < columnCount; j++) + for (int colIndex = 0; colIndex < columnCount; colIndex++) { - fullDataArray[i].add(FullDataPointUtil.encode(j, 1, j, LodUtil.MAX_MC_LIGHT, LodUtil.MAX_MC_LIGHT)); + long datapoint = FullDataPointUtil.encode( + colIndex, // id + 1, // height + colIndex, // relative min Y + (byte)(colIndex % LodUtil.MAX_MC_LIGHT), // block light + (byte)((colIndex + 2) % LodUtil.MAX_MC_LIGHT) // sky light + ); + fullDataArray[arrayIndex].add(datapoint); } } @@ -113,6 +120,18 @@ public class DhFullDataSourceRepoTests repo.save(originalDto); + // also create format-1 encoded version to ensure backwards compatibility + long posV1 = DhSectionPos.encode((byte) 6, 2, 3); + FullDataSourceV2 dataSourceFormatV1 = FullDataSourceV2.createWithData(posV1, dataMapping, fullDataArray, columnGenStep, columnWorldCompressionMode); + FullDataSourceV2DTO dtoFormatV1 = FullDataSourceV2DTO.CreateFromDataSource(dataSourceFormatV1, EDhApiDataCompressionMode.LZMA2); + FullDataSourceV2DTO.writeDataSourceDataArrayToBlobV1( + dataSourceFormatV1.dataPoints, + dtoFormatV1.compressedDataByteArray, + EDhApiDataCompressionMode.LZMA2); + dtoFormatV1.dataFormatVersion = FullDataSourceV2DTO.DATA_FORMAT.V1_NO_ADJACENT_DATA; + repo.save(dtoFormatV1); + + //=======================// // confirm DTO data is // @@ -152,6 +171,21 @@ public class DhFullDataSourceRepoTests } } + // check that we have proper backwards compatability to V1 + try (FullDataSourceV2 savedDataSource = repo.getByKey(posV1).createUnitTestDataSource()) + { + Assert.assertNotNull("Failed to create DataSource", savedDataSource); + assertArraysAreEqual(originalDataSource.columnGenerationSteps, savedDataSource.columnGenerationSteps); + assertArraysAreEqual(originalDataSource.columnWorldCompressionMode, + savedDataSource.columnWorldCompressionMode); + Assert.assertTrue(originalDataSource.dataPoints.length == savedDataSource.dataPoints.length); + + for (int i = 0; i < FullDataSourceV2.WIDTH * FullDataSourceV2.WIDTH; i++) + { + assertArraysAreEqual(originalDataSource.dataPoints[i], savedDataSource.dataPoints[i]); + } + } + //==============// diff --git a/core/src/test/java/tests/VarintTest.java b/core/src/test/java/tests/VarintTest.java new file mode 100644 index 000000000..8970a5524 --- /dev/null +++ b/core/src/test/java/tests/VarintTest.java @@ -0,0 +1,94 @@ +/* + * This file is part of the Distant Horizons mod + * licensed under the GNU LGPL v3 License. + * + * Copyright (C) 2020 James Seibel + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation, version 3. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see . + */ + +package tests; + +import com.seibel.distanthorizons.api.enums.config.EDhApiDataCompressionMode; +import com.seibel.distanthorizons.core.sql.dto.FullDataSourceV2DTO; +import com.seibel.distanthorizons.core.sql.dto.util.VarintUtil; +import com.seibel.distanthorizons.core.util.objects.dataStreams.DhDataInputStream; +import com.seibel.distanthorizons.core.util.objects.dataStreams.DhDataOutputStream; +import org.junit.Assert; +import org.junit.Test; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.IOException; + +public class VarintTest +{ + + @Test + public void Test() + { + Assert.assertEquals(0x80, 128); + + + // zig zag encoding is needed for varint handling, so test it first + for (int i = -256; i < 256; i++) + { + //testZigZagEncoding(i); + } + + for (int i = -256; i < 256; i++) + { + //testSingleVarint(i); + } + } + + private static void testZigZagEncoding(int value) + { + int encodedValue = VarintUtil.zigzagEncode(value); + int decodedValue = VarintUtil.zigzagDecode(encodedValue); + Assert.assertEquals(value, decodedValue); + } + + private static void testSingleVarint(int value) + { + // write to stream + ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream(); + try (DhDataOutputStream outputStream = new DhDataOutputStream(byteArrayOutputStream, EDhApiDataCompressionMode.UNCOMPRESSED)) + { + int encodedValue = VarintUtil.zigzagEncode(value); + VarintUtil.writeVarint(outputStream, encodedValue); // varint requires zig-zag encoding to function + } + catch (IOException e) + { + e.printStackTrace(); + Assert.fail("Fail writing varint ["+value+"], error: ["+e.getMessage()+"]"); + } + + + // read stream + byte[] byteArray = byteArrayOutputStream.toByteArray(); + ByteArrayInputStream byteArrayInputStream = new ByteArrayInputStream(byteArray); + try (DhDataInputStream inputStream = new DhDataInputStream(byteArrayInputStream, EDhApiDataCompressionMode.UNCOMPRESSED)) + { + int encodedValue = VarintUtil.readVarint(inputStream); + int decodedValue = VarintUtil.zigzagDecode(encodedValue); + Assert.assertEquals(value, decodedValue); + } + catch (IOException e) + { + e.printStackTrace(); + Assert.fail("Fail reading varint ["+value+"], error: ["+e.getMessage()+"]"); + } + } + +}