Add varint encoding for full data

Closes Merge !93
Thanks Ryan Hitchman!
This commit is contained in:
James Seibel
2025-11-12 07:21:54 -06:00
parent 6eb24ecde1
commit b9746381eb
4 changed files with 416 additions and 10 deletions
@@ -31,6 +31,7 @@ import com.seibel.distanthorizons.core.pooling.PhantomArrayListPool;
import com.seibel.distanthorizons.core.pos.DhSectionPos;
import com.seibel.distanthorizons.core.network.INetworkObject;
import com.seibel.distanthorizons.core.sql.dto.util.FullDataMinMaxPosUtil;
import com.seibel.distanthorizons.core.sql.dto.util.VarintUtil;
import com.seibel.distanthorizons.core.util.BoolUtil;
import com.seibel.distanthorizons.core.util.FullDataPointUtil;
import com.seibel.distanthorizons.core.util.ListUtil;
@@ -109,7 +110,7 @@ public class FullDataSourceV2DTO
FullDataSourceV2DTO dto = FullDataSourceV2DTO.CreateEmptyDataSourceForDecoding();
// populate arrays
writeDataSourceDataArrayToBlob(dataSource.dataPoints, dto.compressedDataByteArray, compressionModeEnum);
writeDataSourceDataArrayToBlobV2(dataSource.dataPoints, dto.compressedDataByteArray, compressionModeEnum);
writeGenerationStepsToBlob(dataSource.columnGenerationSteps, dto.compressedColumnGenStepByteArray, compressionModeEnum);
writeWorldCompressionModeToBlob(dataSource.columnWorldCompressionMode, dto.compressedWorldCompressionModeByteArray, compressionModeEnum);
writeDataMappingToBlob(dataSource.mapping, dto.compressedMappingByteArray, compressionModeEnum);
@@ -199,8 +200,8 @@ public class FullDataSourceV2DTO
{
// format validation //
if (DATA_FORMAT.V1_NO_ADJACENT_DATA != this.dataFormatVersion
&& DATA_FORMAT.V2_LATEST != this.dataFormatVersion)
if (this.dataFormatVersion != DATA_FORMAT.V1_NO_ADJACENT_DATA
&& this.dataFormatVersion != DATA_FORMAT.V2_LATEST)
{
throw new IllegalStateException("Data source population only supports formats: ["+DATA_FORMAT.V1_NO_ADJACENT_DATA +","+DATA_FORMAT.V2_LATEST +"], data format found: ["+this.dataFormatVersion+"].");
}
@@ -234,7 +235,15 @@ public class FullDataSourceV2DTO
{
readBlobToGenerationSteps(this.compressedColumnGenStepByteArray, dataSource.columnGenerationSteps, compressionModeEnum);
readBlobToWorldCompressionMode(this.compressedWorldCompressionModeByteArray, dataSource.columnWorldCompressionMode, compressionModeEnum);
readBlobToDataSourceDataArray(this.compressedDataByteArray, dataSource.dataPoints, compressionModeEnum);
if (this.dataFormatVersion == 1)
{
readBlobToDataSourceDataArrayV1(this.compressedDataByteArray, dataSource.dataPoints, compressionModeEnum);
}
else
{
readBlobToDataSourceDataArrayV2(this.compressedDataByteArray, dataSource.dataPoints, compressionModeEnum);
}
}
else
{
@@ -398,7 +407,7 @@ public class FullDataSourceV2DTO
}
private static void writeDataSourceDataArrayToBlob(
public static void writeDataSourceDataArrayToBlobV1(
LongArrayList[] inputDataArray, ByteArrayList outputByteArray,
EDhApiDataCompressionMode compressionModeEnum) throws IOException
{
@@ -435,7 +444,7 @@ public class FullDataSourceV2DTO
outputByteArray.addElements(0, byteArrayOutputStream.toByteArray());
}
}
private static void readBlobToDataSourceDataArray(
private static void readBlobToDataSourceDataArrayV1(
ByteArrayList inputCompressedDataByteArray, LongArrayList[] outputDataLongArray,
EDhApiDataCompressionMode compressionModeEnum) throws IOException, DataCorruptedException
{
@@ -470,6 +479,206 @@ public class FullDataSourceV2DTO
}
}
private static void writeDataSourceDataArrayToBlobV2(
LongArrayList[] inputDataArray, ByteArrayList outputByteArray,
EDhApiDataCompressionMode compressionModeEnum) throws IOException
{
ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
try (DhDataOutputStream compressedOut = new DhDataOutputStream(byteArrayOutputStream, compressionModeEnum))
{
int dataArrayLength = FullDataSourceV2.WIDTH * FullDataSourceV2.WIDTH;
// this method would be simpler if we allocated a bunch of temporary arrays,
// but we're trying to avoid garbage.
// 1. column lengths
for (int xz = 0; xz < dataArrayLength; xz++)
{
LongArrayList col = inputDataArray[xz];
int size = col != null ? col.size() : 0;
VarintUtil.writeVarint(compressedOut, size);
}
// 2. column ids, with "is lit" and "is discontinuous" bits
int previousBottomY = 0;
for (int xz = 0; xz < dataArrayLength; xz++)
{
LongArrayList col = inputDataArray[xz];
int size = col != null ? col.size() : 0;
for (int y = 0; y < size; y++)
{
long data = col.getLong(y);
int id = FullDataPointUtil.getId(data);
int height = FullDataPointUtil.getHeight(data);
int bottomY = FullDataPointUtil.getBottomY(data);
boolean hasLight = (FullDataPointUtil.getBlockLight(data) | FullDataPointUtil.getSkyLight(data)) != LodUtil.MIN_MC_LIGHT;
// all datapoints are contiguous, with no gaps
// so having both height and bottomY is redundant. We could store the prediction
// in an array, but it's much cheaper to just recompute it later.
int expectedBottomY = previousBottomY - height;
boolean hasDiscontinuity = bottomY != expectedBottomY;
previousBottomY = bottomY;
VarintUtil.writeVarint(compressedOut, (id << 2) | (hasLight ? 2 : 0) | (hasDiscontinuity ? 1 : 0));
}
}
// 3. heights
for (int xz = 0; xz < dataArrayLength; xz++)
{
LongArrayList col = inputDataArray[xz];
int size = (col != null) ? col.size() : 0;
for (int y = 0; y < size; y++)
{
long data = col.getLong(y);
VarintUtil.writeVarint(compressedOut, FullDataPointUtil.getHeight(data));
}
}
// 4. bottomY (only the mis-predicted ones)
previousBottomY = 0;
for (int xz = 0; xz < dataArrayLength; xz++)
{
LongArrayList col = inputDataArray[xz];
int size = (col != null) ? col.size() : 0;
for (int y = 0; y < size; y++)
{
long data = col.getLong(y);
int height = FullDataPointUtil.getHeight(data);
int bottomY = FullDataPointUtil.getBottomY(data);
int expectedBottomY = previousBottomY - height;
if (bottomY != expectedBottomY)
{
VarintUtil.writeVarint(compressedOut, VarintUtil.zigzagEncode(bottomY - expectedBottomY));
}
previousBottomY = bottomY;
}
}
// 5. packed Light (only lit sections)
for (int xz = 0; xz < dataArrayLength; xz++)
{
LongArrayList col = inputDataArray[xz];
int size = col != null ? col.size() : 0;
for (int y = 0; y < size; y++)
{
long data = col.getLong(y);
int blockLight = FullDataPointUtil.getBlockLight(data);
int skyLight = FullDataPointUtil.getSkyLight(data);
byte packedLight = (byte) ((blockLight << 4) | skyLight);
if (packedLight != 0)
{
compressedOut.writeByte(packedLight);
}
}
}
compressedOut.flush();
byteArrayOutputStream.close();
outputByteArray.addElements(0, byteArrayOutputStream.toByteArray());
}
}
private static void readBlobToDataSourceDataArrayV2(
ByteArrayList inputCompressedDataByteArray,
LongArrayList[] outputDataLongArray, EDhApiDataCompressionMode compressionModeEnum)
throws IOException, DataCorruptedException
{
ByteArrayInputStream byteArrayInputStream = new ByteArrayInputStream(inputCompressedDataByteArray.elements());
try (DhDataInputStream compressedIn = new DhDataInputStream(byteArrayInputStream, compressionModeEnum))
{
// 1. column counts, preallocate
int numColumns = FullDataSourceV2.WIDTH * FullDataSourceV2.WIDTH;
for (int i = 0; i < numColumns; i++)
{
int count = VarintUtil.readVarint(compressedIn);
ListUtil.clearAndSetSize(outputDataLongArray[i], count);
}
// 2. ids and flags for min_y and light
for (LongArrayList col : outputDataLongArray)
{
for (int i = 0; i < col.size(); i++)
{
int encodedId = VarintUtil.readVarint(compressedIn);
col.set(i, FullDataPointUtil.encode(encodedId >> 2, 1, encodedId & 1, (byte) (encodedId & 2), (byte) 0));
}
}
// 3. height
for (LongArrayList col : outputDataLongArray)
{
for (int i = 0; i < col.size(); i++)
{
int height = VarintUtil.readVarint(compressedIn);
long data = col.getLong(i);
col.set(i, FullDataPointUtil.setHeight(data, height));
}
}
// 4. bottomY
int previousBottomY = 0;
for (LongArrayList col : outputDataLongArray)
{
for (int i = 0; i < col.size(); i++)
{
long data = col.getLong(i);
int error = 0;
if (FullDataPointUtil.getBottomY(data) != 0)
{
error = VarintUtil.zigzagDecode(VarintUtil.readVarint(compressedIn));
}
int bottomY = previousBottomY - FullDataPointUtil.getHeight(data) + error;
col.set(i, FullDataPointUtil.setBottomY(data, bottomY));
previousBottomY = bottomY;
}
}
// 5. lights
for (LongArrayList col : outputDataLongArray)
{
for (int i = 0; i < col.size(); i++)
{
long data = col.getLong(i);
boolean hasLight = FullDataPointUtil.getBlockLight(data) != 0;
byte skyLight = 0;
byte blockLight = 0;
if (hasLight)
{
byte packedLight = compressedIn.readByte();
skyLight = (byte) (packedLight & 0xF);
blockLight = (byte) (packedLight >> 4);
}
col.set(i, FullDataPointUtil.setSkyLight(
FullDataPointUtil.setBlockLight(data, blockLight),
skyLight));
}
}
if (FullDataPointUtil.RUN_VALIDATION)
{
// These points all bypassed validation because of using setters.
for (LongArrayList col : outputDataLongArray)
{
for (int i = 0; i < col.size(); i++)
{
FullDataPointUtil.validateDatapoint(col.getLong(i));
}
}
}
}
catch (EOFException e)
{
throw new DataCorruptedException(e);
}
}
private static void writeGenerationStepsToBlob(ByteArrayList inputColumnGenStepByteArray, ByteArrayList outputByteArray, EDhApiDataCompressionMode compressionModeEnum) throws IOException
{
@@ -0,0 +1,69 @@
package com.seibel.distanthorizons.core.sql.dto.util;
import com.seibel.distanthorizons.core.sql.dto.FullDataSourceV2DTO;
import com.seibel.distanthorizons.core.util.objects.dataStreams.DhDataInputStream;
import com.seibel.distanthorizons.core.util.objects.dataStreams.DhDataOutputStream;
import java.io.IOException;
public class VarintUtil
{
/**
* zigzagEncode maps 0=>0, -1=>1, 1=>2, -2=>3, 3=>4, etc.
* this helps encode small magnitude signed numbers as small varints.
* https://lemire.me/blog/2022/11/25/making-all-your-integers-positive-with-zigzag-encoding/
*/
public static int zigzagEncode(int n)
{
// if n is (byte)-1, this results in:
// 0b1111_1110 ^ 0b1111_1111 == 0b0000_0001
return (n << 1) ^ (n >> 31);
}
public static int zigzagDecode(int n)
{ return (n >>> 1) ^ -(n & 1); }
/**
* @param value should be a zigzag encoded value
* created via {@link VarintUtil#zigzagEncode(int)}
*/
public static void writeVarint(DhDataOutputStream out, int value) throws IOException
{
if (value < 0)
{
throw new IllegalArgumentException("varint given ["+value+"], varint only accepts positive values.");
}
while (value >= 128)
{
out.writeByte(value | 128);
value >>>= 7; // 128 = 2^7
}
out.writeByte(value);
}
public static int readVarint(DhDataInputStream in) throws IOException
{
int value = 0;
int shift = 0;
byte b;
do
{
if (shift >= 32)
{
throw new IOException("invalid varint");
}
b = in.readByte();
value |= (b & 127) << shift;
shift += 7;
}
while ((b & 128) != 0);
return value;
}
}
@@ -87,16 +87,23 @@ public class DhFullDataSourceRepoTests
Random seededRandom = new Random(3);
for (int i = 0; i < FullDataSourceV2.WIDTH * FullDataSourceV2.WIDTH; i++)
for (int arrayIndex = 0; arrayIndex < FullDataSourceV2.WIDTH * FullDataSourceV2.WIDTH; arrayIndex++)
{
fullDataArray[i] = new LongArrayList(1);
fullDataArray[arrayIndex] = new LongArrayList(1);
// random column heights so we can differentiate
// columns from each other
int columnCount = Math.abs(seededRandom.nextInt() % 31) + 1;
for (int j = 0; j < columnCount; j++)
for (int colIndex = 0; colIndex < columnCount; colIndex++)
{
fullDataArray[i].add(FullDataPointUtil.encode(j, 1, j, LodUtil.MAX_MC_LIGHT, LodUtil.MAX_MC_LIGHT));
long datapoint = FullDataPointUtil.encode(
colIndex, // id
1, // height
colIndex, // relative min Y
(byte)(colIndex % LodUtil.MAX_MC_LIGHT), // block light
(byte)((colIndex + 2) % LodUtil.MAX_MC_LIGHT) // sky light
);
fullDataArray[arrayIndex].add(datapoint);
}
}
@@ -113,6 +120,18 @@ public class DhFullDataSourceRepoTests
repo.save(originalDto);
// also create format-1 encoded version to ensure backwards compatibility
long posV1 = DhSectionPos.encode((byte) 6, 2, 3);
FullDataSourceV2 dataSourceFormatV1 = FullDataSourceV2.createWithData(posV1, dataMapping, fullDataArray, columnGenStep, columnWorldCompressionMode);
FullDataSourceV2DTO dtoFormatV1 = FullDataSourceV2DTO.CreateFromDataSource(dataSourceFormatV1, EDhApiDataCompressionMode.LZMA2);
FullDataSourceV2DTO.writeDataSourceDataArrayToBlobV1(
dataSourceFormatV1.dataPoints,
dtoFormatV1.compressedDataByteArray,
EDhApiDataCompressionMode.LZMA2);
dtoFormatV1.dataFormatVersion = FullDataSourceV2DTO.DATA_FORMAT.V1_NO_ADJACENT_DATA;
repo.save(dtoFormatV1);
//=======================//
// confirm DTO data is //
@@ -152,6 +171,21 @@ public class DhFullDataSourceRepoTests
}
}
// check that we have proper backwards compatability to V1
try (FullDataSourceV2 savedDataSource = repo.getByKey(posV1).createUnitTestDataSource())
{
Assert.assertNotNull("Failed to create DataSource", savedDataSource);
assertArraysAreEqual(originalDataSource.columnGenerationSteps, savedDataSource.columnGenerationSteps);
assertArraysAreEqual(originalDataSource.columnWorldCompressionMode,
savedDataSource.columnWorldCompressionMode);
Assert.assertTrue(originalDataSource.dataPoints.length == savedDataSource.dataPoints.length);
for (int i = 0; i < FullDataSourceV2.WIDTH * FullDataSourceV2.WIDTH; i++)
{
assertArraysAreEqual(originalDataSource.dataPoints[i], savedDataSource.dataPoints[i]);
}
}
//==============//
+94
View File
@@ -0,0 +1,94 @@
/*
* This file is part of the Distant Horizons mod
* licensed under the GNU LGPL v3 License.
*
* Copyright (C) 2020 James Seibel
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, version 3.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
package tests;
import com.seibel.distanthorizons.api.enums.config.EDhApiDataCompressionMode;
import com.seibel.distanthorizons.core.sql.dto.FullDataSourceV2DTO;
import com.seibel.distanthorizons.core.sql.dto.util.VarintUtil;
import com.seibel.distanthorizons.core.util.objects.dataStreams.DhDataInputStream;
import com.seibel.distanthorizons.core.util.objects.dataStreams.DhDataOutputStream;
import org.junit.Assert;
import org.junit.Test;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
public class VarintTest
{
@Test
public void Test()
{
Assert.assertEquals(0x80, 128);
// zig zag encoding is needed for varint handling, so test it first
for (int i = -256; i < 256; i++)
{
//testZigZagEncoding(i);
}
for (int i = -256; i < 256; i++)
{
//testSingleVarint(i);
}
}
private static void testZigZagEncoding(int value)
{
int encodedValue = VarintUtil.zigzagEncode(value);
int decodedValue = VarintUtil.zigzagDecode(encodedValue);
Assert.assertEquals(value, decodedValue);
}
private static void testSingleVarint(int value)
{
// write to stream
ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
try (DhDataOutputStream outputStream = new DhDataOutputStream(byteArrayOutputStream, EDhApiDataCompressionMode.UNCOMPRESSED))
{
int encodedValue = VarintUtil.zigzagEncode(value);
VarintUtil.writeVarint(outputStream, encodedValue); // varint requires zig-zag encoding to function
}
catch (IOException e)
{
e.printStackTrace();
Assert.fail("Fail writing varint ["+value+"], error: ["+e.getMessage()+"]");
}
// read stream
byte[] byteArray = byteArrayOutputStream.toByteArray();
ByteArrayInputStream byteArrayInputStream = new ByteArrayInputStream(byteArray);
try (DhDataInputStream inputStream = new DhDataInputStream(byteArrayInputStream, EDhApiDataCompressionMode.UNCOMPRESSED))
{
int encodedValue = VarintUtil.readVarint(inputStream);
int decodedValue = VarintUtil.zigzagDecode(encodedValue);
Assert.assertEquals(value, decodedValue);
}
catch (IOException e)
{
e.printStackTrace();
Assert.fail("Fail reading varint ["+value+"], error: ["+e.getMessage()+"]");
}
}
}