Re-Add Z_STD compression for testing

This commit is contained in:
James Seibel
2025-06-28 11:37:06 -05:00
parent 839ea1e778
commit f3947312c1
7 changed files with 191 additions and 93 deletions
@@ -39,8 +39,8 @@ public enum EDhApiDataCompressionMode
/**
* Should only be used internally and for unit testing. <br><br>
*
* Read Speed: 1.64 MS / DTO <br>
* Write Speed: 12.44 MS / DTO <br>
* Read Speed: 6.09 MS / DTO <br>
* Write Speed: 6.01 MS / DTO <br>
* Compression ratio: 1.0 <br>
*/
@DisallowSelectingViaConfigGui
@@ -49,28 +49,29 @@ public enum EDhApiDataCompressionMode
/**
* Extremely fast (often faster than uncompressed), but generally poor compression. <br><br>
*
* Read Speed: 1.85 MS / DTO <br>
* Write Speed: 9.46 MS / DTO <br>
* Compression ratio: 0.3638 <br>
* Read Speed: 3.25 MS / DTO <br>
* Write Speed: 5.99 MS / DTO <br>
* Compression ratio: 0.4513 <br>
*/
LZ4(1),
/*
/**
* Decent speed and good compression. <br><br>
*
* Read Speed: 11.78 MS / DTO <br>
* Write Speed: 16.76 MS / DTO <br>
* Compression ratio: 0.2199 <br>
* Read Speed: 9.31 MS / DTO <br>
* Write Speed: 15.13 MS / DTO <br>
* Compression ratio: 0.2606 <br>
*/
//@Deprecated
//Z_STD(2),
//@DisallowSelectingViaConfigGui
Z_STD(2),
/**
* Extremely slow, but very good compression. <br><br>
*
* Read Speed: 12.25 MS / DTO <br>
* Write Speed: 490.07 MS / DTO <br>
* Compression ratio: 0.1242 <br>
* Read Speed: 13.29 MS / DTO <br>
* Write Speed: 70.95 MS / DTO <br>
* Compression ratio: 0.2068 <br>
*/
LZMA2(3);
@@ -1342,20 +1342,20 @@ public class Config
+ EDhApiDataCompressionMode.UNCOMPRESSED + " \n"
+ "Should only be used for testing, is worse in every way vs ["+EDhApiDataCompressionMode.LZ4+"].\n"
+ "Expected Compression Ratio: 1.0\n"
+ "Estimated average DTO read speed: 1.64 milliseconds\n"
+ "Estimated average DTO write speed: 12.44 milliseconds\n"
+ "Estimated average DTO read speed: 3.25 milliseconds\n"
+ "Estimated average DTO write speed: 5.99 milliseconds\n"
+ "\n"
+ EDhApiDataCompressionMode.LZ4 + " \n"
+ "A good option if you're CPU limited and have plenty of hard drive space.\n"
+ "Expected Compression Ratio: 0.36\n"
+ "Expected Compression Ratio: 0.26\n"
+ "Estimated average DTO read speed: 1.85 ms\n"
+ "Estimated average DTO write speed: 9.46 ms\n"
+ "\n"
+ EDhApiDataCompressionMode.LZMA2 + " \n"
+ "Slow but very good compression.\n"
+ "Expected Compression Ratio: 0.14\n"
+ "Estimated average DTO read speed: 11.89 ms\n"
+ "Estimated average DTO write speed: 192.01 ms\n"
+ "Expected Compression Ratio: 0.2\n"
+ "Estimated average DTO read speed: 13.29 ms\n"
+ "Estimated average DTO write speed: 70.95 ms\n"
+ "")
.build();
@@ -24,6 +24,7 @@ import com.seibel.distanthorizons.core.Initializer;
import com.seibel.distanthorizons.core.util.objects.DataCorruptedException;
import com.seibel.distanthorizons.coreapi.ModInfo;
import net.jpountz.lz4.LZ4FrameInputStream;
import org.apache.commons.compress.compressors.zstandard.ZstdCompressorInputStream;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.tukaani.xz.ResettableArrayCache;
@@ -44,6 +45,7 @@ import java.io.*;
public class DhDataInputStream extends DataInputStream
{
private static final ThreadLocal<ResettableArrayCache> LZMA_RESETTABLE_ARRAY_CACHE_GETTER = ThreadLocal.withInitial(() -> new ResettableArrayCache(new LzmaArrayCache()));
private static final ThreadLocal<ZstdArrayCache> ZSTD_RESETTABLE_ARRAY_CACHE_GETTER = ThreadLocal.withInitial(() -> new ZstdArrayCache());
private static final Logger LOGGER = LogManager.getLogger();
@@ -62,6 +64,8 @@ public class DhDataInputStream extends DataInputStream
return stream;
case LZ4:
return new LZ4FrameInputStream(stream);
case Z_STD:
return new ZstdCompressorInputStream(stream, ZSTD_RESETTABLE_ARRAY_CACHE_GETTER.get());
case LZMA2:
// using an array cache significantly reduces GC pressure
ResettableArrayCache arrayCache = LZMA_RESETTABLE_ARRAY_CACHE_GETTER.get();
@@ -23,6 +23,7 @@ import com.seibel.distanthorizons.api.enums.config.EDhApiDataCompressionMode;
import net.jpountz.lz4.LZ4Factory;
import net.jpountz.lz4.LZ4FrameOutputStream;
import net.jpountz.xxhash.XXHashFactory;
import org.apache.commons.compress.compressors.zstandard.ZstdCompressorOutputStream;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.tukaani.xz.*;
@@ -53,6 +54,9 @@ public class DhDataOutputStream extends DataOutputStream
{
case UNCOMPRESSED:
return stream;
case Z_STD:
return new ZstdCompressorOutputStream(stream, 3, true, true);
case LZ4:
return new LZ4FrameOutputStream(stream,
LZ4FrameOutputStream.BLOCKSIZE.SIZE_64KB, -1L,
@@ -0,0 +1,85 @@
package com.seibel.distanthorizons.core.util.objects.dataStreams;
import com.github.luben.zstd.BufferPool;
import com.seibel.distanthorizons.core.logging.DhLoggerBuilder;
import it.unimi.dsi.fastutil.ints.Int2ReferenceArrayMap;
import org.apache.logging.log4j.Logger;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.function.IntUnaryOperator;
/**
* LZMA requires a custom object to cache it's backend arrays.
*/
public class ZstdArrayCache implements BufferPool
{
private static final Logger LOGGER = DhLoggerBuilder.getLogger();
/**
* In James' testing the byte and int caches only ever had to store 2 and 4 arrays respectively.
* With the in mind we could take a few shortcuts, but if that changes then we need to be
* notified as it might cause issues with the current logic.
*/
public static final int WARN_CACHE_LENGTH_EXCEEDED = 10;
public static final AtomicInteger MAX_BYTE_CACHE_LENGTH_REF = new AtomicInteger(WARN_CACHE_LENGTH_EXCEEDED);
public final IntUnaryOperator maxByteCacheSizeUnaryOperator = (x) -> Math.max(this.bufferCache.size(), x);
/**
* generally only 2 items long <br>
* {@link Int2ReferenceArrayMap} can be used since the cache should only be a few items long.
* If the array ends up being longer then this design will need to be changed.
*/
public final Int2ReferenceArrayMap<ArrayList<ByteBuffer>> bufferCache = new Int2ReferenceArrayMap<>();
//=============//
// byte arrays //
//=============//
@Override
public ByteBuffer get(int size)
{
ArrayList<ByteBuffer> cacheList = this.bufferCache.computeIfAbsent(size, (newSize) -> new ArrayList<>(4));
if (cacheList.isEmpty())
{
return ByteBuffer.allocate(size);
}
ByteBuffer array = cacheList.remove(cacheList.size()-1);
if (array == null)
{
return ByteBuffer.allocate(size);
}
return array;
}
@Override
public void release(ByteBuffer buffer)
{
int size = buffer.array().length;
this.bufferCache.computeIfAbsent(size, (newSize) -> new ArrayList<>());
this.bufferCache.get(size).add(buffer);
if (this.bufferCache.size() > WARN_CACHE_LENGTH_EXCEEDED)
{
int previousMax = MAX_BYTE_CACHE_LENGTH_REF.getAndUpdate(this.maxByteCacheSizeUnaryOperator);
int newMax = MAX_BYTE_CACHE_LENGTH_REF.get();
if (newMax > previousMax)
{
LOGGER.warn("LZMA byte array cache expected size exceeded. Expected max length ["+WARN_CACHE_LENGTH_EXCEEDED+"], actual length ["+this.bufferCache.size()+"].");
}
}
}
}
@@ -935,9 +935,11 @@
"distanthorizons.config.enum.EDhApiDataCompressionMode.UNCOMPRESSED":
"Uncompressed",
"distanthorizons.config.enum.EDhApiDataCompressionMode.LZ4":
"Fast/Big - LZ4",
"Fastest/Big - LZ4",
"distanthorizons.config.enum.EDhApiDataCompressionMode.Z_STD":
"Fast/Small - Z_STD",
"distanthorizons.config.enum.EDhApiDataCompressionMode.LZMA2":
"Slow/Small - LZMA2",
"Slow/Smallest - LZMA2",
"distanthorizons.config.enum.EDhApiWorldCompressionMode.MERGE_SAME_BLOCKS":
"1. Merge Same Blocks",
+73 -71
View File
@@ -25,11 +25,11 @@ import com.seibel.distanthorizons.core.sql.dto.FullDataSourceV2DTO;
import com.seibel.distanthorizons.core.sql.repo.FullDataSourceV2Repo;
import com.seibel.distanthorizons.coreapi.util.StringUtil;
import it.unimi.dsi.fastutil.longs.LongArrayList;
import org.apache.commons.compress.compressors.zstandard.ZstdCompressorOutputStream;
import org.junit.Assert;
import org.junit.Test;
import java.io.*;
import java.text.CharacterIterator;
import java.text.StringCharacterIterator;
/**
* <strong>Note:</strong>
@@ -47,22 +47,14 @@ import java.text.StringCharacterIterator;
*/
public class CompressionTest
{
public static String TEST_DIR = "C:\\DistantHorizonsWorkspace\\distant-horizons\\run\\saves\\Arcapelago\\data";
public static String TEST_DIR = "C:\\DistantHorizonsWorkspace\\distant-horizons\\run\\client\\saves\\Archipelego\\data";
public static String DB_FILE_NAME_PREFIX = "DistantHorizons";
public static String UNCOMPRESSED_DB_FILE_NAME = "DistantHorizons.sqlite";
public static String UNCOMPRESSED_DB_FILE_NAME = "DistantHorizons_uncompressed.sqlite";
/** -1 will test all of them */
public static int MAX_DTO_TEST_COUNT = -1;
//@Test
public void NoCompression()
{
String compressorName = "Uncompressed";
this.testCompressor(compressorName, EDhApiDataCompressionMode.UNCOMPRESSED);
}
// collapse the following commented out code when looking at tests
//@Test
@@ -132,17 +124,6 @@ public class CompressionTest
//}
//@Test
//public void Zstd()
//{
// String compressorName = "Zstd";
//
// DhDataInputStream.CreateInputStreamFunc createInputStreamFunc = (inputStream) -> new ZstdInputStream(inputStream);
// DhDataOutputStream.CreateOutputStreamFunc createOutputStreamFunc = (outputStream) -> new ZstdOutputStream(outputStream);
//
// this.testCompressor(compressorName, createInputStreamFunc, createOutputStreamFunc);
//}
////@Test
//public void ZstdDictionary() throws SQLException // isn't any better than normal Zstd
//{
@@ -205,6 +186,13 @@ public class CompressionTest
// this.testCompressor(compressorName, createInputStreamFunc, createOutputStreamFunc);
//}
//@Test
public void NoCompression()
{
String compressorName = "Uncompressed";
this.testCompressor(compressorName, EDhApiDataCompressionMode.UNCOMPRESSED);
}
//@Test
public void Lz4() // fast, poor compression
{
@@ -213,11 +201,11 @@ public class CompressionTest
}
//@Test
//public void Zstd() // middle of the road
//{
// String compressorName = "Zstd";
// this.testCompressor(compressorName, EDhApiDataCompressionMode.Z_STD);
//}
public void Zstd() // middle of the road
{
String compressorName = "Zstd";
this.testCompressor(compressorName, EDhApiDataCompressionMode.Z_STD);
}
//@Test
public void LZMA2() // very slow, very good compression though
@@ -254,13 +242,15 @@ public class CompressionTest
long totalCompressedFileSizeInBytes;
FullDataSourceV2Repo uncompressedRepo = null;
FullDataSourceV2Repo compressedRepo = null;
try
{
String uncompressedDatabaseFilePath = TEST_DIR + "/" + UNCOMPRESSED_DB_FILE_NAME;
File uncompressedDatabaseFile = new File(uncompressedDatabaseFilePath);
Assert.assertTrue(uncompressedDatabaseFile.exists());
FullDataSourceV2Repo uncompressedRepo = new FullDataSourceV2Repo("jdbc:sqlite", uncompressedDatabaseFile);
uncompressedRepo = new FullDataSourceV2Repo("jdbc:sqlite", uncompressedDatabaseFile);
String compressedDatabaseFilePath = TEST_DIR + "/output/" + DB_FILE_NAME_PREFIX + "_" + compressorName + ".sqlite";
@@ -268,7 +258,7 @@ public class CompressionTest
compressedDatabaseFile.mkdirs();
compressedDatabaseFile.delete();
Assert.assertTrue(!compressedDatabaseFile.exists());
FullDataSourceV2Repo compressedRepo = new FullDataSourceV2Repo("jdbc:sqlite", uncompressedDatabaseFile);
compressedRepo = new FullDataSourceV2Repo("jdbc:sqlite", compressedDatabaseFile);
@@ -292,47 +282,47 @@ public class CompressionTest
// uncompressed input //
FullDataSourceV2DTO uncompressedDto = uncompressedRepo.getByKey(pos);
Assert.assertEquals(uncompressedDto.compressionModeValue, EDhApiDataCompressionMode.UNCOMPRESSED.value);
FullDataSourceV2 uncompressedDataSource = uncompressedDto.createUnitTestDataSource();
long uncompressedDtoSize = uncompressedRepo.getDataSizeInBytes(pos);
minUncompressedDtoSizeInBytes = Math.min(uncompressedDtoSize, minUncompressedDtoSizeInBytes);
maxUncompressedDtoSizeInBytes = Math.max(uncompressedDtoSize, maxUncompressedDtoSizeInBytes);
avgUncompressedDtoSizeInBytes += uncompressedDtoSize;
// compress file //
long startWriteNanoTime = System.nanoTime();
FullDataSourceV2DTO compressedDto = FullDataSourceV2DTO.CreateFromDataSource(uncompressedDataSource, compressionMode);
compressedRepo.save(compressedDto);
long endWriteNanoTime = System.nanoTime();
totalWriteTimeInNano += (endWriteNanoTime - startWriteNanoTime);
long compressedDtoSize = compressedRepo.getDataSizeInBytes(pos);
minCompressedDtoSizeInBytes = Math.min(compressedDtoSize, minCompressedDtoSizeInBytes);
maxCompressedDtoSizeInBytes = Math.max(compressedDtoSize, maxCompressedDtoSizeInBytes);
avgCompressedDtoSizeInBytes += compressedDtoSize;
// read compressed file //
long startReadNanoTime = System.nanoTime();
compressedDto = compressedRepo.getByKey(pos);
FullDataSourceV2 compressedDataSource = compressedDto.createUnitTestDataSource();
long endReadMsTime = System.nanoTime();
totalReadTimeInNano += (endReadMsTime - startReadNanoTime);
processedDtoCount++;
try (FullDataSourceV2DTO uncompressedDto = uncompressedRepo.getByKey(pos))
{
Assert.assertEquals(uncompressedDto.compressionModeValue, EDhApiDataCompressionMode.UNCOMPRESSED.value);
FullDataSourceV2 uncompressedDataSource = uncompressedDto.createUnitTestDataSource();
long uncompressedDtoSize = uncompressedRepo.getDataSizeInBytes(pos);
minUncompressedDtoSizeInBytes = Math.min(uncompressedDtoSize, minUncompressedDtoSizeInBytes);
maxUncompressedDtoSizeInBytes = Math.max(uncompressedDtoSize, maxUncompressedDtoSizeInBytes);
avgUncompressedDtoSizeInBytes += uncompressedDtoSize;
// compress file //
long startWriteNanoTime = System.nanoTime();
FullDataSourceV2DTO compressedDto = FullDataSourceV2DTO.CreateFromDataSource(uncompressedDataSource, compressionMode);
compressedRepo.save(compressedDto);
long endWriteNanoTime = System.nanoTime();
totalWriteTimeInNano += (endWriteNanoTime - startWriteNanoTime);
long compressedDtoSize = compressedRepo.getDataSizeInBytes(pos);
minCompressedDtoSizeInBytes = Math.min(compressedDtoSize, minCompressedDtoSizeInBytes);
maxCompressedDtoSizeInBytes = Math.max(compressedDtoSize, maxCompressedDtoSizeInBytes);
avgCompressedDtoSizeInBytes += compressedDtoSize;
// read compressed file //
long startReadNanoTime = System.nanoTime();
compressedDto = compressedRepo.getByKey(pos);
FullDataSourceV2 compressedDataSource = compressedDto.createUnitTestDataSource();
long endReadMsTime = System.nanoTime();
totalReadTimeInNano += (endReadMsTime - startReadNanoTime);
processedDtoCount++;
}
}
catch (Exception | Error e)
{
@@ -371,6 +361,18 @@ public class CompressionTest
e.printStackTrace();
Assert.fail(e.getMessage());
}
finally
{
if(uncompressedRepo != null)
{
uncompressedRepo.close();
}
if(compressedRepo != null)
{
compressedRepo.close();
}
}
}