From 524937719933e5b1c0002d3ae7e908e30a8bc380 Mon Sep 17 00:00:00 2001 From: James Seibel Date: Sun, 24 Mar 2024 15:11:42 -0500 Subject: [PATCH] Add data caching to Lzma data compression --- .../dataStreams/DhDataOutputStream.java | 21 ++- .../objects/dataStreams/LzmaArrayCache.java | 135 ++++++++++++++++++ 2 files changed, 150 insertions(+), 6 deletions(-) create mode 100644 core/src/main/java/com/seibel/distanthorizons/core/util/objects/dataStreams/LzmaArrayCache.java diff --git a/core/src/main/java/com/seibel/distanthorizons/core/util/objects/dataStreams/DhDataOutputStream.java b/core/src/main/java/com/seibel/distanthorizons/core/util/objects/dataStreams/DhDataOutputStream.java index fe5665024..3337cb26f 100644 --- a/core/src/main/java/com/seibel/distanthorizons/core/util/objects/dataStreams/DhDataOutputStream.java +++ b/core/src/main/java/com/seibel/distanthorizons/core/util/objects/dataStreams/DhDataOutputStream.java @@ -22,11 +22,12 @@ package com.seibel.distanthorizons.core.util.objects.dataStreams; import com.github.luben.zstd.RecyclingBufferPool; import com.github.luben.zstd.ZstdOutputStream; import com.seibel.distanthorizons.api.enums.config.EDhApiDataCompressionMode; +import com.seibel.distanthorizons.core.logging.DhLoggerBuilder; import net.jpountz.lz4.LZ4Factory; import net.jpountz.lz4.LZ4FrameOutputStream; import net.jpountz.xxhash.XXHashFactory; -import org.tukaani.xz.LZMA2Options; -import org.tukaani.xz.XZOutputStream; +import org.apache.logging.log4j.Logger; +import org.tukaani.xz.*; import java.io.*; @@ -37,6 +38,10 @@ import java.io.*; */ public class DhDataOutputStream extends DataOutputStream { + private static final ThreadLocal LZMA_RESETTABLE_ARRAY_CACHE_GETTER = ThreadLocal.withInitial(() -> new ResettableArrayCache(new LzmaArrayCache())); + + + public DhDataOutputStream(OutputStream stream, EDhApiDataCompressionMode compressionMode) throws IOException { super(warpStream(new BufferedOutputStream(stream), compressionMode)); @@ -50,16 +55,20 @@ public class DhDataOutputStream extends DataOutputStream case LZ4: return new LZ4FrameOutputStream(stream, LZ4FrameOutputStream.BLOCKSIZE.SIZE_64KB, -1L, - // using native instances reduces GC pressures + // using native instances reduce GC pressure LZ4Factory.nativeInstance().fastCompressor(), XXHashFactory.nativeInstance().hash32(), LZ4FrameOutputStream.FLG.Bits.BLOCK_INDEPENDENCE); case Z_STD: return new ZstdOutputStream(stream, RecyclingBufferPool.INSTANCE); case LZMA2: - // in James' testing preset 4 has the best balance between compression ratio and speed - // 5 is slightly more compressed 0.128 vs 0.139, but is roughly 60% slower - return new XZOutputStream(stream, new LZMA2Options(4)); + // using an array cache significantly reduces GC pressure + ResettableArrayCache arrayCache = LZMA_RESETTABLE_ARRAY_CACHE_GETTER.get(); + arrayCache.reset(); + // Note: if the LZMA2Options are changed the array cache may need to be re-tested. + // the array cache was specifically tested and tuned for LZMA preset 4 + return new XZOutputStream(stream, new LZMA2Options(4), + XZ.CHECK_CRC64, arrayCache); default: throw new IllegalArgumentException("No compressor defined for ["+compressionMode+"]"); diff --git a/core/src/main/java/com/seibel/distanthorizons/core/util/objects/dataStreams/LzmaArrayCache.java b/core/src/main/java/com/seibel/distanthorizons/core/util/objects/dataStreams/LzmaArrayCache.java new file mode 100644 index 000000000..e932e9936 --- /dev/null +++ b/core/src/main/java/com/seibel/distanthorizons/core/util/objects/dataStreams/LzmaArrayCache.java @@ -0,0 +1,135 @@ +package com.seibel.distanthorizons.core.util.objects.dataStreams; + +import com.seibel.distanthorizons.core.logging.DhLoggerBuilder; +import it.unimi.dsi.fastutil.ints.Int2ReferenceArrayMap; +import it.unimi.dsi.fastutil.ints.IntUnaryOperator; +import org.apache.logging.log4j.Logger; +import org.tukaani.xz.ArrayCache; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.concurrent.atomic.AtomicInteger; + +/** + * LZMA requires a custom object to cache it's backend arrays. + * + * TODO there's a lot of duplicate code in this class since it has logic for both + * int[]'s and byte[]'s. + */ +public class LzmaArrayCache extends ArrayCache +{ + private static final Logger LOGGER = DhLoggerBuilder.getLogger(); + + /** + * In James' testing the byte and int caches only ever had to store 2 and 4 arrays respectively. + * With the in mind we could take a few shortcuts, but if that changes then we need to be + * notified as it might cause issues with the current logic. + */ + public static final int WARN_CACHE_LENGTH_EXCEEDED = 10; + + public static final AtomicInteger MAX_INT_CACHE_LENGTH_REF = new AtomicInteger(WARN_CACHE_LENGTH_EXCEEDED); + public static final AtomicInteger MAX_BYTE_CACHE_LENGTH_REF = new AtomicInteger(WARN_CACHE_LENGTH_EXCEEDED); + + + public final IntUnaryOperator maxByteCacheSizeUnaryOperator = (x) -> Math.max(this.byteCache.size(), x); + public final IntUnaryOperator maxIntCacheSizeUnaryOperator = (x) -> Math.max(this.intCache.size(), x); + + /** + * generally only 2 items long
+ * {@link Int2ReferenceArrayMap} can be used since the cache should only be a few items long. + * If the array ends up being longer then this design will need to be changed. + */ + public final Int2ReferenceArrayMap> byteCache = new Int2ReferenceArrayMap<>(); + /** generally only 4 items long */ + public final Int2ReferenceArrayMap> intCache = new Int2ReferenceArrayMap<>(); + + + + //=============// + // byte arrays // + //=============// + + @Override + public byte[] getByteArray(int size, boolean fillWithZeros) + { + ArrayList cacheList = this.byteCache.computeIfAbsent(size, (newSize) -> new ArrayList<>(4)); + if (cacheList.size() == 0) + { + return new byte[size]; + } + + byte[] array = cacheList.remove(cacheList.size()-1); + if (array == null) + { + return new byte[size]; + } + // the array needs to be cleared to prevent accidentally sending dirty data + Arrays.fill(array, (byte)0); + return array; + } + + @Override + public void putArray(byte[] array) + { + int size = array.length; + this.byteCache.computeIfAbsent(size, (newSize) -> new ArrayList<>()); + this.byteCache.get(size).add(array); + + + if (this.byteCache.size() > WARN_CACHE_LENGTH_EXCEEDED) + { + int previousMax = MAX_BYTE_CACHE_LENGTH_REF.getAndUpdate(this.maxByteCacheSizeUnaryOperator); + int newMax = MAX_BYTE_CACHE_LENGTH_REF.get(); + if (newMax > previousMax) + { + LOGGER.warn("LZMA byte array cache expected size exceeded. Expected max length ["+WARN_CACHE_LENGTH_EXCEEDED+"], actual length ["+this.byteCache.size()+"]."); + } + } + } + + + + //============// + // int arrays // + //============// + + @Override + public int[] getIntArray(int size, boolean fillWithZeros) + { + ArrayList cacheList = this.intCache.computeIfAbsent(size, (newSize) -> new ArrayList<>(4)); + if (cacheList.size() == 0) + { + return new int[size]; + } + + int[] array = cacheList.remove(cacheList.size()-1); + if (array == null) + { + return new int[size]; + } + // the array needs to be cleared to prevent accidentally sending dirty data + Arrays.fill(array, (byte)0); + return array; + } + + @Override + public void putArray(int[] array) + { + int size = array.length; + this.intCache.computeIfAbsent(size, (newSize) -> new ArrayList<>()); + this.intCache.get(size).add(array); + + + if (this.intCache.size() > WARN_CACHE_LENGTH_EXCEEDED) + { + int previousMax = MAX_INT_CACHE_LENGTH_REF.getAndUpdate(this.maxIntCacheSizeUnaryOperator); + int newMax = MAX_INT_CACHE_LENGTH_REF.get(); + if (newMax > previousMax) + { + LOGGER.warn("LZMA int array cache expected size exceeded. Expected max length ["+WARN_CACHE_LENGTH_EXCEEDED+"], actual length ["+this.intCache.size()+"]."); + } + } + } + + +}