From 2d99da12a9c1189d78517b5d700ba92276bf1640 Mon Sep 17 00:00:00 2001 From: Arthur Heymans Date: Tue, 22 Nov 2022 16:33:29 +0100 Subject: [PATCH] commonlib/bsd: Add zstd support This adds the option to compress ramstage and payloads with zstd. zstd compressed ramstages are typically +5% bigger than lzma compressed ramstages. The decompressor .text section grows by 20KiB and the decompressor needs 16KiB more heap than the lzma decompressor. To use less heap inside the zstd decompressor the build time define ZSTD_DECODER_INTERNAL_BUFFER is used. Quote: The build macro `ZSTD_DECODER_INTERNAL_BUFFER` can be set to control the amount of extra memory used during decompression to store literals. This defaults to 64kB. Reducing this value reduces the memory footprint of `ZSTD_DCtx` decompression contexts, but might also result in a small decompression speed cost TEST=Booted on Lenovo X220 with zstd ramstage showed no disadvantage over a bigger internal buffer used. TEST=Booted on Lenovo X220. The zstd decompressor is twice as fast as the lzma decompressor. cbmem -t shows: - finished ZSTD decompress (ignore for x86) 79,444 (24,494) - finished LZMA decompress (ignore for x86) 94,971 (45,545) TEST=Booted on QEMU Q35, QEMU aarch64 virt, QEMU riscv RV64 with zstd compressed ramstage. Change-Id: Ic1b1f53327c598d07bd83d4391e8012d41696a16 Signed-off-by: Arthur Heymans Signed-off-by: Patrick Rudolph Reviewed-on: https://review.coreboot.org/c/coreboot/+/69893 Reviewed-by: Julius Werner Tested-by: build bot (Jenkins) --- Makefile.mk | 7 ++- payloads/Kconfig | 6 +++ src/Kconfig | 13 ++++++ src/commonlib/Makefile.mk | 2 + .../bsd/include/commonlib/bsd/compression.h | 2 +- src/commonlib/bsd/zstd/Makefile.mk | 43 +++++++++++++++++++ .../bsd/zstd/decompress/zstd_decompress.c | 2 +- src/commonlib/bsd/zstd_wrapper.c | 18 ++++++++ .../include/commonlib/timestamp_serialized.h | 4 ++ src/lib/cbfs.c | 27 ++++++++++++ src/lib/selfboot.c | 13 ++++++ tests/lib/cbfs-lookup-test.c | 8 ++++ tests/lib/cbfs-verification-test.c | 6 +++ 13 files changed, 147 insertions(+), 4 deletions(-) create mode 100644 src/commonlib/bsd/zstd/Makefile.mk create mode 100644 src/commonlib/bsd/zstd_wrapper.c diff --git a/Makefile.mk b/Makefile.mk index 75787b32d4..cab60b33a4 100644 --- a/Makefile.mk +++ b/Makefile.mk @@ -455,6 +455,8 @@ cbfs-files-handler= \ CBFS_COMPRESS_FLAG:=none ifeq ($(CONFIG_COMPRESS_RAMSTAGE_LZMA),y) CBFS_COMPRESS_FLAG:=LZMA +else ifeq ($(CONFIG_COMPRESS_RAMSTAGE_ZSTD),y) +CBFS_COMPRESS_FLAG:=ZSTD endif ifeq ($(CONFIG_COMPRESS_RAMSTAGE_LZ4),y) CBFS_COMPRESS_FLAG:=LZ4 @@ -463,9 +465,10 @@ endif CBFS_PAYLOAD_COMPRESS_FLAG:=none ifeq ($(CONFIG_COMPRESSED_PAYLOAD_LZMA),y) CBFS_PAYLOAD_COMPRESS_FLAG:=LZMA -endif -ifeq ($(CONFIG_COMPRESSED_PAYLOAD_LZ4),y) +else ifeq ($(CONFIG_COMPRESSED_PAYLOAD_LZ4),y) CBFS_PAYLOAD_COMPRESS_FLAG:=LZ4 +else ifeq ($(CONFIG_COMPRESSED_PAYLOAD_ZSTD),y) +CBFS_PAYLOAD_COMPRESS_FLAG:=ZSTD endif CBFS_SECONDARY_PAYLOAD_COMPRESS_FLAG:=none diff --git a/payloads/Kconfig b/payloads/Kconfig index be902a1b4b..e30b1ec76c 100644 --- a/payloads/Kconfig +++ b/payloads/Kconfig @@ -86,6 +86,12 @@ config COMPRESSED_PAYLOAD_LZMA In order to reduce the size payloads take up in the ROM chip coreboot can compress them using the LZMA algorithm. +config COMPRESSED_PAYLOAD_ZSTD + bool "Use ZSTD compression for payloads" + help + In order to reduce the size payloads take up in the ROM chip + coreboot can compress them using the ZSTD algorithm. + config COMPRESSED_PAYLOAD_LZ4 bool "Use LZ4 compression for payloads" help diff --git a/src/Kconfig b/src/Kconfig index a7a34b26a5..2b5a2241cc 100644 --- a/src/Kconfig +++ b/src/Kconfig @@ -202,10 +202,16 @@ config MB_COMPRESS_RAMSTAGE_LZ4 help Select this in a mainboard to use LZ4 compression by default +config MB_COMPRESS_RAMSTAGE_ZSTD + bool + help + Select this in a mainboard to use ZSTD compression by default + choice prompt "Ramstage compression" depends on HAVE_RAMSTAGE && !UNCOMPRESSED_RAMSTAGE default COMPRESS_RAMSTAGE_LZ4 if MB_COMPRESS_RAMSTAGE_LZ4 + default COMPRESS_RAMSTAGE_ZSTD if MB_COMPRESS_RAMSTAGE_ZSTD default COMPRESS_RAMSTAGE_LZMA config COMPRESS_RAMSTAGE_LZMA @@ -226,6 +232,13 @@ config COMPRESS_RAMSTAGE_LZ4 If you're not sure, stick with LZMA. +config COMPRESS_RAMSTAGE_ZSTD + bool "Compress ramstage with ZSTD" + help + Compress ramstage with ZSTD. + This is faster than LZMA but uses more BSS. + However it decompresses faster on slower CPUs and is suited for + platforms with high speed SPI interfaces, but limited computing power. endchoice config COMPRESS_PRERAM_STAGES diff --git a/src/commonlib/Makefile.mk b/src/commonlib/Makefile.mk index 44f63e1ea6..b4bc1b8fae 100644 --- a/src/commonlib/Makefile.mk +++ b/src/commonlib/Makefile.mk @@ -1,6 +1,7 @@ ## SPDX-License-Identifier: GPL-2.0-only subdirs-y += storage +subdirs-y += bsd/zstd bootblock-y += mem_pool.c verstage-y += mem_pool.c @@ -58,6 +59,7 @@ ramstage-y += bsd/lz4_wrapper.c postcar-y += bsd/lz4_wrapper.c all-y += list.c +all-y += bsd/zstd_wrapper.c ramstage-y += sort.c diff --git a/src/commonlib/bsd/include/commonlib/bsd/compression.h b/src/commonlib/bsd/include/commonlib/bsd/compression.h index 1f70fa4c60..b13bc4c8b6 100644 --- a/src/commonlib/bsd/include/commonlib/bsd/compression.h +++ b/src/commonlib/bsd/include/commonlib/bsd/compression.h @@ -19,6 +19,6 @@ size_t ulz4fn(const void *src, size_t srcn, void *dst, size_t dstn); size_t ulz4f(const void *src, void *dst); /* Decompresses ZSTD image */ -size_t uzstdfn(const void *src, size_t srcn, void *dst, size_t dstn); +size_t uzstdn(const void *src, size_t srcn, void *dst, size_t dstn); #endif /* _COMMONLIB_COMPRESSION_H_ */ diff --git a/src/commonlib/bsd/zstd/Makefile.mk b/src/commonlib/bsd/zstd/Makefile.mk new file mode 100644 index 0000000000..d04ad31e98 --- /dev/null +++ b/src/commonlib/bsd/zstd/Makefile.mk @@ -0,0 +1,43 @@ +/* SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0-only */ + +zstd_support=n +ifeq ($(CONFIG_COMPRESS_RAMSTAGE_ZSTD),y) +zstd_support=y +endif +ifeq ($(CONFIG_COMPRESSED_PAYLOAD_ZSTD),y) +zstd_support=y +endif + +ifeq ($(zstd_support),y) +all-y += decompress/huf_decompress.c +all-y += decompress/zstd_ddict.c +all-y += decompress/zstd_decompress_block.c +all-y += decompress/zstd_decompress.c + +all-y += common/entropy_common.c +all-y += common/fse_decompress.c + +CPPFLAGS_common += -DZSTD_DISABLE_ASM=1 +# Measured on Intel Sandy Bridge i5-2540M at 800Mhz: +# Setting code size reduction time loss +# -DHUF_FORCE_DECOMPRESS_X1=1 6.4 KiB 6% +# -DHUF_FORCE_DECOMPRESS_X2=1 5.4 KiB 7% +# -DZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT=1 8.1 KiB 19% +# -DZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG=1 7.8 KiB 15% + +CPPFLAGS_common += -DHUF_FORCE_DECOMPRESS_X1=1 +CPPFLAGS_common += -DZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT=1 +CPPFLAGS_common += -DZSTD_STRIP_ERROR_STRINGS=1 + +# Huffman fast decode needs 64bit and LE +CPPFLAGS_x86_32 += -DHUF_DISABLE_FAST_DECODE=1 +# Measured on Intel Sandy Bridge i5-2540M at 800Mhz (x86_64 only): +# Setting code size reduction time loss +# -DHUF_DISABLE_FAST_DECODE=1 2.3 KiB 5% + +CPPFLAGS_common += -DDYNAMIC_BMI2=0 -DSTATIC_BMI2=0 -DZSTD_DECODER_INTERNAL_BUFFER=2048 +endif + +CPPFLAGS_common += -I$(src)/commonlib/bsd/zstd +CPPFLAGS_common += -I$(src)/commonlib/bsd/zstd/common +CPPFLAGS_common += -I$(src)/commonlib/bsd/zstd/decompress diff --git a/src/commonlib/bsd/zstd/decompress/zstd_decompress.c b/src/commonlib/bsd/zstd/decompress/zstd_decompress.c index e24795d15e..e87776a68c 100644 --- a/src/commonlib/bsd/zstd/decompress/zstd_decompress.c +++ b/src/commonlib/bsd/zstd/decompress/zstd_decompress.c @@ -667,7 +667,7 @@ size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx, { const size_t res = ZSTD_decompressFrame(dctx, dst, dstCapacity, &src, &srcSize); RETURN_ERROR_IF( - (ZSTD_getErrorCode(res) == ZSTD_error_prefix_unknown) + (ERR_getErrorCode(res) == ZSTD_error_prefix_unknown) && (moreThan1Frame==1), srcSize_wrong, "At least one frame successfully completed, " diff --git a/src/commonlib/bsd/zstd_wrapper.c b/src/commonlib/bsd/zstd_wrapper.c new file mode 100644 index 0000000000..8905120635 --- /dev/null +++ b/src/commonlib/bsd/zstd_wrapper.c @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0-only */ + +#include +#include +#include +#include +#define ZSTD_STATIC_LINKING_ONLY 1 +#include +#undef ZSTD_STATIC_LINKING_ONLY +#include + +size_t uzstdn(const void *src, size_t srcn, void *dst, size_t dstn) +{ + static ZSTD_DCtx dctx __aligned(8); + if (!ZSTD_initStaticDCtx(&dctx, sizeof(dctx))) + return 0; + return ZSTD_decompressDCtx(&dctx, dst, dstn, src, srcn); +} diff --git a/src/commonlib/include/commonlib/timestamp_serialized.h b/src/commonlib/include/commonlib/timestamp_serialized.h index 2a159f64b8..3999b35069 100644 --- a/src/commonlib/include/commonlib/timestamp_serialized.h +++ b/src/commonlib/include/commonlib/timestamp_serialized.h @@ -37,6 +37,8 @@ enum timestamp_id { TS_ULZMA_END = 16, TS_ULZ4F_START = 17, TS_ULZ4F_END = 18, + TS_UZSTDF_START = 19, + TS_UZSTDF_END = 20, TS_DEVICE_INIT_CHIPS = 30, TS_DEVICE_ENUMERATE = 31, TS_DEVICE_CONFIGURE = 40, @@ -236,6 +238,8 @@ static const struct timestamp_id_to_name { TS_NAME_DEF(TS_ULZMA_END, 0, "finished LZMA decompress (ignore for x86)"), TS_NAME_DEF(TS_ULZ4F_START, TS_ULZ4F_END, "starting LZ4 decompress (ignore for x86)"), TS_NAME_DEF(TS_ULZ4F_END, 0, "finished LZ4 decompress (ignore for x86)"), + TS_NAME_DEF(TS_UZSTDF_START, TS_UZSTDF_END, "starting ZSTD decompress (ignore for x86)"), + TS_NAME_DEF(TS_UZSTDF_END, 0, "finished ZSTD decompress (ignore for x86)"), TS_NAME_DEF(TS_DEVICE_INIT_CHIPS, TS_DEVICE_ENUMERATE, "early chipset initialization"), TS_NAME_DEF(TS_DEVICE_ENUMERATE, TS_DEVICE_CONFIGURE, "device enumeration"), TS_NAME_DEF(TS_DEVICE_CONFIGURE, TS_DEVICE_ENABLE, "device configuration"), diff --git a/src/lib/cbfs.c b/src/lib/cbfs.c index 4ec65a864c..1dd9288bff 100644 --- a/src/lib/cbfs.c +++ b/src/lib/cbfs.c @@ -155,6 +155,15 @@ static inline bool cbfs_lzma_enabled(void) return false; } +static inline bool cbfs_zstd_enabled(void) +{ + if (ENV_PAYLOAD_LOADER && CONFIG(COMPRESSED_PAYLOAD_ZSTD)) + return true; + if (ENV_RAMSTAGE_LOADER && CONFIG(COMPRESS_RAMSTAGE_ZSTD)) + return true; + return false; +} + static bool cbfs_file_hash_mismatch(const void *buffer, size_t size, const union cbfs_mdata *mdata, bool skip_verification) { @@ -267,6 +276,24 @@ static size_t cbfs_load_and_decompress(const struct region_device *rdev, void *b rdev_munmap(rdev, map); + return out_size; + case CBFS_COMPRESS_ZSTD: + if (!cbfs_zstd_enabled()) + return 0; + + map = rdev_mmap_full(rdev); + if (map == NULL) + return 0; + + if (!cbfs_file_hash_mismatch(map, in_size, mdata, skip_verification)) { + /* Note: timestamp not useful for memory-mapped media (x86) */ + timestamp_add_now(TS_UZSTDF_START); + out_size = uzstdn(map, in_size, buffer, buffer_size); + timestamp_add_now(TS_UZSTDF_END); + } + + rdev_munmap(rdev, map); + return out_size; default: diff --git a/src/lib/selfboot.c b/src/lib/selfboot.c index e21c493701..5a2321815e 100644 --- a/src/lib/selfboot.c +++ b/src/lib/selfboot.c @@ -87,6 +87,19 @@ static int load_one_segment(uint8_t *dest, return 0; break; } + case CBFS_COMPRESS_ZSTD: { + if (!CONFIG(COMPRESSED_PAYLOAD_ZSTD)) + return 0; + + printk(BIOS_DEBUG, "using ZSTD\n"); + timestamp_add_now(TS_UZSTDF_START); + len = uzstdn(src, len, dest, memsz); + timestamp_add_now(TS_UZSTDF_END); + if (!len) /* Decompression Error. */ + return 0; + break; + } + case CBFS_COMPRESS_NONE: { printk(BIOS_DEBUG, "it's not compressed!\n"); memcpy(dest, src, len); diff --git a/tests/lib/cbfs-lookup-test.c b/tests/lib/cbfs-lookup-test.c index 75628683b6..d6cc5d95a2 100644 --- a/tests/lib/cbfs-lookup-test.c +++ b/tests/lib/cbfs-lookup-test.c @@ -62,6 +62,14 @@ size_t ulzman(const void *src, size_t srcn, void *dst, size_t dstn) return dstn; } +size_t uzstdn(const void *src, size_t srcn, void *dst, size_t dstn) +{ + check_expected(srcn); + check_expected(dstn); + memcpy(dst, src, dstn); + return dstn; +} + size_t ulz4fn(const void *src, size_t srcn, void *dst, size_t dstn) { check_expected(srcn); diff --git a/tests/lib/cbfs-verification-test.c b/tests/lib/cbfs-verification-test.c index b1a39bcaac..8abc36c053 100644 --- a/tests/lib/cbfs-verification-test.c +++ b/tests/lib/cbfs-verification-test.c @@ -52,6 +52,12 @@ size_t ulzman(const void *src, size_t srcn, void *dst, size_t dstn) return 0; } +size_t uzstdn(const void *src, size_t srcn, void *dst, size_t dstn) +{ + fail_msg("Unexpected call to %s", __func__); + return 0; +} + size_t ulz4fn(const void *src, size_t srcn, void *dst, size_t dstn) { fail_msg("Unexpected call to %s", __func__);