commonlib/bsd: Add zstd support

This adds the option to compress ramstage and payloads with zstd.

zstd compressed ramstages are typically +5% bigger than lzma compressed
ramstages. The decompressor .text section grows by 20KiB and the
decompressor needs 16KiB more heap than the lzma decompressor.

To use less heap inside the zstd decompressor the build time define
ZSTD_DECODER_INTERNAL_BUFFER is used.

Quote:
 The build macro `ZSTD_DECODER_INTERNAL_BUFFER` can be set to control
 the amount of extra memory used during decompression to store
 literals. This defaults to 64kB.  Reducing this value reduces the
 memory footprint of `ZSTD_DCtx` decompression contexts, but might
 also result in a small decompression speed cost

TEST=Booted on Lenovo X220 with zstd ramstage showed no disadvantage
     over a bigger internal buffer used.
TEST=Booted on Lenovo X220. The zstd decompressor is twice as fast
     as the lzma decompressor.
     cbmem -t shows:
   - finished ZSTD decompress (ignore for x86)         79,444 (24,494)
   - finished LZMA decompress (ignore for x86)         94,971 (45,545)

TEST=Booted on QEMU Q35, QEMU aarch64 virt, QEMU riscv RV64 with
     zstd compressed ramstage.

Change-Id: Ic1b1f53327c598d07bd83d4391e8012d41696a16
Signed-off-by: Arthur Heymans <arthur@aheymans.xyz>
Signed-off-by: Patrick Rudolph <patrick.rudolph@9elements.com>
Reviewed-on: https://review.coreboot.org/c/coreboot/+/69893
Reviewed-by: Julius Werner <jwerner@chromium.org>
Tested-by: build bot (Jenkins) <no-reply@coreboot.org>
This commit is contained in:
Arthur Heymans 2022-11-22 16:33:29 +01:00 committed by Matt DeVillier
commit 2d99da12a9
13 changed files with 147 additions and 4 deletions

View file

@ -455,6 +455,8 @@ cbfs-files-handler= \
CBFS_COMPRESS_FLAG:=none
ifeq ($(CONFIG_COMPRESS_RAMSTAGE_LZMA),y)
CBFS_COMPRESS_FLAG:=LZMA
else ifeq ($(CONFIG_COMPRESS_RAMSTAGE_ZSTD),y)
CBFS_COMPRESS_FLAG:=ZSTD
endif
ifeq ($(CONFIG_COMPRESS_RAMSTAGE_LZ4),y)
CBFS_COMPRESS_FLAG:=LZ4
@ -463,9 +465,10 @@ endif
CBFS_PAYLOAD_COMPRESS_FLAG:=none
ifeq ($(CONFIG_COMPRESSED_PAYLOAD_LZMA),y)
CBFS_PAYLOAD_COMPRESS_FLAG:=LZMA
endif
ifeq ($(CONFIG_COMPRESSED_PAYLOAD_LZ4),y)
else ifeq ($(CONFIG_COMPRESSED_PAYLOAD_LZ4),y)
CBFS_PAYLOAD_COMPRESS_FLAG:=LZ4
else ifeq ($(CONFIG_COMPRESSED_PAYLOAD_ZSTD),y)
CBFS_PAYLOAD_COMPRESS_FLAG:=ZSTD
endif
CBFS_SECONDARY_PAYLOAD_COMPRESS_FLAG:=none

View file

@ -86,6 +86,12 @@ config COMPRESSED_PAYLOAD_LZMA
In order to reduce the size payloads take up in the ROM chip
coreboot can compress them using the LZMA algorithm.
config COMPRESSED_PAYLOAD_ZSTD
bool "Use ZSTD compression for payloads"
help
In order to reduce the size payloads take up in the ROM chip
coreboot can compress them using the ZSTD algorithm.
config COMPRESSED_PAYLOAD_LZ4
bool "Use LZ4 compression for payloads"
help

View file

@ -202,10 +202,16 @@ config MB_COMPRESS_RAMSTAGE_LZ4
help
Select this in a mainboard to use LZ4 compression by default
config MB_COMPRESS_RAMSTAGE_ZSTD
bool
help
Select this in a mainboard to use ZSTD compression by default
choice
prompt "Ramstage compression"
depends on HAVE_RAMSTAGE && !UNCOMPRESSED_RAMSTAGE
default COMPRESS_RAMSTAGE_LZ4 if MB_COMPRESS_RAMSTAGE_LZ4
default COMPRESS_RAMSTAGE_ZSTD if MB_COMPRESS_RAMSTAGE_ZSTD
default COMPRESS_RAMSTAGE_LZMA
config COMPRESS_RAMSTAGE_LZMA
@ -226,6 +232,13 @@ config COMPRESS_RAMSTAGE_LZ4
If you're not sure, stick with LZMA.
config COMPRESS_RAMSTAGE_ZSTD
bool "Compress ramstage with ZSTD"
help
Compress ramstage with ZSTD.
This is faster than LZMA but uses more BSS.
However it decompresses faster on slower CPUs and is suited for
platforms with high speed SPI interfaces, but limited computing power.
endchoice
config COMPRESS_PRERAM_STAGES

View file

@ -1,6 +1,7 @@
## SPDX-License-Identifier: GPL-2.0-only
subdirs-y += storage
subdirs-y += bsd/zstd
bootblock-y += mem_pool.c
verstage-y += mem_pool.c
@ -58,6 +59,7 @@ ramstage-y += bsd/lz4_wrapper.c
postcar-y += bsd/lz4_wrapper.c
all-y += list.c
all-y += bsd/zstd_wrapper.c
ramstage-y += sort.c

View file

@ -19,6 +19,6 @@ size_t ulz4fn(const void *src, size_t srcn, void *dst, size_t dstn);
size_t ulz4f(const void *src, void *dst);
/* Decompresses ZSTD image */
size_t uzstdfn(const void *src, size_t srcn, void *dst, size_t dstn);
size_t uzstdn(const void *src, size_t srcn, void *dst, size_t dstn);
#endif /* _COMMONLIB_COMPRESSION_H_ */

View file

@ -0,0 +1,43 @@
/* SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0-only */
zstd_support=n
ifeq ($(CONFIG_COMPRESS_RAMSTAGE_ZSTD),y)
zstd_support=y
endif
ifeq ($(CONFIG_COMPRESSED_PAYLOAD_ZSTD),y)
zstd_support=y
endif
ifeq ($(zstd_support),y)
all-y += decompress/huf_decompress.c
all-y += decompress/zstd_ddict.c
all-y += decompress/zstd_decompress_block.c
all-y += decompress/zstd_decompress.c
all-y += common/entropy_common.c
all-y += common/fse_decompress.c
CPPFLAGS_common += -DZSTD_DISABLE_ASM=1
# Measured on Intel Sandy Bridge i5-2540M at 800Mhz:
# Setting code size reduction time loss
# -DHUF_FORCE_DECOMPRESS_X1=1 6.4 KiB 6%
# -DHUF_FORCE_DECOMPRESS_X2=1 5.4 KiB 7%
# -DZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT=1 8.1 KiB 19%
# -DZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG=1 7.8 KiB 15%
CPPFLAGS_common += -DHUF_FORCE_DECOMPRESS_X1=1
CPPFLAGS_common += -DZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT=1
CPPFLAGS_common += -DZSTD_STRIP_ERROR_STRINGS=1
# Huffman fast decode needs 64bit and LE
CPPFLAGS_x86_32 += -DHUF_DISABLE_FAST_DECODE=1
# Measured on Intel Sandy Bridge i5-2540M at 800Mhz (x86_64 only):
# Setting code size reduction time loss
# -DHUF_DISABLE_FAST_DECODE=1 2.3 KiB 5%
CPPFLAGS_common += -DDYNAMIC_BMI2=0 -DSTATIC_BMI2=0 -DZSTD_DECODER_INTERNAL_BUFFER=2048
endif
CPPFLAGS_common += -I$(src)/commonlib/bsd/zstd
CPPFLAGS_common += -I$(src)/commonlib/bsd/zstd/common
CPPFLAGS_common += -I$(src)/commonlib/bsd/zstd/decompress

View file

@ -667,7 +667,7 @@ size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx,
{ const size_t res = ZSTD_decompressFrame(dctx, dst, dstCapacity,
&src, &srcSize);
RETURN_ERROR_IF(
(ZSTD_getErrorCode(res) == ZSTD_error_prefix_unknown)
(ERR_getErrorCode(res) == ZSTD_error_prefix_unknown)
&& (moreThan1Frame==1),
srcSize_wrong,
"At least one frame successfully completed, "

View file

@ -0,0 +1,18 @@
/* SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0-only */
#include <assert.h>
#include <commonlib/bsd/compression.h>
#include <stdint.h>
#include <stddef.h>
#define ZSTD_STATIC_LINKING_ONLY 1
#include <zstd.h>
#undef ZSTD_STATIC_LINKING_ONLY
#include <zstd_decompress_internal.h>
size_t uzstdn(const void *src, size_t srcn, void *dst, size_t dstn)
{
static ZSTD_DCtx dctx __aligned(8);
if (!ZSTD_initStaticDCtx(&dctx, sizeof(dctx)))
return 0;
return ZSTD_decompressDCtx(&dctx, dst, dstn, src, srcn);
}

View file

@ -37,6 +37,8 @@ enum timestamp_id {
TS_ULZMA_END = 16,
TS_ULZ4F_START = 17,
TS_ULZ4F_END = 18,
TS_UZSTDF_START = 19,
TS_UZSTDF_END = 20,
TS_DEVICE_INIT_CHIPS = 30,
TS_DEVICE_ENUMERATE = 31,
TS_DEVICE_CONFIGURE = 40,
@ -236,6 +238,8 @@ static const struct timestamp_id_to_name {
TS_NAME_DEF(TS_ULZMA_END, 0, "finished LZMA decompress (ignore for x86)"),
TS_NAME_DEF(TS_ULZ4F_START, TS_ULZ4F_END, "starting LZ4 decompress (ignore for x86)"),
TS_NAME_DEF(TS_ULZ4F_END, 0, "finished LZ4 decompress (ignore for x86)"),
TS_NAME_DEF(TS_UZSTDF_START, TS_UZSTDF_END, "starting ZSTD decompress (ignore for x86)"),
TS_NAME_DEF(TS_UZSTDF_END, 0, "finished ZSTD decompress (ignore for x86)"),
TS_NAME_DEF(TS_DEVICE_INIT_CHIPS, TS_DEVICE_ENUMERATE, "early chipset initialization"),
TS_NAME_DEF(TS_DEVICE_ENUMERATE, TS_DEVICE_CONFIGURE, "device enumeration"),
TS_NAME_DEF(TS_DEVICE_CONFIGURE, TS_DEVICE_ENABLE, "device configuration"),

View file

@ -155,6 +155,15 @@ static inline bool cbfs_lzma_enabled(void)
return false;
}
static inline bool cbfs_zstd_enabled(void)
{
if (ENV_PAYLOAD_LOADER && CONFIG(COMPRESSED_PAYLOAD_ZSTD))
return true;
if (ENV_RAMSTAGE_LOADER && CONFIG(COMPRESS_RAMSTAGE_ZSTD))
return true;
return false;
}
static bool cbfs_file_hash_mismatch(const void *buffer, size_t size,
const union cbfs_mdata *mdata, bool skip_verification)
{
@ -267,6 +276,24 @@ static size_t cbfs_load_and_decompress(const struct region_device *rdev, void *b
rdev_munmap(rdev, map);
return out_size;
case CBFS_COMPRESS_ZSTD:
if (!cbfs_zstd_enabled())
return 0;
map = rdev_mmap_full(rdev);
if (map == NULL)
return 0;
if (!cbfs_file_hash_mismatch(map, in_size, mdata, skip_verification)) {
/* Note: timestamp not useful for memory-mapped media (x86) */
timestamp_add_now(TS_UZSTDF_START);
out_size = uzstdn(map, in_size, buffer, buffer_size);
timestamp_add_now(TS_UZSTDF_END);
}
rdev_munmap(rdev, map);
return out_size;
default:

View file

@ -87,6 +87,19 @@ static int load_one_segment(uint8_t *dest,
return 0;
break;
}
case CBFS_COMPRESS_ZSTD: {
if (!CONFIG(COMPRESSED_PAYLOAD_ZSTD))
return 0;
printk(BIOS_DEBUG, "using ZSTD\n");
timestamp_add_now(TS_UZSTDF_START);
len = uzstdn(src, len, dest, memsz);
timestamp_add_now(TS_UZSTDF_END);
if (!len) /* Decompression Error. */
return 0;
break;
}
case CBFS_COMPRESS_NONE: {
printk(BIOS_DEBUG, "it's not compressed!\n");
memcpy(dest, src, len);

View file

@ -62,6 +62,14 @@ size_t ulzman(const void *src, size_t srcn, void *dst, size_t dstn)
return dstn;
}
size_t uzstdn(const void *src, size_t srcn, void *dst, size_t dstn)
{
check_expected(srcn);
check_expected(dstn);
memcpy(dst, src, dstn);
return dstn;
}
size_t ulz4fn(const void *src, size_t srcn, void *dst, size_t dstn)
{
check_expected(srcn);

View file

@ -52,6 +52,12 @@ size_t ulzman(const void *src, size_t srcn, void *dst, size_t dstn)
return 0;
}
size_t uzstdn(const void *src, size_t srcn, void *dst, size_t dstn)
{
fail_msg("Unexpected call to %s", __func__);
return 0;
}
size_t ulz4fn(const void *src, size_t srcn, void *dst, size_t dstn)
{
fail_msg("Unexpected call to %s", __func__);