commonlib/bsd: Add zstd support
This adds the option to compress ramstage and payloads with zstd.
zstd compressed ramstages are typically +5% bigger than lzma compressed
ramstages. The decompressor .text section grows by 20KiB and the
decompressor needs 16KiB more heap than the lzma decompressor.
To use less heap inside the zstd decompressor the build time define
ZSTD_DECODER_INTERNAL_BUFFER is used.
Quote:
The build macro `ZSTD_DECODER_INTERNAL_BUFFER` can be set to control
the amount of extra memory used during decompression to store
literals. This defaults to 64kB. Reducing this value reduces the
memory footprint of `ZSTD_DCtx` decompression contexts, but might
also result in a small decompression speed cost
TEST=Booted on Lenovo X220 with zstd ramstage showed no disadvantage
over a bigger internal buffer used.
TEST=Booted on Lenovo X220. The zstd decompressor is twice as fast
as the lzma decompressor.
cbmem -t shows:
- finished ZSTD decompress (ignore for x86) 79,444 (24,494)
- finished LZMA decompress (ignore for x86) 94,971 (45,545)
TEST=Booted on QEMU Q35, QEMU aarch64 virt, QEMU riscv RV64 with
zstd compressed ramstage.
Change-Id: Ic1b1f53327c598d07bd83d4391e8012d41696a16
Signed-off-by: Arthur Heymans <arthur@aheymans.xyz>
Signed-off-by: Patrick Rudolph <patrick.rudolph@9elements.com>
Reviewed-on: https://review.coreboot.org/c/coreboot/+/69893
Reviewed-by: Julius Werner <jwerner@chromium.org>
Tested-by: build bot (Jenkins) <no-reply@coreboot.org>
This commit is contained in:
parent
4ca5e9c8c6
commit
2d99da12a9
13 changed files with 147 additions and 4 deletions
|
|
@ -455,6 +455,8 @@ cbfs-files-handler= \
|
|||
CBFS_COMPRESS_FLAG:=none
|
||||
ifeq ($(CONFIG_COMPRESS_RAMSTAGE_LZMA),y)
|
||||
CBFS_COMPRESS_FLAG:=LZMA
|
||||
else ifeq ($(CONFIG_COMPRESS_RAMSTAGE_ZSTD),y)
|
||||
CBFS_COMPRESS_FLAG:=ZSTD
|
||||
endif
|
||||
ifeq ($(CONFIG_COMPRESS_RAMSTAGE_LZ4),y)
|
||||
CBFS_COMPRESS_FLAG:=LZ4
|
||||
|
|
@ -463,9 +465,10 @@ endif
|
|||
CBFS_PAYLOAD_COMPRESS_FLAG:=none
|
||||
ifeq ($(CONFIG_COMPRESSED_PAYLOAD_LZMA),y)
|
||||
CBFS_PAYLOAD_COMPRESS_FLAG:=LZMA
|
||||
endif
|
||||
ifeq ($(CONFIG_COMPRESSED_PAYLOAD_LZ4),y)
|
||||
else ifeq ($(CONFIG_COMPRESSED_PAYLOAD_LZ4),y)
|
||||
CBFS_PAYLOAD_COMPRESS_FLAG:=LZ4
|
||||
else ifeq ($(CONFIG_COMPRESSED_PAYLOAD_ZSTD),y)
|
||||
CBFS_PAYLOAD_COMPRESS_FLAG:=ZSTD
|
||||
endif
|
||||
|
||||
CBFS_SECONDARY_PAYLOAD_COMPRESS_FLAG:=none
|
||||
|
|
|
|||
|
|
@ -86,6 +86,12 @@ config COMPRESSED_PAYLOAD_LZMA
|
|||
In order to reduce the size payloads take up in the ROM chip
|
||||
coreboot can compress them using the LZMA algorithm.
|
||||
|
||||
config COMPRESSED_PAYLOAD_ZSTD
|
||||
bool "Use ZSTD compression for payloads"
|
||||
help
|
||||
In order to reduce the size payloads take up in the ROM chip
|
||||
coreboot can compress them using the ZSTD algorithm.
|
||||
|
||||
config COMPRESSED_PAYLOAD_LZ4
|
||||
bool "Use LZ4 compression for payloads"
|
||||
help
|
||||
|
|
|
|||
13
src/Kconfig
13
src/Kconfig
|
|
@ -202,10 +202,16 @@ config MB_COMPRESS_RAMSTAGE_LZ4
|
|||
help
|
||||
Select this in a mainboard to use LZ4 compression by default
|
||||
|
||||
config MB_COMPRESS_RAMSTAGE_ZSTD
|
||||
bool
|
||||
help
|
||||
Select this in a mainboard to use ZSTD compression by default
|
||||
|
||||
choice
|
||||
prompt "Ramstage compression"
|
||||
depends on HAVE_RAMSTAGE && !UNCOMPRESSED_RAMSTAGE
|
||||
default COMPRESS_RAMSTAGE_LZ4 if MB_COMPRESS_RAMSTAGE_LZ4
|
||||
default COMPRESS_RAMSTAGE_ZSTD if MB_COMPRESS_RAMSTAGE_ZSTD
|
||||
default COMPRESS_RAMSTAGE_LZMA
|
||||
|
||||
config COMPRESS_RAMSTAGE_LZMA
|
||||
|
|
@ -226,6 +232,13 @@ config COMPRESS_RAMSTAGE_LZ4
|
|||
|
||||
If you're not sure, stick with LZMA.
|
||||
|
||||
config COMPRESS_RAMSTAGE_ZSTD
|
||||
bool "Compress ramstage with ZSTD"
|
||||
help
|
||||
Compress ramstage with ZSTD.
|
||||
This is faster than LZMA but uses more BSS.
|
||||
However it decompresses faster on slower CPUs and is suited for
|
||||
platforms with high speed SPI interfaces, but limited computing power.
|
||||
endchoice
|
||||
|
||||
config COMPRESS_PRERAM_STAGES
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
## SPDX-License-Identifier: GPL-2.0-only
|
||||
|
||||
subdirs-y += storage
|
||||
subdirs-y += bsd/zstd
|
||||
|
||||
bootblock-y += mem_pool.c
|
||||
verstage-y += mem_pool.c
|
||||
|
|
@ -58,6 +59,7 @@ ramstage-y += bsd/lz4_wrapper.c
|
|||
postcar-y += bsd/lz4_wrapper.c
|
||||
|
||||
all-y += list.c
|
||||
all-y += bsd/zstd_wrapper.c
|
||||
|
||||
ramstage-y += sort.c
|
||||
|
||||
|
|
|
|||
|
|
@ -19,6 +19,6 @@ size_t ulz4fn(const void *src, size_t srcn, void *dst, size_t dstn);
|
|||
size_t ulz4f(const void *src, void *dst);
|
||||
|
||||
/* Decompresses ZSTD image */
|
||||
size_t uzstdfn(const void *src, size_t srcn, void *dst, size_t dstn);
|
||||
size_t uzstdn(const void *src, size_t srcn, void *dst, size_t dstn);
|
||||
|
||||
#endif /* _COMMONLIB_COMPRESSION_H_ */
|
||||
|
|
|
|||
43
src/commonlib/bsd/zstd/Makefile.mk
Normal file
43
src/commonlib/bsd/zstd/Makefile.mk
Normal file
|
|
@ -0,0 +1,43 @@
|
|||
/* SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0-only */
|
||||
|
||||
zstd_support=n
|
||||
ifeq ($(CONFIG_COMPRESS_RAMSTAGE_ZSTD),y)
|
||||
zstd_support=y
|
||||
endif
|
||||
ifeq ($(CONFIG_COMPRESSED_PAYLOAD_ZSTD),y)
|
||||
zstd_support=y
|
||||
endif
|
||||
|
||||
ifeq ($(zstd_support),y)
|
||||
all-y += decompress/huf_decompress.c
|
||||
all-y += decompress/zstd_ddict.c
|
||||
all-y += decompress/zstd_decompress_block.c
|
||||
all-y += decompress/zstd_decompress.c
|
||||
|
||||
all-y += common/entropy_common.c
|
||||
all-y += common/fse_decompress.c
|
||||
|
||||
CPPFLAGS_common += -DZSTD_DISABLE_ASM=1
|
||||
# Measured on Intel Sandy Bridge i5-2540M at 800Mhz:
|
||||
# Setting code size reduction time loss
|
||||
# -DHUF_FORCE_DECOMPRESS_X1=1 6.4 KiB 6%
|
||||
# -DHUF_FORCE_DECOMPRESS_X2=1 5.4 KiB 7%
|
||||
# -DZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT=1 8.1 KiB 19%
|
||||
# -DZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG=1 7.8 KiB 15%
|
||||
|
||||
CPPFLAGS_common += -DHUF_FORCE_DECOMPRESS_X1=1
|
||||
CPPFLAGS_common += -DZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT=1
|
||||
CPPFLAGS_common += -DZSTD_STRIP_ERROR_STRINGS=1
|
||||
|
||||
# Huffman fast decode needs 64bit and LE
|
||||
CPPFLAGS_x86_32 += -DHUF_DISABLE_FAST_DECODE=1
|
||||
# Measured on Intel Sandy Bridge i5-2540M at 800Mhz (x86_64 only):
|
||||
# Setting code size reduction time loss
|
||||
# -DHUF_DISABLE_FAST_DECODE=1 2.3 KiB 5%
|
||||
|
||||
CPPFLAGS_common += -DDYNAMIC_BMI2=0 -DSTATIC_BMI2=0 -DZSTD_DECODER_INTERNAL_BUFFER=2048
|
||||
endif
|
||||
|
||||
CPPFLAGS_common += -I$(src)/commonlib/bsd/zstd
|
||||
CPPFLAGS_common += -I$(src)/commonlib/bsd/zstd/common
|
||||
CPPFLAGS_common += -I$(src)/commonlib/bsd/zstd/decompress
|
||||
|
|
@ -667,7 +667,7 @@ size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx,
|
|||
{ const size_t res = ZSTD_decompressFrame(dctx, dst, dstCapacity,
|
||||
&src, &srcSize);
|
||||
RETURN_ERROR_IF(
|
||||
(ZSTD_getErrorCode(res) == ZSTD_error_prefix_unknown)
|
||||
(ERR_getErrorCode(res) == ZSTD_error_prefix_unknown)
|
||||
&& (moreThan1Frame==1),
|
||||
srcSize_wrong,
|
||||
"At least one frame successfully completed, "
|
||||
|
|
|
|||
18
src/commonlib/bsd/zstd_wrapper.c
Normal file
18
src/commonlib/bsd/zstd_wrapper.c
Normal file
|
|
@ -0,0 +1,18 @@
|
|||
/* SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0-only */
|
||||
|
||||
#include <assert.h>
|
||||
#include <commonlib/bsd/compression.h>
|
||||
#include <stdint.h>
|
||||
#include <stddef.h>
|
||||
#define ZSTD_STATIC_LINKING_ONLY 1
|
||||
#include <zstd.h>
|
||||
#undef ZSTD_STATIC_LINKING_ONLY
|
||||
#include <zstd_decompress_internal.h>
|
||||
|
||||
size_t uzstdn(const void *src, size_t srcn, void *dst, size_t dstn)
|
||||
{
|
||||
static ZSTD_DCtx dctx __aligned(8);
|
||||
if (!ZSTD_initStaticDCtx(&dctx, sizeof(dctx)))
|
||||
return 0;
|
||||
return ZSTD_decompressDCtx(&dctx, dst, dstn, src, srcn);
|
||||
}
|
||||
|
|
@ -37,6 +37,8 @@ enum timestamp_id {
|
|||
TS_ULZMA_END = 16,
|
||||
TS_ULZ4F_START = 17,
|
||||
TS_ULZ4F_END = 18,
|
||||
TS_UZSTDF_START = 19,
|
||||
TS_UZSTDF_END = 20,
|
||||
TS_DEVICE_INIT_CHIPS = 30,
|
||||
TS_DEVICE_ENUMERATE = 31,
|
||||
TS_DEVICE_CONFIGURE = 40,
|
||||
|
|
@ -236,6 +238,8 @@ static const struct timestamp_id_to_name {
|
|||
TS_NAME_DEF(TS_ULZMA_END, 0, "finished LZMA decompress (ignore for x86)"),
|
||||
TS_NAME_DEF(TS_ULZ4F_START, TS_ULZ4F_END, "starting LZ4 decompress (ignore for x86)"),
|
||||
TS_NAME_DEF(TS_ULZ4F_END, 0, "finished LZ4 decompress (ignore for x86)"),
|
||||
TS_NAME_DEF(TS_UZSTDF_START, TS_UZSTDF_END, "starting ZSTD decompress (ignore for x86)"),
|
||||
TS_NAME_DEF(TS_UZSTDF_END, 0, "finished ZSTD decompress (ignore for x86)"),
|
||||
TS_NAME_DEF(TS_DEVICE_INIT_CHIPS, TS_DEVICE_ENUMERATE, "early chipset initialization"),
|
||||
TS_NAME_DEF(TS_DEVICE_ENUMERATE, TS_DEVICE_CONFIGURE, "device enumeration"),
|
||||
TS_NAME_DEF(TS_DEVICE_CONFIGURE, TS_DEVICE_ENABLE, "device configuration"),
|
||||
|
|
|
|||
|
|
@ -155,6 +155,15 @@ static inline bool cbfs_lzma_enabled(void)
|
|||
return false;
|
||||
}
|
||||
|
||||
static inline bool cbfs_zstd_enabled(void)
|
||||
{
|
||||
if (ENV_PAYLOAD_LOADER && CONFIG(COMPRESSED_PAYLOAD_ZSTD))
|
||||
return true;
|
||||
if (ENV_RAMSTAGE_LOADER && CONFIG(COMPRESS_RAMSTAGE_ZSTD))
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool cbfs_file_hash_mismatch(const void *buffer, size_t size,
|
||||
const union cbfs_mdata *mdata, bool skip_verification)
|
||||
{
|
||||
|
|
@ -267,6 +276,24 @@ static size_t cbfs_load_and_decompress(const struct region_device *rdev, void *b
|
|||
|
||||
rdev_munmap(rdev, map);
|
||||
|
||||
return out_size;
|
||||
case CBFS_COMPRESS_ZSTD:
|
||||
if (!cbfs_zstd_enabled())
|
||||
return 0;
|
||||
|
||||
map = rdev_mmap_full(rdev);
|
||||
if (map == NULL)
|
||||
return 0;
|
||||
|
||||
if (!cbfs_file_hash_mismatch(map, in_size, mdata, skip_verification)) {
|
||||
/* Note: timestamp not useful for memory-mapped media (x86) */
|
||||
timestamp_add_now(TS_UZSTDF_START);
|
||||
out_size = uzstdn(map, in_size, buffer, buffer_size);
|
||||
timestamp_add_now(TS_UZSTDF_END);
|
||||
}
|
||||
|
||||
rdev_munmap(rdev, map);
|
||||
|
||||
return out_size;
|
||||
|
||||
default:
|
||||
|
|
|
|||
|
|
@ -87,6 +87,19 @@ static int load_one_segment(uint8_t *dest,
|
|||
return 0;
|
||||
break;
|
||||
}
|
||||
case CBFS_COMPRESS_ZSTD: {
|
||||
if (!CONFIG(COMPRESSED_PAYLOAD_ZSTD))
|
||||
return 0;
|
||||
|
||||
printk(BIOS_DEBUG, "using ZSTD\n");
|
||||
timestamp_add_now(TS_UZSTDF_START);
|
||||
len = uzstdn(src, len, dest, memsz);
|
||||
timestamp_add_now(TS_UZSTDF_END);
|
||||
if (!len) /* Decompression Error. */
|
||||
return 0;
|
||||
break;
|
||||
}
|
||||
|
||||
case CBFS_COMPRESS_NONE: {
|
||||
printk(BIOS_DEBUG, "it's not compressed!\n");
|
||||
memcpy(dest, src, len);
|
||||
|
|
|
|||
|
|
@ -62,6 +62,14 @@ size_t ulzman(const void *src, size_t srcn, void *dst, size_t dstn)
|
|||
return dstn;
|
||||
}
|
||||
|
||||
size_t uzstdn(const void *src, size_t srcn, void *dst, size_t dstn)
|
||||
{
|
||||
check_expected(srcn);
|
||||
check_expected(dstn);
|
||||
memcpy(dst, src, dstn);
|
||||
return dstn;
|
||||
}
|
||||
|
||||
size_t ulz4fn(const void *src, size_t srcn, void *dst, size_t dstn)
|
||||
{
|
||||
check_expected(srcn);
|
||||
|
|
|
|||
|
|
@ -52,6 +52,12 @@ size_t ulzman(const void *src, size_t srcn, void *dst, size_t dstn)
|
|||
return 0;
|
||||
}
|
||||
|
||||
size_t uzstdn(const void *src, size_t srcn, void *dst, size_t dstn)
|
||||
{
|
||||
fail_msg("Unexpected call to %s", __func__);
|
||||
return 0;
|
||||
}
|
||||
|
||||
size_t ulz4fn(const void *src, size_t srcn, void *dst, size_t dstn)
|
||||
{
|
||||
fail_msg("Unexpected call to %s", __func__);
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue