coreboot/src/lib/lzmadecode.c
Arthur Heymans 7552eb210c lib/lzmadecode: Allow for 8 byte reads on 64bit
This adds an optimization to lzma decode to also read from the boot
medium in chunks of 8 bytes if that is the general purpose register
length instead of always 4 bytes. It depends on the cache / memory / spi
controller whether this is faster, but it's likely to be either the same
or faster.

TESTED
- google/vilboz: cached boot medium
64bit before - 32bit - 64bit after
load FSP-M: 35,674 - 35,595 - 34,690
load ramstage: 42,134 - 43,378 - 40,882
load FSP-S: 24,954 - 25,496 - 24,368

- foxconn/g41m: uncached boot medium for testing
64bit before - 32bit - 64bit after
load ramstage: 51,164 - 51,872 - 51,894

Change-Id: I890c075307c0aec877618d9902ea352ae42a3bfa
Signed-off-by: Arthur Heymans <arthur@aheymans.xyz>
Reviewed-on: https://review.coreboot.org/c/coreboot/+/70175
Tested-by: build bot (Jenkins) <no-reply@coreboot.org>
Reviewed-by: Lean Sheng Tan <sheng.tan@9elements.com>
Reviewed-by: Julius Werner <jwerner@chromium.org>
2024-02-21 16:19:05 +00:00

441 lines
11 KiB
C

/*
LzmaDecode.c
LZMA Decoder (optimized for Speed version)
LZMA SDK 4.40 Copyright (c) 1999-2006 Igor Pavlov (2006-05-01)
http://www.7-zip.org/
LZMA SDK is licensed under two licenses:
1) GNU Lesser General Public License (GNU LGPL)
2) Common Public License (CPL)
It means that you can select one of these two licenses and
follow rules of that license.
SPECIAL EXCEPTION:
Igor Pavlov, as the author of this Code, expressly permits you to
statically or dynamically link your Code (or bind by name) to the
interfaces of this file without subjecting your linked Code to the
terms of the CPL or GNU LGPL. Any modifications or additions
to this file, however, are subject to the LGPL or CPL terms.
*/
#if CONFIG(DECOMPRESS_OFAST)
#define __lzma_attribute_Ofast__ __attribute__((optimize("Ofast")))
#else
#define __lzma_attribute_Ofast__
#endif
#include "lzmadecode.h"
#include <types.h>
#define kNumTopBits 24
#define kTopValue ((UInt32)1 << kNumTopBits)
#define kNumBitModelTotalBits 11
#define kBitModelTotal (1 << kNumBitModelTotalBits)
#define kNumMoveBits 5
/* Use sizeof(SizeT) sized reads whenever possible to avoid bad flash performance. Fall back
* to byte reads for last sizeof(SizeT) bytes since RC_TEST returns an error when BufferLim
* is *reached* (not surpassed!), meaning we can't allow that to happen while
* there are still bytes to decode from the algorithm's point of view. */
#define RC_READ_BYTE \
(look_ahead_ptr < sizeof(SizeT) ? look_ahead.raw[look_ahead_ptr++] \
: ((((uintptr_t) Buffer & (sizeof(SizeT) - 1)) \
|| ((SizeT) (BufferLim - Buffer) <= sizeof(SizeT))) ? (*Buffer++) \
: ((look_ahead.dw = *(SizeT *)Buffer), (Buffer += sizeof(SizeT)), \
(look_ahead_ptr = 1), look_ahead.raw[0])))
#define RC_INIT2 Code = 0; Range = 0xFFFFFFFF; \
{ \
int i; \
\
for (i = 0; i < 5; i++) { \
RC_TEST; \
Code = (Code << 8) | RC_READ_BYTE; \
} \
}
#define RC_TEST { if (Buffer == BufferLim) return LZMA_RESULT_DATA_ERROR; }
#define RC_INIT(buffer, bufferSize) Buffer = buffer; \
BufferLim = buffer + bufferSize; RC_INIT2
#define RC_NORMALIZE \
if (Range < kTopValue) { \
RC_TEST; \
Range <<= 8; \
Code = (Code << 8) | RC_READ_BYTE; \
}
#define IfBit0(p) \
RC_NORMALIZE; \
bound = (Range >> kNumBitModelTotalBits) * *(p); \
if (Code < bound)
#define UpdateBit0(p) \
Range = bound; \
*(p) += (kBitModelTotal - *(p)) >> kNumMoveBits
#define UpdateBit1(p) \
Range -= bound; \
Code -= bound; \
*(p) -= (*(p)) >> kNumMoveBits
#define RC_GET_BIT2(p, mi, A0, A1) \
IfBit0(p) { \
UpdateBit0(p); \
mi <<= 1; \
A0; \
} else { \
UpdateBit1(p); \
mi = (mi + mi) + 1; \
A1; \
}
#define RC_GET_BIT(p, mi) RC_GET_BIT2(p, mi, ;, ;)
#define RangeDecoderBitTreeDecode(probs, numLevels, res) \
{ \
int i = numLevels; \
\
res = 1; \
do { \
CProb *cp = probs + res; \
RC_GET_BIT(cp, res) \
} while (--i != 0); \
res -= (1 << numLevels); \
}
#define kNumPosBitsMax 4
#define kNumPosStatesMax (1 << kNumPosBitsMax)
#define kLenNumLowBits 3
#define kLenNumLowSymbols (1 << kLenNumLowBits)
#define kLenNumMidBits 3
#define kLenNumMidSymbols (1 << kLenNumMidBits)
#define kLenNumHighBits 8
#define kLenNumHighSymbols (1 << kLenNumHighBits)
#define LenChoice 0
#define LenChoice2 (LenChoice + 1)
#define LenLow (LenChoice2 + 1)
#define LenMid (LenLow + (kNumPosStatesMax << kLenNumLowBits))
#define LenHigh (LenMid + (kNumPosStatesMax << kLenNumMidBits))
#define kNumLenProbs (LenHigh + kLenNumHighSymbols)
#define kNumStates 12
#define kNumLitStates 7
#define kStartPosModelIndex 4
#define kEndPosModelIndex 14
#define kNumFullDistances (1 << (kEndPosModelIndex >> 1))
#define kNumPosSlotBits 6
#define kNumLenToPosStates 4
#define kNumAlignBits 4
#define kAlignTableSize (1 << kNumAlignBits)
#define kMatchMinLen 2
#define IsMatch 0
#define IsRep (IsMatch + (kNumStates << kNumPosBitsMax))
#define IsRepG0 (IsRep + kNumStates)
#define IsRepG1 (IsRepG0 + kNumStates)
#define IsRepG2 (IsRepG1 + kNumStates)
#define IsRep0Long (IsRepG2 + kNumStates)
#define PosSlot (IsRep0Long + (kNumStates << kNumPosBitsMax))
#define SpecPos (PosSlot + (kNumLenToPosStates << kNumPosSlotBits))
#define Align (SpecPos + kNumFullDistances - kEndPosModelIndex)
#define LenCoder (Align + kAlignTableSize)
#define RepLenCoder (LenCoder + kNumLenProbs)
#define Literal (RepLenCoder + kNumLenProbs)
#if Literal != LZMA_BASE_SIZE
StopCompilingDueBUG
#endif
int LzmaDecodeProperties(CLzmaProperties *propsRes,
const unsigned char *propsData, int size)
{
unsigned char prop0;
if (size < LZMA_PROPERTIES_SIZE)
return LZMA_RESULT_DATA_ERROR;
prop0 = propsData[0];
if (prop0 >= (9 * 5 * 5))
return LZMA_RESULT_DATA_ERROR;
{
for (propsRes->pb = 0; prop0 >= (9 * 5);
propsRes->pb++, prop0 -= (9 * 5))
;
for (propsRes->lp = 0; prop0 >= 9; propsRes->lp++, prop0 -= 9)
;
propsRes->lc = prop0;
/*
* unsigned char remainder = (unsigned char)(prop0 / 9);
* propsRes->lc = prop0 % 9;
* propsRes->pb = remainder / 5;
* propsRes->lp = remainder % 5;
*/
}
return LZMA_RESULT_OK;
}
#define kLzmaStreamWasFinishedId (-1)
__lzma_attribute_Ofast__
int LzmaDecode(CLzmaDecoderState *vs,
const unsigned char *inStream, SizeT inSize, SizeT *inSizeProcessed,
unsigned char *outStream, SizeT outSize, SizeT *outSizeProcessed)
{
CProb *p = vs->Probs;
SizeT nowPos = 0;
Byte previousByte = 0;
UInt32 posStateMask = (1 << (vs->Properties.pb)) - 1;
UInt32 literalPosMask = (1 << (vs->Properties.lp)) - 1;
int lc = vs->Properties.lc;
int state = 0;
UInt32 rep0 = 1, rep1 = 1, rep2 = 1, rep3 = 1;
int len = 0;
const Byte *Buffer;
const Byte *BufferLim;
int look_ahead_ptr = sizeof(SizeT);
union {
Byte raw[sizeof(SizeT)];
SizeT dw;
} look_ahead;
UInt32 Range;
UInt32 Code;
*inSizeProcessed = 0;
*outSizeProcessed = 0;
{
UInt32 i;
UInt32 numProbs = Literal + ((UInt32)LZMA_LIT_SIZE << (lc
+ vs->Properties.lp));
for (i = 0; i < numProbs; i++)
p[i] = kBitModelTotal >> 1;
}
RC_INIT(inStream, inSize);
while (nowPos < outSize) {
CProb *prob;
UInt32 bound;
int posState = (int)((nowPos)&posStateMask);
prob = p + IsMatch + (state << kNumPosBitsMax) + posState;
IfBit0(prob) {
int symbol = 1;
UpdateBit0(prob);
prob = p + Literal + (LZMA_LIT_SIZE *
((((nowPos) & literalPosMask) << lc)
+ (previousByte >> (8 - lc))));
if (state >= kNumLitStates) {
int matchByte;
matchByte = outStream[nowPos - rep0];
do {
int bit;
CProb *probLit;
matchByte <<= 1;
bit = (matchByte & 0x100);
probLit = prob + 0x100 + bit + symbol;
RC_GET_BIT2(probLit, symbol,
if (bit != 0)
break,
if (bit == 0)
break)
} while (symbol < 0x100);
}
while (symbol < 0x100) {
CProb *probLit = prob + symbol;
RC_GET_BIT(probLit, symbol)
}
previousByte = (Byte)symbol;
outStream[nowPos++] = previousByte;
if (state < 4)
state = 0;
else if (state < 10)
state -= 3;
else
state -= 6;
} else {
UpdateBit1(prob);
prob = p + IsRep + state;
IfBit0(prob) {
UpdateBit0(prob);
rep3 = rep2;
rep2 = rep1;
rep1 = rep0;
state = state < kNumLitStates ? 0 : 3;
prob = p + LenCoder;
} else {
UpdateBit1(prob);
prob = p + IsRepG0 + state;
IfBit0(prob) {
UpdateBit0(prob);
prob = p + IsRep0Long
+ (state << kNumPosBitsMax)
+ posState;
IfBit0(prob) {
UpdateBit0(prob);
if (nowPos == 0)
return LZMA_RESULT_DATA_ERROR;
state = state < kNumLitStates
? 9 : 11;
previousByte = outStream[nowPos
- rep0];
outStream[nowPos++] =
previousByte;
continue;
} else {
UpdateBit1(prob);
}
} else {
UInt32 distance;
UpdateBit1(prob);
prob = p + IsRepG1 + state;
IfBit0(prob) {
UpdateBit0(prob);
distance = rep1;
} else {
UpdateBit1(prob);
prob = p + IsRepG2 + state;
IfBit0(prob) {
UpdateBit0(prob);
distance = rep2;
} else {
UpdateBit1(prob);
distance = rep3;
rep3 = rep2;
}
rep2 = rep1;
}
rep1 = rep0;
rep0 = distance;
}
state = state < kNumLitStates ? 8 : 11;
prob = p + RepLenCoder;
}
{
int numBits, offset;
CProb *probLen = prob + LenChoice;
IfBit0(probLen) {
UpdateBit0(probLen);
probLen = prob + LenLow
+ (posState << kLenNumLowBits);
offset = 0;
numBits = kLenNumLowBits;
} else {
UpdateBit1(probLen);
probLen = prob + LenChoice2;
IfBit0(probLen) {
UpdateBit0(probLen);
probLen = prob + LenMid
+ (posState <<
kLenNumMidBits);
offset = kLenNumLowSymbols;
numBits = kLenNumMidBits;
} else {
UpdateBit1(probLen);
probLen = prob + LenHigh;
offset = kLenNumLowSymbols
+ kLenNumMidSymbols;
numBits = kLenNumHighBits;
}
}
RangeDecoderBitTreeDecode(probLen, numBits,
len);
len += offset;
}
if (state < 4) {
int posSlot;
state += kNumLitStates;
prob = p + PosSlot +
((len < kNumLenToPosStates ? len :
kNumLenToPosStates - 1) <<
kNumPosSlotBits);
RangeDecoderBitTreeDecode(prob, kNumPosSlotBits,
posSlot);
if (posSlot >= kStartPosModelIndex) {
int numDirectBits = ((posSlot >> 1)
- 1);
rep0 = (2 | ((UInt32)posSlot & 1));
if (posSlot < kEndPosModelIndex) {
rep0 <<= numDirectBits;
prob = p + SpecPos + rep0
- posSlot - 1;
} else {
numDirectBits -= kNumAlignBits;
do {
RC_NORMALIZE
Range >>= 1;
rep0 <<= 1;
if (Code >= Range) {
Code -= Range;
rep0 |= 1;
}
} while (--numDirectBits != 0);
prob = p + Align;
rep0 <<= kNumAlignBits;
numDirectBits = kNumAlignBits;
}
{
int i = 1;
int mi = 1;
do {
CProb *prob3 = prob
+ mi;
RC_GET_BIT2(prob3, mi,
;, rep0 |= i);
i <<= 1;
} while (--numDirectBits != 0);
}
} else
rep0 = posSlot;
if (++rep0 == (UInt32)(0)) {
/* it's for stream version */
len = kLzmaStreamWasFinishedId;
break;
}
}
len += kMatchMinLen;
if (rep0 > nowPos)
return LZMA_RESULT_DATA_ERROR;
do {
previousByte = outStream[nowPos - rep0];
len--;
outStream[nowPos++] = previousByte;
} while (len != 0 && nowPos < outSize);
}
}
RC_NORMALIZE;
/*
* Tell static analysis we know len can have a dead assignment.
*/
(void)len;
*inSizeProcessed = (SizeT)(Buffer - inStream);
*outSizeProcessed = nowPos;
return LZMA_RESULT_OK;
}