From 2408695dd35a0c27dc16e7e173e335aefbb00878 Mon Sep 17 00:00:00 2001 From: Sean Rhodes Date: Wed, 6 Jul 2022 19:55:07 +0100 Subject: [PATCH] soc/intel/apollolake: Add a loader for the IBB Add a loader that will load the IBB into the CSE via the Ring Protocol Buffer. All registers were taken from Intel document number #336561. Change-Id: Ia41e3909f8099d2ea864166e9ea03e10e40a1b68 Signed-off-by: Sean Rhodes Reviewed-on: https://review.coreboot.org/c/coreboot/+/65270 Reviewed-by: Matt DeVillier Tested-by: build bot (Jenkins) --- src/soc/intel/apollolake/Makefile.mk | 1 + src/soc/intel/apollolake/bootflow.md | 52 +++++++ src/soc/intel/apollolake/include/soc/loader.h | 15 ++ src/soc/intel/apollolake/loader.c | 138 ++++++++++++++++++ 4 files changed, 206 insertions(+) create mode 100644 src/soc/intel/apollolake/bootflow.md create mode 100644 src/soc/intel/apollolake/include/soc/loader.h create mode 100644 src/soc/intel/apollolake/loader.c diff --git a/src/soc/intel/apollolake/Makefile.mk b/src/soc/intel/apollolake/Makefile.mk index 3f2aa3bc2f..ea3e05aebf 100644 --- a/src/soc/intel/apollolake/Makefile.mk +++ b/src/soc/intel/apollolake/Makefile.mk @@ -12,6 +12,7 @@ bootblock-y += car.c bootblock-y += heci.c bootblock-y += gspi.c bootblock-y += i2c.c +bootblock-$(CONFIG_IFWI_IBBM_LOAD) += loader.c bootblock-y += lpc.c bootblock-y += mmap_boot.c bootblock-y += pmutil.c diff --git a/src/soc/intel/apollolake/bootflow.md b/src/soc/intel/apollolake/bootflow.md new file mode 100644 index 0000000000..932cc5ed00 --- /dev/null +++ b/src/soc/intel/apollolake/bootflow.md @@ -0,0 +1,52 @@ +IFWI Boot Flow +============== + + ╔═══════════════════════════════════════════════════════════╗ + ║ BIOS Region (512 KiB maximum) ║ + ║ ╔════════════════════════════════════════════╗ ╔════════╗ ║ + ║ ║ IBBM ║ ║ IBBL ║ ║ + ║ ╚════════════════════════════════════════════╝ ╚════════╝ ║ + ╚═══════════════════════════════════════════════════════════╝ + + When we enter the bootblock, the first 128k will be copied + into the SRAM. This will contain the IBBL partition (bootblock) + and whatever will fit in the IBB partition. + + We can't touch the bootblock as we are running from it. The RBP + can handle a maximum of 0x9be2 per copy, so the contents + of the SRAM is typically copied in 4 equally sized chunks of + 0x8000. + + ╔════════════════════╗ ╔════════════════════╗ + ║ SRAM (128 KiB) ║ ║ CAR ║ + ║ ╔═════╗ ╔════════╗ ║ ║ ╔════╗ ║ + ║ ║>IBBM║ ║ IBBL ║ ║ ║ ║IBBM║ ║ + ║ ╚═════╝ ╚════════╝ ║ ║ ╚════╝ ║ + ╚════════════════════╝ ╚════════════════════╝ + 🠗 🠕 + 🠖🠖🠖🠖🠖🠖🠖🠖🠖🠖🠖🠖🠖🠖🠖🠖🠖🠖 + + This means that from the bootblock, we copy the IBB in chunks of + 0x8000 (or less if the IBB is smaller than a chunk) into CAR. + + ╔════════════════════╗ + ║ CAR ║ + ║ ╔════════════════╗ ║ + ║ ║ IBBM ║ ║ + ║ ╚════════════════╝ ║ + ╚════════════════════╝ + + We mark it as executable by flushing the L1 cache and then jump + to the verstage in CAR. The first thing we do is set the IBBL to + be uncachable, as we no longer have a use for it. + + ╔════════════════════╗ + ║ SRAM (128 KiB) ║ + ║ ╔════════════════╗ ║ + ║ ║ ║ ║ + ║ ╚════════════════╝ ║ + ╚════════════════════╝ + + We keep copying the chunks until there is none left, then send + one final acknowledgement to the CSE - so it can know that the + bootflow is complete. diff --git a/src/soc/intel/apollolake/include/soc/loader.h b/src/soc/intel/apollolake/include/soc/loader.h new file mode 100644 index 0000000000..cfa4792a0e --- /dev/null +++ b/src/soc/intel/apollolake/include/soc/loader.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#ifndef _SOC_LOADER_H_ +#define _SOC_LOADER_H_ + +/* HECI 1 offsets MMIO */ +#define HOST2CSE 0x70 +#define CSE2HOST 0x60 + +#define CSE_RBP_LIMIT 0x9be2 +#define FIRST_CHUNK 0x8000 + +bool load_ibb(uint32_t ibb_dst, uint32_t ibb_size); + +#endif diff --git a/src/soc/intel/apollolake/loader.c b/src/soc/intel/apollolake/loader.c new file mode 100644 index 0000000000..5e15f2e2d6 --- /dev/null +++ b/src/soc/intel/apollolake/loader.c @@ -0,0 +1,138 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static void flush_cache(uintptr_t start, uintptr_t size) +{ + uintptr_t end; + uintptr_t addr; + + end = start + (ALIGN_UP(size, 4096)); + for (addr = start; addr < end; addr += 64) + clflush((void *)addr); +} + +/* + * This function is intentionally comment heavy, as the documentation for + * APL and GLK is almost non-existant. + */ + +bool load_ibb(uint32_t ibb_dest, uint32_t ibb_size) +{ + uint32_t host_to_cse = (ibb_size << 4) | 0xc0000000; + uint32_t cse_to_host; + uint32_t state; + uint8_t ring_index; + + uint32_t chunk_num; + uint32_t chunk_index = 0; + uint32_t chunk_size; + uint32_t number_of_chunks; + uint32_t ibb_size_left; + + uint8_t *dst; + uint8_t *src; + uint32_t size; + + /* Check if the CSE exists */ + if ((pci_read_config32(PCH_DEV_CSE, 0) != + ((PCI_DID_INTEL_APL_CSE0 << 16) | PCI_VID_INTEL)) && + (pci_read_config32(PCH_DEV_CSE, 0) != + ((PCI_DID_INTEL_GLK_CSE0 << 16) | PCI_VID_INTEL))) + return -1; + + /* Request data from CSE */ + pci_write_config32(PCH_DEV_CSE, HOST2CSE, host_to_cse); + + /* Wait for response */ + do { + cse_to_host = pci_read_config32(PCH_DEV_CSE, CSE2HOST); + chunk_num = (cse_to_host >> 28) & 3; + } while (chunk_num == 0); + + /* + * As the size of the SRAM isn't divisible by 3, we split + * the data into 4 equally sized chunks of 0x8000. + */ + if (chunk_num == 3) + chunk_num = 4; + + /* Retrieve related information */ + number_of_chunks = chunk_num; + /* Get the size of the IBB remaining bytes */ + ibb_size_left = (cse_to_host & 0x0fffc000) >> 4; + ibb_size = ibb_size_left; + chunk_size = SHARED_SRAM_SIZE / number_of_chunks; + + /* Loading IBBM */ + while (ibb_size_left > 0) { + + /* Check whether there is ring buffer ready to copy */ + state = (cse_to_host ^ host_to_cse) & 0x0f; + while (state == 0) { + cse_to_host = pci_read_config32(PCH_DEV_CSE, CSE2HOST); + state = (cse_to_host ^ host_to_cse) & 0x0f; + } + + /* Calculate ring index */ + ring_index = (uint8_t)(chunk_index % number_of_chunks); + if ((state & (1 << ring_index)) != 0) { + /* Calculate the source and destination address in ring buffer */ + src = (uint8_t *)(uintptr_t) + (SHARED_SRAM_BASE + chunk_size * ring_index); + dst = (uint8_t *)(uintptr_t) + (ibb_dest + chunk_size * chunk_index); + + if (ibb_size_left < chunk_size) + size = ibb_size_left; + else + size = chunk_size; + + /* + * The RBP can handle a maximum of 0x9be2 for each copy. + * Whilst the above code should account for this, copying + * more will break serial output so we die here so the + * issue is known. + */ + if (size > CSE_RBP_LIMIT) + die("CSE RBP capabilities exceeded!\n"); + + /* Move data from SRAM into temporary memory */ + if ((uint32_t)src >= SHARED_SRAM_BASE) + flush_cache((uint32_t)(uint32_t)src, size); + + memcpy(dst, src, size); + assert(!memcmp(dst, src, size)); + ibb_size_left -= size; + + /* Send ACK to CSE */ + host_to_cse ^= 1 << ring_index; + pci_write_config32(PCH_DEV_CSE, HOST2CSE, host_to_cse); + + chunk_index++; + } + } + + /* Check that there are no remaining chunks */ + do { + cse_to_host = pci_read_config32(PCH_DEV_CSE, CSE2HOST); + } while ((cse_to_host & (BIT(10))) == 0); + + if ((cse_to_host & ((BIT(9)))) != 0) + printk(BIOS_DEBUG, "CSE RBP: IBBM loaded to CAR!\n"); + + /* Clear MCA in BANK4 caused by the Clflush */ + clear_mca_bank4(); + + return 0; +}