Haswell NRI: Implement 1D margin training

Implement an algorithm that performs a simple 1D margin training. This
algorithm is generic, i.e. it can be used with multiple margin params.
Use this algorithm to train three margin parameters: RdT, WrT and RdV.

This algorithm also does per-bit calibration, but only for RdT and WrT
since Haswell does not have per-rank per-bit RdV (c.f. `RX_OFFSET_VDQ`
register). Still, implement support in `change_margin()` for all three
types of per-bit margins (WrTBit, RdTBit, RdVBit) for completeness.

Tested on Asrock Z97 Extreme6 with 2 DIMMs per channel (1R + 2R):
- NRI finishes successfully, board still boots to Arch Linux.
- Both fast training as well as S3 suspend/resume still work.

Change-Id: I382cea8e230aee46a0dc66248f1e678d8a9a0090
Signed-off-by: Angel Pons <th3fanbus@gmail.com>
Reviewed-on: https://review.coreboot.org/c/coreboot/+/89314
Reviewed-by: Patrick Rudolph <patrick.rudolph@9elements.com>
Tested-by: build bot (Jenkins) <no-reply@coreboot.org>
This commit is contained in:
Angel Pons 2025-09-10 16:30:34 +02:00 committed by Matt DeVillier
commit 08f2f3a21b
7 changed files with 496 additions and 8 deletions

View file

@ -18,6 +18,7 @@ romstage-y += setup_wdb.c
romstage-y += spd_bitmunching.c
romstage-y += testing_io.c
romstage-y += timings_refresh.c
romstage-y += train_1d_margins.c
romstage-y += train_jedec_write_leveling.c
romstage-y += train_read_mpr.c
romstage-y += train_receive_enable.c

View file

@ -200,18 +200,109 @@ static void update_data_offset_train(
}
}
static uint32_t get_max_margin(const enum margin_parameter param)
static uint16_t get_per_bit_reg(
const enum margin_parameter param,
const bool multicast,
const uint8_t channel,
const uint8_t rank,
const uint8_t byte)
{
switch (param) {
case RdTBit:
if (multicast) {
return DDR_DATA_RX_PER_BIT_RANK(rank);
} else {
return RX_PER_BIT(channel, rank, byte);
}
case WrTBit:
if (multicast) {
return DDR_DATA_TX_PER_BIT_RANK(rank);
} else {
return TX_PER_BIT(channel, rank, byte);
}
case RdVBit:
/** TODO: Broadwell has per-rank RX_OFFSET_VDQ registers **/
if (multicast) {
return DDR_DATA_RX_OFFSET_VDQ;
} else {
return RX_OFFSET_VDQ(channel, byte);
}
default:
die("%s: Invalid margin parameter %d\n", __func__, param);
}
}
static void update_per_bit_margin(
struct sysinfo *ctrl,
const enum margin_parameter param,
const uint8_t channel,
const uint8_t rank,
const uint8_t byte,
const uint32_t value)
{
for (uint8_t bit = 0; bit < NUM_BITS; bit++) {
const uint8_t v_bit = value >> (4 * bit) & 0xf;
switch (param) {
case RdTBit:
ctrl->rxdqpb[channel][rank][byte][bit].center = v_bit;
break;
case WrTBit:
ctrl->txdqpb[channel][rank][byte][bit].center = v_bit;
break;
case RdVBit:
ctrl->rxdqvrefpb[channel][rank][byte][bit].center = v_bit;
break;
default:
break;
}
}
}
static void change_per_bit_margin(
struct sysinfo *ctrl,
const enum margin_parameter param,
const bool multicast,
const uint8_t in_channel,
const uint8_t rank,
const uint8_t in_byte,
const bool update_ctrl,
const uint32_t value,
const enum regfile_mode regfile)
{
const bool is_rd = param == RdTBit || param == RdVBit;
const bool is_wr = param == WrTBit;
const uint16_t reg = get_per_bit_reg(param, multicast, in_channel, rank, in_byte);
mchbar_write32(reg, value);
if (multicast) {
for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
if (!does_ch_exist(ctrl, channel))
continue;
download_regfile(ctrl, channel, true, rank, regfile, 0, is_rd, is_wr);
if (!update_ctrl)
continue;
for (uint8_t byte = 0; byte < ctrl->lanes; byte++) {
update_per_bit_margin(ctrl, param, channel, rank, byte, value);
}
}
} else {
download_regfile(ctrl, in_channel, false, rank, regfile, in_byte, is_rd, is_wr);
if (!update_ctrl)
return;
update_per_bit_margin(ctrl, param, in_channel, rank, in_byte, value);
}
}
uint32_t get_max_margin_for_param(const enum margin_parameter param)
{
switch (param) {
case RcvEna:
case RdT:
case WrT:
case WrDqsT:
return MAX_POSSIBLE_TIME;
case RdV:
case WrV:
return MAX_POSSIBLE_VREF;
default:
die("%s: Invalid margin parameter %u\n", __func__, param);
return MAX_POSSIBLE_TIME;
}
}
@ -233,7 +324,22 @@ void change_margin(
if (!en_multicast && !does_ch_exist(ctrl, channel))
die("%s: Tried to change margin of empty channel %u\n", __func__, channel);
const uint32_t max_value = get_max_margin(param);
/* Per-bit margins are handled differently */
if (param == WrTBit || param == RdTBit || param == RdVBit) {
change_per_bit_margin(
ctrl,
param,
en_multicast,
channel,
rank,
byte,
update_ctrl,
(uint32_t)value0,
regfile);
return;
}
const uint32_t max_value = get_max_margin_for_param(param);
const int32_t v0 = clamp_s32(-max_value, value0, max_value);
union ddr_data_offset_train_reg ddr_data_offset_train = {

View file

@ -129,6 +129,9 @@ static const struct task_entry cold_boot[] = {
{ train_receive_enable, true, "RCVET", },
{ train_read_mpr, true, "RDMPRT", },
{ train_jedec_write_leveling, true, "JWRL", },
{ train_write_timing_centering, true, "WRTC1D", },
{ train_read_timing_centering, true, "RDTC1D", },
{ train_read_voltage_centering, true, "RDVC1D", },
{ optimise_comp, true, "OPTCOMP", },
{ post_training, true, "POSTTRAIN", },
{ activate_mc, true, "ACTIVATE", },

View file

@ -68,6 +68,29 @@ enum margin_parameter {
WrT,
WrDqsT,
RdV,
WrV,
WrLevel,
WrTBit,
RdTBit,
RdVBit,
INVALID_MARGIN,
};
/*
* This enum is used to index an array of margin results. Those are
* used to keep track of available margin in more advanced training
* algorithms (optimisation), c.f. margin_result member in sysinfo.
*/
enum last_margin_param {
LastRxV,
LastRxT,
LastTxV,
LastTxT,
LastRcvEna,
LastWrLevel,
LastCmdT,
LastCmdV,
MAX_RESULT_TYPE,
};
enum opt_param {
@ -232,6 +255,7 @@ enum raminit_status {
RAMINIT_STATUS_SUCCESS = 0,
RAMINIT_STATUS_NO_MEMORY_INSTALLED,
RAMINIT_STATUS_UNSUPPORTED_MEMORY,
RAMINIT_STATUS_INVALID_PARAMETER,
RAMINIT_STATUS_MPLL_INIT_FAILURE,
RAMINIT_STATUS_POLL_TIMEOUT,
RAMINIT_STATUS_REUT_ERROR,
@ -239,6 +263,7 @@ enum raminit_status {
RAMINIT_STATUS_RCVEN_FAILURE,
RAMINIT_STATUS_RMPR_FAILURE,
RAMINIT_STATUS_JWRL_FAILURE,
RAMINIT_STATUS_1D_MARGINING_FAILURE,
RAMINIT_STATUS_INVALID_CACHE,
RAMINIT_STATUS_UNSPECIFIED_ERROR, /** TODO: Deprecated in favor of specific values **/
};
@ -261,12 +286,32 @@ struct raminit_dimm_info {
bool valid;
};
struct last_margin {
uint32_t start;
uint32_t end;
};
struct time_margin {
uint8_t left;
uint8_t center;
uint8_t right;
};
struct vref_margin {
uint8_t low;
uint8_t center;
uint8_t high;
};
union raw_bitlane_errors {
struct {
uint32_t lower;
uint32_t upper;
uint8_t ecc;
};
uint8_t per_byte[NUM_LANES];
};
struct sysinfo {
enum raminit_boot_mode bootmode;
enum generic_stepping stepping;
@ -355,6 +400,9 @@ struct sysinfo {
uint8_t rxdqsn[NUM_CHANNELS][NUM_SLOTRANKS][NUM_LANES];
int8_t rxvref[NUM_CHANNELS][NUM_SLOTRANKS][NUM_LANES];
struct time_margin rxdqpb[NUM_CHANNELS][NUM_SLOTRANKS][NUM_LANES][NUM_BITS];
struct time_margin txdqpb[NUM_CHANNELS][NUM_SLOTRANKS][NUM_LANES][NUM_BITS];
struct vref_margin rxdqvrefpb[NUM_CHANNELS][NUM_SLOTRANKS][NUM_LANES][NUM_BITS];
uint8_t clk_pi_code[NUM_CHANNELS][NUM_SLOTRANKS];
@ -365,6 +413,11 @@ struct sysinfo {
uint8_t cmd_north_pi_code[NUM_CHANNELS][NUM_GROUPS];
uint8_t cmd_south_pi_code[NUM_CHANNELS][NUM_GROUPS];
/*
* BIG WARNING: The slotrank comes before the channel!
*/
struct last_margin results[MAX_RESULT_TYPE][NUM_SLOTRANKS][NUM_CHANNELS][NUM_LANES];
union tc_bank_reg tc_bank[NUM_CHANNELS];
union tc_bank_rank_a_reg tc_bankrank_a[NUM_CHANNELS];
union tc_bank_rank_b_reg tc_bankrank_b[NUM_CHANNELS];
@ -488,6 +541,9 @@ enum raminit_status train_sense_amp_offset(struct sysinfo *ctrl);
enum raminit_status train_receive_enable(struct sysinfo *ctrl);
enum raminit_status train_read_mpr(struct sysinfo *ctrl);
enum raminit_status train_jedec_write_leveling(struct sysinfo *ctrl);
enum raminit_status train_read_timing_centering(struct sysinfo *ctrl);
enum raminit_status train_write_timing_centering(struct sysinfo *ctrl);
enum raminit_status train_read_voltage_centering(struct sysinfo *ctrl);
enum raminit_status optimise_comp(struct sysinfo *ctrl);
enum raminit_status save_training_values(struct sysinfo *ctrl);
enum raminit_status restore_training_values(struct sysinfo *ctrl);
@ -536,6 +592,7 @@ void reut_issue_mrs_all(
const uint16_t val[NUM_SLOTS]);
enum raminit_status reut_issue_zq(struct sysinfo *ctrl, uint8_t chanmask, uint8_t zq_type);
union raw_bitlane_errors get_bitlane_errors(const uint8_t channel);
void write_wdb_fixed_pat(
const struct sysinfo *ctrl,
@ -604,6 +661,8 @@ void download_regfile(
bool read_rf_rd,
bool read_rf_wr);
uint32_t get_max_margin_for_param(const enum margin_parameter param);
void change_margin(
struct sysinfo *ctrl,
const enum margin_parameter param,

View file

@ -194,3 +194,12 @@ enum raminit_status reut_issue_zq(struct sysinfo *ctrl, uint8_t chanmask, uint8_
return status;
}
union raw_bitlane_errors get_bitlane_errors(const uint8_t channel)
{
return (union raw_bitlane_errors) {
.lower = mchbar_read32(REUT_ch_ERR_DATA_STATUS(channel) + 0),
.upper = mchbar_read32(REUT_ch_ERR_DATA_STATUS(channel) + 4),
.ecc = mchbar_read8(REUT_ch_ERR_MISC_STATUS(channel)),
};
}

View file

@ -0,0 +1,305 @@
/* SPDX-License-Identifier: GPL-2.0-or-later */
#include "raminit_native.h"
#include "ranges.h"
#define MAX_BITLANE_LINES (2 * MAX_POSSIBLE_BOTH + 1)
#define MARGIN_MIN_WIDTH 8
#define MARGIN_1D_PLOT RAM_DEBUG
static void print_bitlane(
struct sysinfo *ctrl,
const uint8_t channel,
const int32_t m_start,
const int32_t m_step,
const int32_t m_stop,
const union raw_bitlane_errors bit_failures[NUM_CHANNELS][MAX_BITLANE_LINES])
{
for (int32_t offset = m_start; offset <= m_stop; offset += m_step) {
const uint32_t index = offset + MAX_POSSIBLE_BOTH;
const union raw_bitlane_errors *failures = &bit_failures[channel][index];
printk(MARGIN_1D_PLOT, "\n% 5d\t", offset);
for (uint8_t byte = 0; byte < ctrl->lanes; byte++) {
const uint8_t byte_errors = failures->per_byte[byte];
for (uint8_t bit = 0; bit < NUM_BITS; bit++)
printk(MARGIN_1D_PLOT, byte_errors & BIT(bit) ? "#" : ".");
}
}
}
static enum raminit_status apply_dq_offsets(
struct sysinfo *ctrl,
const uint8_t channel,
const uint8_t rank,
const enum margin_parameter param,
enum raminit_status status,
const struct linear_train_data chan_data[])
{
printk(BIOS_DEBUG, "\nC%u.R%u: Left\tRight\tWidth\tCenter\n", channel, rank);
for (uint8_t byte = 0; byte < ctrl->lanes; byte++) {
const struct linear_train_data *const curr_data = &chan_data[byte];
const int32_t lwidth = range_width(curr_data->largest);
const int32_t center = range_center(curr_data->largest);
printk(BIOS_DEBUG, " B%u: %d\t%d\t%d\t%d",
byte,
curr_data->largest.start,
curr_data->largest.end,
lwidth,
center);
if (lwidth < MARGIN_MIN_WIDTH) {
printk(BIOS_ERR,
"\t1D eye too small! channel: %u byte: %u width: %d\n",
channel, byte, lwidth);
status = RAMINIT_STATUS_1D_MARGINING_FAILURE;
} else {
printk(BIOS_DEBUG, "\n");
}
const struct last_margin margin = {
.start = ABS(curr_data->largest.start - center) * 10,
.end = ABS(curr_data->largest.end - center) * 10,
};
switch (param) {
case RdT:
ctrl->rxdqsp[channel][rank][byte] += center;
ctrl->rxdqsn[channel][rank][byte] += center;
update_rxt(ctrl, channel, rank, byte, RXT_RESTORE, 0);
ctrl->results[LastRxT][rank][channel][byte] = margin;
break;
case WrT:
ctrl->tx_dq[channel][rank][byte] += center;
update_txt(ctrl, channel, rank, byte, TXT_RESTORE, 0);
ctrl->results[LastTxT][rank][channel][byte] = margin;
break;
case RdV:
/* RdV training uses half steps */
ctrl->rxvref[channel][rank][byte] += center / 2;
update_rxt(ctrl, channel, rank, byte, RXT_RESTORE, 0);
ctrl->results[LastRxV][rank][channel][byte] = margin;
break;
default:
break;
}
}
return status;
}
static const char *const get_delay_string(const enum margin_parameter param)
{
switch (param) {
case RdT: return "RdDqsDelay";
case WrT: return "WrDqsDelay";
case RdV: return "RdVoltage";
default: return NULL;
}
}
static enum margin_parameter get_per_bit_margin_param(const enum margin_parameter param)
{
switch (param) {
case RdT: return RdTBit;
case WrT: return WrTBit;
case RdV: return RdVBit;
default: return INVALID_MARGIN;
}
}
static enum raminit_status train_1d_margin(
struct sysinfo *ctrl,
const uint8_t chanmask_in,
const enum margin_parameter param,
const bool reset_per_bit,
const uint8_t loopcount)
{
const char *const delay_string = get_delay_string(param);
if (!delay_string) {
printk(BIOS_ERR, "%s: Invalid margin parameter %u\n", __func__, param);
return RAMINIT_STATUS_INVALID_PARAMETER;
}
setup_io_test_basic_va(ctrl, chanmask_in, loopcount, NSOE);
const enum margin_parameter param_bit = get_per_bit_margin_param(param);
if (reset_per_bit && param_bit == INVALID_MARGIN) {
printk(BIOS_ERR, "%s: Invalid per-bit margin for param %u\n", __func__, param);
return RAMINIT_STATUS_INVALID_PARAMETER;
}
const int32_t m_stop = get_max_margin_for_param(param);
const int32_t m_start = -m_stop;
const int32_t m_step = 1;
enum raminit_status status = 0;
for (uint8_t rank = 0; rank < NUM_SLOTRANKS; rank++) {
if (!does_rank_exist(ctrl, rank))
continue;
/* Hell thinks this should be resetting the currently selected rank */
if (reset_per_bit) {
change_1d_margin_multicast(
ctrl,
param_bit,
0x88888888,
rank,
true,
REG_FILE_USE_RANK);
}
printk(BIOS_DEBUG, "Rank %u\n", rank);
printk(MARGIN_1D_PLOT, "Channel");
for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
if (!rank_in_ch(ctrl, rank, channel))
continue;
printk(MARGIN_1D_PLOT, "\t%u\t\t", channel);
}
printk(MARGIN_1D_PLOT, "\nByte");
for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
if (!rank_in_ch(ctrl, rank, channel))
continue;
printk(MARGIN_1D_PLOT, "\t");
for (uint8_t byte = 0; byte < ctrl->lanes; byte++)
printk(MARGIN_1D_PLOT, "%u ", byte);
}
uint8_t chanmask = 0;
for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
chanmask |= select_reut_ranks(ctrl, channel, BIT(rank));
if (!(BIT(channel) & chanmask))
continue;
/* Update rank timing to middle value */
for (uint8_t byte = 0; byte < ctrl->lanes; byte++) {
if (param == RdT) {
ctrl->rxdqsp[channel][rank][byte] = 32;
ctrl->rxdqsn[channel][rank][byte] = 32;
update_rxt(ctrl, channel, rank, byte, RXT_RESTORE, 0);
} else if (param == WrT) {
/*
* Put TxDq in the middle of the strobe and ensure
* there is enough room to sweep to the right.
*/
uint16_t tx_dq = ctrl->txdqs[channel][rank][byte] + 32;
if ((tx_dq + m_stop) > 511)
tx_dq = 511 - m_stop;
ctrl->tx_dq[channel][rank][byte] = tx_dq;
update_txt(ctrl, channel, rank, byte, TXT_RESTORE, 0);
} else if (param == RdV) {
ctrl->rxvref[channel][rank][byte] = 0;
update_rxt(ctrl, channel, rank, byte, RXT_RESTORE, 0);
}
}
/* Set up REUT error counters to count errors per channel */
mchbar_write32(REUT_ch_ERR_COUNTER_CTL_x(channel, 0), 0);
}
if (!chanmask)
continue;
clear_data_offset_train_all(ctrl);
printk(MARGIN_1D_PLOT, "\n%s", delay_string);
struct linear_train_data region_data[NUM_CHANNELS][NUM_LANES] = { 0 };
union raw_bitlane_errors bit_errors[NUM_CHANNELS][MAX_BITLANE_LINES] = { 0 };
for (int32_t offset = m_start; offset <= m_stop; offset += m_step) {
printk(MARGIN_1D_PLOT, "\n% 5d", offset);
change_1d_margin_multicast(
ctrl,
param,
offset,
0,
false,
REG_FILE_USE_START);
run_io_test(ctrl, chanmask, ctrl->dq_pat, 1);
for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
if (!(chanmask & BIT(channel)))
continue;
/* Read out byte group error results and update limit */
const uint16_t result = get_byte_group_errors(channel);
bit_errors[channel][offset + MAX_POSSIBLE_BOTH] =
get_bitlane_errors(channel);
printk(MARGIN_1D_PLOT, "\t");
for (uint8_t byte = 0; byte < ctrl->lanes; byte++) {
const bool pass = !(result & BIT(byte));
printk(MARGIN_1D_PLOT, pass ? ". " : "# ");
linear_record_pass(
&region_data[channel][byte],
pass,
offset,
m_start,
m_step);
}
}
}
printk(MARGIN_1D_PLOT, "\n\nBit lane information");
for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
if (!(chanmask & BIT(channel)))
continue;
printk(MARGIN_1D_PLOT, "\nChannel %u\nByte ", channel);
for (uint8_t byte = 0; byte < ctrl->lanes; byte++)
printk(MARGIN_1D_PLOT, "%u ", byte);
printk(MARGIN_1D_PLOT, "\nBit ");
for (uint8_t bit_count = 0; bit_count < ctrl->lanes * 8; bit_count++)
printk(MARGIN_1D_PLOT, "%u", bit_count % 8);
printk(MARGIN_1D_PLOT, "\n%s", delay_string);
print_bitlane(
ctrl,
channel,
m_start,
m_step,
m_stop,
bit_errors);
printk(MARGIN_1D_PLOT, "\n");
}
change_1d_margin_multicast(
ctrl,
param,
0,
0,
false,
REG_FILE_USE_CURRENT);
for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
if (!(chanmask & BIT(channel)))
continue;
status = apply_dq_offsets(
ctrl,
channel,
rank,
param,
status,
region_data[channel]);
}
printk(BIOS_DEBUG, "\n");
}
return status;
}
#define DATA_TRAIN_LOOP_COUNT 15
enum raminit_status train_read_timing_centering(struct sysinfo *ctrl)
{
return train_1d_margin(ctrl, ctrl->chanmap, RdT, true, DATA_TRAIN_LOOP_COUNT);
}
enum raminit_status train_write_timing_centering(struct sysinfo *ctrl)
{
return train_1d_margin(ctrl, ctrl->chanmap, WrT, true, DATA_TRAIN_LOOP_COUNT);
}
enum raminit_status train_read_voltage_centering(struct sysinfo *ctrl)
{
/*
* We do not reset per-bit RdV. Haswell only has one RX_OFFSET_VDQ register
* per rank, so this would require combining training results for all ranks
* somehow. While Broadwell and newer platforms have per-rank RX_OFFSET_VDQ
* registers, we can keep using sense amp offset training results, for now.
*/
return train_1d_margin(ctrl, ctrl->chanmap, RdV, false, DATA_TRAIN_LOOP_COUNT);
}

View file

@ -16,7 +16,9 @@
/* DDR DATA per-channel per-bytelane */
#define RX_TRAIN_ch_r_b(ch, rank, byte) _DDRIO_C_R_B(0x0000, ch, rank, byte)
#define RX_PER_BIT(ch, rank, byte) _DDRIO_C_R_B(0x0010, ch, rank, byte)
#define TX_TRAIN_ch_r_b(ch, rank, byte) _DDRIO_C_R_B(0x0020, ch, rank, byte)
#define TX_PER_BIT(ch, rank, byte) _DDRIO_C_R_B(0x0030, ch, rank, byte)
#define RX_OFFSET_VDQ(ch, byte) _DDRIO_C_R_B(0x004c, ch, 0, byte)
@ -126,8 +128,11 @@
#define REUT_ch_ERR_DATA_MASK(ch) _MCMAIN_C(0x40d8, ch)
#define REUT_ch_ERR_DATA_STATUS(ch) _MCMAIN_C(0x40e0, ch)
#define REUT_ch_ERR_MISC_STATUS(ch) _MCMAIN_C(0x40e8, ch)
#define REUT_ch_ERR_COUNTER_CTL_x(ch, byte) _MCMAIN_C_X(0x40f0, ch, byte)
#define REUT_ch_MISC_CKE_CTRL(ch) _MCMAIN_C(0x4190, ch)
#define REUT_ch_MISC_ODT_CTRL(ch) _MCMAIN_C(0x4194, ch)
#define REUT_ch_MISC_PAT_CADB_CTRL(ch) _MCMAIN_C(0x4198, ch)