libpayload: ehci: Cache management is hard, let's go copying...
It turns out that my previous commit to make the EHCI stack cache aware on ARM devices wasn't quite correct, and the problem is actually much trickier than I thought. After having some fun with more weird transfer problems that appear/disappear based on stack alignment, this is my current worst-case threat model that any cache managing implementation would need to handle correctly: Some upper layer calls ehci_bulk() with a transfer buffer on its stack. Due to stack alignment, it happens to start just at the top of a cache line, so up to 64 - 4 bytes of ehci_bulk's stack will share that line. ehci_bulk() calls dcache_clean() and initializes the USB transfer. Between that point and the call to dcache_invalidate() at the end of ehci_bulk(), any access to the stack variables in that cache line (even a speculative prefetch) will refetch the line into the cache. Afterwards any other access to a random memory location that just happens to get aliased to the same cache line may evict it again, causing the processor to write out stale data to the transfer buffer and possibly overwrite data that has already been received over USB. In short, any dcache_clean/dcache_invalidate-based implementation that preserves correctness while allowing any arbitrary (non cache-aligned) memory location as a transfer buffer is presumed to be impossible. Instead, this patch causes all transfer data to be copied to/from a cache-coherent bounce buffer. It will still transfer directly if the supplied buffer is already cache-coherent, which can be used by callers to optimize their transfers (and is true by default on x86). CQ-DEPEND=CL:169170 BUG=chrome-os-partner:21969 TEST=Make sure Snow still boots from the USB 2.0 port. Change-Id: I112908410bdbc8ca028d44f2f5d388c529f8057f Signed-off-by: Julius Werner <jwerner@chromium.org> Reviewed-on: https://chromium-review.googlesource.com/169231 Reviewed-by: Stefan Reinauer <reinauer@chromium.org>
This commit is contained in:
parent
7f54c8c133
commit
702dc50f1d
4 changed files with 86 additions and 23 deletions
|
|
@ -359,10 +359,11 @@ static int ehci_process_async_schedule(
|
|||
return result;
|
||||
}
|
||||
|
||||
static int ehci_bulk (endpoint_t *ep, int size, u8 *data, int finalize)
|
||||
static int ehci_bulk (endpoint_t *ep, int size, u8 *src, int finalize)
|
||||
{
|
||||
int result = 0;
|
||||
int counter = 0;
|
||||
u8 *end = src + size;
|
||||
int remaining = size;
|
||||
int endp = ep->endpoint & 0xf;
|
||||
int pid = (ep->direction==IN)?EHCI_IN:EHCI_OUT;
|
||||
|
||||
|
|
@ -373,30 +374,42 @@ static int ehci_bulk (endpoint_t *ep, int size, u8 *data, int finalize)
|
|||
return -1;
|
||||
}
|
||||
|
||||
if (!dma_coherent(src)) {
|
||||
end = EHCI_INST(ep->dev->controller)->dma_buffer + size;
|
||||
if (size > DMA_SIZE) {
|
||||
usb_debug("EHCI bulk transfer too large for DMA buffer: %d\n", size);
|
||||
return -1;
|
||||
}
|
||||
if (pid == EHCI_OUT)
|
||||
memcpy(end - size, src, size);
|
||||
}
|
||||
|
||||
ehci_qh_t *qh = dma_memalign(64, sizeof(ehci_qh_t));
|
||||
qtd_t *head = dma_memalign(64, sizeof(qtd_t));
|
||||
qtd_t *cur = head;
|
||||
dcache_clean_by_mva(data, size);
|
||||
if (!qh || !head)
|
||||
goto oom;
|
||||
while (1) {
|
||||
memset((void *)cur, 0, sizeof(qtd_t));
|
||||
cur->token = QTD_ACTIVE |
|
||||
(pid << QTD_PID_SHIFT) |
|
||||
(0 << QTD_CERR_SHIFT);
|
||||
u32 chunk = fill_td(cur, data + counter, size - counter);
|
||||
counter += chunk;
|
||||
remaining -= fill_td(cur, end - remaining, remaining);
|
||||
|
||||
cur->alt_next_qtd = QTD_TERMINATE;
|
||||
if (counter >= size) {
|
||||
if (remaining <= 0) {
|
||||
cur->next_qtd = virt_to_phys(0) | QTD_TERMINATE;
|
||||
break;
|
||||
} else {
|
||||
qtd_t *next = dma_memalign(64, sizeof(qtd_t));
|
||||
if (!next)
|
||||
goto oom;
|
||||
cur->next_qtd = virt_to_phys(next);
|
||||
cur = next;
|
||||
}
|
||||
}
|
||||
|
||||
/* create QH */
|
||||
ehci_qh_t *qh = dma_memalign(64, sizeof(ehci_qh_t));
|
||||
memset((void *)qh, 0, sizeof(ehci_qh_t));
|
||||
qh->horiz_link_ptr = virt_to_phys(qh) | QH_QH;
|
||||
qh->epchar = ep->dev->address |
|
||||
|
|
@ -416,22 +429,31 @@ static int ehci_bulk (endpoint_t *ep, int size, u8 *data, int finalize)
|
|||
|
||||
result = ehci_process_async_schedule(
|
||||
EHCI_INST(ep->dev->controller), qh, head);
|
||||
dcache_invalidate_by_mva(data, size);
|
||||
if (result >= 0)
|
||||
if (result >= 0) {
|
||||
result = size - result;
|
||||
if (pid == EHCI_IN && end != src + size)
|
||||
memcpy(src, end - size, result);
|
||||
}
|
||||
|
||||
ep->toggle = (cur->token & QTD_TOGGLE_MASK) >> QTD_TOGGLE_SHIFT;
|
||||
|
||||
free_qh_and_tds(qh, head);
|
||||
|
||||
return result;
|
||||
|
||||
oom:
|
||||
usb_debug("Not enough DMA memory for EHCI control structures!\n");
|
||||
free_qh_and_tds(qh, head);
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
||||
/* FIXME: Handle control transfers as 3 QHs, so the 2nd stage can be >0x4000 bytes */
|
||||
static int ehci_control (usbdev_t *dev, direction_t dir, int drlen, void *devreq,
|
||||
int dalen, u8 *data)
|
||||
static int ehci_control (usbdev_t *dev, direction_t dir, int drlen, void *setup,
|
||||
int dalen, u8 *src)
|
||||
{
|
||||
u8 *data = src;
|
||||
u8 *devreq = setup;
|
||||
int endp = 0; // this is control. always 0 (for now)
|
||||
int toggle = 0;
|
||||
int mlen = dev->endpoints[0].maxpacketsize;
|
||||
|
|
@ -445,11 +467,26 @@ static int ehci_control (usbdev_t *dev, direction_t dir, int drlen, void *devreq
|
|||
non_hs_ctrl_ep = 1;
|
||||
}
|
||||
|
||||
if (!dma_coherent(setup)) {
|
||||
devreq = EHCI_INST(dev->controller)->dma_buffer;
|
||||
memcpy(devreq, setup, drlen);
|
||||
}
|
||||
if (dalen > 0 && !dma_coherent(src)) {
|
||||
data = EHCI_INST(dev->controller)->dma_buffer + drlen;
|
||||
if (drlen + dalen > DMA_SIZE) {
|
||||
usb_debug("EHCI control transfer too large for DMA buffer: %d\n", drlen + dalen);
|
||||
return -1;
|
||||
}
|
||||
if (dir == OUT)
|
||||
memcpy(data, src, dalen);
|
||||
}
|
||||
|
||||
/* create qTDs */
|
||||
dcache_clean_by_mva(devreq, drlen);
|
||||
qtd_t *head = dma_memalign(64, sizeof(qtd_t));
|
||||
ehci_qh_t *qh = dma_memalign(64, sizeof(ehci_qh_t));
|
||||
qtd_t *cur = head;
|
||||
if (!qh || !head)
|
||||
goto oom;
|
||||
memset((void *)cur, 0, sizeof(qtd_t));
|
||||
cur->token = QTD_ACTIVE |
|
||||
(toggle?QTD_TOGGLE_DATA1:0) |
|
||||
|
|
@ -461,11 +498,12 @@ static int ehci_control (usbdev_t *dev, direction_t dir, int drlen, void *devreq
|
|||
qtd_t *next = dma_memalign(64, sizeof(qtd_t));
|
||||
cur->next_qtd = virt_to_phys(next);
|
||||
cur->alt_next_qtd = QTD_TERMINATE;
|
||||
if (!next)
|
||||
goto oom;
|
||||
|
||||
/* FIXME: We're limited to 16-20K (depending on alignment) for payload for now.
|
||||
* Figure out, how toggle can be set sensibly in this scenario */
|
||||
if (dalen > 0) {
|
||||
dcache_clean_by_mva(data, dalen);
|
||||
toggle ^= 1;
|
||||
cur = next;
|
||||
memset((void *)cur, 0, sizeof(qtd_t));
|
||||
|
|
@ -477,6 +515,8 @@ static int ehci_control (usbdev_t *dev, direction_t dir, int drlen, void *devreq
|
|||
usb_debug("ERROR: couldn't send the entire control payload\n");
|
||||
}
|
||||
next = dma_memalign(64, sizeof(qtd_t));
|
||||
if (!next)
|
||||
goto oom;
|
||||
cur->next_qtd = virt_to_phys(next);
|
||||
cur->alt_next_qtd = QTD_TERMINATE;
|
||||
}
|
||||
|
|
@ -493,7 +533,6 @@ static int ehci_control (usbdev_t *dev, direction_t dir, int drlen, void *devreq
|
|||
cur->alt_next_qtd = QTD_TERMINATE;
|
||||
|
||||
/* create QH */
|
||||
ehci_qh_t *qh = dma_memalign(64, sizeof(ehci_qh_t));
|
||||
memset((void *)qh, 0, sizeof(ehci_qh_t));
|
||||
qh->horiz_link_ptr = virt_to_phys(qh) | QH_QH;
|
||||
qh->epchar = dev->address |
|
||||
|
|
@ -511,15 +550,19 @@ static int ehci_control (usbdev_t *dev, direction_t dir, int drlen, void *devreq
|
|||
|
||||
result = ehci_process_async_schedule(
|
||||
EHCI_INST(dev->controller), qh, head);
|
||||
if (result >= 0)
|
||||
if (result >= 0) {
|
||||
result = dalen - result;
|
||||
|
||||
dcache_invalidate_by_mva(devreq, drlen);
|
||||
if (dalen > 0)
|
||||
dcache_invalidate_by_mva(data, dalen);
|
||||
if (dir == IN && data != src)
|
||||
memcpy(src, data, result);
|
||||
}
|
||||
|
||||
free_qh_and_tds(qh, head);
|
||||
return result;
|
||||
|
||||
oom:
|
||||
usb_debug("Not enough DMA memory for EHCI control structures!\n");
|
||||
free_qh_and_tds(qh, head);
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -790,6 +833,12 @@ ehci_init (void *bar)
|
|||
if (!periodic_list)
|
||||
fatal("Not enough memory creating EHCI periodic frame list.\n");
|
||||
|
||||
if (dma_initialized()) {
|
||||
EHCI_INST(controller)->dma_buffer = dma_memalign(4096, DMA_SIZE);
|
||||
if (!EHCI_INST(controller)->dma_buffer)
|
||||
fatal("Not enough DMA memory for EHCI bounce buffer.\n");
|
||||
}
|
||||
|
||||
/*
|
||||
* Insert dummy QH in periodic frame list
|
||||
* This helps with broken host controllers
|
||||
|
|
|
|||
|
|
@ -135,13 +135,14 @@ typedef struct ehci {
|
|||
hc_cap_t *capabilities;
|
||||
hc_op_t *operation;
|
||||
ehci_qh_t *dummy_qh;
|
||||
#define DMA_SIZE (64 * 1024)
|
||||
void *dma_buffer;
|
||||
} ehci_t;
|
||||
|
||||
#define PS_TERMINATE 1
|
||||
#define PS_TYPE_QH 1 << 1
|
||||
#define PS_PTR_MASK ~0x1f
|
||||
|
||||
|
||||
#define EHCI_INST(controller) ((ehci_t*)((controller)->instance))
|
||||
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -115,6 +115,8 @@ void *memalign(size_t align, size_t size);
|
|||
void init_dma_memory(void *start, u32 size);
|
||||
void *dma_malloc(size_t size);
|
||||
void *dma_memalign(size_t align, size_t size);
|
||||
int dma_initialized(void);
|
||||
int dma_coherent(void *ptr);
|
||||
/** @} */
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -87,7 +87,7 @@ static int minimal_free = 0;
|
|||
void init_dma_memory(void *start, u32 size)
|
||||
{
|
||||
#ifdef CONFIG_LP_DEBUG_MALLOC
|
||||
if (dma != heap) {
|
||||
if (dma_initialized()) {
|
||||
printf("WARNING: %s called twice!\n");
|
||||
return;
|
||||
}
|
||||
|
|
@ -101,6 +101,17 @@ void init_dma_memory(void *start, u32 size)
|
|||
dma->align_regions = NULL;
|
||||
}
|
||||
|
||||
int dma_initialized()
|
||||
{
|
||||
return dma != heap;
|
||||
}
|
||||
|
||||
/* For boards that don't initialize DMA we assume all locations are coherent */
|
||||
int dma_coherent(void *ptr)
|
||||
{
|
||||
return !dma_initialized() || (dma->start <= ptr && dma->end > ptr);
|
||||
}
|
||||
|
||||
static void setup(hdrtype_t volatile *start, int size)
|
||||
{
|
||||
*start = FREE_BLOCK(size);
|
||||
|
|
@ -124,7 +135,7 @@ static void *alloc(int len, struct memory_type *type)
|
|||
|
||||
/* Make sure the region is setup correctly. */
|
||||
if (!HAS_MAGIC(*ptr))
|
||||
setup(ptr, (int)((&_eheap - &_heap) - HDRSIZE));
|
||||
setup(ptr, (int)((type->end - type->start) - HDRSIZE));
|
||||
|
||||
/* Find some free space. */
|
||||
do {
|
||||
|
|
@ -474,6 +485,6 @@ void print_malloc_map(void)
|
|||
if (free_memory && (minimal_free > free_memory))
|
||||
minimal_free = free_memory;
|
||||
printf("Maximum memory consumption: %d bytes\n",
|
||||
(unsigned int)(&_eheap - &_heap) - HDRSIZE - minimal_free);
|
||||
(unsigned int)(heap->end - heap->start) - HDRSIZE - minimal_free);
|
||||
}
|
||||
#endif
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue