Redo fespi flash algorithm (#384)
* WIP, rewrite of flash algorithm. Just put all the flashing logic into the algorithm, instead of using an intermediate format. This should reduce total data written while flashing by about 9%, and also makes the code much simpler. Change-Id: I807e60c8ab4f9f376cceaecdbbd10a2326be1c79 * New algorithm works. Speeds up Arty flashing another 9%. wrote 2228224 bytes from file /media/sf_tnewsome/SiFive/arty_images/arty.E21TraceFPGAEvaluationConfig.mcs in 86.784538s (25.074 KiB/s) verified 2192012 bytes in 6.693336s (319.816 KiB/s) 8.66user 13.03system 1:33.91elapsed 23%CPU (0avgtext+0avgdata 12272maxresident)k Change-Id: Ie55c5250d667251be141cb32b144bbcf3713fce4 * Fix whitespace. Change-Id: I338d518fa11a108efb530ffe75a2030619457a0b * Don't reserve so much stack space. Also properly check XLEN in riscv_wrapper.S. Change-Id: Ifa0301f3ea80f648fb8a6d6b6c8bf39f386fe4a6
This commit is contained in:
+118
-306
@@ -471,262 +471,13 @@ static int slow_fespi_write_buffer(struct flash_bank *bank,
|
||||
return ERROR_OK;
|
||||
}
|
||||
|
||||
static const uint8_t algorithm_bin[] = {
|
||||
#include "../../../contrib/loaders/flash/fespi/fespi.inc"
|
||||
};
|
||||
#define STEP_EXIT 4
|
||||
#define STEP_TX 8
|
||||
#define STEP_TXWM_WAIT 12
|
||||
#define STEP_WRITE_REG 16
|
||||
#define STEP_WIP_WAIT 20
|
||||
#define STEP_SET_DIR 24
|
||||
#define STEP_NOP 0xff
|
||||
|
||||
struct algorithm_steps {
|
||||
unsigned size;
|
||||
unsigned used;
|
||||
uint8_t **steps;
|
||||
static const uint8_t riscv32_bin[] = {
|
||||
#include "../../../contrib/loaders/flash/fespi/riscv32_fespi.inc"
|
||||
};
|
||||
|
||||
static struct algorithm_steps *as_new(void)
|
||||
{
|
||||
struct algorithm_steps *as = calloc(1, sizeof(struct algorithm_steps));
|
||||
as->size = 8;
|
||||
as->steps = malloc(as->size * sizeof(as->steps[0]));
|
||||
return as;
|
||||
}
|
||||
|
||||
static struct algorithm_steps *as_delete(struct algorithm_steps *as)
|
||||
{
|
||||
for (unsigned step = 0; step < as->used; step++) {
|
||||
free(as->steps[step]);
|
||||
as->steps[step] = NULL;
|
||||
}
|
||||
free(as->steps);
|
||||
free(as);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static int as_empty(struct algorithm_steps *as)
|
||||
{
|
||||
for (unsigned s = 0; s < as->used; s++) {
|
||||
if (as->steps[s][0] != STEP_NOP)
|
||||
return 0;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* Return size of compiled program. */
|
||||
static unsigned as_compile(struct algorithm_steps *as, uint8_t *target,
|
||||
unsigned target_size)
|
||||
{
|
||||
unsigned offset = 0;
|
||||
bool finish_early = false;
|
||||
for (unsigned s = 0; s < as->used && !finish_early; s++) {
|
||||
unsigned bytes_left = target_size - offset;
|
||||
switch (as->steps[s][0]) {
|
||||
case STEP_NOP:
|
||||
break;
|
||||
case STEP_TX:
|
||||
{
|
||||
unsigned size = as->steps[s][1];
|
||||
if (size + 3 > bytes_left) {
|
||||
finish_early = true;
|
||||
break;
|
||||
}
|
||||
memcpy(target + offset, as->steps[s], size + 2);
|
||||
offset += size + 2;
|
||||
break;
|
||||
}
|
||||
case STEP_WRITE_REG:
|
||||
if (4 > bytes_left) {
|
||||
finish_early = true;
|
||||
break;
|
||||
}
|
||||
memcpy(target + offset, as->steps[s], 3);
|
||||
offset += 3;
|
||||
break;
|
||||
case STEP_SET_DIR:
|
||||
if (3 > bytes_left) {
|
||||
finish_early = true;
|
||||
break;
|
||||
}
|
||||
memcpy(target + offset, as->steps[s], 2);
|
||||
offset += 2;
|
||||
break;
|
||||
case STEP_TXWM_WAIT:
|
||||
case STEP_WIP_WAIT:
|
||||
if (2 > bytes_left) {
|
||||
finish_early = true;
|
||||
break;
|
||||
}
|
||||
memcpy(target + offset, as->steps[s], 1);
|
||||
offset += 1;
|
||||
break;
|
||||
default:
|
||||
assert(0);
|
||||
}
|
||||
if (!finish_early)
|
||||
as->steps[s][0] = STEP_NOP;
|
||||
}
|
||||
assert(offset + 1 <= target_size);
|
||||
target[offset++] = STEP_EXIT;
|
||||
|
||||
LOG_DEBUG("%d-byte program:", offset);
|
||||
for (unsigned i = 0; i < offset;) {
|
||||
char buf[80];
|
||||
for (unsigned x = 0; i < offset && x < 16; x++, i++)
|
||||
sprintf(buf + x*3, "%02x ", target[i]);
|
||||
LOG_DEBUG("%s", buf);
|
||||
}
|
||||
|
||||
return offset;
|
||||
}
|
||||
|
||||
static void as_add_step(struct algorithm_steps *as, uint8_t *step)
|
||||
{
|
||||
if (as->used == as->size) {
|
||||
as->size *= 2;
|
||||
as->steps = realloc(as->steps, sizeof(as->steps[0]) * as->size);
|
||||
LOG_DEBUG("Increased size to 0x%x", as->size);
|
||||
}
|
||||
as->steps[as->used] = step;
|
||||
as->used++;
|
||||
}
|
||||
|
||||
static void as_add_tx(struct algorithm_steps *as, unsigned count, const uint8_t *data)
|
||||
{
|
||||
LOG_DEBUG("count=%d", count);
|
||||
while (count > 0) {
|
||||
unsigned step_count = MIN(count, 255);
|
||||
uint8_t *step = malloc(step_count + 2);
|
||||
step[0] = STEP_TX;
|
||||
step[1] = step_count;
|
||||
memcpy(step + 2, data, step_count);
|
||||
as_add_step(as, step);
|
||||
data += step_count;
|
||||
count -= step_count;
|
||||
}
|
||||
}
|
||||
|
||||
static void as_add_tx1(struct algorithm_steps *as, uint8_t byte)
|
||||
{
|
||||
uint8_t data[1];
|
||||
data[0] = byte;
|
||||
as_add_tx(as, 1, data);
|
||||
}
|
||||
|
||||
static void as_add_write_reg(struct algorithm_steps *as, uint8_t offset, uint8_t data)
|
||||
{
|
||||
uint8_t *step = malloc(3);
|
||||
step[0] = STEP_WRITE_REG;
|
||||
step[1] = offset;
|
||||
step[2] = data;
|
||||
as_add_step(as, step);
|
||||
}
|
||||
|
||||
static void as_add_txwm_wait(struct algorithm_steps *as)
|
||||
{
|
||||
uint8_t *step = malloc(1);
|
||||
step[0] = STEP_TXWM_WAIT;
|
||||
as_add_step(as, step);
|
||||
}
|
||||
|
||||
static void as_add_wip_wait(struct algorithm_steps *as)
|
||||
{
|
||||
uint8_t *step = malloc(1);
|
||||
step[0] = STEP_WIP_WAIT;
|
||||
as_add_step(as, step);
|
||||
}
|
||||
|
||||
static void as_add_set_dir(struct algorithm_steps *as, bool dir)
|
||||
{
|
||||
uint8_t *step = malloc(2);
|
||||
step[0] = STEP_SET_DIR;
|
||||
step[1] = FESPI_FMT_DIR(dir);
|
||||
as_add_step(as, step);
|
||||
}
|
||||
|
||||
/* This should write something less than or equal to a page.*/
|
||||
static int steps_add_buffer_write(struct algorithm_steps *as,
|
||||
const uint8_t *buffer, uint32_t chip_offset, uint32_t len)
|
||||
{
|
||||
if (chip_offset & 0xFF000000) {
|
||||
LOG_ERROR("FESPI interface does not support greater than 3B addressing, can't write to offset 0x%x",
|
||||
chip_offset);
|
||||
return ERROR_FAIL;
|
||||
}
|
||||
|
||||
as_add_tx1(as, SPIFLASH_WRITE_ENABLE);
|
||||
as_add_txwm_wait(as);
|
||||
as_add_write_reg(as, FESPI_REG_CSMODE, FESPI_CSMODE_HOLD);
|
||||
|
||||
uint8_t setup[] = {
|
||||
SPIFLASH_PAGE_PROGRAM,
|
||||
chip_offset >> 16,
|
||||
chip_offset >> 8,
|
||||
chip_offset,
|
||||
};
|
||||
as_add_tx(as, sizeof(setup), setup);
|
||||
|
||||
as_add_tx(as, len, buffer);
|
||||
as_add_txwm_wait(as);
|
||||
as_add_write_reg(as, FESPI_REG_CSMODE, FESPI_CSMODE_AUTO);
|
||||
|
||||
/* fespi_wip() */
|
||||
as_add_set_dir(as, FESPI_DIR_RX);
|
||||
as_add_write_reg(as, FESPI_REG_CSMODE, FESPI_CSMODE_HOLD);
|
||||
as_add_wip_wait(as);
|
||||
as_add_write_reg(as, FESPI_REG_CSMODE, FESPI_CSMODE_AUTO);
|
||||
as_add_set_dir(as, FESPI_DIR_TX);
|
||||
|
||||
return ERROR_OK;
|
||||
}
|
||||
|
||||
static int steps_execute(struct algorithm_steps *as,
|
||||
struct flash_bank *bank, struct working_area *algorithm_wa,
|
||||
struct working_area *data_wa)
|
||||
{
|
||||
struct target *target = bank->target;
|
||||
struct fespi_flash_bank *fespi_info = bank->driver_priv;
|
||||
uint32_t ctrl_base = fespi_info->ctrl_base;
|
||||
int xlen = riscv_xlen(target);
|
||||
|
||||
struct reg_param reg_params[2];
|
||||
init_reg_param(®_params[0], "a0", xlen, PARAM_OUT);
|
||||
init_reg_param(®_params[1], "a1", xlen, PARAM_OUT);
|
||||
buf_set_u64(reg_params[0].value, 0, xlen, ctrl_base);
|
||||
buf_set_u64(reg_params[1].value, 0, xlen, data_wa->address);
|
||||
|
||||
int retval = ERROR_OK;
|
||||
while (!as_empty(as)) {
|
||||
keep_alive();
|
||||
uint8_t *data_buf = malloc(data_wa->size);
|
||||
unsigned bytes = as_compile(as, data_buf, data_wa->size);
|
||||
retval = target_write_buffer(target, data_wa->address, bytes,
|
||||
data_buf);
|
||||
free(data_buf);
|
||||
if (retval != ERROR_OK) {
|
||||
LOG_ERROR("Failed to write data to " TARGET_ADDR_FMT ": %d",
|
||||
data_wa->address, retval);
|
||||
goto exit;
|
||||
}
|
||||
|
||||
retval = target_run_algorithm(target, 0, NULL, 2, reg_params,
|
||||
algorithm_wa->address, algorithm_wa->address + 4,
|
||||
10000, NULL);
|
||||
if (retval != ERROR_OK) {
|
||||
LOG_ERROR("Failed to execute algorithm at " TARGET_ADDR_FMT ": %d",
|
||||
algorithm_wa->address, retval);
|
||||
goto exit;
|
||||
}
|
||||
}
|
||||
|
||||
exit:
|
||||
destroy_reg_param(®_params[1]);
|
||||
destroy_reg_param(®_params[0]);
|
||||
return retval;
|
||||
}
|
||||
static const uint8_t riscv64_bin[] = {
|
||||
#include "../../../contrib/loaders/flash/fespi/riscv64_fespi.inc"
|
||||
};
|
||||
|
||||
static int fespi_write(struct flash_bank *bank, const uint8_t *buffer,
|
||||
uint32_t offset, uint32_t count)
|
||||
@@ -737,8 +488,7 @@ static int fespi_write(struct flash_bank *bank, const uint8_t *buffer,
|
||||
int sector;
|
||||
int retval = ERROR_OK;
|
||||
|
||||
LOG_DEBUG("%s: offset=0x%08" PRIx32 " count=0x%08" PRIx32,
|
||||
__func__, offset, count);
|
||||
LOG_DEBUG("offset=0x%08" PRIx32 " count=0x%08" PRIx32, offset, count);
|
||||
|
||||
if (target->state != TARGET_HALTED) {
|
||||
LOG_ERROR("Target not halted");
|
||||
@@ -763,80 +513,143 @@ static int fespi_write(struct flash_bank *bank, const uint8_t *buffer,
|
||||
}
|
||||
}
|
||||
|
||||
struct working_area *algorithm_wa;
|
||||
if (target_alloc_working_area(target, sizeof(algorithm_bin),
|
||||
&algorithm_wa) != ERROR_OK) {
|
||||
LOG_WARNING("Couldn't allocate %zd-byte working area.",
|
||||
sizeof(algorithm_bin));
|
||||
algorithm_wa = NULL;
|
||||
int xlen = riscv_xlen(target);
|
||||
struct working_area *algorithm_wa = NULL;
|
||||
struct working_area *data_wa = NULL;
|
||||
const uint8_t *bin;
|
||||
size_t bin_size;
|
||||
if (xlen == 32) {
|
||||
bin = riscv32_bin;
|
||||
bin_size = sizeof(riscv32_bin);
|
||||
} else {
|
||||
bin = riscv64_bin;
|
||||
bin_size = sizeof(riscv64_bin);
|
||||
}
|
||||
|
||||
unsigned data_wa_size = 0;
|
||||
if (target_alloc_working_area(target, bin_size, &algorithm_wa) == ERROR_OK) {
|
||||
retval = target_write_buffer(target, algorithm_wa->address,
|
||||
sizeof(algorithm_bin), algorithm_bin);
|
||||
bin_size, bin);
|
||||
if (retval != ERROR_OK) {
|
||||
LOG_ERROR("Failed to write code to " TARGET_ADDR_FMT ": %d",
|
||||
algorithm_wa->address, retval);
|
||||
target_free_working_area(target, algorithm_wa);
|
||||
algorithm_wa = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
struct working_area *data_wa = NULL;
|
||||
unsigned data_wa_size = 2 * count;
|
||||
while (1) {
|
||||
if (data_wa_size < 128) {
|
||||
LOG_WARNING("Couldn't allocate data working area.");
|
||||
target_free_working_area(target, algorithm_wa);
|
||||
algorithm_wa = NULL;
|
||||
}
|
||||
if (target_alloc_working_area_try(target, data_wa_size, &data_wa) ==
|
||||
ERROR_OK) {
|
||||
break;
|
||||
}
|
||||
data_wa_size = MIN(target->working_area_size - algorithm_wa->size, count);
|
||||
while (1) {
|
||||
if (data_wa_size < 128) {
|
||||
LOG_WARNING("Couldn't allocate data working area.");
|
||||
target_free_working_area(target, algorithm_wa);
|
||||
algorithm_wa = NULL;
|
||||
}
|
||||
if (target_alloc_working_area_try(target, data_wa_size, &data_wa) ==
|
||||
ERROR_OK) {
|
||||
break;
|
||||
}
|
||||
|
||||
data_wa_size /= 2;
|
||||
data_wa_size = data_wa_size * 3 / 4;
|
||||
}
|
||||
} else {
|
||||
LOG_WARNING("Couldn't allocate %zd-byte working area.", bin_size);
|
||||
algorithm_wa = NULL;
|
||||
}
|
||||
|
||||
/* If no valid page_size, use reasonable default. */
|
||||
page_size = fespi_info->dev->pagesize ?
|
||||
fespi_info->dev->pagesize : SPIFLASH_DEF_PAGESIZE;
|
||||
|
||||
fespi_txwm_wait(bank);
|
||||
if (algorithm_wa) {
|
||||
struct reg_param reg_params[5];
|
||||
init_reg_param(®_params[0], "a0", xlen, PARAM_IN_OUT);
|
||||
init_reg_param(®_params[1], "a1", xlen, PARAM_OUT);
|
||||
init_reg_param(®_params[2], "a2", xlen, PARAM_OUT);
|
||||
init_reg_param(®_params[3], "a3", xlen, PARAM_OUT);
|
||||
init_reg_param(®_params[4], "a4", xlen, PARAM_OUT);
|
||||
|
||||
/* Disable Hardware accesses*/
|
||||
if (fespi_disable_hw_mode(bank) != ERROR_OK)
|
||||
return ERROR_FAIL;
|
||||
while (count > 0) {
|
||||
cur_count = MIN(count, data_wa_size);
|
||||
buf_set_u64(reg_params[0].value, 0, xlen, fespi_info->ctrl_base);
|
||||
buf_set_u64(reg_params[1].value, 0, xlen, page_size);
|
||||
buf_set_u64(reg_params[2].value, 0, xlen, data_wa->address);
|
||||
buf_set_u64(reg_params[3].value, 0, xlen, offset);
|
||||
buf_set_u64(reg_params[4].value, 0, xlen, cur_count);
|
||||
|
||||
struct algorithm_steps *as = as_new();
|
||||
retval = target_write_buffer(target, data_wa->address, cur_count,
|
||||
buffer);
|
||||
if (retval != ERROR_OK) {
|
||||
LOG_DEBUG("Failed to write %d bytes to " TARGET_ADDR_FMT ": %d",
|
||||
cur_count, data_wa->address, retval);
|
||||
goto err;
|
||||
}
|
||||
|
||||
/* poll WIP */
|
||||
retval = fespi_wip(bank, FESPI_PROBE_TIMEOUT);
|
||||
if (retval != ERROR_OK)
|
||||
goto err;
|
||||
LOG_DEBUG("write(ctrl_base=0x%" TARGET_PRIxADDR ", page_size=0x%x, "
|
||||
"address=0x%" TARGET_PRIxADDR ", offset=0x%" PRIx32
|
||||
", count=0x%" PRIx32 "), buffer=%02x %02x %02x %02x %02x %02x ..." PRIx32,
|
||||
fespi_info->ctrl_base, page_size, data_wa->address, offset, cur_count,
|
||||
buffer[0], buffer[1], buffer[2], buffer[3], buffer[4], buffer[5]);
|
||||
retval = target_run_algorithm(target, 0, NULL, 5, reg_params,
|
||||
algorithm_wa->address, 0, 10000, NULL);
|
||||
if (retval != ERROR_OK) {
|
||||
LOG_ERROR("Failed to execute algorithm at " TARGET_ADDR_FMT ": %d",
|
||||
algorithm_wa->address, retval);
|
||||
goto err;
|
||||
}
|
||||
|
||||
page_offset = offset % page_size;
|
||||
/* central part, aligned words */
|
||||
while (count > 0) {
|
||||
/* clip block at page boundary */
|
||||
if (page_offset + count > page_size)
|
||||
cur_count = page_size - page_offset;
|
||||
else
|
||||
cur_count = count;
|
||||
int algorithm_result = buf_get_u64(reg_params[0].value, 0, xlen);
|
||||
if (algorithm_result != 0) {
|
||||
LOG_ERROR("Algorithm returned error %d", algorithm_result);
|
||||
retval = ERROR_FAIL;
|
||||
goto err;
|
||||
}
|
||||
|
||||
if (algorithm_wa)
|
||||
retval = steps_add_buffer_write(as, buffer, offset, cur_count);
|
||||
else
|
||||
retval = slow_fespi_write_buffer(bank, buffer, offset, cur_count);
|
||||
page_offset = 0;
|
||||
buffer += cur_count;
|
||||
offset += cur_count;
|
||||
count -= cur_count;
|
||||
}
|
||||
|
||||
target_free_working_area(target, data_wa);
|
||||
target_free_working_area(target, algorithm_wa);
|
||||
|
||||
} else {
|
||||
fespi_txwm_wait(bank);
|
||||
|
||||
/* Disable Hardware accesses*/
|
||||
if (fespi_disable_hw_mode(bank) != ERROR_OK)
|
||||
return ERROR_FAIL;
|
||||
|
||||
/* poll WIP */
|
||||
retval = fespi_wip(bank, FESPI_PROBE_TIMEOUT);
|
||||
if (retval != ERROR_OK)
|
||||
goto err;
|
||||
|
||||
page_offset = 0;
|
||||
buffer += cur_count;
|
||||
offset += cur_count;
|
||||
count -= cur_count;
|
||||
page_offset = offset % page_size;
|
||||
/* central part, aligned words */
|
||||
while (count > 0) {
|
||||
/* clip block at page boundary */
|
||||
if (page_offset + count > page_size)
|
||||
cur_count = page_size - page_offset;
|
||||
else
|
||||
cur_count = count;
|
||||
|
||||
retval = slow_fespi_write_buffer(bank, buffer, offset, cur_count);
|
||||
if (retval != ERROR_OK)
|
||||
goto err;
|
||||
|
||||
page_offset = 0;
|
||||
buffer += cur_count;
|
||||
offset += cur_count;
|
||||
count -= cur_count;
|
||||
}
|
||||
|
||||
/* Switch to HW mode before return to prompt */
|
||||
if (fespi_enable_hw_mode(bank) != ERROR_OK)
|
||||
return ERROR_FAIL;
|
||||
}
|
||||
|
||||
if (algorithm_wa)
|
||||
retval = steps_execute(as, bank, algorithm_wa, data_wa);
|
||||
return ERROR_OK;
|
||||
|
||||
err:
|
||||
if (algorithm_wa) {
|
||||
@@ -844,11 +657,10 @@ err:
|
||||
target_free_working_area(target, algorithm_wa);
|
||||
}
|
||||
|
||||
as_delete(as);
|
||||
|
||||
/* Switch to HW mode before return to prompt */
|
||||
if (fespi_enable_hw_mode(bank) != ERROR_OK)
|
||||
return ERROR_FAIL;
|
||||
|
||||
return retval;
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user