Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 9 additions & 1 deletion src/audio/module_adapter/module/generic.c
Original file line number Diff line number Diff line change
Expand Up @@ -341,6 +341,9 @@ EXPORT_SYMBOL(z_impl_mod_fast_get);
static int free_contents(struct processing_module *mod, struct module_resource *container)
{
struct module_resources *res = &mod->priv.resources;
#if CONFIG_FAST_GET
struct k_mem_domain *mdom;
#endif

switch (container->type) {
case MOD_RES_HEAP:
Expand All @@ -354,7 +357,12 @@ static int free_contents(struct processing_module *mod, struct module_resource *
#endif
#if CONFIG_FAST_GET
case MOD_RES_FAST_GET:
fast_put(res->heap, container->sram_ptr);
#if CONFIG_USERSPACE
mdom = mod->mdom;
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The userspace proxy already stores a pointer to the memory domain in a different place. Need to distinguishing the behavior for PROXY/APPLICATION.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@softwarecki is this patch breaking anything for the "thread" model? If not, I'd propose to merge it to fix the "application" model which is currently broken because of this fast-get regression. Once it's all fixed we can carefully check what we can improve

#else
mdom = NULL;
#endif
fast_put(res->heap, mdom, container->sram_ptr);
return 0;
#endif
default:
Expand Down
10 changes: 10 additions & 0 deletions src/audio/src/src_common.c
Original file line number Diff line number Diff line change
Expand Up @@ -688,6 +688,16 @@ __cold int src_free(struct processing_module *mod)
assert_can_be_cold();

comp_info(mod->dev, "entry");

#if CONFIG_FAST_GET
struct src_param *a = &cd->param;

if (a->stage1)
mod_fast_put(mod, a->stage1->coefs);
if (a->stage2)
mod_fast_put(mod, a->stage2->coefs);
#endif

mod_free(mod, cd->delay_lines);
mod_free(mod, cd);

Expand Down
4 changes: 1 addition & 3 deletions src/include/module/module/base.h
Original file line number Diff line number Diff line change
Expand Up @@ -201,10 +201,8 @@ struct processing_module {
enum module_processing_type proc_type;
#if CONFIG_USERSPACE
struct userspace_context *user_ctx;
#endif /* CONFIG_USERSPACE */
#if CONFIG_SOF_USERSPACE_APPLICATION
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this change is unnecessary, because the proxy already stores the memory domain in its context.

struct k_mem_domain *mdom;
#endif
#endif /* CONFIG_USERSPACE */
#endif /* SOF_MODULE_PRIVATE */
};

Expand Down
5 changes: 3 additions & 2 deletions src/include/sof/lib/fast-get.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
#include <stddef.h>

struct k_heap;
struct k_mem_domain;

/*
* When built for SOF, fast_get() and fast_put() are only needed when DRAM
Expand All @@ -25,13 +26,13 @@ struct k_heap;
(CONFIG_LLEXT_TYPE_ELF_RELOCATABLE || !defined(LL_EXTENSION_BUILD))) || \
!CONFIG_SOF_FULL_ZEPHYR_APPLICATION
const void *fast_get(struct k_heap *heap, const void * const dram_ptr, size_t size);
void fast_put(struct k_heap *heap, const void *sram_ptr);
void fast_put(struct k_heap *heap, struct k_mem_domain *mdom, const void *sram_ptr);
#else
static inline const void *fast_get(struct k_heap *heap, const void * const dram_ptr, size_t size)
{
return dram_ptr;
}
static inline void fast_put(struct k_heap *heap, const void *sram_ptr) {}
static inline void fast_put(struct k_heap *heap, struct k_mem_domain *mdom, const void *sram_ptr) {}
#endif

#endif /* __SOF_LIB_FAST_GET_H__ */
17 changes: 17 additions & 0 deletions src/schedule/zephyr_dp_schedule_application.c
Original file line number Diff line number Diff line change
Expand Up @@ -369,6 +369,23 @@ static void scheduler_dp_domain_free(struct task_dp_pdata *pdata)
k_mem_domain_remove_partition(mdom, pdata->mpart + SOF_DP_PART_CFG);
k_mem_domain_remove_partition(mdom, pdata->mpart + SOF_DP_PART_CFG_CACHE);

if (mdom->num_partitions) {
/* Violation: all partitions should be freed by now */
unsigned int n = mdom->num_partitions;

LOG_WRN("%u domain partition(s) not freed, force-freeing them all", n);

for (unsigned int i = 0; i < arch_mem_domain_max_partitions_get() && n; i++) {
struct k_mem_partition *dpart = &mdom->partitions[i];

if (dpart->size) {
k_mem_domain_remove_partition(mdom, dpart);
n--;
}
}
}

/* All partitions removed, the domain can be freed now */
pmod->mdom = NULL;
objpool_free(&dp_mdom_head, mdom);
}
Expand Down
10 changes: 5 additions & 5 deletions test/cmocka/src/lib/fast-get/fast-get-tests.c
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ static void test_simple_fast_get_put(void **state)
assert(ret);
assert(!memcmp(ret, testdata[0], sizeof(testdata[0])));

fast_put(NULL, ret);
fast_put(NULL, NULL, ret);
}

static void test_fast_get_size_missmatch_test(void **state)
Expand All @@ -94,7 +94,7 @@ static void test_fast_get_size_missmatch_test(void **state)
ret[1] = fast_get(NULL, testdata[0], sizeof(testdata[0]) + 1);
assert(!ret[1]);

fast_put(NULL, ret);
fast_put(NULL, NULL, ret);
}

static void test_over_32_fast_gets_and_puts(void **state)
Expand All @@ -111,7 +111,7 @@ static void test_over_32_fast_gets_and_puts(void **state)
assert(!memcmp(copy[i], testdata[i], sizeof(testdata[0])));

for (i = 0; i < ARRAY_SIZE(copy); i++)
fast_put(NULL, copy[i]);
fast_put(NULL, NULL, copy[i]);
}

static void test_fast_get_refcounting(void **state)
Expand All @@ -133,13 +133,13 @@ static void test_fast_get_refcounting(void **state)
assert(!memcmp(copy[0][i], testdata[i], sizeof(testdata[0])));

for (i = 0; i < ARRAY_SIZE(copy[0]); i++)
fast_put(NULL, copy[0][i]);
fast_put(NULL, NULL, copy[0][i]);

for (i = 0; i < ARRAY_SIZE(copy[0]); i++)
assert(!memcmp(copy[1][i], testdata[i], sizeof(testdata[0])));

for (i = 0; i < ARRAY_SIZE(copy[0]); i++)
fast_put(NULL, copy[1][i]);
fast_put(NULL, NULL, copy[1][i]);
}

int main(void)
Expand Down
10 changes: 5 additions & 5 deletions test/ztest/unit/fast-get/test_fast_get_ztest.c
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ ZTEST(fast_get_suite, test_simple_fast_get_put)
zassert_mem_equal(ret, testdata[0], sizeof(testdata[0]),
"Returned data should match original data");

fast_put(NULL, ret);
fast_put(NULL, NULL, ret);
}

/**
Expand All @@ -94,7 +94,7 @@ ZTEST(fast_get_suite, test_fast_get_size_missmatch_test)
ret[1] = fast_get(NULL, testdata[0], sizeof(testdata[0]) + 1);
zassert_is_null(ret[1], "fast_get with different size should return NULL");

fast_put(NULL, ret[0]);
fast_put(NULL, NULL, ret[0]);
}

/**
Expand All @@ -116,7 +116,7 @@ ZTEST(fast_get_suite, test_over_32_fast_gets_and_puts)
"Data at index %d should match original", i);

for (i = 0; i < ARRAY_SIZE(copy); i++)
fast_put(NULL, copy[i]);
fast_put(NULL, NULL, copy[i]);
}

/**
Expand Down Expand Up @@ -147,7 +147,7 @@ ZTEST(fast_get_suite, test_fast_get_refcounting)

/* Release first set of references */
for (i = 0; i < ARRAY_SIZE(copy[0]); i++)
fast_put(NULL, copy[0][i]);
fast_put(NULL, NULL, copy[0][i]);

/* Data should still be valid through second set of references */
for (i = 0; i < ARRAY_SIZE(copy[0]); i++)
Expand All @@ -156,7 +156,7 @@ ZTEST(fast_get_suite, test_fast_get_refcounting)

/* Release second set of references */
for (i = 0; i < ARRAY_SIZE(copy[0]); i++)
fast_put(NULL, copy[1][i]);
fast_put(NULL, NULL, copy[1][i]);
}

/**
Expand Down
49 changes: 27 additions & 22 deletions zephyr/lib/fast-get.c
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ struct sof_fast_get_entry {
const void *dram_ptr;
void *sram_ptr;
#if CONFIG_USERSPACE
struct k_thread *thread;
struct k_mem_domain *mdom;
#endif
size_t size;
unsigned int refcount;
Expand Down Expand Up @@ -103,7 +103,7 @@ static struct sof_fast_get_entry *fast_get_find_entry(struct sof_fast_get_data *
#endif

#if CONFIG_USERSPACE
static bool fast_get_domain_exists(struct k_thread *thread, void *start, size_t size)
static bool fast_get_partition_exists(struct k_thread *thread, void *start, size_t size)
{
struct k_mem_domain *domain = thread->mem_domain_info.mem_domain;

Expand All @@ -117,7 +117,7 @@ static bool fast_get_domain_exists(struct k_thread *thread, void *start, size_t
return false;
}

static int fast_get_access_grant(k_tid_t thread, void *addr, size_t size)
static int fast_get_access_grant(struct k_mem_domain *mdom, void *addr, size_t size)
{
struct k_mem_partition part = {
.start = (uintptr_t)addr,
Expand All @@ -126,7 +126,7 @@ static int fast_get_access_grant(k_tid_t thread, void *addr, size_t size)
};

LOG_DBG("add %#zx @ %p", part.size, addr);
return k_mem_domain_add_partition(thread->mem_domain_info.mem_domain, &part);
return k_mem_domain_add_partition(mdom, &part);
}
#endif /* CONFIG_USERSPACE */

Expand Down Expand Up @@ -183,15 +183,21 @@ const void *fast_get(struct k_heap *heap, const void *dram_ptr, size_t size)
ret = entry->sram_ptr;

#if CONFIG_USERSPACE
/* We only get there for large buffers */
if (k_current_get()->mem_domain_info.mem_domain->num_partitions > 1) {
struct k_mem_domain *mdom = k_current_get()->mem_domain_info.mem_domain;

/*
* We only get there for large buffers, since small buffers with
* enabled userspace don't create fast-get entries
Copy link
Contributor

@wjablon1 wjablon1 Mar 23, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just to clarify, I think this part is well understood (you can deduce this info just by examining fast_get function itself). No need to further explain it.
The mystery part that I was referring to in my previous comment is this:

mdom->num_partitions > 1

I am not sure if this comment is even related. I am guessing the related comment is below:

/* A userspace thread makes the request */

Am I right? If yes, maybe you could just change it to:

/* if num_partitions > 0, the request comes form a userspace thread */

Or you can combine both comments like this:

/* We only get there for large buffers.
/* Check if the request comes from a userspace thread by examining num_partitions
*/

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@wjablon1 yes, that test is related to the "userspace thread" comment. @softwarecki wanted to replace that test with a K_USER check, then it will be clearer

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok, if we already have a plan for this, forget about my comment and remove unnecessary clarification

*/
if (mdom->num_partitions > 1) {
/* A userspace thread makes the request */
if (k_current_get() != entry->thread &&
!fast_get_domain_exists(k_current_get(), ret,
ALIGN_UP(size, CONFIG_MM_DRV_PAGE_SIZE))) {
LOG_DBG("grant access to thread %p first was %p", k_current_get(),
entry->thread);
int err = fast_get_access_grant(k_current_get(), ret, size);
if (mdom != entry->mdom &&
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't really understand the purpose of this check. The entry allocation together with the mdom assignment happens in a different part of the code.

If the same module instance calls fast_get again for the same address (unlikely), then this condition will indeed trigger. But what about other instances of the same module? Do they always rely on fast_get_partition_exists on every call?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

maybe just checking for "partition exists" would be enough, but this would be an optimisation, we can optimise it later.

!fast_get_partition_exists(k_current_get(), ret,
ALIGN_UP(size, CONFIG_MM_DRV_PAGE_SIZE))) {
LOG_DBG("grant access to domain %p first was %p", mdom,
entry->mdom);

int err = fast_get_access_grant(mdom, ret, size);

if (err < 0) {
LOG_ERR("failed to grant additional access err=%d", err);
Expand Down Expand Up @@ -228,10 +234,10 @@ const void *fast_get(struct k_heap *heap, const void *dram_ptr, size_t size)
dcache_writeback_region((__sparse_force void __sparse_cache *)entry->sram_ptr, size);

#if CONFIG_USERSPACE
entry->thread = k_current_get();
entry->mdom = k_current_get()->mem_domain_info.mem_domain;
if (size > FAST_GET_MAX_COPY_SIZE) {
/* Otherwise we've allocated on thread's heap, so it already has access */
int err = fast_get_access_grant(entry->thread, ret, size);
int err = fast_get_access_grant(entry->mdom, ret, size);

if (err < 0) {
LOG_ERR("failed to grant access err=%d", err);
Expand Down Expand Up @@ -265,7 +271,7 @@ static struct sof_fast_get_entry *fast_put_find_entry(struct sof_fast_get_data *
return NULL;
}

void fast_put(struct k_heap *heap, const void *sram_ptr)
void fast_put(struct k_heap *heap, struct k_mem_domain *mdom, const void *sram_ptr)
{
struct sof_fast_get_data *data = &fast_get_data;
struct sof_fast_get_entry *entry;
Expand All @@ -281,7 +287,7 @@ void fast_put(struct k_heap *heap, const void *sram_ptr)
entry->refcount--;

if (!entry->refcount) {
LOG_DBG("freeing buffer %p", entry->sram_ptr);
LOG_DBG("freeing buffer %p", sram_ptr);
sof_heap_free(heap, entry->sram_ptr);
}

Expand All @@ -294,17 +300,16 @@ void fast_put(struct k_heap *heap, const void *sram_ptr)
* Order matters: free buffer first (needs partition for cache access),
* then remove partition.
*/
if (entry->size > FAST_GET_MAX_COPY_SIZE && entry->thread) {
if (entry->size > FAST_GET_MAX_COPY_SIZE && entry->mdom && mdom) {
struct k_mem_partition part = {
.start = (uintptr_t)entry->sram_ptr,
.start = (uintptr_t)sram_ptr,
.size = ALIGN_UP(entry->size, CONFIG_MM_DRV_PAGE_SIZE),
.attr = K_MEM_PARTITION_P_RO_U_RO | XTENSA_MMU_CACHED_WB,
};
struct k_mem_domain *domain = k_current_get()->mem_domain_info.mem_domain;

LOG_DBG("removing partition %p size %#zx from thread %p",
(void *)part.start, part.size, k_current_get());
int err = k_mem_domain_remove_partition(domain, &part);
LOG_DBG("removing partition %p size %#zx memory domain %p",
(void *)part.start, part.size, mdom);
int err = k_mem_domain_remove_partition(mdom, &part);

if (err)
LOG_WRN("partition removal failed: %d", err);
Expand Down
Loading