The "Holy Bible" for embedded engineers
Shared memory programming enables multiple cores or processes to communicate through common memory regions. In embedded systems, this is crucial for efficient inter-core communication, data sharing, and parallel processing while maintaining data consistency and avoiding race conditions.
// Basic shared memory structure
typedef struct {
volatile uint32_t flag;
uint32_t data[100];
volatile uint32_t read_index;
volatile uint32_t write_index;
uint32_t buffer_size;
} __attribute__((aligned(64))) shared_buffer_t;
// Shared memory region definition
typedef struct {
shared_buffer_t* buffer;
uint32_t core_id;
bool is_initialized;
} shared_memory_context_t;
// Initialize shared memory
shared_memory_context_t* create_shared_memory_context(uint32_t core_id) {
shared_memory_context_t* context = malloc(sizeof(shared_memory_context_t));
if (!context) return NULL;
// Allocate shared memory (in practice, this would be a specific memory region)
context->buffer = (shared_buffer_t*)SHARED_MEMORY_BASE_ADDRESS;
context->core_id = core_id;
context->is_initialized = false;
return context;
}
void initialize_shared_buffer(shared_buffer_t* buffer) {
if (!buffer) return;
buffer->flag = 0;
buffer->read_index = 0;
buffer->write_index = 0;
buffer->buffer_size = 100;
// Clear data buffer
memset(buffer->data, 0, sizeof(buffer->data));
}
// Memory region mapping for shared access
typedef struct {
void* virtual_address;
uint32_t physical_address;
size_t size;
uint32_t permissions;
bool is_shared;
} memory_region_t;
memory_region_t* map_shared_memory_region(uint32_t physical_addr,
size_t size,
uint32_t permissions) {
memory_region_t* region = malloc(sizeof(memory_region_t));
if (!region) return NULL;
// Map physical memory to virtual address
region->physical_address = physical_addr;
region->virtual_address = map_physical_memory(physical_addr, size, permissions);
region->size = size;
region->permissions = permissions;
region->is_shared = true;
return region;
}
void* map_physical_memory(uint32_t physical_addr, size_t size, uint32_t permissions) {
// Platform-specific memory mapping
// This would use MMU or memory mapping functions
return (void*)physical_addr; // Simplified for demonstration
}
void unmap_shared_memory_region(memory_region_t* region) {
if (region && region->virtual_address) {
unmap_physical_memory(region->virtual_address, region->size);
free(region);
}
}
// Shared memory mutex
typedef struct {
volatile uint32_t lock;
uint32_t owner_core;
uint32_t lock_count;
} __attribute__((aligned(64))) shared_mutex_t;
// Initialize shared mutex
void init_shared_mutex(shared_mutex_t* mutex) {
mutex->lock = 0;
mutex->owner_core = 0xFFFFFFFF;
mutex->lock_count = 0;
}
// Acquire shared mutex
bool acquire_shared_mutex(shared_mutex_t* mutex, uint32_t core_id) {
uint32_t expected = 0;
uint32_t desired = 1;
// Try to acquire lock using compare-and-swap
if (__sync_bool_compare_and_swap(&mutex->lock, expected, desired)) {
mutex->owner_core = core_id;
mutex->lock_count++;
return true;
}
return false;
}
// Release shared mutex
void release_shared_mutex(shared_mutex_t* mutex, uint32_t core_id) {
if (mutex->owner_core == core_id) {
mutex->lock_count--;
if (mutex->lock_count == 0) {
mutex->owner_core = 0xFFFFFFFF;
mutex->lock = 0;
}
}
}
// Example usage with shared buffer
bool write_to_shared_buffer(shared_buffer_t* buffer,
shared_mutex_t* mutex,
uint32_t data,
uint32_t core_id) {
if (!acquire_shared_mutex(mutex, core_id)) {
return false;
}
// Write data to shared buffer
if (buffer->write_index < buffer->buffer_size) {
buffer->data[buffer->write_index] = data;
buffer->write_index++;
release_shared_mutex(mutex, core_id);
return true;
}
release_shared_mutex(mutex, core_id);
return false;
}
// Shared semaphore structure
typedef struct {
volatile uint32_t count;
volatile uint32_t max_count;
uint32_t waiting_cores;
} __attribute__((aligned(64))) shared_semaphore_t;
// Initialize shared semaphore
void init_shared_semaphore(shared_semaphore_t* sem, uint32_t initial_count, uint32_t max_count) {
sem->count = initial_count;
sem->max_count = max_count;
sem->waiting_cores = 0;
}
// Wait on shared semaphore
bool wait_shared_semaphore(shared_semaphore_t* sem, uint32_t core_id) {
uint32_t expected;
uint32_t desired;
do {
expected = sem->count;
if (expected == 0) {
return false; // Semaphore not available
}
desired = expected - 1;
} while (!__sync_bool_compare_and_swap(&sem->count, expected, desired));
return true;
}
// Signal shared semaphore
bool signal_shared_semaphore(shared_semaphore_t* sem) {
uint32_t expected;
uint32_t desired;
do {
expected = sem->count;
if (expected >= sem->max_count) {
return false; // Semaphore at maximum
}
desired = expected + 1;
} while (!__sync_bool_compare_and_swap(&sem->count, expected, desired));
return true;
}
// Lock-free queue structure
typedef struct {
volatile uint32_t head;
volatile uint32_t tail;
uint32_t size;
uint32_t* data;
} __attribute__((aligned(64))) lock_free_queue_t;
lock_free_queue_t* create_lock_free_queue(uint32_t size) {
lock_free_queue_t* queue = malloc(sizeof(lock_free_queue_t));
if (!queue) return NULL;
queue->data = aligned_alloc(64, size * sizeof(uint32_t));
if (!queue->data) {
free(queue);
return NULL;
}
queue->head = 0;
queue->tail = 0;
queue->size = size;
return queue;
}
// Enqueue operation
bool lock_free_enqueue(lock_free_queue_t* queue, uint32_t value) {
uint32_t tail = queue->tail;
uint32_t next_tail = (tail + 1) % queue->size;
if (next_tail == queue->head) {
return false; // Queue full
}
queue->data[tail] = value;
queue->tail = next_tail;
return true;
}
// Dequeue operation
bool lock_free_dequeue(lock_free_queue_t* queue, uint32_t* value) {
uint32_t head = queue->head;
if (head == queue->tail) {
return false; // Queue empty
}
*value = queue->data[head];
queue->head = (head + 1) % queue->size;
return true;
}
// Lock-free stack structure
typedef struct node {
uint32_t data;
struct node* next;
} lock_free_stack_node_t;
typedef struct {
volatile lock_free_stack_node_t* top;
} lock_free_stack_t;
lock_free_stack_t* create_lock_free_stack(void) {
lock_free_stack_t* stack = malloc(sizeof(lock_free_stack_t));
if (stack) {
stack->top = NULL;
}
return stack;
}
// Push operation
bool lock_free_push(lock_free_stack_t* stack, uint32_t value) {
lock_free_stack_node_t* new_node = malloc(sizeof(lock_free_stack_node_t));
if (!new_node) return false;
new_node->data = value;
lock_free_stack_node_t* old_top;
do {
old_top = stack->top;
new_node->next = old_top;
} while (!__sync_bool_compare_and_swap(&stack->top, old_top, new_node));
return true;
}
// Pop operation
bool lock_free_pop(lock_free_stack_t* stack, uint32_t* value) {
lock_free_stack_node_t* old_top;
lock_free_stack_node_t* new_top;
do {
old_top = stack->top;
if (!old_top) {
return false; // Stack empty
}
new_top = old_top->next;
} while (!__sync_bool_compare_and_swap(&stack->top, old_top, new_top));
*value = old_top->data;
free(old_top);
return true;
}
// Memory barrier functions
void full_memory_barrier(void) {
__sync_synchronize(); // Full memory barrier
}
void read_memory_barrier(void) {
__asm volatile ("dmb ishld" : : : "memory"); // Read barrier
}
void write_memory_barrier(void) {
__asm volatile ("dmb ishst" : : : "memory"); // Write barrier
}
// Compiler barrier
void compiler_barrier(void) {
__asm volatile ("" : : : "memory");
}
// Memory barrier usage in shared memory
typedef struct {
volatile uint32_t data;
volatile uint32_t sequence;
} __attribute__((aligned(64))) barrier_protected_data_t;
void write_with_barrier(barrier_protected_data_t* shared_data, uint32_t new_data) {
shared_data->data = new_data;
write_memory_barrier(); // Ensure write is visible
shared_data->sequence++;
full_memory_barrier(); // Ensure sequence update is visible
}
uint32_t read_with_barrier(barrier_protected_data_t* shared_data) {
uint32_t sequence = shared_data->sequence;
read_memory_barrier(); // Ensure we read latest sequence
uint32_t data = shared_data->data;
full_memory_barrier(); // Ensure we read after sequence
return data;
}
// Atomic operations with memory barriers
uint32_t atomic_exchange_with_barrier(volatile uint32_t* ptr, uint32_t value) {
uint32_t result = __sync_lock_test_and_set(ptr, value);
full_memory_barrier();
return result;
}
uint32_t atomic_compare_exchange_with_barrier(volatile uint32_t* ptr,
uint32_t expected,
uint32_t desired) {
uint32_t result = __sync_val_compare_and_swap(ptr, expected, desired);
full_memory_barrier();
return result;
}
// Example: Atomic counter with barriers
typedef struct {
volatile uint32_t counter;
volatile uint32_t version;
} __attribute__((aligned(64))) atomic_counter_t;
void atomic_increment_with_barrier(atomic_counter_t* counter) {
uint32_t old_value;
uint32_t new_value;
do {
old_value = counter->counter;
new_value = old_value + 1;
} while (!atomic_compare_exchange_with_barrier(&counter->counter,
old_value, new_value));
counter->version++;
write_memory_barrier();
}
// Cache line aligned shared data
#define CACHE_LINE_SIZE 64
typedef struct {
uint32_t core1_data;
char padding1[CACHE_LINE_SIZE - 4];
uint32_t core2_data;
char padding2[CACHE_LINE_SIZE - 4];
uint32_t core3_data;
char padding3[CACHE_LINE_SIZE - 4];
uint32_t core4_data;
char padding4[CACHE_LINE_SIZE - 4];
} __attribute__((aligned(CACHE_LINE_SIZE))) cache_aligned_shared_data_t;
// Prevent false sharing
typedef struct {
uint32_t frequently_accessed;
char padding[CACHE_LINE_SIZE - 4];
} __attribute__((aligned(CACHE_LINE_SIZE))) isolated_shared_data_t;
// Cache-aware shared memory allocation
void* allocate_cache_aligned_shared_memory(size_t size) {
size_t aligned_size = ((size + CACHE_LINE_SIZE - 1) / CACHE_LINE_SIZE) * CACHE_LINE_SIZE;
return aligned_alloc(CACHE_LINE_SIZE, aligned_size);
}
// Cache operations for shared memory
void flush_cache_for_shared_write(void* address, size_t size) {
__builtin___clear_cache((char*)address, (char*)address + size);
}
void invalidate_cache_for_shared_read(void* address, size_t size) {
__builtin___clear_cache((char*)address, (char*)address + size);
}
// Shared memory with cache management
typedef struct {
void* data;
size_t size;
bool needs_flush;
bool needs_invalidate;
} cache_managed_shared_memory_t;
cache_managed_shared_memory_t* create_cache_managed_shared_memory(size_t size) {
cache_managed_shared_memory_t* shared_mem = malloc(sizeof(cache_managed_shared_memory_t));
if (!shared_mem) return NULL;
shared_mem->data = allocate_cache_aligned_shared_memory(size);
shared_mem->size = size;
shared_mem->needs_flush = true;
shared_mem->needs_invalidate = true;
return shared_mem;
}
void prepare_shared_memory_for_write(cache_managed_shared_memory_t* shared_mem) {
if (shared_mem->needs_flush) {
flush_cache_for_shared_write(shared_mem->data, shared_mem->size);
}
}
void prepare_shared_memory_for_read(cache_managed_shared_memory_t* shared_mem) {
if (shared_mem->needs_invalidate) {
invalidate_cache_for_shared_read(shared_mem->data, shared_mem->size);
}
}
// Inter-core communication structure
typedef struct {
volatile uint32_t message;
volatile uint32_t sender_core;
volatile uint32_t receiver_core;
volatile uint32_t sequence;
} __attribute__((aligned(64))) inter_core_message_t;
typedef struct {
inter_core_message_t messages[MAX_CORES];
volatile uint32_t message_count;
} inter_core_communication_t;
// Send message to another core
bool send_message_to_core(inter_core_communication_t* comm,
uint32_t target_core,
uint32_t message,
uint32_t sender_core) {
if (target_core >= MAX_CORES) return false;
inter_core_message_t* msg = &comm->messages[target_core];
msg->message = message;
msg->sender_core = sender_core;
msg->receiver_core = target_core;
msg->sequence++;
// Trigger interrupt on target core
trigger_core_interrupt(target_core);
return true;
}
// Receive message from another core
bool receive_message_from_core(inter_core_communication_t* comm,
uint32_t core_id,
uint32_t* message,
uint32_t* sender_core) {
inter_core_message_t* msg = &comm->messages[core_id];
if (msg->receiver_core == core_id && msg->message != 0) {
*message = msg->message;
*sender_core = msg->sender_core;
msg->message = 0; // Clear message
return true;
}
return false;
}
// Ring buffer for inter-core communication
typedef struct {
volatile uint32_t head;
volatile uint32_t tail;
volatile uint32_t size;
volatile uint32_t* buffer;
} __attribute__((aligned(64))) shared_ring_buffer_t;
shared_ring_buffer_t* create_shared_ring_buffer(uint32_t size) {
shared_ring_buffer_t* ring_buffer = malloc(sizeof(shared_ring_buffer_t));
if (!ring_buffer) return NULL;
ring_buffer->buffer = aligned_alloc(64, size * sizeof(uint32_t));
if (!ring_buffer->buffer) {
free(ring_buffer);
return NULL;
}
ring_buffer->head = 0;
ring_buffer->tail = 0;
ring_buffer->size = size;
return ring_buffer;
}
// Producer function (called by one core)
bool ring_buffer_produce(shared_ring_buffer_t* ring_buffer, uint32_t data) {
uint32_t next_head = (ring_buffer->head + 1) % ring_buffer->size;
if (next_head == ring_buffer->tail) {
return false; // Buffer full
}
ring_buffer->buffer[ring_buffer->head] = data;
ring_buffer->head = next_head;
return true;
}
// Consumer function (called by another core)
bool ring_buffer_consume(shared_ring_buffer_t* ring_buffer, uint32_t* data) {
if (ring_buffer->head == ring_buffer->tail) {
return false; // Buffer empty
}
*data = ring_buffer->buffer[ring_buffer->tail];
ring_buffer->tail = (ring_buffer->tail + 1) % ring_buffer->size;
return true;
}
// Real-time shared memory with deadlines
typedef struct {
volatile uint32_t data;
volatile uint32_t timestamp;
volatile uint32_t deadline;
volatile uint32_t core_id;
} __attribute__((aligned(64))) real_time_shared_data_t;
// Real-time shared memory access
bool real_time_shared_write(real_time_shared_data_t* shared_data,
uint32_t data,
uint32_t deadline,
uint32_t core_id) {
uint32_t current_time = get_system_tick_count();
if (current_time > deadline) {
return false; // Deadline missed
}
shared_data->data = data;
shared_data->timestamp = current_time;
shared_data->deadline = deadline;
shared_data->core_id = core_id;
return true;
}
bool real_time_shared_read(real_time_shared_data_t* shared_data,
uint32_t* data,
uint32_t* timestamp) {
uint32_t current_time = get_system_tick_count();
if (current_time > shared_data->deadline) {
return false; // Data expired
}
*data = shared_data->data;
*timestamp = shared_data->timestamp;
return true;
}
// Priority-based shared memory access
typedef enum {
PRIORITY_LOW = 0,
PRIORITY_MEDIUM = 1,
PRIORITY_HIGH = 2,
PRIORITY_CRITICAL = 3
} shared_memory_priority_t;
typedef struct {
volatile uint32_t data;
volatile shared_memory_priority_t priority;
volatile uint32_t access_count;
} __attribute__((aligned(64))) priority_shared_data_t;
bool priority_shared_write(priority_shared_data_t* shared_data,
uint32_t data,
shared_memory_priority_t priority) {
// Check if we can preempt current access
if (priority > shared_data->priority) {
shared_data->data = data;
shared_data->priority = priority;
shared_data->access_count++;
return true;
}
return false; // Lower priority access blocked
}
// WRONG: Race condition in shared memory access
void incorrect_shared_access(volatile uint32_t* shared_counter) {
uint32_t value = *shared_counter; // Read
value++; // Modify
*shared_counter = value; // Write
// Race condition between read and write
}
// CORRECT: Atomic operation
void correct_shared_access(volatile uint32_t* shared_counter) {
__sync_fetch_and_add(shared_counter, 1); // Atomic increment
}
// WRONG: False sharing between cores
typedef struct {
uint32_t core1_counter; // May be in same cache line as core2_counter
uint32_t core2_counter;
} incorrect_shared_counters_t;
// CORRECT: Cache line separation
typedef struct {
uint32_t core1_counter;
char padding1[60]; // Padding to next cache line
uint32_t core2_counter;
char padding2[60];
} __attribute__((aligned(64))) correct_shared_counters_t;
// WRONG: Missing memory barriers
void incorrect_shared_write(volatile uint32_t* data, volatile uint32_t* flag) {
*data = 0x12345678;
*flag = 1; // May be reordered with data write
}
// CORRECT: With memory barriers
void correct_shared_write(volatile uint32_t* data, volatile uint32_t* flag) {
*data = 0x12345678;
write_memory_barrier(); // Ensure data write is visible
*flag = 1;
}
// Producer-consumer pattern with shared memory
typedef struct {
volatile uint32_t* buffer;
volatile uint32_t head;
volatile uint32_t tail;
volatile uint32_t size;
shared_mutex_t* mutex;
} producer_consumer_shared_t;
producer_consumer_shared_t* create_producer_consumer_shared(uint32_t buffer_size) {
producer_consumer_shared_t* pc = malloc(sizeof(producer_consumer_shared_t));
if (!pc) return NULL;
pc->buffer = aligned_alloc(64, buffer_size * sizeof(uint32_t));
pc->head = 0;
pc->tail = 0;
pc->size = buffer_size;
pc->mutex = malloc(sizeof(shared_mutex_t));
if (pc->buffer && pc->mutex) {
init_shared_mutex(pc->mutex);
return pc;
}
// Cleanup on failure
free(pc->buffer);
free(pc->mutex);
free(pc);
return NULL;
}
bool producer_put(producer_consumer_shared_t* pc, uint32_t data, uint32_t core_id) {
if (!acquire_shared_mutex(pc->mutex, core_id)) {
return false;
}
uint32_t next_head = (pc->head + 1) % pc->size;
if (next_head == pc->tail) {
release_shared_mutex(pc->mutex, core_id);
return false; // Buffer full
}
pc->buffer[pc->head] = data;
pc->head = next_head;
release_shared_mutex(pc->mutex, core_id);
return true;
}
bool consumer_get(producer_consumer_shared_t* pc, uint32_t* data, uint32_t core_id) {
if (!acquire_shared_mutex(pc->mutex, core_id)) {
return false;
}
if (pc->head == pc->tail) {
release_shared_mutex(pc->mutex, core_id);
return false; // Buffer empty
}
*data = pc->buffer[pc->tail];
pc->tail = (pc->tail + 1) % pc->size;
release_shared_mutex(pc->mutex, core_id);
return true;
}
// Validate shared memory access
bool validate_shared_memory_access(void* address, size_t size) {
// Check alignment
if ((uintptr_t)address % 64 != 0) {
return false;
}
// Check size alignment
if (size % 64 != 0) {
return false;
}
// Check if address is in shared memory region
if ((uintptr_t)address < SHARED_MEMORY_BASE ||
(uintptr_t)address >= SHARED_MEMORY_END) {
return false;
}
return true;
}
// Safe shared memory allocation
void* safe_shared_memory_alloc(size_t size) {
size_t aligned_size = ((size + 63) / 64) * 64; // Align to 64 bytes
void* ptr = aligned_alloc(64, aligned_size);
if (!ptr) return NULL;
if (!validate_shared_memory_access(ptr, aligned_size)) {
free(ptr);
return NULL;
}
return ptr;
}
// Monitor shared memory usage
typedef struct {
uint32_t access_count;
uint32_t conflict_count;
uint32_t last_access_time;
uint32_t last_access_core;
} shared_memory_monitor_t;
shared_memory_monitor_t* create_shared_memory_monitor(void) {
shared_memory_monitor_t* monitor = malloc(sizeof(shared_memory_monitor_t));
if (monitor) {
monitor->access_count = 0;
monitor->conflict_count = 0;
monitor->last_access_time = 0;
monitor->last_access_core = 0;
}
return monitor;
}
void update_shared_memory_monitor(shared_memory_monitor_t* monitor,
uint32_t core_id,
bool conflict) {
monitor->access_count++;
monitor->last_access_time = get_system_tick_count();
monitor->last_access_core = core_id;
if (conflict) {
monitor->conflict_count++;
}
}
Next Topic: Real-time Systems → Operating Systems