内存拷贝函数
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#include <errno.h>
#include <xmmintrin.h> // SSE Intrinsics#define SIZE_1K 1024
#define SIZE_1M (1024 * 1024)void* aligned_malloc(size_t size) {void* ptr = _mm_malloc(size, 16);if (!ptr) perror("aligned_malloc failed");return ptr;
}void* simd_memcpy(void* dest, const void* src, size_t size) {// Use unaligned load/store instructions in case of unaligned addresses__m128i* d = (__m128i*)dest;const __m128i* s = (__m128i*)src;size_t alignedSize = (size / sizeof(__m128i)) * sizeof(__m128i); // Size that can be copied by 128-bit chunks// Copy aligned part using SIMDfor (size_t i = 0; i < alignedSize / sizeof(__m128i); ++i) {_mm_storeu_si128(d++, _mm_loadu_si128(s++)); // Using unaligned instructions}// Handle remaining bytes if anysize_t remainingBytes = size - alignedSize;if (remainingBytes > 0) {char* byteDest = (char*)d;const char* byteSrc = (const char*)s;// Standard byte-by-byte copy for the remaindermemcpy(byteDest, byteSrc, remainingBytes);}return dest;
}void test_memcpy(void* (*func)(void* , const void* , size_t), size_t size, int iterations, const char* funcName) {if (!func || size <= 0) {fprintf(stderr, "Invalid arguments: function is NULL or size is non-positive.\n");return;}const char* src = (const char*)malloc(size);char* dst = (char*)malloc(size);if (!src || !dst) {perror("malloc failed");exit(EXIT_FAILURE);}struct timespec start, end;if (clock_gettime(CLOCK_MONOTONIC, &start) == -1) {perror("clock_gettime failed");}for (int i = 0; i < iterations; ++i) {func(dst, src, size); // Execute copy}if (clock_gettime(CLOCK_MONOTONIC, &end) == -1) {perror("clock_gettime failed");}double elapsed = (end.tv_sec - start.tv_sec) + (end.tv_nsec - start.tv_nsec) / 1e9;printf("Function: %s, Size: %zu bytes, Iterations: %d, Time: %.6f s\n", funcName, size, iterations, elapsed);
}int main() {// Test standard memcpytest_memcpy(memcpy, SIZE_1K, 2048*2048*3, "memcpy");// Test GCC's built-in memcpytest_memcpy(__builtin_memcpy, SIZE_1K, 2048*2048*3, "__builtin_memcpy");// Test SSE-based SIMD memcpy, ensuring memory is aligned{test_memcpy(simd_memcpy, SIZE_1K, 2048*2048*3, "simd_memcpy");}return 0;
}