mirror of
https://github.com/ioacademy-jikim/debugging
synced 2025-06-08 16:36:21 +00:00
835 lines
27 KiB
C
835 lines
27 KiB
C
|
|
/* This is an example of a program which does atomic memory operations
|
|
between two processes which share a page. Valgrind 3.4.1 and
|
|
earlier produce incorrect answers because it does not preserve
|
|
atomicity of the relevant instructions in the generated code; but
|
|
the post-DCAS-merge versions of Valgrind do behave correctly. */
|
|
|
|
/* On ARM, this can be compiled into either ARM or Thumb code, so as
|
|
to test both A and T encodings of LDREX/STREX et al. Also on ARM,
|
|
it tests doubleword atomics (LDREXD, STREXD) which I don't think it
|
|
does on any other platform. */
|
|
|
|
#include <stdlib.h>
|
|
#include <stdio.h>
|
|
#include <string.h>
|
|
#include <assert.h>
|
|
#include <unistd.h>
|
|
#include <sys/wait.h>
|
|
#include "tests/sys_mman.h"
|
|
|
|
#define NNN 3456987
|
|
|
|
#define IS_8_ALIGNED(_ptr) (0 == (((unsigned long)(_ptr)) & 7))
|
|
|
|
|
|
__attribute__((noinline)) void atomic_add_8bit ( char* p, int n )
|
|
{
|
|
#if defined(VGA_x86)
|
|
unsigned long block[2];
|
|
block[0] = (unsigned long)p;
|
|
block[1] = n;
|
|
__asm__ __volatile__(
|
|
"movl 0(%%esi),%%eax" "\n\t"
|
|
"movl 4(%%esi),%%ebx" "\n\t"
|
|
"lock; addb %%bl,(%%eax)" "\n"
|
|
: : "S"(&block[0])/* S means "esi only" */ : "memory","cc","eax","ebx"
|
|
);
|
|
#elif defined(VGA_amd64)
|
|
unsigned long block[2];
|
|
block[0] = (unsigned long)p;
|
|
block[1] = n;
|
|
__asm__ __volatile__(
|
|
"movq 0(%%rsi),%%rax" "\n\t"
|
|
"movq 8(%%rsi),%%rbx" "\n\t"
|
|
"lock; addb %%bl,(%%rax)" "\n"
|
|
: : "S"(&block[0])/* S means "rsi only" */ : "memory","cc","rax","rbx"
|
|
);
|
|
#elif defined(VGA_ppc32)
|
|
/* Nasty hack. Does correctly atomically do *p += n, but only if p
|
|
is 4-aligned -- guaranteed by caller. */
|
|
unsigned long success;
|
|
do {
|
|
__asm__ __volatile__(
|
|
"lwarx 15,0,%1" "\n\t"
|
|
"add 15,15,%2" "\n\t"
|
|
"stwcx. 15,0,%1" "\n\t"
|
|
"mfcr %0" "\n\t"
|
|
"srwi %0,%0,29" "\n\t"
|
|
"andi. %0,%0,1" "\n"
|
|
: /*out*/"=b"(success)
|
|
: /*in*/ "b"(p), "b"(((unsigned long)n) << 24)
|
|
: /*trash*/ "memory", "cc", "r15"
|
|
);
|
|
} while (success != 1);
|
|
#elif defined(VGA_ppc64be)
|
|
/* Nasty hack. Does correctly atomically do *p += n, but only if p
|
|
is 8-aligned -- guaranteed by caller. */
|
|
unsigned long success;
|
|
do {
|
|
__asm__ __volatile__(
|
|
"ldarx 15,0,%1" "\n\t"
|
|
"add 15,15,%2" "\n\t"
|
|
"stdcx. 15,0,%1" "\n\t"
|
|
"mfcr %0" "\n\t"
|
|
"srwi %0,%0,29" "\n\t"
|
|
"andi. %0,%0,1" "\n"
|
|
: /*out*/"=b"(success)
|
|
: /*in*/ "b"(p), "b"(((unsigned long)n) << 56)
|
|
: /*trash*/ "memory", "cc", "r15"
|
|
);
|
|
} while (success != 1);
|
|
#elif defined(VGA_ppc64le)
|
|
/* Nasty hack. Does correctly atomically do *p += n, but only if p
|
|
is 8-aligned -- guaranteed by caller. */
|
|
unsigned long success;
|
|
do {
|
|
__asm__ __volatile__(
|
|
"ldarx 15,0,%1" "\n\t"
|
|
"add 15,15,%2" "\n\t"
|
|
"stdcx. 15,0,%1" "\n\t"
|
|
"mfcr %0" "\n\t"
|
|
"srwi %0,%0,29" "\n\t"
|
|
"andi. %0,%0,1" "\n"
|
|
: /*out*/"=b"(success)
|
|
: /*in*/ "b"(p), "b"(((unsigned long)n))
|
|
: /*trash*/ "memory", "cc", "r15"
|
|
);
|
|
} while (success != 1);
|
|
#elif defined(VGA_arm)
|
|
unsigned int block[3]
|
|
= { (unsigned int)p, (unsigned int)n, 0xFFFFFFFF };
|
|
do {
|
|
__asm__ __volatile__(
|
|
"mov r5, %0" "\n\t"
|
|
"ldr r9, [r5, #0]" "\n\t" // p
|
|
"ldr r10, [r5, #4]" "\n\t" // n
|
|
"ldrexb r8, [r9]" "\n\t"
|
|
"add r8, r8, r10" "\n\t"
|
|
"strexb r4, r8, [r9]" "\n\t"
|
|
"str r4, [r5, #8]" "\n\t"
|
|
: /*out*/
|
|
: /*in*/ "r"(&block[0])
|
|
: /*trash*/ "memory", "cc", "r5", "r8", "r9", "r10", "r4"
|
|
);
|
|
} while (block[2] != 0);
|
|
#elif defined(VGA_arm64)
|
|
unsigned long long int block[3]
|
|
= { (unsigned long long int)p, (unsigned long long int)n,
|
|
0xFFFFFFFFFFFFFFFFULL};
|
|
do {
|
|
__asm__ __volatile__(
|
|
"mov x5, %0" "\n\t"
|
|
"ldr x9, [x5, #0]" "\n\t" // p
|
|
"ldr x10, [x5, #8]" "\n\t" // n
|
|
"ldxrb w8, [x9]" "\n\t"
|
|
"add x8, x8, x10" "\n\t"
|
|
"stxrb w4, w8, [x9]" "\n\t"
|
|
"str x4, [x5, #16]" "\n\t"
|
|
: /*out*/
|
|
: /*in*/ "r"(&block[0])
|
|
: /*trash*/ "memory", "cc", "x5", "x8", "x9", "x10", "x4"
|
|
);
|
|
} while (block[2] != 0);
|
|
#elif defined(VGA_s390x)
|
|
int dummy;
|
|
__asm__ __volatile__(
|
|
" l 0,%0\n\t"
|
|
"0: st 0,%1\n\t"
|
|
" icm 1,1,%1\n\t"
|
|
" ar 1,%2\n\t"
|
|
" stcm 1,1,%1\n\t"
|
|
" l 1,%1\n\t"
|
|
" cs 0,1,%0\n\t"
|
|
" jl 0b\n\t"
|
|
: "+m" (*p), "+m" (dummy)
|
|
: "d" (n)
|
|
: "cc", "memory", "0", "1");
|
|
#elif defined(VGA_mips32)
|
|
/* We rely on the fact that p is 4-aligned. Otherwise 'll' may throw an
|
|
exception that can cause this function to fail. */
|
|
#if defined (_MIPSEL)
|
|
unsigned int block[3]
|
|
= { (unsigned int)p, (unsigned int)n, 0x0 };
|
|
do {
|
|
__asm__ __volatile__(
|
|
"move $t0, %0" "\n\t"
|
|
"lw $t1, 0($t0)" "\n\t" // p
|
|
"lw $t2, 4($t0)" "\n\t" // n
|
|
"andi $t2, $t2, 0xFF" "\n\t" // n = n and 0xFF
|
|
"li $t4, 0xFF" "\n\t"
|
|
"nor $t4, $t4, $zero" "\n\t" // $t4 = 0xFFFFFF00
|
|
"ll $t3, 0($t1)" "\n\t" // $t3 = old value
|
|
"and $t4, $t4, $t3" "\n\t" // $t4 = $t3 and 0xFFFFFF00
|
|
"addu $t3, $t3, $t2" "\n\t" // $t3 = $t3 + n
|
|
"andi $t3, $t3, 0xFF" "\n\t" // $t3 = $t3 and 0xFF
|
|
"or $t3, $t3, $t4" "\n\t" // $t3 = $t3 or $t4
|
|
"sc $t3, 0($t1)" "\n\t"
|
|
"sw $t3, 8($t0)" "\n\t" // save result
|
|
: /*out*/
|
|
: /*in*/ "r"(&block[0])
|
|
: /*trash*/ "memory", "t0", "t1", "t2", "t3", "t4"
|
|
);
|
|
} while (block[2] != 1);
|
|
#elif defined (_MIPSEB)
|
|
unsigned int block[3]
|
|
= { (unsigned int)p, (unsigned int)n << 24, 0x0 };
|
|
do {
|
|
__asm__ __volatile__(
|
|
"move $t0, %0" "\n\t"
|
|
"lw $t1, 0($t0)" "\n\t" // p
|
|
"lw $t2, 4($t0)" "\n\t" // n
|
|
"ll $t3, 0($t1)" "\n\t"
|
|
"addu $t3, $t3, $t2" "\n\t"
|
|
"sc $t3, 0($t1)" "\n\t"
|
|
"sw $t3, 8($t0)" "\n\t"
|
|
: /*out*/
|
|
: /*in*/ "r"(&block[0])
|
|
: /*trash*/ "memory", "t0", "t1", "t2", "t3"
|
|
);
|
|
} while (block[2] != 1);
|
|
#endif
|
|
#elif defined(VGA_mips64)
|
|
/* We rely on the fact that p is 4-aligned. Otherwise 'll' may throw an
|
|
exception that can cause this function to fail. */
|
|
#if defined (_MIPSEL)
|
|
unsigned long block[3]
|
|
= { (unsigned long)p, (unsigned long)n, 0x0ULL };
|
|
do {
|
|
__asm__ __volatile__(
|
|
"move $t0, %0" "\n\t"
|
|
"ld $t1, 0($t0)" "\n\t" // p
|
|
"ld $t2, 8($t0)" "\n\t" // n
|
|
"andi $t2, $t2, 0xFF" "\n\t" // n = n and 0xFF
|
|
"li $s0, 0xFF" "\n\t"
|
|
"nor $s0, $s0, $zero" "\n\t" // $s0 = 0xFFFFFF00
|
|
"ll $t3, 0($t1)" "\n\t" // $t3 = old value
|
|
"and $s0, $s0, $t3" "\n\t" // $s0 = $t3 and 0xFFFFFF00
|
|
"addu $t3, $t3, $t2" "\n\t" // $t3 = $t3 + n
|
|
"andi $t3, $t3, 0xFF" "\n\t" // $t3 = $t3 and 0xFF
|
|
"or $t3, $t3, $s0" "\n\t" // $t3 = $t3 or $s0
|
|
"sc $t3, 0($t1)" "\n\t"
|
|
"sw $t3, 16($t0)" "\n\t" // save result
|
|
: /*out*/
|
|
: /*in*/ "r"(&block[0])
|
|
: /*trash*/ "memory", "t0", "t1", "t2", "t3", "s0"
|
|
);
|
|
} while (block[2] != 1);
|
|
#elif defined (_MIPSEB)
|
|
unsigned long block[3]
|
|
= { (unsigned long)p, (unsigned long)n << 56, 0x0 };
|
|
do {
|
|
__asm__ __volatile__(
|
|
"move $t0, %0" "\n\t"
|
|
"ld $t1, 0($t0)" "\n\t" // p
|
|
"ld $t2, 8($t0)" "\n\t" // n
|
|
"lld $t3, 0($t1)" "\n\t"
|
|
"daddu $t3, $t3, $t2" "\n\t"
|
|
"scd $t3, 0($t1)" "\n\t"
|
|
"sd $t3, 16($t0)" "\n\t"
|
|
: /*out*/
|
|
: /*in*/ "r"(&block[0])
|
|
: /*trash*/ "memory", "t0", "t1", "t2", "t3"
|
|
);
|
|
} while (block[2] != 1);
|
|
#endif
|
|
#elif defined(VGA_tilegx)
|
|
int i;
|
|
unsigned int *p4 = (unsigned int *)(((unsigned long long)p + 3) & (~3ULL));
|
|
unsigned int mask = (0xff) << ((int)p & 3);
|
|
unsigned int add = (n & 0xff) << ((int)p & 3);
|
|
unsigned int x, new;
|
|
|
|
while(1) {
|
|
x = *p4;
|
|
new = (x & (~mask)) | ((x + add) & mask);
|
|
__insn_mtspr(0x2780, x);
|
|
if ( __insn_cmpexch4(p4, new) == x)
|
|
break;
|
|
}
|
|
#else
|
|
# error "Unsupported arch"
|
|
#endif
|
|
}
|
|
|
|
|
|
__attribute__((noinline)) void atomic_add_16bit ( short* p, int n )
|
|
{
|
|
#if defined(VGA_x86)
|
|
unsigned long block[2];
|
|
block[0] = (unsigned long)p;
|
|
block[1] = n;
|
|
__asm__ __volatile__(
|
|
"movl 0(%%esi),%%eax" "\n\t"
|
|
"movl 4(%%esi),%%ebx" "\n\t"
|
|
"lock; addw %%bx,(%%eax)" "\n"
|
|
: : "S"(&block[0])/* S means "esi only" */ : "memory","cc","eax","ebx"
|
|
);
|
|
#elif defined(VGA_amd64)
|
|
unsigned long block[2];
|
|
block[0] = (unsigned long)p;
|
|
block[1] = n;
|
|
__asm__ __volatile__(
|
|
"movq 0(%%rsi),%%rax" "\n\t"
|
|
"movq 8(%%rsi),%%rbx" "\n\t"
|
|
"lock; addw %%bx,(%%rax)" "\n"
|
|
: : "S"(&block[0])/* S means "rsi only" */ : "memory","cc","rax","rbx"
|
|
);
|
|
#elif defined(VGA_ppc32)
|
|
/* Nasty hack. Does correctly atomically do *p += n, but only if p
|
|
is 8-aligned -- guaranteed by caller. */
|
|
unsigned long success;
|
|
do {
|
|
__asm__ __volatile__(
|
|
"lwarx 15,0,%1" "\n\t"
|
|
"add 15,15,%2" "\n\t"
|
|
"stwcx. 15,0,%1" "\n\t"
|
|
"mfcr %0" "\n\t"
|
|
"srwi %0,%0,29" "\n\t"
|
|
"andi. %0,%0,1" "\n"
|
|
: /*out*/"=b"(success)
|
|
: /*in*/ "b"(p), "b"(((unsigned long)n) << 16)
|
|
: /*trash*/ "memory", "cc", "r15"
|
|
);
|
|
} while (success != 1);
|
|
#elif defined(VGA_ppc64be)
|
|
/* Nasty hack. Does correctly atomically do *p += n, but only if p
|
|
is 8-aligned -- guaranteed by caller. */
|
|
unsigned long success;
|
|
do {
|
|
__asm__ __volatile__(
|
|
"ldarx 15,0,%1" "\n\t"
|
|
"add 15,15,%2" "\n\t"
|
|
"stdcx. 15,0,%1" "\n\t"
|
|
"mfcr %0" "\n\t"
|
|
"srwi %0,%0,29" "\n\t"
|
|
"andi. %0,%0,1" "\n"
|
|
: /*out*/"=b"(success)
|
|
: /*in*/ "b"(p), "b"(((unsigned long)n) << 48)
|
|
: /*trash*/ "memory", "cc", "r15"
|
|
);
|
|
} while (success != 1);
|
|
#elif defined(VGA_ppc64le)
|
|
/* Nasty hack. Does correctly atomically do *p += n, but only if p
|
|
is 8-aligned -- guaranteed by caller. */
|
|
unsigned long success;
|
|
do {
|
|
__asm__ __volatile__(
|
|
"ldarx 15,0,%1" "\n\t"
|
|
"add 15,15,%2" "\n\t"
|
|
"stdcx. 15,0,%1" "\n\t"
|
|
"mfcr %0" "\n\t"
|
|
"srwi %0,%0,29" "\n\t"
|
|
"andi. %0,%0,1" "\n"
|
|
: /*out*/"=b"(success)
|
|
: /*in*/ "b"(p), "b"(((unsigned long)n))
|
|
: /*trash*/ "memory", "cc", "r15"
|
|
);
|
|
} while (success != 1);
|
|
#elif defined(VGA_arm)
|
|
unsigned int block[3]
|
|
= { (unsigned int)p, (unsigned int)n, 0xFFFFFFFF };
|
|
do {
|
|
__asm__ __volatile__(
|
|
"mov r5, %0" "\n\t"
|
|
"ldr r9, [r5, #0]" "\n\t" // p
|
|
"ldr r10, [r5, #4]" "\n\t" // n
|
|
"ldrexh r8, [r9]" "\n\t"
|
|
"add r8, r8, r10" "\n\t"
|
|
"strexh r4, r8, [r9]" "\n\t"
|
|
"str r4, [r5, #8]" "\n\t"
|
|
: /*out*/
|
|
: /*in*/ "r"(&block[0])
|
|
: /*trash*/ "memory", "cc", "r5", "r8", "r9", "r10", "r4"
|
|
);
|
|
} while (block[2] != 0);
|
|
#elif defined(VGA_arm64)
|
|
unsigned long long int block[3]
|
|
= { (unsigned long long int)p, (unsigned long long int)n,
|
|
0xFFFFFFFFFFFFFFFFULL};
|
|
do {
|
|
__asm__ __volatile__(
|
|
"mov x5, %0" "\n\t"
|
|
"ldr x9, [x5, #0]" "\n\t" // p
|
|
"ldr x10, [x5, #8]" "\n\t" // n
|
|
"ldxrh w8, [x9]" "\n\t"
|
|
"add x8, x8, x10" "\n\t"
|
|
"stxrh w4, w8, [x9]" "\n\t"
|
|
"str x4, [x5, #16]" "\n\t"
|
|
: /*out*/
|
|
: /*in*/ "r"(&block[0])
|
|
: /*trash*/ "memory", "cc", "x5", "x8", "x9", "x10", "x4"
|
|
);
|
|
} while (block[2] != 0);
|
|
#elif defined(VGA_s390x)
|
|
int dummy;
|
|
__asm__ __volatile__(
|
|
" l 0,%0\n\t"
|
|
"0: st 0,%1\n\t"
|
|
" icm 1,3,%1\n\t"
|
|
" ar 1,%2\n\t"
|
|
" stcm 1,3,%1\n\t"
|
|
" l 1,%1\n\t"
|
|
" cs 0,1,%0\n\t"
|
|
" jl 0b\n\t"
|
|
: "+m" (*p), "+m" (dummy)
|
|
: "d" (n)
|
|
: "cc", "memory", "0", "1");
|
|
#elif defined(VGA_mips32)
|
|
/* We rely on the fact that p is 4-aligned. Otherwise 'll' may throw an
|
|
exception that can cause this function to fail. */
|
|
#if defined (_MIPSEL)
|
|
unsigned int block[3]
|
|
= { (unsigned int)p, (unsigned int)n, 0x0 };
|
|
do {
|
|
__asm__ __volatile__(
|
|
"move $t0, %0" "\n\t"
|
|
"lw $t1, 0($t0)" "\n\t" // p
|
|
"lw $t2, 4($t0)" "\n\t" // n
|
|
"andi $t2, $t2, 0xFFFF" "\n\t" // n = n and 0xFFFF
|
|
"li $t4, 0xFFFF" "\n\t"
|
|
"nor $t4, $t4, $zero" "\n\t" // $t4 = 0xFFFF0000
|
|
"ll $t3, 0($t1)" "\n\t" // $t3 = old value
|
|
"and $t4, $t4, $t3" "\n\t" // $t4 = $t3 and 0xFFFF0000
|
|
"addu $t3, $t3, $t2" "\n\t" // $t3 = $t3 + n
|
|
"andi $t3, $t3, 0xFFFF" "\n\t" // $t3 = $t3 and 0xFFFF
|
|
"or $t3, $t3, $t4" "\n\t" // $t3 = $t3 or $t4
|
|
"sc $t3, 0($t1)" "\n\t"
|
|
"sw $t3, 8($t0)" "\n\t" // save result
|
|
: /*out*/
|
|
: /*in*/ "r"(&block[0])
|
|
: /*trash*/ "memory", "t0", "t1", "t2", "t3", "t4"
|
|
);
|
|
} while (block[2] != 1);
|
|
#elif defined (_MIPSEB)
|
|
unsigned int block[3]
|
|
= { (unsigned int)p, (unsigned int)n << 16, 0x0 };
|
|
do {
|
|
__asm__ __volatile__(
|
|
"move $t0, %0" "\n\t"
|
|
"lw $t1, 0($t0)" "\n\t" // p
|
|
"lw $t2, 4($t0)" "\n\t" // n
|
|
"ll $t3, 0($t1)" "\n\t"
|
|
"addu $t3, $t3, $t2" "\n\t"
|
|
"sc $t3, 0($t1)" "\n\t"
|
|
"sw $t3, 8($t0)" "\n\t"
|
|
: /*out*/
|
|
: /*in*/ "r"(&block[0])
|
|
: /*trash*/ "memory", "t0", "t1", "t2", "t3"
|
|
);
|
|
} while (block[2] != 1);
|
|
#endif
|
|
#elif defined(VGA_mips64)
|
|
/* We rely on the fact that p is 4-aligned. Otherwise 'll' may throw an
|
|
exception that can cause this function to fail. */
|
|
#if defined (_MIPSEL)
|
|
unsigned long block[3]
|
|
= { (unsigned long)p, (unsigned long)n, 0x0ULL };
|
|
do {
|
|
__asm__ __volatile__(
|
|
"move $t0, %0" "\n\t"
|
|
"ld $t1, 0($t0)" "\n\t" // p
|
|
"ld $t2, 8($t0)" "\n\t" // n
|
|
"andi $t2, $t2, 0xFFFF" "\n\t" // n = n and 0xFFFF
|
|
"li $s0, 0xFFFF" "\n\t"
|
|
"nor $s0, $s0, $zero" "\n\t" // $s0= 0xFFFF0000
|
|
"ll $t3, 0($t1)" "\n\t" // $t3 = old value
|
|
"and $s0, $s0, $t3" "\n\t" // $s0 = $t3 and 0xFFFF0000
|
|
"addu $t3, $t3, $t2" "\n\t" // $t3 = $t3 + n
|
|
"andi $t3, $t3, 0xFFFF" "\n\t" // $t3 = $t3 and 0xFFFF
|
|
"or $t3, $t3, $s0" "\n\t" // $t3 = $t3 or $s0
|
|
"sc $t3, 0($t1)" "\n\t"
|
|
"sw $t3, 16($t0)" "\n\t" // save result
|
|
: /*out*/
|
|
: /*in*/ "r"(&block[0])
|
|
: /*trash*/ "memory", "t0", "t1", "t2", "t3", "s0"
|
|
);
|
|
} while (block[2] != 1);
|
|
#elif defined (_MIPSEB)
|
|
unsigned long block[3]
|
|
= { (unsigned long)p, (unsigned long)n << 48, 0x0 };
|
|
do {
|
|
__asm__ __volatile__(
|
|
"move $t0, %0" "\n\t"
|
|
"ld $t1, 0($t0)" "\n\t" // p
|
|
"ld $t2, 8($t0)" "\n\t" // n
|
|
"lld $t3, 0($t1)" "\n\t"
|
|
"daddu $t3, $t3, $t2" "\n\t"
|
|
"scd $t3, 0($t1)" "\n\t"
|
|
"sd $t3, 16($t0)" "\n\t"
|
|
: /*out*/
|
|
: /*in*/ "r"(&block[0])
|
|
: /*trash*/ "memory", "t0", "t1", "t2", "t3"
|
|
);
|
|
} while (block[2] != 1);
|
|
#endif
|
|
#elif defined(VGA_tilegx)
|
|
int i;
|
|
unsigned int *p4 = (unsigned int *)(((unsigned long long)p + 3) & (~3ULL));
|
|
unsigned int mask = (0xffff) << ((int)p & 3);
|
|
unsigned int add = (n & 0xffff) << ((int)p & 3);
|
|
unsigned int x, new;
|
|
|
|
while(1) {
|
|
x = *p4;
|
|
new = (x & (~mask)) | ((x + add) & mask);
|
|
__insn_mtspr(0x2780, x);
|
|
if ( __insn_cmpexch4(p4, new) == x)
|
|
break;
|
|
}
|
|
#else
|
|
# error "Unsupported arch"
|
|
#endif
|
|
}
|
|
|
|
__attribute__((noinline)) void atomic_add_32bit ( int* p, int n )
|
|
{
|
|
#if defined(VGA_x86)
|
|
unsigned long block[2];
|
|
block[0] = (unsigned long)p;
|
|
block[1] = n;
|
|
__asm__ __volatile__(
|
|
"movl 0(%%esi),%%eax" "\n\t"
|
|
"movl 4(%%esi),%%ebx" "\n\t"
|
|
"lock; addl %%ebx,(%%eax)" "\n"
|
|
: : "S"(&block[0])/* S means "esi only" */ : "memory","cc","eax","ebx"
|
|
);
|
|
#elif defined(VGA_amd64)
|
|
unsigned long block[2];
|
|
block[0] = (unsigned long)p;
|
|
block[1] = n;
|
|
__asm__ __volatile__(
|
|
"movq 0(%%rsi),%%rax" "\n\t"
|
|
"movq 8(%%rsi),%%rbx" "\n\t"
|
|
"lock; addl %%ebx,(%%rax)" "\n"
|
|
: : "S"(&block[0])/* S means "rsi only" */ : "memory","cc","rax","rbx"
|
|
);
|
|
#elif defined(VGA_ppc32)
|
|
unsigned long success;
|
|
do {
|
|
__asm__ __volatile__(
|
|
"lwarx 15,0,%1" "\n\t"
|
|
"add 15,15,%2" "\n\t"
|
|
"stwcx. 15,0,%1" "\n\t"
|
|
"mfcr %0" "\n\t"
|
|
"srwi %0,%0,29" "\n\t"
|
|
"andi. %0,%0,1" "\n"
|
|
: /*out*/"=b"(success)
|
|
: /*in*/ "b"(p), "b"(n)
|
|
: /*trash*/ "memory", "cc", "r15"
|
|
);
|
|
} while (success != 1);
|
|
#elif defined(VGA_ppc64be)
|
|
/* Nasty hack. Does correctly atomically do *p += n, but only if p
|
|
is 8-aligned -- guaranteed by caller. */
|
|
unsigned long success;
|
|
do {
|
|
__asm__ __volatile__(
|
|
"ldarx 15,0,%1" "\n\t"
|
|
"add 15,15,%2" "\n\t"
|
|
"stdcx. 15,0,%1" "\n\t"
|
|
"mfcr %0" "\n\t"
|
|
"srwi %0,%0,29" "\n\t"
|
|
"andi. %0,%0,1" "\n"
|
|
: /*out*/"=b"(success)
|
|
: /*in*/ "b"(p), "b"(((unsigned long)n) << 32)
|
|
: /*trash*/ "memory", "cc", "r15"
|
|
);
|
|
} while (success != 1);
|
|
#elif defined(VGA_ppc64le)
|
|
/* Nasty hack. Does correctly atomically do *p += n, but only if p
|
|
is 8-aligned -- guaranteed by caller. */
|
|
unsigned long success;
|
|
do {
|
|
__asm__ __volatile__(
|
|
"ldarx 15,0,%1" "\n\t"
|
|
"add 15,15,%2" "\n\t"
|
|
"stdcx. 15,0,%1" "\n\t"
|
|
"mfcr %0" "\n\t"
|
|
"srwi %0,%0,29" "\n\t"
|
|
"andi. %0,%0,1" "\n"
|
|
: /*out*/"=b"(success)
|
|
: /*in*/ "b"(p), "b"(((unsigned long)n))
|
|
: /*trash*/ "memory", "cc", "r15"
|
|
);
|
|
} while (success != 1);
|
|
#elif defined(VGA_arm)
|
|
unsigned int block[3]
|
|
= { (unsigned int)p, (unsigned int)n, 0xFFFFFFFF };
|
|
do {
|
|
__asm__ __volatile__(
|
|
"mov r5, %0" "\n\t"
|
|
"ldr r9, [r5, #0]" "\n\t" // p
|
|
"ldr r10, [r5, #4]" "\n\t" // n
|
|
"ldrex r8, [r9]" "\n\t"
|
|
"add r8, r8, r10" "\n\t"
|
|
"strex r4, r8, [r9]" "\n\t"
|
|
"str r4, [r5, #8]" "\n\t"
|
|
: /*out*/
|
|
: /*in*/ "r"(&block[0])
|
|
: /*trash*/ "memory", "cc", "r5", "r8", "r9", "r10", "r4"
|
|
);
|
|
} while (block[2] != 0);
|
|
#elif defined(VGA_arm64)
|
|
unsigned long long int block[3]
|
|
= { (unsigned long long int)p, (unsigned long long int)n,
|
|
0xFFFFFFFFFFFFFFFFULL};
|
|
do {
|
|
__asm__ __volatile__(
|
|
"mov x5, %0" "\n\t"
|
|
"ldr x9, [x5, #0]" "\n\t" // p
|
|
"ldr x10, [x5, #8]" "\n\t" // n
|
|
"ldxr w8, [x9]" "\n\t"
|
|
"add x8, x8, x10" "\n\t"
|
|
"stxr w4, w8, [x9]" "\n\t"
|
|
"str x4, [x5, #16]" "\n\t"
|
|
: /*out*/
|
|
: /*in*/ "r"(&block[0])
|
|
: /*trash*/ "memory", "cc", "x5", "x8", "x9", "x10", "x4"
|
|
);
|
|
} while (block[2] != 0);
|
|
#elif defined(VGA_s390x)
|
|
__asm__ __volatile__(
|
|
" l 0,%0\n\t"
|
|
"0: lr 1,0\n\t"
|
|
" ar 1,%1\n\t"
|
|
" cs 0,1,%0\n\t"
|
|
" jl 0b\n\t"
|
|
: "+m" (*p)
|
|
: "d" (n)
|
|
: "cc", "memory", "0", "1");
|
|
#elif defined(VGA_mips32)
|
|
unsigned int block[3]
|
|
= { (unsigned int)p, (unsigned int)n, 0x0 };
|
|
do {
|
|
__asm__ __volatile__(
|
|
"move $t0, %0" "\n\t"
|
|
"lw $t1, 0($t0)" "\n\t" // p
|
|
"lw $t2, 4($t0)" "\n\t" // n
|
|
"ll $t3, 0($t1)" "\n\t"
|
|
"addu $t3, $t3, $t2" "\n\t"
|
|
"sc $t3, 0($t1)" "\n\t"
|
|
"sw $t3, 8($t0)" "\n\t"
|
|
: /*out*/
|
|
: /*in*/ "r"(&block[0])
|
|
: /*trash*/ "memory", "t0", "t1", "t2", "t3"
|
|
);
|
|
} while (block[2] != 1);
|
|
#elif defined(VGA_mips64)
|
|
unsigned long block[3]
|
|
= { (unsigned long)p, (unsigned long)n, 0x0ULL };
|
|
do {
|
|
__asm__ __volatile__(
|
|
"move $t0, %0" "\n\t"
|
|
"ld $t1, 0($t0)" "\n\t" // p
|
|
"ld $t2, 8($t0)" "\n\t" // n
|
|
"ll $t3, 0($t1)" "\n\t"
|
|
"addu $t3, $t3, $t2" "\n\t"
|
|
"sc $t3, 0($t1)" "\n\t"
|
|
"sd $t3, 16($t0)" "\n\t"
|
|
: /*out*/
|
|
: /*in*/ "r"(&block[0])
|
|
: /*trash*/ "memory", "t0", "t1", "t2", "t3"
|
|
);
|
|
} while (block[2] != 1);
|
|
#elif defined(VGA_tilegx)
|
|
__insn_fetchadd4(p, n);
|
|
#else
|
|
# error "Unsupported arch"
|
|
#endif
|
|
}
|
|
|
|
__attribute__((noinline)) void atomic_add_64bit ( long long int* p, int n )
|
|
{
|
|
#if defined(VGA_x86) || defined(VGA_ppc32) || defined(VGA_mips32)
|
|
/* do nothing; is not supported */
|
|
#elif defined(VGA_amd64)
|
|
// this is a bit subtle. It relies on the fact that, on a 64-bit platform,
|
|
// sizeof(unsigned long long int) == sizeof(unsigned long) == sizeof(void*)
|
|
unsigned long long int block[2];
|
|
block[0] = (unsigned long long int)(unsigned long)p;
|
|
block[1] = n;
|
|
__asm__ __volatile__(
|
|
"movq 0(%%rsi),%%rax" "\n\t"
|
|
"movq 8(%%rsi),%%rbx" "\n\t"
|
|
"lock; addq %%rbx,(%%rax)" "\n"
|
|
: : "S"(&block[0])/* S means "rsi only" */ : "memory","cc","rax","rbx"
|
|
);
|
|
#elif defined(VGA_ppc64be) || defined(VGA_ppc64le)
|
|
unsigned long success;
|
|
do {
|
|
__asm__ __volatile__(
|
|
"ldarx 15,0,%1" "\n\t"
|
|
"add 15,15,%2" "\n\t"
|
|
"stdcx. 15,0,%1" "\n\t"
|
|
"mfcr %0" "\n\t"
|
|
"srwi %0,%0,29" "\n\t"
|
|
"andi. %0,%0,1" "\n"
|
|
: /*out*/"=b"(success)
|
|
: /*in*/ "b"(p), "b"(n)
|
|
: /*trash*/ "memory", "cc", "r15"
|
|
);
|
|
} while (success != 1);
|
|
#elif defined(VGA_arm)
|
|
unsigned long long int block[3]
|
|
= { (unsigned long long int)(unsigned long)p,
|
|
(unsigned long long int)n,
|
|
0xFFFFFFFFFFFFFFFFULL };
|
|
do {
|
|
__asm__ __volatile__(
|
|
"mov r5, %0" "\n\t"
|
|
"ldr r8, [r5, #0]" "\n\t" // p
|
|
"ldrd r2, r3, [r5, #8]" "\n\t" // n
|
|
"ldrexd r0, r1, [r8]" "\n\t"
|
|
"adds r2, r2, r0" "\n\t"
|
|
"adc r3, r3, r1" "\n\t"
|
|
"strexd r1, r2, r3, [r8]" "\n\t"
|
|
"str r1, [r5, #16]" "\n\t"
|
|
: /*out*/
|
|
: /*in*/ "r"(&block[0])
|
|
: /*trash*/ "memory", "cc", "r5", "r0", "r1", "r8", "r2", "r3"
|
|
);
|
|
} while (block[2] != 0xFFFFFFFF00000000ULL);
|
|
#elif defined(VGA_arm64)
|
|
unsigned long long int block[3]
|
|
= { (unsigned long long int)p, (unsigned long long int)n,
|
|
0xFFFFFFFFFFFFFFFFULL};
|
|
do {
|
|
__asm__ __volatile__(
|
|
"mov x5, %0" "\n\t"
|
|
"ldr x9, [x5, #0]" "\n\t" // p
|
|
"ldr x10, [x5, #8]" "\n\t" // n
|
|
"ldxr x8, [x9]" "\n\t"
|
|
"add x8, x8, x10" "\n\t"
|
|
"stxr w4, x8, [x9]" "\n\t"
|
|
"str x4, [x5, #16]" "\n\t"
|
|
: /*out*/
|
|
: /*in*/ "r"(&block[0])
|
|
: /*trash*/ "memory", "cc", "x5", "x8", "x9", "x10", "x4"
|
|
);
|
|
} while (block[2] != 0);
|
|
#elif defined(VGA_s390x)
|
|
__asm__ __volatile__(
|
|
" lg 0,%0\n\t"
|
|
"0: lgr 1,0\n\t"
|
|
" agr 1,%1\n\t"
|
|
" csg 0,1,%0\n\t"
|
|
" jl 0b\n\t"
|
|
: "+m" (*p)
|
|
: "d" (n)
|
|
: "cc", "memory", "0", "1");
|
|
#elif defined(VGA_mips64)
|
|
unsigned long block[3]
|
|
= { (unsigned long)p, (unsigned long)n, 0x0ULL };
|
|
do {
|
|
__asm__ __volatile__(
|
|
"move $t0, %0" "\n\t"
|
|
"ld $t1, 0($t0)" "\n\t" // p
|
|
"ld $t2, 8($t0)" "\n\t" // n
|
|
"lld $t3, 0($t1)" "\n\t"
|
|
"daddu $t3, $t3, $t2" "\n\t"
|
|
"scd $t3, 0($t1)" "\n\t"
|
|
"sd $t3, 16($t0)" "\n\t"
|
|
: /*out*/
|
|
: /*in*/ "r"(&block[0])
|
|
: /*trash*/ "memory", "t0", "t1", "t2", "t3"
|
|
);
|
|
} while (block[2] != 1);
|
|
#elif defined(VGA_tilegx)
|
|
__insn_fetchadd(p, n);
|
|
#else
|
|
# error "Unsupported arch"
|
|
#endif
|
|
}
|
|
|
|
int main ( int argc, char** argv )
|
|
{
|
|
int i, status;
|
|
char* page;
|
|
char* p8;
|
|
short* p16;
|
|
int* p32;
|
|
long long int* p64;
|
|
pid_t child, p2;
|
|
|
|
printf("parent, pre-fork\n");
|
|
|
|
page = mmap( 0, sysconf(_SC_PAGESIZE),
|
|
PROT_READ|PROT_WRITE,
|
|
MAP_ANONYMOUS|MAP_SHARED, -1, 0 );
|
|
if (page == MAP_FAILED) {
|
|
perror("mmap failed");
|
|
exit(1);
|
|
}
|
|
|
|
p8 = (char*)(page+0);
|
|
p16 = (short*)(page+256);
|
|
p32 = (int*)(page+512);
|
|
p64 = (long long int*)(page+768);
|
|
|
|
assert( IS_8_ALIGNED(p8) );
|
|
assert( IS_8_ALIGNED(p16) );
|
|
assert( IS_8_ALIGNED(p32) );
|
|
assert( IS_8_ALIGNED(p64) );
|
|
|
|
memset(page, 0, 1024);
|
|
|
|
*p8 = 0;
|
|
*p16 = 0;
|
|
*p32 = 0;
|
|
*p64 = 0;
|
|
|
|
child = fork();
|
|
if (child == -1) {
|
|
perror("fork() failed\n");
|
|
return 1;
|
|
}
|
|
|
|
if (child == 0) {
|
|
/* --- CHILD --- */
|
|
printf("child\n");
|
|
for (i = 0; i < NNN; i++) {
|
|
atomic_add_8bit(p8, 1);
|
|
atomic_add_16bit(p16, 1);
|
|
atomic_add_32bit(p32, 1);
|
|
atomic_add_64bit(p64, 98765 ); /* ensure we hit the upper 32 bits */
|
|
}
|
|
return 1;
|
|
/* NOTREACHED */
|
|
|
|
}
|
|
|
|
/* --- PARENT --- */
|
|
|
|
printf("parent\n");
|
|
|
|
for (i = 0; i < NNN; i++) {
|
|
atomic_add_8bit(p8, 1);
|
|
atomic_add_16bit(p16, 1);
|
|
atomic_add_32bit(p32, 1);
|
|
atomic_add_64bit(p64, 98765 ); /* ensure we hit the upper 32 bits */
|
|
}
|
|
|
|
p2 = waitpid(child, &status, 0);
|
|
assert(p2 == child);
|
|
|
|
/* assert that child finished normally */
|
|
assert(WIFEXITED(status));
|
|
|
|
printf("FINAL VALUES: 8 bit %d, 16 bit %d, 32 bit %d, 64 bit %lld\n",
|
|
(int)(*(signed char*)p8), (int)(*p16), *p32, *p64 );
|
|
|
|
if (-74 == (int)(*(signed char*)p8)
|
|
&& 32694 == (int)(*p16)
|
|
&& 6913974 == *p32
|
|
&& (0LL == *p64 || 682858642110LL == *p64)) {
|
|
printf("PASS\n");
|
|
} else {
|
|
printf("FAIL -- see source code for expected values\n");
|
|
}
|
|
|
|
printf("parent exits\n");
|
|
|
|
return 0;
|
|
}
|