Explorar o código

- added atomic ops & mem. barriers support for:
- arm
- arm v6 (untested, but it compiles ok)
- alpha (untested. but it compiles ok)
- fastlock: minor fixes
- Makefile.defs: support for mip64 and armv6; various minor fixes

Andrei Pelinescu-Onciul %!s(int64=19) %!d(string=hai) anos
pai
achega
0db44da77f
Modificáronse 5 ficheiros con 757 adicións e 17 borrados
  1. 105 10
      Makefile.defs
  2. 319 0
      atomic/atomic_alpha.h
  3. 315 0
      atomic/atomic_arm.h
  4. 11 1
      atomic_ops.h
  5. 7 6
      fastlock.h

+ 105 - 10
Makefile.defs

@@ -48,6 +48,8 @@
 #  2006-03-30  64 bit mode compile by default on sparc64 (-m64), added
 #  2006-03-30  64 bit mode compile by default on sparc64 (-m64), added
 #              CC_GCC_LIKE_ASM and SPARC64_MODE (andrei)
 #              CC_GCC_LIKE_ASM and SPARC64_MODE (andrei)
 #              sparc <= v8 support (andrei)
 #              sparc <= v8 support (andrei)
+#  2006-03-31  armv6 & mips64 support added
+#              mips and arm set to NOSMP by default (andrei)
 
 
 
 
 # check if already included/exported
 # check if already included/exported
@@ -64,7 +66,7 @@ MAIN_NAME=ser
 VERSION = 0
 VERSION = 0
 PATCHLEVEL = 10
 PATCHLEVEL = 10
 SUBLEVEL =   99
 SUBLEVEL =   99
-EXTRAVERSION = -dev34
+EXTRAVERSION = -dev35
 
 
 SER_VER = $(shell expr $(VERSION) \* 1000000 + $(PATCHLEVEL) \* 1000 + \
 SER_VER = $(shell expr $(VERSION) \* 1000000 + $(PATCHLEVEL) \* 1000 + \
 			$(SUBLEVEL) )
 			$(SUBLEVEL) )
@@ -78,7 +80,8 @@ else
 endif
 endif
 
 
 ARCH := $(shell $(GETARCH) |sed -e s/i.86/i386/ -e s/sun4u/sparc64/  \
 ARCH := $(shell $(GETARCH) |sed -e s/i.86/i386/ -e s/sun4u/sparc64/  \
-			-e s/armv4l/arm/ -e "s/Power Macintosh/ppc/" \
+			-e s/armv[3-5].*/arm/  -e s/armv6.*/arm6/ \
+			-e "s/Power Macintosh/ppc/" \
 			-e "s/cobalt/mips2/" \
 			-e "s/cobalt/mips2/" \
 			-e s/amd64/x86_64/ )
 			-e s/amd64/x86_64/ )
 # fix sparc -> sparc64
 # fix sparc -> sparc64
@@ -447,13 +450,16 @@ endif
 endif
 endif
 
 
 ifeq ($(ARCH), sparc)
 ifeq ($(ARCH), sparc)
-	# smp no supported on sparc32
-	DEFS+= -DNOSMP # FIXME
 	use_fast_lock=yes
 	use_fast_lock=yes
 endif
 endif
 
 
 ifeq ($(ARCH), arm)
 ifeq ($(ARCH), arm)
 	use_fast_lock=yes
 	use_fast_lock=yes
+	DEFS+=-DNOSMP # very unlikely to have an smp arm
+endif
+
+ifeq ($(ARCH), arm6)
+	use_fast_lock=yes
 endif
 endif
 
 
 ifeq ($(ARCH), ppc)
 ifeq ($(ARCH), ppc)
@@ -467,6 +473,8 @@ endif
 ifeq ($(ARCH), mips)
 ifeq ($(ARCH), mips)
 # mips1 arch. (e.g. R3000) - no hardware locking support
 # mips1 arch. (e.g. R3000) - no hardware locking support
 	use_fast_lock=no
 	use_fast_lock=no
+	DEFS+=-DMIPS_HAS_LLSC # likely
+	DEFS+=-DNOSMP # very likely
 endif
 endif
 
 
 ifeq ($(ARCH), mips2)
 ifeq ($(ARCH), mips2)
@@ -474,6 +482,11 @@ ifeq ($(ARCH), mips2)
 	use_fast_lock=yes
 	use_fast_lock=yes
 endif
 endif
 
 
+ifeq ($(ARCH), mips64)
+# mips2 arch and newer (mips3=R4000, mips4=R5000 a.s.o)
+	use_fast_lock=yes
+endif
+
 ifeq ($(ARCH), alpha)
 ifeq ($(ARCH), alpha)
 	use_fast_lock=yes
 	use_fast_lock=yes
 endif
 endif
@@ -754,20 +767,20 @@ ifeq	($(ARCH), arm)
 ifeq		($(CC_NAME), gcc)
 ifeq		($(CC_NAME), gcc)
 				DEFS+=-DCC_GCC_LIKE_ASM
 				DEFS+=-DCC_GCC_LIKE_ASM
 				#common stuff
 				#common stuff
-				CFLAGS=-O9 -funroll-loops  -Wcast-align $(PROFILE) \
+				CFLAGS=-O9 -funroll-loops $(PROFILE) \
 					-Wall   
 					-Wall   
 			#if gcc 4.x+
 			#if gcc 4.x+
 ifeq			($(CC_SHORTVER), 4.x)
 ifeq			($(CC_SHORTVER), 4.x)
-					CFLAGS+=-mcpu=strongarm1100 -minline-all-stringops \
-							-ftree-vectorize
+					CFLAGS+= -ftree-vectorize
+					# not supported on arm: -minline-all-stringops 
 else
 else
 			#if gcc 3.4+
 			#if gcc 3.4+
 ifeq			($(CC_SHORTVER), 3.4)
 ifeq			($(CC_SHORTVER), 3.4)
-					CFLAGS+= -mcpu=strongarm1100
+					CFLAGS+=
 else
 else
 			#if gcc 3.0
 			#if gcc 3.0
 ifeq			($(CC_SHORTVER), 3.0)
 ifeq			($(CC_SHORTVER), 3.0)
-					CFLAGS+= -mcpu=strongarm1100
+					CFLAGS+= 
 							#-mcpu=athlon
 							#-mcpu=athlon
 else
 else
 ifeq			($(CC_SHORTVER), 2.9x) #older gcc version (2.9[1-5])
 ifeq			($(CC_SHORTVER), 2.9x) #older gcc version (2.9[1-5])
@@ -791,6 +804,48 @@ $(error 			Unsupported compiler ($(CC):$(CC_NAME)), try gcc)
 endif		#CC_NAME, gcc
 endif		#CC_NAME, gcc
 endif	#ARCH, arm 
 endif	#ARCH, arm 
 
 
+	#if armv6 cpu 
+ifeq	($(ARCH), arm6)
+		# if gcc 
+ifeq		($(CC_NAME), gcc)
+				DEFS+=-DCC_GCC_LIKE_ASM
+				#common stuff
+				CFLAGS=-march=armv6 -O9 -funroll-loops \
+						$(PROFILE) -Wall   
+			#if gcc 4.x+
+ifeq			($(CC_SHORTVER), 4.x)
+					CFLAGS+= -ftree-vectorize
+else
+			#if gcc 3.4+
+ifeq			($(CC_SHORTVER), 3.4)
+					CFLAGS+=
+else
+			#if gcc 3.0
+ifeq			($(CC_SHORTVER), 3.0)
+					CFLAGS+= 
+							#-mcpu=athlon
+else
+ifeq			($(CC_SHORTVER), 2.9x) #older gcc version (2.9[1-5])
+$(warning 			Old gcc detected ($(CC_SHORTVER)), use  gcc 3.0.x \
+					for better results)
+					
+					CFLAGS+=
+else
+				#really old version
+$(warning			You are using an old and unsupported gcc \
+					 version ($(CC_SHORTVER)), compile at your own risk!)
+	
+endif			# CC_SHORTVER, 2.9x
+endif			# CC_SHORTVER, 3.0
+endif			# CC_SHORTVER, 3.4
+endif			# CC_SHORTVER, 4.0
+	
+else		# CC_NAME, gcc
+				#other compilers
+$(error 			Unsupported compiler ($(CC):$(CC_NAME)), try gcc)
+endif		#CC_NAME, gcc
+endif	#ARCH, arm6
+
 	#if  mips (R3000)
 	#if  mips (R3000)
 ifeq	($(ARCH), mips)
 ifeq	($(ARCH), mips)
 		# if gcc 
 		# if gcc 
@@ -873,6 +928,45 @@ $(error 			Unsupported compiler ($(CC):$(CC_NAME)), try gcc)
 endif		#CC_NAME, gcc
 endif		#CC_NAME, gcc
 endif	#ARCH, mips2
 endif	#ARCH, mips2
 
 
+#if  >=mips64
+ifeq	($(ARCH), mips64)
+		# if gcc 
+ifeq		($(CC_NAME), gcc)
+				DEFS+=-DCC_GCC_LIKE_ASM
+				#common stuff
+				CFLAGS= -mips64 -O9 -funroll-loops $(PROFILE) \
+					-Wall 
+			#if gcc 4.0+
+ifeq			($(CC_SHORTVER), 4.x)
+					CFLAGS+=-minline-all-stringops -ftree-vectorize
+else
+			#if gcc 3.4+
+ifeq			($(CC_SHORTVER), 3.4)
+					CFLAGS+=
+else
+			#if gcc 3.0
+ifeq			($(CC_SHORTVER), 3.0)
+					CFLAGS+=
+else
+ifeq			($(CC_SHORTVER), 2.9x) #older gcc version (2.9[1-5])
+$(warning 			Old gcc detected ($(CC_SHORTVER)), use  gcc 3.0.x \
+					for better results)
+					CFLAGS+=
+else
+				#really old version
+$(warning			You are using an old and unsupported gcc \
+					 version ($(CC_SHORTVER)), compile at your own risk!)
+	
+endif			# CC_SHORTVER, 2.9x
+endif			# CC_SHORTVER, 3.0
+endif			# CC_SHORTVER, 3.4
+endif			# CC_SHORTVER, 4.x
+	
+else		# CC_NAME, gcc
+				#other compilers
+$(error 			Unsupported compiler ($(CC):$(CC_NAME)), try gcc)
+endif		#CC_NAME, gcc
+endif	#ARCH, mips64
 
 
 #if  alpha
 #if  alpha
 ifeq	($(ARCH), alpha)
 ifeq	($(ARCH), alpha)
@@ -883,7 +977,8 @@ ifeq		($(CC_NAME), gcc)
 				CFLAGS= -O9 -funroll-loops $(PROFILE)  -Wall 
 				CFLAGS= -O9 -funroll-loops $(PROFILE)  -Wall 
 			#if gcc 4.0+
 			#if gcc 4.0+
 ifeq			($(CC_SHORTVER), 4.x)
 ifeq			($(CC_SHORTVER), 4.x)
-					CFLAGS+=-minline-all-stringops
+					CFLAGS+= 
+					# not supported: -minline-all-stringops
 else
 else
 			#if gcc 3.4+
 			#if gcc 3.4+
 ifeq			($(CC_SHORTVER), 3.4)
 ifeq			($(CC_SHORTVER), 3.4)

+ 319 - 0
atomic/atomic_alpha.h

@@ -0,0 +1,319 @@
+/* 
+ * $Id$
+ * 
+ * Copyright (C) 2006 iptelorg GmbH
+ *
+ * This file is part of ser, a free SIP server.
+ *
+ * ser is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version
+ *
+ * For a license to use the ser software under conditions
+ * other than those described here, or to purchase support for this
+ * software, please contact iptel.org by e-mail at the following addresses:
+ *    [email protected]
+ *
+ * ser is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+/*
+ *  atomic operations and memory barriers (alpha specific)
+ *  WARNING: atomic ops do not include memory barriers
+ *  see atomic_ops.h for more details 
+ *
+ *  Config defines:  - NOSMP 
+ *                   - __CPU_alpha
+ */
+/* 
+ * History:
+ * --------
+ *  2006-03-31  created by andrei
+ */
+
+
+#ifndef _atomic_alpha_h
+#define _atomic_alpha_h
+
+#define HAVE_ASM_INLINE_ATOMIC_OPS
+#define HAVE_ASM_INLINE_MEMBAR
+
+#warning alpha atomic code was not tested, please report problems to \
+		[email protected] or [email protected]
+
+#ifdef NOSMP
+#define membar() asm volatile ("" : : : "memory") /* gcc do not cache barrier*/
+#define membar_read()  membar()
+#define membar_write() membar()
+#else
+
+#define membar()		asm volatile ("    mb \n\t" : : : "memory" ) 
+#define membar_read()	membar()
+#define membar_write()	asm volatile ("    wmb \n\t" : : : "memory" )
+
+#endif /* NOSMP */
+
+
+
+/* main asm block 
+ * if store failes, jump _forward_ (optimization, because back jumps are
+ *  always predicted to happen on alpha )*/
+#define ATOMIC_ASM_OP00_int(op) \
+			"1:   ldl_l %0, %2 \n\t" \
+			"     " op "\n\t" \
+			"     stl_c %0, %2 \n\t" \
+			"     beq %0, 2f \n\t" \
+			".subsection 2 \n\t" \
+			"2:   br 1b \n\t" \
+			".previous \n\t"
+
+/* as above, but output in %1 instead of %0 (%0 is not clobbered) */
+#define ATOMIC_ASM_OP01_int(op) \
+			"1:   ldl_l %0, %3 \n\t" \
+			"     " op "\n\t" \
+			"     stl_c %1, %3 \n\t" \
+			"     beq %1, 2f \n\t" \
+			".subsection 2 \n\t" \
+			"2:   br 1b \n\t" \
+			".previous \n\t"
+
+#define ATOMIC_ASM_OP00_long(op) \
+			"1:   ldq_l %0, %2 \n\t" \
+			"     " op "\n\t" \
+			"     stq_c %0, %2 \n\t" \
+			"     beq %0, 2f \n\t" \
+			".subsection 2 \n\t" \
+			"2:   br 1b \n\t" \
+			".previous \n\t"
+
+/* as above, but output in %1 instead of %0 (%0 is not clobbered) */
+#define ATOMIC_ASM_OP01_long(op) \
+			"1:   ldq_l %0, %3 \n\t" \
+			"     " op "\n\t" \
+			"     stq_c %1, %3 \n\t" \
+			"     beq %1, 2f \n\t" \
+			".subsection 2 \n\t" \
+			"2:   br 1b \n\t" \
+			".previous \n\t"
+
+
+
+/* input in %0, output in %0 */
+#define ATOMIC_FUNC_DECL0_0(NAME, OP, P_TYPE, RET_TYPE, RET_EXPR) \
+	inline static RET_TYPE atomic_##NAME##_##P_TYPE (volatile P_TYPE *var) \
+	{ \
+		P_TYPE ret; \
+		asm volatile( \
+			ATOMIC_ASM_OP00_##P_TYPE(OP) : "=&r"(ret), "=m"(*var) : "m"(*var) \
+			); \
+		return RET_EXPR; \
+	}
+
+
+/* input in %0, and %1 (param), output in %1,  %0 goes in ret */
+#define ATOMIC_FUNC_DECL01_1(NAME, OP, P_TYPE, RET_TYPE, RET_EXPR) \
+	inline static RET_TYPE atomic_##NAME##_##P_TYPE (volatile P_TYPE *var, \
+														P_TYPE v ) \
+	{ \
+		P_TYPE ret; \
+		asm volatile( \
+			ATOMIC_ASM_OP01_##P_TYPE(OP) \
+			: "=&r"(ret), "+r"(v), "=m"(*var)  : "m"(*var) \
+			); \
+		return RET_EXPR; \
+	}
+
+
+/* input in %0, output in %1, %0 goes in ret */
+#define ATOMIC_FUNC_DECL0_1(NAME, OP, P_TYPE, RET_TYPE, RET_EXPR) \
+	inline static RET_TYPE atomic_##NAME##_##P_TYPE (volatile P_TYPE *var) \
+	{ \
+		P_TYPE ret, tmp; \
+		asm volatile( \
+			ATOMIC_ASM_OP01_##P_TYPE(OP) \
+			: "=&r"(ret), "=&r"(tmp), "=m"(*var)  : "m"(*var) \
+			); \
+		return RET_EXPR; \
+	}
+
+
+/* input in %0 and %3 (param), output in %0 */
+#define ATOMIC_FUNC_DECL03_0(NAME, OP, P_TYPE, RET_TYPE, RET_EXPR) \
+	inline static RET_TYPE atomic_##NAME##_##P_TYPE (volatile P_TYPE *var, \
+														P_TYPE v) \
+	{ \
+		P_TYPE ret; \
+		asm volatile( \
+			ATOMIC_ASM_OP00_##P_TYPE(OP) \
+			: "=&r"(ret), "=m"(*var)  : "m"(*var), "r"(v) \
+			); \
+		return RET_EXPR; \
+	}
+
+
+ATOMIC_FUNC_DECL0_0(inc, "addl %0, 1, %0", int, void, /* no return */ )
+ATOMIC_FUNC_DECL0_0(dec, "subl %0, 1, %0", int, void, /* no return */ )
+ATOMIC_FUNC_DECL03_0(and, "and %0, %3, %0", int, void, /* no return */ )
+ATOMIC_FUNC_DECL03_0(or,  "bis %0, %3, %0", int, void, /* no return */ )
+ATOMIC_FUNC_DECL0_1(inc_and_test, "addl %0, 1, %1", int, int, (ret+1)==0 )
+ATOMIC_FUNC_DECL0_1(dec_and_test, "subl %0, 1, %1", int, int, (ret-1)==0 )
+ATOMIC_FUNC_DECL01_1(get_and_set, "" /* nothing needed */, int, int, ret )
+
+ATOMIC_FUNC_DECL0_0(inc, "addq %0, 1, %0", long, void, /* no return */ )
+ATOMIC_FUNC_DECL0_0(dec, "subq %0, 1, %0", long, void, /* no return */ )
+ATOMIC_FUNC_DECL03_0(and, "and %0, %3, %0", long, void, /* no return */ )
+ATOMIC_FUNC_DECL03_0(or,  "bis %0, %3, %0", long, void, /* no return */ )
+ATOMIC_FUNC_DECL0_1(inc_and_test, "addq %0, 1, %1", long, long, (ret+1)==0 )
+ATOMIC_FUNC_DECL0_1(dec_and_test, "subq %0, 1, %1", long, long, (ret-1)==0 )
+ATOMIC_FUNC_DECL01_1(get_and_set, "" /* nothing needed */, long, long, ret )
+
+
+#define atomic_inc(var) atomic_inc_int(&(var)->val)
+#define atomic_dec(var) atomic_dec_int(&(var)->val)
+#define atomic_and(var, mask) atomic_and_int(&(var)->val, (mask))
+#define atomic_or(var, mask)  atomic_or_int(&(var)->val, (mask))
+#define atomic_dec_and_test(var) atomic_dec_and_test_int(&(var)->val)
+#define atomic_inc_and_test(var) atomic_inc_and_test_int(&(var)->val)
+#define atomic_get_and_set(var, i) atomic_get_and_set_int(&(var)->val, i)
+
+
+/* with integrated membar */
+
+#define mb_atomic_set_int(v, i) \
+	do{ \
+		membar(); \
+		atomic_set_int(v, i); \
+	}while(0)
+
+
+
+inline static int mb_atomic_get_int(volatile int* v)
+{
+	membar();
+	return atomic_get_int(v);
+}
+
+
+#define mb_atomic_inc_int(v) \
+	do{ \
+		membar(); \
+		atomic_inc_int(v); \
+	}while(0)
+
+#define mb_atomic_dec_int(v) \
+	do{ \
+		membar(); \
+		atomic_dec_int(v); \
+	}while(0)
+
+#define mb_atomic_or_int(v, m) \
+	do{ \
+		membar(); \
+		atomic_or_int(v, m); \
+	}while(0)
+
+#define mb_atomic_and_int(v, m) \
+	do{ \
+		membar(); \
+		atomic_and_int(v, m); \
+	}while(0)
+
+inline static int mb_atomic_inc_and_test_int(volatile int* v)
+{
+	membar();
+	return atomic_inc_and_test_int(v);
+}
+
+inline static int mb_atomic_dec_and_test_int(volatile int* v)
+{
+	membar();
+	return atomic_dec_and_test_int(v);
+}
+
+
+inline static int mb_atomic_get_and_set_int(volatile int* v, int i)
+{
+	membar();
+	return atomic_get_and_set_int(v, i);
+}
+
+
+
+#define mb_atomic_set_long(v, i) \
+	do{ \
+		membar(); \
+		atomic_set_long(v, i); \
+	}while(0)
+
+
+
+inline static long mb_atomic_get_long(volatile long* v)
+{
+	membar();
+	return atomic_get_long(v);
+}
+
+
+#define mb_atomic_inc_long(v) \
+	do{ \
+		membar(); \
+		atomic_inc_long(v); \
+	}while(0)
+
+
+#define mb_atomic_dec_long(v) \
+	do{ \
+		membar(); \
+		atomic_dec_long(v); \
+	}while(0)
+
+#define mb_atomic_or_long(v, m) \
+	do{ \
+		membar(); \
+		atomic_or_long(v, m); \
+	}while(0)
+
+#define mb_atomic_and_long(v, m) \
+	do{ \
+		membar(); \
+		atomic_and_long(v, m); \
+	}while(0)
+
+inline static long mb_atomic_inc_and_test_long(volatile long* v)
+{
+	membar();
+	return atomic_inc_and_test_long(v);
+}
+
+inline static long mb_atomic_dec_and_test_long(volatile long* v)
+{
+	membar();
+	return atomic_dec_and_test_long(v);
+}
+
+
+inline static long mb_atomic_get_and_set_long(volatile long* v, long l)
+{
+	membar();
+	return atomic_get_and_set_long(v, l);
+}
+
+
+#define mb_atomic_inc(var) mb_atomic_inc_int(&(var)->val)
+#define mb_atomic_dec(var) mb_atomic_dec_int(&(var)->val)
+#define mb_atomic_and(var, mask) mb_atomic_and_int(&(var)->val, (mask))
+#define mb_atomic_or(var, mask)  mb_atomic_or_int(&(var)->val, (mask))
+#define mb_atomic_dec_and_test(var) mb_atomic_dec_and_test_int(&(var)->val)
+#define mb_atomic_inc_and_test(var) mb_atomic_inc_and_test_int(&(var)->val)
+#define mb_atomic_get(var)	mb_atomic_get_int(&(var)->val)
+#define mb_atomic_set(var, i)	mb_atomic_set_int(&(var)->val, i)
+#define mb_atomic_get_and_set(var, i) mb_atomic_get_and_set_int(&(var)->val, i)
+
+#endif

+ 315 - 0
atomic/atomic_arm.h

@@ -0,0 +1,315 @@
+/* 
+ * $Id$
+ * 
+ * Copyright (C) 2006 iptelorg GmbH
+ *
+ * This file is part of ser, a free SIP server.
+ *
+ * ser is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version
+ *
+ * For a license to use the ser software under conditions
+ * other than those described here, or to purchase support for this
+ * software, please contact iptel.org by e-mail at the following addresses:
+ *    [email protected]
+ *
+ * ser is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+/*
+ *  atomic ops and memory barriers for arm (>= v3)
+ *  see atomic_ops.h for more details 
+ *
+ * Config defines: - NOSMP
+ *                 - __CPU_arm
+ *                 - __CPU_arm6    - armv6 support (supports atomic ops
+ *                                    via ldrex/strex)
+ */ 
+/* 
+ * History:
+ * --------
+ *  2006-03-31  created by andrei
+ */
+
+
+#ifndef _atomic_arm_h
+#define _atomic_arm_h
+
+
+
+#warning "arm atomic operations support not tested"
+
+#ifdef NOSMP
+#define HAVE_ASM_INLINE_MEMBAR
+#define membar() asm volatile ("" : : : "memory") /* gcc do not cache barrier*/
+#define membar_read()  membar()
+#define membar_write() membar()
+#else /* SMP */
+#warning SMP not supported for arm atomic ops, try compiling with -DNOSMP
+/* fall back to default lock based barriers (don't define HAVE_ASM...) */
+#endif /* NOSMP */
+
+
+#ifdef __CPU_arm6
+
+
+#define HAVE_ASM_INLINE_ATOMIC_OPS
+
+/* hack to get some membars */
+#ifndef NOSMP
+#include "atomic_unknown.h"
+#endif
+
+/* main asm block 
+ *  use %0 as input and write the output in %1*/
+#define ATOMIC_ASM_OP(op) \
+			"1:   ldrex %0, [%3] \n\t" \
+			"     " op "\n\t" \
+			"     strex %0, %1, [%3] \n\t" \
+			"     cmp %0, #0 \n\t" \
+			"     bne 1b \n\t"
+
+/* same as above but writes %4 instead of %1, and %0 will contain 
+ * the prev. val*/
+#define ATOMIC_ASM_OP2(op) \
+			"1:   ldrex %0, [%3] \n\t" \
+			"     " op "\n\t" \
+			"     strex %1, %4, [%3] \n\t" \
+			"     cmp %1, #0 \n\t" \
+			"     bne 1b \n\t"
+
+/* no extra param, %0 contains *var, %1 should contain the result */
+#define ATOMIC_FUNC_DECL(NAME, OP, P_TYPE, RET_TYPE, RET_EXPR) \
+	inline static RET_TYPE atomic_##NAME##_##P_TYPE (volatile P_TYPE *var) \
+	{ \
+		P_TYPE ret, tmp; \
+		asm volatile( \
+			ATOMIC_ASM_OP(OP) \
+			: "=&r"(tmp), "=&r"(ret), "=m"(*var) : "r"(var)  : "cc" \
+			); \
+		return RET_EXPR; \
+	}
+
+/* one extra param in %4 */
+#define ATOMIC_FUNC_DECL1(NAME, OP, P_TYPE, RET_TYPE, RET_EXPR) \
+	inline static RET_TYPE atomic_##NAME##_##P_TYPE (volatile P_TYPE *var, \
+														P_TYPE v) \
+	{ \
+		P_TYPE ret, tmp; \
+		asm volatile( \
+			ATOMIC_ASM_OP(OP) \
+			: "=&r"(tmp), "=&r"(ret), "=m"(*var) : "r"(var), "r"(v) : "cc" \
+			); \
+		return RET_EXPR; \
+	}
+
+
+/* as above, but %4 should contain the result, and %0 is returned*/
+#define ATOMIC_FUNC_DECL2(NAME, OP, P_TYPE, RET_TYPE, RET_EXPR) \
+	inline static RET_TYPE atomic_##NAME##_##P_TYPE (volatile P_TYPE *var, \
+														P_TYPE v) \
+	{ \
+		P_TYPE ret, tmp; \
+		asm volatile( \
+			ATOMIC_ASM_OP2(OP) \
+			: "=&r"(ret), "=&r"(tmp), "=m"(*var) : "r"(var), "r"(v) : "cc" \
+			); \
+		return RET_EXPR; \
+	}
+
+
+
+ATOMIC_FUNC_DECL(inc,      "add  %1, %0, #1", int, void, /* no return */ )
+ATOMIC_FUNC_DECL(dec,      "sub  %1, %0, #1", int, void, /* no return */ )
+ATOMIC_FUNC_DECL1(and,     "and  %1, %0, %4", int, void, /* no return */ )
+ATOMIC_FUNC_DECL1(or,      "orr  %1, %0, %4", int, void, /* no return */ )
+ATOMIC_FUNC_DECL(inc_and_test, "add  %1, %0, #1", int, int, ret )
+ATOMIC_FUNC_DECL(dec_and_test, "sub  %1, %0, #1", int, int, ret )
+ATOMIC_FUNC_DECL2(get_and_set, /* no extra op needed */ , int, int,  ret)
+
+ATOMIC_FUNC_DECL(inc,      "add  %1, %0, #1", long, void, /* no return */ )
+ATOMIC_FUNC_DECL(dec,      "sub  %1, %0, #1", long, void, /* no return */ )
+ATOMIC_FUNC_DECL1(and,     "and  %1, %0, %4", long, void, /* no return */ )
+ATOMIC_FUNC_DECL1(or,      "orr  %1, %0, %4", long, void, /* no return */ )
+ATOMIC_FUNC_DECL(inc_and_test, "add  %1, %0, #1", long, long, ret )
+ATOMIC_FUNC_DECL(dec_and_test, "sub  %1, %0, #1", long, long, ret )
+ATOMIC_FUNC_DECL2(get_and_set, /* no extra op needed */ , long, long,  ret)
+
+#define atomic_inc(var) atomic_inc_int(&(var)->val)
+#define atomic_dec(var) atomic_dec_int(&(var)->val)
+#define atomic_and(var, mask) atomic_and_int(&(var)->val, (mask))
+#define atomic_or(var, mask)  atomic_or_int(&(var)->val, (mask))
+#define atomic_dec_and_test(var) atomic_dec_and_test_int(&(var)->val)
+#define atomic_inc_and_test(var) atomic_inc_and_test_int(&(var)->val)
+#define atomic_get_and_set(var, i) atomic_get_and_set_int(&(var)->val, i)
+
+
+/* with integrated membar */
+
+#define mb_atomic_set_int(v, i) \
+	do{ \
+		membar(); \
+		atomic_set_int(v, i); \
+	}while(0)
+
+
+
+inline static int mb_atomic_get_int(volatile int* v)
+{
+	membar();
+	return atomic_get_int(v);
+}
+
+
+#define mb_atomic_inc_int(v) \
+	do{ \
+		membar(); \
+		atomic_inc_int(v); \
+	}while(0)
+
+#define mb_atomic_dec_int(v) \
+	do{ \
+		membar(); \
+		atomic_dec_int(v); \
+	}while(0)
+
+#define mb_atomic_or_int(v, m) \
+	do{ \
+		membar(); \
+		atomic_or_int(v, m); \
+	}while(0)
+
+#define mb_atomic_and_int(v, m) \
+	do{ \
+		membar(); \
+		atomic_and_int(v, m); \
+	}while(0)
+
+inline static int mb_atomic_inc_and_test_int(volatile int* v)
+{
+	membar();
+	return atomic_inc_and_test_int(v);
+}
+
+inline static int mb_atomic_dec_and_test_int(volatile int* v)
+{
+	membar();
+	return atomic_dec_and_test_int(v);
+}
+
+
+inline static int mb_atomic_get_and_set_int(volatile int* v, int i)
+{
+	membar();
+	return atomic_get_and_set_int(v, i);
+}
+
+
+
+#define mb_atomic_set_long(v, i) \
+	do{ \
+		membar(); \
+		atomic_set_long(v, i); \
+	}while(0)
+
+
+
+inline static long mb_atomic_get_long(volatile long* v)
+{
+	membar();
+	return atomic_get_long(v);
+}
+
+
+#define mb_atomic_inc_long(v) \
+	do{ \
+		membar(); \
+		atomic_inc_long(v); \
+	}while(0)
+
+
+#define mb_atomic_dec_long(v) \
+	do{ \
+		membar(); \
+		atomic_dec_long(v); \
+	}while(0)
+
+#define mb_atomic_or_long(v, m) \
+	do{ \
+		membar(); \
+		atomic_or_long(v, m); \
+	}while(0)
+
+#define mb_atomic_and_long(v, m) \
+	do{ \
+		membar(); \
+		atomic_and_long(v, m); \
+	}while(0)
+
+inline static long mb_atomic_inc_and_test_long(volatile long* v)
+{
+	membar();
+	return atomic_inc_and_test_long(v);
+}
+
+inline static long mb_atomic_dec_and_test_long(volatile long* v)
+{
+	membar();
+	return atomic_dec_and_test_long(v);
+}
+
+
+inline static long mb_atomic_get_and_set_long(volatile long* v, long l)
+{
+	membar();
+	return atomic_get_and_set_long(v, l);
+}
+
+
+#define mb_atomic_inc(var) mb_atomic_inc_int(&(var)->val)
+#define mb_atomic_dec(var) mb_atomic_dec_int(&(var)->val)
+#define mb_atomic_and(var, mask) mb_atomic_and_int(&(var)->val, (mask))
+#define mb_atomic_or(var, mask)  mb_atomic_or_int(&(var)->val, (mask))
+#define mb_atomic_dec_and_test(var) mb_atomic_dec_and_test_int(&(var)->val)
+#define mb_atomic_inc_and_test(var) mb_atomic_inc_and_test_int(&(var)->val)
+#define mb_atomic_get(var)	mb_atomic_get_int(&(var)->val)
+#define mb_atomic_set(var, i)	mb_atomic_set_int(&(var)->val, i)
+#define mb_atomic_get_and_set(var, i) mb_atomic_get_and_set_int(&(var)->val, i)
+
+
+#else /* ! __CPU_arm6 => __CPU_arm */
+
+/* no atomic ops for v <6 , only SWP supported
+ * Atomic ops could be implemented if one bit is sacrificed and used like
+ *  a spinlock, e.g:
+ *          mov %r0, #0x1
+ *       1: swp %r1, %r0, [&atomic_val]
+ *          if (%r1 & 0x1) goto 1 # wait if first bit is 1 
+ *          %r1>>=1  # restore the value (only 31 bits can be used )
+ *          %r1=op (%r1, ...) 
+ *          %r1<<=1   # shift back the value, such that the first bit is 0
+ *          str %r1, [&atomic_val]  # write the value
+ *
+ * However only 31 bits could be used (=> atomic_*_int and atomic_*_long
+ *  would still have to be lock based, since in these cases we guarantee all 
+ *  the bits)  and I'm not sure there would be a significant performance
+ *  benefit when compared with the fallback lock based version:
+ *    lock(atomic_lock);
+ *    atomic_val=op(*atomic_val, ...)
+ *    unlock(atomic_lock);
+ *
+ *  -- andrei
+ */
+
+#endif /* __CPU_arm6 */
+
+
+#endif

+ 11 - 1
atomic_ops.h

@@ -86,11 +86,13 @@
  *                            safe
  *                            safe
  *                   __CPU_i386, __CPU_x86_64, X86_OOSTORE - see 
  *                   __CPU_i386, __CPU_x86_64, X86_OOSTORE - see 
  *                       atomic/atomic_x86.h
  *                       atomic/atomic_x86.h
- *                   __CPU_mips, __CPU_mip2, __CPU_mip64, MIPS_HAS_LLSC - see
+ *                   __CPU_mips, __CPU_mips2, __CPU_mips64, MIPS_HAS_LLSC - see
  *                       atomic/atomic_mip2.h
  *                       atomic/atomic_mip2.h
  *                   __CPU_ppc, __CPU_ppc64 - see atomic/atomic_ppc.h
  *                   __CPU_ppc, __CPU_ppc64 - see atomic/atomic_ppc.h
  *                   __CPU_sparc - see atomic/atomic_sparc.h
  *                   __CPU_sparc - see atomic/atomic_sparc.h
  *                   __CPU_sparc64, SPARC64_MODE - see atomic/atomic_sparc64.h
  *                   __CPU_sparc64, SPARC64_MODE - see atomic/atomic_sparc64.h
+ *                   __CPU_arm, __CPU_arm6 - see atomic/atomic_arm.h
+ *                   __CPU_alpha - see atomic/atomic_alpha.h
  */
  */
 /* 
 /* 
  * History:
  * History:
@@ -147,6 +149,14 @@ inline static int atomic_get(atomic_t *v)
 
 
 #include "atomic/atomic_sparc.h"
 #include "atomic/atomic_sparc.h"
 
 
+#elif defined __CPU_arm || defined __CPU_arm6
+
+#include "atomic/atomic_arm.h"
+
+#elif defined __CPU_alpha
+
+#include "atomic/atomic_alpha.h"
+
 #endif /* __CPU_xxx  => no known cpu */
 #endif /* __CPU_xxx  => no known cpu */
 
 
 #endif /* CC_GCC_LIKE_ASM */
 #endif /* CC_GCC_LIKE_ASM */

+ 7 - 6
fastlock.h

@@ -97,7 +97,7 @@ inline static int tsl(fl_lock_t* lock)
 			: "=r"(val) : "r"(lock):"memory"
 			: "=r"(val) : "r"(lock):"memory"
 	);
 	);
 	
 	
-#elif defined __CPU_arm
+#elif defined __CPU_arm || defined __CPU_arm6
 	asm volatile(
 	asm volatile(
 			"# here \n\t"
 			"# here \n\t"
 			"swpb %0, %1, [%2] \n\t"
 			"swpb %0, %1, [%2] \n\t"
@@ -121,7 +121,8 @@ inline static int tsl(fl_lock_t* lock)
 			: "r"(1), "b" (lock) :
 			: "r"(1), "b" (lock) :
 			"memory", "cc"
 			"memory", "cc"
         );
         );
-#elif defined __CPU_mips2 || ( defined __CPU_mips && defined MIPS_HAS_LLSC )
+#elif defined __CPU_mips2 || ( defined __CPU_mips && defined MIPS_HAS_LLSC ) \
+	|| defined __CPU_mips64
 	long tmp;
 	long tmp;
 	
 	
 	asm volatile(
 	asm volatile(
@@ -156,8 +157,7 @@ inline static int tsl(fl_lock_t* lock)
 		"    mb           \n\t"
 		"    mb           \n\t"
 		"2:               \n\t"
 		"2:               \n\t"
 		:"=&r" (val), "=m"(*lock), "=r"(tmp)
 		:"=&r" (val), "=m"(*lock), "=r"(tmp)
-		:"1"(*lock)  /* warning on gcc 3.4: replace it with m or remove
-						it and use +m in the input line ? */
+		:"m"(*lock) 
 		: "memory"
 		: "memory"
 	);
 	);
 #else
 #else
@@ -204,7 +204,7 @@ inline static void release_lock(fl_lock_t* lock)
 			: "r" (lock)
 			: "r" (lock)
 			: "memory"
 			: "memory"
 	);
 	);
-#elif defined __CPU_arm
+#elif defined __CPU_arm || defined __CPU_arm6
 	asm volatile(
 	asm volatile(
 		" str %0, [%1] \n\r" 
 		" str %0, [%1] \n\r" 
 		: /*no outputs*/ 
 		: /*no outputs*/ 
@@ -223,7 +223,8 @@ inline static void release_lock(fl_lock_t* lock)
 			: "r"(0), "b" (lock)
 			: "r"(0), "b" (lock)
 			: "memory"
 			: "memory"
 	);
 	);
-#elif defined __CPU_mips2 || ( defined __CPU_mips && defined MIPS_HAS_LLSC )
+#elif defined __CPU_mips2 || ( defined __CPU_mips && defined MIPS_HAS_LLSC ) \
+	|| defined __CPU_mips64
 	asm volatile(
 	asm volatile(
 		".set push \n\t"
 		".set push \n\t"
 		".set noreorder \n\t"
 		".set noreorder \n\t"