/*------------------------->  ANSI C - headerfile  <-------------------------*/
/* Copyright (C) 1995/96 by International Computer Science Institute         */
/* This file is part of the GNU Sather library. It is free software; you may */
/* redistribute  and/or modify it under the terms of the GNU Library General */
/* Public  License (LGPL)  as published  by the  Free  Software  Foundation; */
/* either version 2 of the license, or (at your option) any later version.   */
/* This  library  is distributed  in the  hope that it will  be  useful, but */
/* WITHOUT ANY WARRANTY without even the implied warranty of MERCHANTABILITY */
/* or FITNESS FOR A PARTICULAR PURPOSE. See Doc/LGPL for more details.       */
/* The license text is also available from:  Free Software Foundation, Inc., */
/* 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA                     */
/*------------>  Please email comments to <bug-sather@gnu.org>  <------------*/

/* 
 * The pSather Interface
 *
 * Version 1.0 (released for 1.0.9) by Claudio Fleiner (fleiner@icsi.berkeley.edu)
 */
#ifndef _PSATHER_H_
#define _PSATHER_H_

#include <stddef.h>
#include <stdlib.h>
#include <setjmp.h>
#include <string.h>
#include <memory.h>
#include <sys/time.h>
#include <stdio.h>

/* (JN) signals that do not exist under linux */
#if defined(__linux__)
/*# define SIGSYS SIGSEGV*/
# define SIGEMT SIGSEGV
#endif /* __linux__ */

/*
 * PSATHER is only defined when the sacomp compiler compiles this
 * file. We use this to define some useful things that are
 * normaly defined in sather.h. This way we can use this file
 * to debug the runtime without using sacomp
 */
#ifndef PSATHER 
/* # define PSATHER     */
typedef struct { short tag; } OB_HEADER;
typedef struct { OB_HEADER header; } *OB;
# define TAG(x)         ((OB)x)->header.tag
# define ASIZE(a)	((a)->asize)
#else
# ifdef _SATHER_H_ /* nasty hack: _SATHER_H_ is defined at this point if we
	            * compile a C file generated by sacomp, otherwise its not
	            */
/*
 * DEBUG does not work for pSather C files generated by sacomp
 * so we define it here to be ignored.
 */
#  define DEBUG0(a)
#  define DEBUG1(a,b)
#  define DEBUG2(a,b,c)
#  define DEBUG3(a,b,c,d)
#  define DEBUG4(a,b,c,d,e)
#  define DEBUG5(a,b,c,d,e,f)
#  define DEBUG6(a,b,c,d,e,f,g)
# endif
# include <sather.h> 
# ifdef DEBUG  /* sather.h defines DEBUG too, we don't want it here */
#  undef DEBUG
# endif

#endif

/*
 * The runtime uses some GC alloc routines (at least for now).
 * as gc.h is not included, we have to define the prototypes here
 */
void *GC_malloc(size_t size);
void *GC_malloc_atomic(size_t size);
void GC_free(void *p);
void *GC_calloc(long a,long b);
void *GC_realloc(void *a,size_t size);
void *GC_malloc_atomic_ignore_off_page(size_t size);
void *GC_malloc_ignore_off_page(size_t size);

#ifdef VOID_CHK
# define CHKV(x) {if(FVOID(x)) RFATAL("void access");}
# define CHKVv(x) (FVOID(x)?RFATAL("void access"):0),
#else
# define CHKV(x)
# define CHKVv(x) 
#endif

#ifdef BOUNDS_CHK
# define CHKB(v,low,high) {if(v<low || v>high) RFATAL("out of bounds");}
#else
# define CHKB(a,b,c)
#endif

#include "../../Common/Brahma/brahma.h"  /* DPS */

typedef BR_cluster_t vnn_t;

#define lck_unlock my_lck_unlock
void my_lck_unlock(BR_lock_t);
void r_lck_unlock_mem();
void r_ta_sema_signal();

#define thr_print_id(a,b) BR_ascii_id(a,b,15)

typedef volatile unsigned char spinlock_t;

#define SPINLOCK_LOCK BR_SPINLOCK_LOCK
#define SPINLOCK_UNLOCK BR_SPINLOCK_UNLOCK

#define SEMA_TRY    BR_TRY_WAIT
#define SEMA_WAIT   BR_WAIT
#define SEMA_SIGNAL BR_SIGNAL
#define SEMA_CREATE BR_SEMA_CREATE
#define SEMA_DELETE BR_SEMA_DELETE

typedef spinlock_t *ta_sema_t;
#define TA_SEMAPHORE(a)	spinlock_t __cntr_var_ ## a=255;spinlock_t *a= &__cntr_var_ ## a
#define TA_SEMA_SIGNAL(a) (*(a))=0
#define TA_SEMA_WAIT(a)   while(*a) {BR_THREAD_YIELD(); BR_POLL();}

/*
** The thr_yield() I added does nothing, but it forces the compiler to
** actually emit code to check the condition.  Otherwise, C optimization
** may helpfully elide the check.  It would be *much* better the go
** through a true synchronization interface than to do things this way.
** This is also obviously nonportable.  DPS
*/
#define am_wait_for(cond) do { if(cond) break; BR_THREAD_YIELD(); BR_POLL(); } while(1)
#define thr_delay_function BR_delay_function

#define CLMEM ((void **)(BR_CLUSTER_LOCAL()))
/* global memory in case of brahma, uses a void * array, usage:
	0-100: statistic
	101: cache
	102: thread_count (int)
*/
/*#define thread_count ((long)(CLMEM[102]))*/
#define thread_count (CLMEM[102])

/*
 * we redefine abort to go through our abort
 */
#define abort	psather_abort
/*
 * Configuration of the pSather Runtime.
 *
 * Note: here you can only change configurations that are local
 *       to the psather library, but not which kind of
 *       libraries are used, ...
 *
 * You can define everything defined in this config file also
 * through options to the compiler (for gcc use
 * -Doption or -Doption-value , and for sacomp use -C_flag -Doption
 * or -C_flag -Doption=value).
 *
 * If you define nothing, some standard values are used for
 * all defines.
 */

/*
 * define PSATHER_STAT if you want to collect some statistics,
 * NO_PSATHER_STAT otherwise.
 */
#if defined(PSATHER_STATISTICS) && !defined(PSATHER_STAT)
#define PSATHER_STAT
#undef NO_PSATHER_STAT
#endif
#if !defined(PSATHER_STAT) && !defined(NO_PSATHER_STAT)
# define NO_PSATHER_STAT 		/* DEFAULT VALUE */
#endif

/*
 * Define USE_ALLOCA is this function works
 * reliably on your system, NO_ALLOCA otherwise
 */
#if !defined(USE_ALLOCA) && !defined(NO_ALLOCA)
# define NO_ALLOCA		/* DEFAULT VALUE */
#endif

/*
 * When starting a new thread, the runtime needs to allocate
 * some memory temporarly. Currently it uses local memory
 * if there are not too many arguments. ATTACH_LOCAL_MEM
 * defines the size of the memory on the stack.
 * This value is ignored if you define USE_ALLOCA.
 */
#ifndef ATTACH_LOCAL_MEM
# define ATTACH_LOCAL_MEM 64		/* DEFAULT VALUE */
#endif

/*
 * The following constant is used for a similar purpose:
 * It defines the size of the local memory used when
 * reading remote attributes.
 * This value is ignored if you define USE_ALLOCA.
 */
#ifndef ATOMIC_READ_MEM
# define  ATOMIC_READ_MEM 32		/* DEFAULT VALUE */
#endif

/* 
 * define MEMCPY_IS_ATOMIC, if this is true, 
 * MEMCPY_IS_NOT_ATOMIC otherwise
 */
#if !defined(MEMCPY_IS_ATOMIC) && !defined(MEMCPY_IS_NOT_ATOMIC)
# define MEMCPY_IS_NOT_ATOMIC		/* DEFAULT VALUE */
#endif

/*
 * To use a cache when importing variables from other clusters,
 * define IMPORT_CACHE. IMPORT_CACHE_SIZE defines the size of 
 * the cache (it must be a multiple of 2) and IMPORT_CACHE_TRESHOLD
 * defines the largest size an attribute may have to be stored in the
 * cache (should be at least 8 and a multiple of 2). IMPORT_CACHE_HASH
 * defines a hash function to map an address to the correct hash entry.
 * This has function MUST ensure that addresses on different clusters
 * are mapped to different hash values !!!
 * The size of the per cluster cache can be calculated with the following
 * formula:
 * cachesize in bytes >
 *  IMPORT_CACHE_SIZE*(IMPORT_CACHE_TRESHOLD+sizeof(*BR_lock_t)+2*sizeof(void*)+sizeof(long))
 * On a solaris machine with SIZE==512 and TRESHOLD==8 we get about 18KB
 */
#if !defined(IMPORT_CACHE) && !defined(NO_IMPORT_CACHE)
# define NO_IMPORT_CACHE
#endif

#ifdef IMPORT_CACHE
# ifndef IMPORT_CACHE_SIZE
#  define IMPORT_CACHE_SIZE 1024		/* DEFAULT VALUE */
# endif
# ifndef IMPORT_CACHE_TRESHOLD
#  define IMPORT_CACHE_TRESHOLD 16		/* DEFAULT VALUE */
# endif
# ifndef IMPORT_CACHE_HASH
#  define IMPORT_CACHE_HASH(cluster,adr) ((((unsigned long)(adr)>>2)+cluster)&(IMPORT_CACHE_SIZE-1))
# endif
#endif

/*
 * Value Assignment Locks
 * Whenever we assign one value to another, and the system does not
 * guarantee atomizity, we need a lock around them (we only have to 
 * worry about other threads running in the same memory space).
 * To do this we set up a number of locks (VLOCKS) and map each
 * possible address to one of them. The mapping algorithm should
 * be adapted to the alignment of each individual system
 */
#ifndef VLOCKS
# define VLOCKS 1024		/* DEFAULT VALUE */
#endif
#ifndef VLOCK_HASH
# define VLOCK_HASH(adr) (((unsigned long)(adr)>>2)&(VLOCKS-1))
#endif

/* 
 * Atomic assignments.
 * define ATOMIC_x, with x equals 1, 2, 4, 8 or 16 if all
 * assignments of a type with this size are indeed atomic.
 * For each atomic assignment size you need to define
 * a C-type of this size. If you are not sure if the
 * sizes are correct for your environment, run the
 * program 'type_size.c'. It will print out the necessary
 * definitions (note that it assumes that all assignments
 * of standard C types are atomic).
 * Define NON_ATOMIC_x if they are not.
 * The defaults assumes that 1,2,4 and 8 are atomic. If all 
 * assignments are atomic and memcpy is atomic too, define ATOMIC_ALL
 * LARGEST_ATOMIC defines the largest size a datastructure
 * may have such that assignments are atomic. 
 * ALIGNMENT_x defines the alignment needed for types of
 * ATOMIC_TYPE_x
 */
#ifndef ATOMIC_ALL
# if !defined(ATOMIC_1) && !defined(NON_ATOMIC_1)
#  define ATOMIC_1
# endif
# if !defined(ATOMIC_2) && !defined(NON_ATOMIC_2)
#  define ATOMIC_2
# endif
# if !defined(ATOMIC_4) && !defined(NON_ATOMIC_4)
#  define ATOMIC_4
# endif
# if !defined(ATOMIC_8) && !defined(NON_ATOMIC_8)
#  define ATOMIC_8
# endif
# if !defined(ATOMIC_16) && !defined(NON_ATOMIC_16)
#  define NON_ATOMIC_16
# endif
#define ATOMIC_1_TYPE char
#define ALIGNMENT_1   1
#define ATOMIC_2_TYPE short
#define ALIGNMENT_2   2
#define ATOMIC_4_TYPE long
#define ALIGNMENT_4   4
#define ATOMIC_8_TYPE double
#define ALIGNMENT_8   8
#define ATOMIC_16_TYPE W16
#define ALIGNMENT_16   16
# if defined(ATOMIC_16)
#  define LARGEST_ATOMIC 16
#  define LARGEST_ATOMIC_TYPE ATOMIC_16_TYPE
#  define LARGEST_ALIGNMENT ALIGNMENT_16
# else
#  if defined(ATOMIC_8)
#   define LARGEST_ATOMIC 8
#   define LARGEST_ATOMIC_TYPE ATOMIC_8_TYPE
#   define LARGEST_ALIGNMENT ALIGNMENT_8
#  else
#   if defined(ATOMIC_4)
#    define LARGEST_ATOMIC 4
#    define LARGEST_ATOMIC_TYPE ATOMIC_4_TYPE
#    define LARGEST_ALIGNMENT ALIGNMENT_4
#   else
#    if defined(ATOMIC_2)
#     define LARGEST_ATOMIC 2
#     define LARGEST_ATOMIC_TYPE ATOMIC_2_TYPE
#     define LARGEST_ALIGNMENT ALIGNMENT_2
#    else
#     if defined(ATOMIC_1)
#      define LARGEST_ATOMIC 1
#      define LARGEST_ATOMIC_TYPE ATOMIC_1_TYPE
#      define LARGEST_ALIGNMENT ALIGNMENT_1
#     endif
#    endif
#   endif
#  endif
# endif
#endif

#ifdef POLLING
#define AM_POLL_NETWORK	if(MEIKO_POLL_POSSIBLE) am_poll();
#else
#define AM_POLL_NETWORK
#endif
/***************************** 
 * END OF THE CONFIG OPTIONS * 
 *****************************/
/* Nobbi: not needed
int fprintf(FILE *,const char *,...);
*/
/*
 * To get good error messages, you need to define DEBUG and PSATHER_CHK
 * when compiling your program and the psather library.
 */
#ifdef PRINT_BACKTRACE
#define STORE_FF ,(((LOCAL_MEM)(BR_GET_THREAD_LOCAL()))->pFF= &FF)
/* some pSather function use macros that rely on the presenve of FF
 * in this case we simply create a bogus FF either in the function 
 * or at the beginning of the file
 */
#define CREATE_FF struct _func_frame FF;
/* we need to use FF, otherwise the compiler complains */
extern CREATE_FF
#else
#define STORE_FF
#define CREATE_FF
#endif

/* 
 * Some versions of Solaris define SP to be stack pointer (06). We 
 * do not care of that and since I have no idea in what other places
 * SP may possible resurface, I just undefine SP here if it is defined.
 */
#ifdef SP
#undef SP
#endif

#if defined(DEBUG) || defined(PSATHER_TRACE) || defined(PRINT_BACKTRACE)
#define REGISTER_THREADS
#define RFATAL(s) p_rfatalv(s)
#define SP	do { SPe 0; } while(0);
#define S(a)	do { SP;a; } while(0)
#define SPe	(int)((((LOCAL_MEM)(BR_GET_THREAD_LOCAL()))->file=__FILE__),(((LOCAL_MEM)(BR_GET_THREAD_LOCAL()))->line=__LINE__) STORE_FF),
#else
#define RFATAL(s) p_rfatalvfl(s,__FILE__,__LINE__)
#define SP	
#define S(a)	a
#define SPe	
#endif

#define SAFE_POS	{ char *safe_file=((LOCAL_MEM)(BR_GET_THREAD_LOCAL()))->file;int safe_line=((LOCAL_MEM)(BR_GET_THREAD_LOCAL()))->line;void *safe_ff=((LOCAL_MEM)(BR_GET_THREAD_LOCAL()))->pFF;
#define RESTORE_POS       ((LOCAL_MEM)(BR_GET_THREAD_LOCAL()))->file=safe_file;((LOCAL_MEM)(BR_GET_THREAD_LOCAL()))->line=safe_line;((LOCAL_MEM)(BR_GET_THREAD_LOCAL()))->pFF=safe_ff; }


#define PSATHER_THREAD_ID ((LOCAL_MEM)(BR_GET_THREAD_LOCAL()))->psather_tid
#define PSATHER_THREAD_ID_SET(x) (((LOCAL_MEM)(BR_GET_THREAD_LOCAL()))->psather_tid = (x))
/*
 * HERE should return the cluster id (an integer).
 */
#define HERE BR_HERE()
#define ANY (-1)

/*
 * FOB pointer look exactly like standard pointers, BUT their first few
 * bits are used as a relative cluster address. The nice part here
 * is that pointers that point to the local memory have exactly
 * the same representation as in sather.
 * There are two problems: we cannot use more than about 8 to 10 bits
 * for this, so we cannot use more than 1000 clusters,
 * and the more clusters we use, the less memory is available for 
 * each cluster.
 * MAX_CLUSTER_BITS defines how many of the high bits are used for
 * the cluster address.
 * Another problem is that pointers to stack memory and pointer to
 * globals look like far pointers (at least on solaris system).
 * Therefore we need an additional way to distinguish far pointers
 * from local pointers. We use the lowest bit for this (note that
 * there is no way for pSather to have pointer into the middle of
 * a char array, so the lowest 1 or 2 bits are zero for all pointers)
 */

#define CLUSTER_BITS 6
#define CLUSTER_MASK 0xFC000001
#define POINTER_MASK (~CLUSTER_MASK)
#define CLUSTER_SHIFT (32-CLUSTER_BITS)
#define CLUSTER_BIT(n) ((unsigned long)(n)<<CLUSTER_SHIFT)
#define CLUSTER(p) (((unsigned long)(p)&CLUSTER_MASK)>>CLUSTER_SHIFT)
#define POINTER(p) (FAR(p)?((void*)((unsigned long)(p)&POINTER_MASK)):(p))
#define INITFOB(cl,p) (void *)((((cl)-HERE)<<CLUSTER_SHIFT)|(unsigned long)p|(cl!=HERE?1:0))

typedef void *FOB;
#define FVOID(x) (POINTER(x)==NULL)
#define FNULL 	NULL

#define WHERE(x)	p_where(x)
#define PS_WHERE(x)	ps_where(x)
#define PSR_WHERE(x)	psr_where(x)
#define FAR(x)		(((unsigned long)x)&1)
#define NEAR(x)		(!FAR(x))
#define PS_NEAR_OR_VOID(x) ps_near_or_void(x)
#define PS_NEAR(x)	ps_near(x)
#define PSR_NEAR(x)	psr_near(x)
#define PS_FAR(x)	ps_far(x)
#define PSR_FAR(x)	psr_far(x)
#if defined(PSATHER_CHK) && !defined(PSATHER1)
# define MAKENEAR(x)	(FAR(x)?(void *)(fprintf(stderr,"%s:%d:Extracting near pointer from a pointer pointing to another cluster\n",__FILE__,__LINE__),PSATHER_ABORT):x)
#else
# define MAKENEAR(x)	(x)
#endif
#define MAKEFAR(x)	(x)
#define MAKEFOB(cl,pt)	((FOB)(CLUSTER_BIT(cl-HERE)+(unsigned long)(pt)&(cl==HERE?~0u:~1u)))
#define SENDFOB(pt,cl)			p_send_fob(pt,cl)
#define RECVFOB(pt,cl) 			p_recv_fob(pt,cl)
/*
 * RECVOB and SENDVOB assume that they get the pointer to either a
 * value object or a reference object. For reference objects you CAN
 * pass 0 as tag, for boxed value objects you MUST pass 0 as tag
 */
#define SENDOB(tag,pt,cl)		p_sr_ob(p_send_fob,tag,pt,cl)
#define RECVOB(tag,pt,cl)		p_sr_ob(p_recv_fob,tag,pt,cl)
#define SENDARRAY(tag,pt,size,cl)	p_sr_array(p_send_fob,tag,pt,cl,size)
#define RECVARRAY(tag,pt,size,cl)	p_sr_array(p_recv_fob,tag,pt,cl,size)
#define SENDFOBHOME(pt)	POINTER(pt)

#define INLINED_P_SEND_FOB \
	FOB p_send_fob(FOB pt,int cl) \
	{ \
		int a=CLUSTER(pt); \
		if(cl==HERE || (a && !(((unsigned long)pt)&1))) return pt;  \
		if(((a+HERE)&(CLUSTER_MASK>>CLUSTER_SHIFT))==cl) return SENDFOBHOME(pt); \
		pt=(FOB)(((unsigned long)(pt)+CLUSTER_BIT(HERE-cl))|1); \
		return pt; \
	}
#define INLINED_P_RECV_FOB \
	FOB p_recv_fob(FOB pt,int cl) \
	{ \
		int a=CLUSTER(pt); \
		if(cl==HERE || (a && ((unsigned long)pt&1)==0)) return pt; \
		pt=(FOB)(((unsigned long)(pt)+CLUSTER_BIT((cl)-HERE))); \
		if(CLUSTER(pt)==0) pt=(FOB)((unsigned long)pt&~1); \
		else pt=(FOB)((unsigned long)pt|1); \
		return pt; \
	}
#define INLINED_P_WHERE \
	int p_where(FOB x) \
	{ \
		return ((unsigned long)x&1)?((CLUSTER(x)+HERE)&(CLUSTER_MASK>>CLUSTER_SHIFT)):HERE; \
	} 
#define INLINED_PS_WHERE \
	int ps_where(FOB x) \
	{ \
		CREATE_FF \
		if(FVOID(x)||F_TAG(x)<0) return HERE; \
		return p_where(x); \
	} 
#define INLINED_PSR_WHERE \
	int psr_where(FOB x) \
	{ \
		if(FVOID(x)) return HERE; \
		return p_where(x); \
	} 
#define INLINED_PS_NEAR \
	int ps_near(FOB x) \
	{ \
		CREATE_FF \
		if(FVOID(x) || F_TAG(x)<0) return 0;\
		return NEAR(x); \
	}
#define INLINED_PSR_NEAR \
	int psr_near(FOB x) \
	{ \
		if(FVOID(x)) return 0;\
		return NEAR(x); \
	}
#define INLINED_PS_FAR \
	int ps_far(FOB x) \
	{ \
		CREATE_FF \
		if(FVOID(x) || F_TAG(x)<0) return 0;\
		return FAR(x); \
	}
#define INLINED_PSR_FAR \
	int psr_far(FOB x) \
	{ \
		if(FVOID(x)) return 0;\
		return FAR(x); \
	}
#define INLINED_PS_NEAR_OR_VOID \
	int ps_near_or_void(FOB x) \
	{\
		return PS_NEAR(x) || FVOID(x);\
	}

/*
 * CLUSTERS defines the number of clusters for the current
 * run. This must be a constant during run time, but for
 * each run it may have a different value.
 * It is normaly defined when compiling a sather program, but
 * we redefine it here to use it in test programs
 */
#define CLUSTERS	clusters

/*
 * MY_CLUSTER_PROCS defines the number of processors for a cluster
 *
 */
#define MY_CLUSTER_SIZE BR_PROCESSORS()

/*
 * PSATHER_START: must be called before any of the functions defined
 *               in this header files are used. Needs the complete arguments
 *		 (argc and argv)
 * PSATHER_STOP:  must be called before ending the program.
 */
#define PSATHER_ABORT		psather_abort()
#define PSATHER_START(ac,av)	psather_start(ac,av); \
				INIT_PROTECT_BEGIN \
				  PROTECT_BEGIN

#define PSATHER_STOP		  PROTECT_WHEN  \
					fprintf(stderr,"unhandled exception, aborting\n"); \
					abort();  \
				  PROTECT_END  \
				INIT_PROTECT_END \
				psather_stop();

/*
 * the SYS class
 */
#define SYS_DEFER			BR_THREAD_YIELD()

#ifdef PSATHER1
#define SYS_IMPORT
#define SYS_EXPORT
#else
#ifdef IMPORT_CACHE
#define SYS_IMPORT			p_import()
#define SYS_EXPORT			p_export()
#else
#define SYS_IMPORT
#define SYS_EXPORT
#endif
#endif

#ifdef PSATHER_CHK
#define CHKCL(c) do { if(c>=CLUSTERS) RFATAL("cluster out of bound"); } while(0)
#else
#define CHKCL(c)
#endif
/*
 * REMOTE_EXEC
 * used to execute a function on a remote cluster.
 * cluster: where to execute the function
 * bound_ob: a bound_ob structure usable for remote execution
 */
#define REMOTE_EXEC(cluster,bound_ob) {SP CHKCL(cluster);execp_mem(cluster,p_remote_exec,bound_ob,sizeof(*bound_ob));am_wait_for(bound_ob->local==NULL);}

/*
 * ATTACH, uses five arguments
 * function: the function to execute
 * self:     the current self object (as pointer)
 * attach:   each thread is attached to an attach object, and here you pass the
 *           attach for the new process.
 * argument: a structure passed as argument to function.
 * position: the cluster on which this function should be executed.
 */
#define ATTACH(func,self,arg,attach,pos) {SP CHKCL(pos);p_attach((FORK_FUNC)func,self,arg,attach,pos); }
#define FORK_ATTACH(func,self,arg,attach,pos) {SP CHKCL(pos);p_fork_attach((FORK_FUNC)func,self,arg,attach,pos); }

#define PARLOOP_ATTACH(func,self,arg,attach,pos) { SP CHKCL(pos);parloop_begin();(*func)(self,arg,attach,HERE);parloop_end(); }
#define PARLOOP_FORK_ATTACH(func,self,arg,attach,pos) {SP CHKCL(pos);parloop_enqueue((FORK_FUNC)func,self,arg,attach,pos); }

#define SYNC	S(pd_sync())

/*
 * $LOCK
 */
typedef FOB LOCK_HEADER;
#define LOCK_HEADER_STRUCT LOCK_HEADER LOCK_header;

typedef struct {
	OB_HEADER header;
	LOCK_HEADER_STRUCT
} *LOCK;
#define LOCK_HEADER_STRUCT_REFS (int)&(*(LOCK)0).LOCK_header,
#define INIT_LOCK_HEADER(s)    p_init_lock_header((LOCK)s);



/*
 * the structure EXCEPT_ELEMENT should not be used anywhere in the code
 * FORBID EXCEPT_ELEMENT
 *
 * We have three different EXCEPT structures:
 * - one used by PROTECT
 * - one used by locks
 * - one used for loops
 * They are always stored on the stack, with the exception of
 * frames used in iters, those are stored in the heap.
 */
#define EXCEPT_PROTECT	1
#define EXCEPT_LOCK	2
#define EXCEPT_LOOP	3

/* we have to be sure that IS_ITER is not defined */
#ifdef IS_ITER 
#undef IS_ITER
#endif

struct EXCEPT_ELEMENT_COMMON {
	int type; /* one of EXCEPT_PROTECT, _LOCK, _LOOP */
	int heap; /* one if stored on the heap (and has to be freed) */
        struct EXCEPT_ELEMENT_COMMON *prev_frame,*next_frame;
        FOB exception;
};

struct EXCEPT_PROTECT_ELEMENT {
	struct EXCEPT_ELEMENT_COMMON c;
        jmp_buf jmp;
};

#define EXCEPT_LOOP_ELEMENT(n) struct { \
	struct EXCEPT_ELEMENT_COMMON c; \
	int  slots; \
	struct EXCEPT_ELEMENT_COMMON *slot[n]; \
}

#define EXCEPT_LOCK_ELEMENT(n) struct  {  \
	struct EXCEPT_ELEMENT_COMMON c; \
	long plate; \
	short flags; \
	int chosen_branch; \
        unsigned short branches; \
	unsigned short cols; \
        LOCK locks[n]; \
}


#define EXCEPTION_STACK (((LOCAL_MEM)(BR_GET_THREAD_LOCAL()))->exception_stack)


/*
 * To pop some frames from the exception stack, you may use POP_EXCPETION.
 */
#define POP_EXCEPTION(n)	p_ex_pop((n))
#define POP_EXCEPTION1		p_ex_pop(1)
#define GET_EXCEPTION_STACK	p_ex_top()
#define SET_EXCEPTION_STACK(x)  p_ex_set(x)
#define SET_CURRENT_EXCEPTION	p_ex_set(pp_n);
#define RESTORE_CURRENT_EX	pp_n=GET_EXCEPTION_STACK;

/*
 * RAISE(x) raises an exception. The value x (actually a pointer) is passed
 * to the exception handling routine, which can use it through the
 * macro EXCEPTION. If this routine needs to propagate the exception,
 * it can use RAISE(EXCEPTION)
 */
#define RAISE(x)			S(p_ex_raise((x)))
#define EXCEPTION 	   		(SPe (p_ex_exception()))

/* 
 * To protect some statements A against exceptions, use
 * PROTECT_BEGIN
 *	A;
 * PRTOECT_WHEN
 *	if(EXCPETION==....
 * PRTOECT_END
 * FORBID p_n
 */
#define PROTECT_BEGIN	   { struct EXCEPT_PROTECT_ELEMENT p_n,*pp_n;SP	      \
			     if(IS_ITER) { \
				     pp_n=(struct EXCEPT_PROTECT_ELEMENT *)malloc(sizeof(p_n)); \
				     pp_n->c.type=EXCEPT_PROTECT; \
				     pp_n->c.heap=1; \
			     } else { \
				     pp_n=&p_n; \
				     pp_n->c.type=EXCEPT_PROTECT; \
				     pp_n->c.heap=0; \
			     } \
			     pp_n->c.prev_frame=pp_n->c.next_frame=NULL;			      \
			     if(setjmp(pp_n->jmp)==0) {		  	      \
				p_ex_push(pp_n);
             
#define PROTECT_WHEN	     p_ex_pop(1); } else { \
				if(IS_ITER) free(pp_n);

#define PROTECT_END	     }	    \
			   }

/*
 * INIT_PROTECT_BEGIN, END are used to initialize the error stack
 */
#define INIT_PROTECT_BEGIN { struct EXCEPT_PROTECT_ELEMENT p_n;		      \
			     p_n.c.type=EXCEPT_PROTECT; \
			     p_n.c.heap=0; \
			     p_n.c.prev_frame=p_n.c.next_frame=NULL;			      \
			     p_ex_init_thread(); \
			     if(setjmp(p_n.jmp)==0) {		  	      \
				p_ex_push_first(&p_n);
             
#define INIT_PROTECT_END     }	   \
			   }

/*
 * disjunctive lock
 * FORBID br_br
 * FORBID p_n
 * use as follows:
 *
 * DECLARE_LOCK(2,2,LOCK_WITH_ELSE)  (* flags can be either 0, LOCK_WITH_ELSE, or LOCK_HIDDEN *)
 * ADD_LOCK(0,0,lck1);
 * ADD_LOCK(0,1,lck2);
 * ADD_LOCK(1,0,lck3);
 * SELECT_LOCK
 * BRANCH(0)
 * 	psather_code
 * BRANCH(1)
 *	psather_code
 * BRANCH_ELSE
 *	psather_code
 * LOCK_END
 */
#define LOCK_HIDDEN 	1
#define LOCK_WITH_ELSE	2
#define LOCK_LOCAL	4 /* set automatically, used for locks where */
#define LOCK_UNLOCKED   8 /* the lock manager is not used */
#define LOCK_NO_STACK  16 /* used if the lock should not be put on the
			     exception stack. To end the lock before then
			     LOCK_END use 'LOCK_UNLOCK_NOW'. Works only
			     for the innermost lock. Instead of LOCK_END you
			     have to use LOCK_END_NO_STACK. Does not work if
			     you want to use unlock. */
#define PLATE_IS_LOCKED 		32 /* used internally by the lock manager */
#define PLATE_USES_COMBINATIONS 	64 /* used internally by the lock manager */
#define PLATE_IN_COMBINATION		128 /* used internally by the lock manager */

#define DECLARE_LOCK(nbranches,ncols,flg) \
	   { /*volatile int br_br=0;*/ \
	     /*void* locked_lock;*/ \
	     EXCEPT_LOCK_ELEMENT((nbranches)*(ncols)) p_n,*pp_n;SP \
	     if(IS_ITER) { \
		     pp_n=(void *)calloc(sizeof(p_n),1);  \
		     pp_n->c.type=EXCEPT_LOCK; \
		     pp_n->c.heap=1; \
	     } else { \
		     pp_n=&p_n; \
		     memset(&p_n,0,sizeof(p_n)); \
		     pp_n->c.type=EXCEPT_LOCK; \
		     pp_n->c.heap=0; \
	     } \
	     pp_n->c.prev_frame=pp_n->c.next_frame=NULL; \
	     pp_n->flags=flg;				     \
	     pp_n->branches=(nbranches);			     \
	     pp_n->cols=(ncols); \
	     pp_n->chosen_branch=0;

#ifdef VOID_CHK
#define CHKL(lock,c)  if(FVOID((lock))) \
			RFATAL("using a void lock (arg" #c ")");  \
		      if(!EXT_LOCK_is_dlock((lock))) \
	RFATAL("argument for lock stmt is not of type $LOCK (arg" #c ")");
#else
#define CHKL(lock,c) 
#endif
#define ADD_LOCK(c,l,lck) do { LOCK lCk_kCl=(LOCK)lck;CHKL(lCk_kCl,l);pp_n->locks[c*pp_n->cols+l]=(LOCK)(lCk_kCl); } while(0)

#define LM_PUSH_ON_EXEPT_STACK	p_ex_push(pp_n);
#define LM_POP_OFF_EXEPT_STACK	p_ex_pop(1);

extern void LM_EXTERNAL_INTERFACE_acquire_single_lock(void *lck);
extern void LM_EXTERNAL_INTERFACE_release_single_lock(void *lck);

#define LM_ACQUIRE_SINGLE_LOCK_NO_STACK(lck) \
	    {	void* pp_n = (void*)0; void* locked_lock = (void*)(lck); \
		LM_EXTERNAL_INTERFACE_acquire_single_lock((lck));

#define LM_TRY_SINGLE_LOCK_NO_STACK(lck) \
	    if(LM_EXTERNAL_INTERFACE_try_single_lock((lck))) { \
	    	{ void* pp_n = (void*)0; void* locked_lock = (void*)lck;

#define LM_RELEASE_SINGLE_LOCK_NO_STACK \
	    LM_EXTERNAL_INTERFACE_release_single_lock((locked_lock)); }

#define LM_ACQUIRE_CONJUNCTIVE_LOCKS_NO_STACK \
	    LM_EXTERNAL_INTERFACE_acquire_conjunctive_locks( \
					pp_n->locks,pp_n->cols);

#define LM_TRY_CONJUNCTIVE_LOCKS_NO_STACK \
	    if (LM_EXTERNAL_INTERFACE_try_conjunctive_locks( \
					pp_n->locks,pp_n->cols)) {

#define LM_RELEASE_CONJUNCTIVE_LOCKS_NO_STACK \
	    LM_EXTERNAL_INTERFACE_release_conjunctive_locks( \
					pp_n->locks,pp_n->cols); \
	    pp_n->branches=0;	/* Mark EXCEP_LOCK_ELEMENT done. */

#define LM_ACQUIRE_SINGLE_LOCK \
	    LM_EXTERNAL_INTERFACE_acquire_single_lock(pp_n->locks[0]);

#define LM_TRY_SINGLE_LOCK \
	    if(LM_EXTERNAL_INTERFACE_try_single_lock(pp_n->locks[0])) {

#define LM_RELEASE_SINGLE_LOCK \
            if (pp_n->locks[0] != (LOCK)0) \
	     LM_EXTERNAL_INTERFACE_release_single_lock(pp_n->locks[0]); \
	    pp_n->branches=0;	/* Mark EXCEP_LOCK_ELEMENT done. */

#define LM_ACQUIRE_CONJUNCTIVE_LOCKS \
	    LM_EXTERNAL_INTERFACE_acquire_conjunctive_locks( \
					pp_n->locks,pp_n->cols);

#define LM_TRY_CONJUNCTIVE_LOCKS \
	    if (LM_EXTERNAL_INTERFACE_try_conjunctive_locks( \
					pp_n->locks,pp_n->cols)) {

#define LM_RELEASE_CONJUNCTIVE_LOCKS \
	    LM_EXTERNAL_INTERFACE_release_conjunctive_locks( \
					pp_n->locks,pp_n->cols); \
	    pp_n->branches=0;	/* Mark EXCEP_LOCK_ELEMENT done. */

#define LM_SELECT_DISJUNCTIVE_LOCKS \
	    switch(pp_n->chosen_branch = \
		LM_EXTERNAL_INTERFACE_acquire_disjunctive_locks(pp_n)) \
	    { \
		case -2: SP RFATAL("deadlock detected in lock clause");

extern void LM_EXTERNAL_INTERFACE_release_conjunctive_locks(void *lock,int size);

#define LM_RELEASE_LOCKS(ex) \
	    LM_EXTERNAL_INTERFACE_release_conjunctive_locks( \
		((ex)->locks) + (ex)->chosen_branch * (ex)->cols, \
		(ex)->cols); \
	    (ex)->branches=0;	/* Mark EXCEP_LOCK_ELEMENT done. */

#define LM_RELEASE_DISJUNCTIVE_LOCKS	LM_RELEASE_LOCKS(pp_n)

#define LM_LOCK_CLAUSE(n)	break; case (n): SP; \
				pp_n->chosen_branch = (n);

#define LM_ELSE_CLAUSE		break; case -1: SP;

#define LM_END_DISJUNCTIVE_LOCKS_SELECTION \
		break; default: RFATAL("runtime error 991"); }

#define LM_CLOSE_LOCK_STATEMENT	}

#define LM_LOCK_UNLOCK_NOW	LM_UNLOCK(locked_lock);

#define LM_CAST(p)	((EXCEPT_LOCK_ELEMENT(1)*)p)
#define LM_UNLOCK(n) \
	    do{ int i,j; SP CHKV(n); \
		/* release the lock */ \
		LM_EXTERNAL_INTERFACE_unlock_single_lock(pp_n,n); \
		/* Set the lock reference to void */ \
		/* if .pp_n is void the reference is .locked_lock */ \
		if (pp_n == (void*)0) locked_lock = (void*)0; \
		else { \
		    /* now we have to find the lock in the exception \
		     * stack entry and there set it to void. */ \
		    /* Set .i to the point in the lock reference array \
		     * at which the chosen when clause starts. */ \
		    int i = LM_CAST(pp_n)->chosen_branch \
                            * LM_CAST(pp_n)->cols; \
		    /* Set j to the end of the when clause.	*/ \
		    int j = i + LM_CAST(pp_n)->cols; \
		    /* Scan the when clause for lock .n. */ \
		    while((void*)(LM_CAST(pp_n)->locks[i]) != (void*)n \
                          && i<j) \
			i++; \
		    /* If the lock has been found ... */ \
		    if( (void*)LM_CAST(pp_n)->locks[i] == (void*)n ) { \
			/* Shift all remaining lock by one to the left \
			 * and set the last entry of the when clause \
			 * to void. */ \
			while(i+1<j) {  \
			    LM_CAST(pp_n)->locks[i] = \
				LM_CAST(pp_n)->locks[i+1]; i++; } \
			LM_CAST(pp_n)->locks[i] = (void*)0; } } \
	    } while(0)

/*
#define SELECT_LOCK \
		switch(pp_n->chosen_branch=r_lock((void *)pp_n)) { \
		case -2: SP RFATAL("deadlock detected in lock clause");

#define BRANCH(n) \
		         break; \
		case ((n)-1):SP  \
			if(!(pp_n->flags&LOCK_NO_STACK)) p_ex_push(pp_n);

#define BRANCH_ELSE \
			 break; \
		case -1:SP 

#define LOCK_END \
		         break; \
		default: RFATAL("runtime error 991"); \
		} \
	        if(pp_n->chosen_branch>=0) p_ex_pop(1); \
	   }

#define LOCK_END_NO_STACK \
		         break; \
		default: RFATAL("runtime error 991"); \
		} \
	        if(pp_n->chosen_branch>=0) LOCK_UNLOCK_NOW; \
	   }

#define LOCK_UNLOCK_NOW		r_endlock((void *)pp_n)

 *
 * You can unlock any lock acquired by either a LOCK or a TRY
 * statement before the LOCKEND 
 * 
#define UNLOCK(n)	do {SP CHKV(n);r_unlock((void *)pp_n,n); } while(0)
*/

/*
 * LOOP protections (used for locks in iters)
 */
#define LOOP_BEGIN(iters) \
	   { volatile int br_br=0; \
	     EXCEPT_LOOP_ELEMENT(iters) p_n,*pp_n;SP \
	     if(IS_ITER) { \
		     pp_n=(void *)calloc(sizeof(p_n),1);  \
		     pp_n->c.type=EXCEPT_LOOP; \
		     pp_n->c.heap=1; \
	     } else { \
		     pp_n=&p_n; \
		     memset(&p_n,0,sizeof(p_n)); \
		     pp_n->c.type=EXCEPT_LOOP; \
		     pp_n->c.heap=0; \
	     } \
	     pp_n->c.prev_frame=pp_n->c.next_frame=NULL; \
	     pp_n->slots=(iters);			     \
             do { int i; \
                     for(i=0;i<pp_n->slots;i++) pp_n->slot[i]=NULL;  \
             } while(0); \
	     p_ex_push(pp_n);

#define LOOP_END \
	     p_ex_pop(1); \
	   }

/*
 * VASS_xx(a,typeof(a),b) guarantees atomicity for value type assignment a=b;
 * FORBID vlocks
 * FORBID xX
 * x is either L, if the corresponding argument is a thread local and needs no
 *                locking, or
 *             P, if it may be known to some other thread.
 *   
 */
#if defined(_REENTRANT) && defined(__GNUC__) && defined(__sparc__)
/* use spinlocks */
extern spinlock_t vlocks[];
#define LOCKV(adr) SPINLOCK_LOCK((vlocks[VLOCK_HASH(adr)]))
#define UNLOCKV(adr) SPINLOCK_UNLOCK((vlocks[VLOCK_HASH(adr)]))
#else
extern BR_lock_t vlocks[];
#define LOCKV(adr) BR_LOCK(vlocks[VLOCK_HASH(adr)])
#define UNLOCKV(adr) BR_UNLOCK(vlocks[VLOCK_HASH(adr)])
#endif
#define VASS_PP(a,type,b) do { type xX;SP LOCKV(&(b));xX=(b);UNLOCKV(&(b));LOCKV(&(a));a=xX;UNLOCKV(&(a)); } while(0)
#define VASS_PL(a,type,b) do { SP LOCKV(&(a));(a)=(b);UNLOCKV(&(a)); } while(0)
#define VASS_LP(a,type,b) do { SP LOCKV(&(b));(a)=(b);UNLOCKV(&(b)); } while(0)
#define VASS_LL(a,type,b) do { SP (a)=(type)(b); } while(0)

/*
 * remote reads and writes
 * Reads are of the form 
 * R(local_var,type_of_remote,remote,type_of_attr,attr);
 * F_R_RATTR_xx is used for reference types,
 * F_VA_RATTR_xx if a standard assignment local=(type_of_attr)((type_of_remote)remote).attr is
 * atomic, F_V_RATTR_xx is used if it is not atomic. x has the
 * same meaning as in VASS_xx.
 */


#if defined(ATTR_TRACE) && defined(PSATHER_TRACE)
#  ifdef IMPORT_CACHE
#    define READ_R_IT(a,b,c,r,T,A,I)			cache_r_read(a,b,c,r,T,A,I)
#    define READ_VA_IT(a,b,c,d,e,f,r,T,A,I) 		cache_va_read(a,b,c,d,e,f,r,T,A,I)
#    define READ_V_IT(a,b,c,tg,d,e,f,r,T,A,I) 		cache_v_read(a,b,c,tg,d,e,f,r,T,A,I)
#    define PRE_READ_R_IT(p,p1,a,b,c,r,T,A,I)		pre_cache_r_read(&p,&p1,a,b,c,r,T,A,I)
#    define PRE_READ_VA_IT(p,p1,a,b,c,d,e,f,r,T,A,I) 	pre_cache_va_read(&p,&p1,a,b,c,d,e,f,r,T,A,I)
#    define PRE_READ_V_IT(p,p1,a,b,c,tg,d,e,f,r,T,A,I) 	pre_cache_v_read(&p,&p1,a,b,c,tg,d,e,f,r,T,A,I)
#    define WRITE_R_IT(a,b,c,r,T,A,I)			remote_r_write(a,b,c,r,T,A,I)
#    define WRITE_VA_IT(a,b,c,d,e,f,r,T,A,I) 		remote_va_write(a,b,c,d,e,f,r,T,A,I)
#    define WRITE_V_IT(a,b,c,d,e,f,tg,r,T,A,I) 		remote_v_write(a,b,c,d,e,f,tg,r,T,A,I)
#    define POST_WRITE_R_IT(a,b,c,r,T,A,I)		post_remote_r_write(a,b,c,r,T,A,I)
#    define POST_WRITE_VA_IT(a,b,c,d,e,f,r,T,A,I) 	post_remote_va_write(a,b,c,d,e,f,r,T,A,I)
#    define POST_WRITE_V_IT(a,b,c,d,e,f,tg,r,T,A,I) 	post_remote_v_write(a,b,c,d,e,f,tg,r,T,A,I)
#  else
#    define READ_R_IT(a,b,c,r,T,A,I)			remote_r_read(a,b,c,r,T,A,I)
#    define READ_VA_IT(a,b,c,d,e,f,r,T,A,I) 		remote_va_read(a,b,c,d,e,f,r,T,A,I)
#    define READ_V_IT(a,b,c,tg,d,e,f,r,T,A,I) 		remote_v_read(a,b,c,tg,d,e,f,r,T,A,I)
#    define PRE_READ_R_IT(p,p1,a,b,c,r,T,A,I)		pre_remote_r_read(&p,&p1,a,b,c,r,T,A,I)
#    define PRE_READ_VA_IT(p,p1,a,b,c,d,e,f,r,T,A,I) 	pre_remote_va_read(&p,&p1,a,b,c,d,e,f,r,T,A,I)
#    define PRE_READ_V_IT(p,p1,a,b,c,d,e,f,r,T,A,I) 	pre_remote_v_read(&p,&p1,a,b,c,d,e,f,r,T,A,I)
#    define WRITE_R_IT(a,b,c,r,T,A,I)			remote_r_write(a,b,c,r,T,A,I)
#    define WRITE_VA_IT(a,b,c,d,e,f,r,T,A,I) 		remote_va_write(a,b,c,d,e,f,r,T,A,I)
#    define WRITE_V_IT(a,b,c,d,e,f,tg,r,T,A,I) 		remote_v_write(a,b,c,d,e,f,tg,r,T,A,I)
#    define POST_WRITE_R_IT(a,b,c,r,T,A,I)		post_remote_r_write(a,b,c,r,T,A,I)
#    define POST_WRITE_VA_IT(a,b,c,d,e,f,r,T,A,I) 	post_remote_va_write(a,b,c,d,e,f,r,T,A,I)
#    define POST_WRITE_V_IT(a,b,c,d,e,f,tg,r,T,A,I) 	post_remote_v_write(a,b,c,d,e,f,tg,r,T,A,I)
#  endif
#else
#  ifdef IMPORT_CACHE
#    define READ_R_IT(a,b,c,r,T,A,I)			cache_r_read(a,b,c)
#    define READ_VA_IT(a,b,c,d,e,f,r,T,A,I) 		cache_va_read(a,b,c,d,e,f)
#    define READ_V_IT(a,b,c,tl,d,e,f,r,T,A,I) 		cache_v_read(a,b,c,tl,d,e,f)
#    define PRE_READ_R_IT(p,p1,a,b,c,r,T,A,I)		pre_cache_r_read(&p,&p1,a,b,c)
#    define PRE_READ_VA_IT(p,p1,a,b,c,d,e,f,r,T,A,I) 	pre_cache_va_read(&p,&p1,a,b,c,d,e,f)
#    define PRE_READ_V_IT(p,p1,a,b,c,tl,d,e,f,r,T,A,I) 	pre_cache_v_read(&p,&p1,a,b,c,tl,d,e,f)
#    define WRITE_R_IT(a,b,c,r,T,A,I)			remote_r_write(a,b,c)
#    define WRITE_VA_IT(a,b,c,d,e,f,r,T,A,I) 		remote_va_write(a,b,c,d,e,f)
#    define WRITE_V_IT(a,b,c,d,e,f,tg,r,T,A,I) 		remote_v_write(a,b,c,d,e,f,tg)
#    define POST_WRITE_R_IT(a,b,c,r,T,A,I)		post_remote_r_write(a,b,c)
#    define POST_WRITE_VA_IT(a,b,c,d,e,f,r,T,A,I) 	post_remote_va_write(a,b,c,d,e,f)
#    define POST_WRITE_V_IT(a,b,c,d,e,f,tg,r,T,A,I) 	post_remote_v_write(a,b,c,d,e,f,tg)
#  else
#    define READ_R_IT(a,b,c,r,T,A,I)			remote_r_read(a,b,c)
#    define READ_VA_IT(a,b,c,d,e,f,r,T,A,I)		remote_va_read(a,b,c,d,e,f)
#    define READ_V_IT(a,b,tl,c,d,e,f,r,T,A,I)		remote_v_read(a,b,tl,c,d,e,f)
#    define PRE_READ_R_IT(p,p1,a,b,c,r,T,A,I)		pre_remote_r_read(&p,&p1,a,b,c)
#    define PRE_READ_VA_IT(p,p1,a,b,c,d,e,f,r,T,A,I)	pre_remote_va_read(&p,&p1,a,b,c,d,e,f)
#    define PRE_READ_V_IT(p,p1,a,b,tl,c,d,e,f,r,T,A,I)	pre_remote_v_read(&p,&p1,a,b,tl,c,d,e,f)
#    define WRITE_R_IT(a,b,c,r,T,A,I)			remote_r_write(a,b,c)
#    define WRITE_VA_IT(a,b,c,d,e,f,r,T,A,I)		remote_va_write(a,b,c,d,e,f)
#    define WRITE_V_IT(a,b,c,d,e,f,tg,r,T,A,I)		remote_v_write(a,b,c,d,e,f,tg)
#    define POST_WRITE_R_IT(a,b,c,r,T,A,I)		post_remote_r_write(a,b,c)
#    define POST_WRITE_VA_IT(a,b,c,d,e,f,r,T,A,I)	post_remote_va_write(a,b,c,d,e,f)
#    define POST_WRITE_V_IT(a,b,c,d,e,f,tg,r,T,A,I)	post_remote_v_write(a,b,c,d,e,f,tg)
#  endif
#endif

#ifdef PSATHER1
# define F_R_RATTR(local,atm_l,T,x,y,atm_r,TT,A,I) do {SP CHKV(x) \
				local=(void *)((T)(x))->y;  \
			} while(0)
#else
#define F_R_RATTR(local,atm_l,T,x,y,atm_r,TT,A,I) do {SP CHKV(x) \
			if(NEAR(x)) local=(void *)((T)(POINTER(x)))->y; \
			else READ_R_IT(WHERE(x), \
				 (FOB*)&local, \
			         (FOB*)&(((T)(POINTER(x)))->y), \
				      POINTER(x),TT,A,I); \
			} while(0)
#define PRE_R_RATTR(p,p1,local,atm_l,T,x,y,atm_r,TT,A,I) do {SP CHKV(x) \
			if(NEAR(x)) local=(void *)((T)(POINTER(x)))->y; \
			else PRE_READ_R_IT(p,p1,WHERE(x), \
				 (FOB*)&local, \
			         (FOB*)&(((T)(POINTER(x)))->y), \
				      POINTER(x),TT,A,I); \
			} while(0)
#endif

#define F_R_RATTR_AA(local,T,x,y) F_R_RATTR(local,1,T,x,y,1,#T,#y,0)
#define F_R_RATTR_NA(local,T,x,y) F_R_RATTR(local,0,T,x,y,1,#T,#y,0)
#define F_R_RATTR_AN(local,T,x,y) F_R_RATTR(local,1,T,x,y,0,#T,#y,0)
#define F_R_RATTR_NN(local,T,x,y) F_R_RATTR(local,0,T,x,y,0,#T,#y,0)

#define F_R_RARR_AA(local,T,x,i) do{SP CHKB(i,0,F_ASIZE(T,x)) F_R_RATTR(local,1,T,x,arr_part[i],1,#T,NULL,i);}while (0)
#define F_R_RARR_NA(local,T,x,i) do{SP CHKB(i,0,F_ASIZE(T,x)) F_R_RATTR(local,0,T,x,arr_part[i],1,#T,NULL,i);}while (0)
#define F_R_RARR_AN(local,T,x,i) do{SP CHKB(i,0,F_ASIZE(T,x)) F_R_RATTR(local,1,T,x,arr_part[i],0,#T,NULL,i);}while (0)
#define F_R_RARR_NN(local,T,x,i) do{SP CHKB(i,0,F_ASIZE(T,x)) F_R_RATTR(local,0,T,x,arr_part[i],0,#T,NULL,i);}while (0)

#ifndef PSATHER1
#define PRE_R_RATTR_AA(p,p1,local,T,x,y) PRE_R_RATTR(p,p1,local,1,T,x,y,1,#T,#y,0)
#define PRE_R_RATTR_NA(p,p1,local,T,x,y) PRE_R_RATTR(p,p1,local,0,T,x,y,1,#T,#y,0)
#define PRE_R_RATTR_AN(p,p1,local,T,x,y) PRE_R_RATTR(p,p1,local,1,T,x,y,0,#T,#y,0)
#define PRE_R_RATTR_NN(p,p1,local,T,x,y) PRE_R_RATTR(p,p1,local,0,T,x,y,0,#T,#y,0)

#define PRE_R_RARR_AA(p,p1,local,T,x,i) do{SP CHKB(i,0,F_ASIZE(T,x)) PRE_R_RATTR(p,p1,local,1,T,x,arr_part[i],1,#T,NULL,i);}while (0)
#define PRE_R_RARR_NA(p,p1,local,T,x,i) do{SP CHKB(i,0,F_ASIZE(T,x)) PRE_R_RATTR(p,p1,local,0,T,x,arr_part[i],1,#T,NULL,i);}while (0)
#define PRE_R_RARR_AN(p,p1,local,T,x,i) do{SP CHKB(i,0,F_ASIZE(T,x)) PRE_R_RATTR(p,p1,local,1,T,x,arr_part[i],0,#T,NULL,i);}while (0)
#define PRE_R_RARR_NN(p,p1,local,T,x,i) do{SP CHKB(i,0,F_ASIZE(T,x)) PRE_R_RATTR(p,p1,local,0,T,x,arr_part[i],0,#T,NULL,i);}while (0)
#endif

#ifdef PSATHER1
#define F_VA_RATTR(local,atm_l,T,x,y,atm_r,TT,A,I) do {SP CHKV(x) \
			local=((T)(x))->y; \
			} while(0)
#else
#define F_VA_RATTR(local,atm_l,T,x,y,atm_r,TT,A,I) do {SP CHKV(x) \
			if(NEAR(x)) local=((T)(POINTER(x)))->y; \
			else READ_VA_IT(WHERE(x), \
				 &local,atm_l, \
			         &(((T)(POINTER(x)))->y),atm_r, \
				 sizeof(((T)(POINTER(x)))->y), \
				      POINTER(x),TT,A,I); \
			} while(0)
#define PRE_VA_RATTR(p,p1,local,atm_l,T,x,y,atm_r,TT,A,I) do {SP CHKV(x)\
			if(NEAR(x)) local=((T)(POINTER(x)))->y; \
			else PRE_READ_VA_IT(p,p1,WHERE(x), \
				 &local,atm_l, \
			         &(((T)(POINTER(x)))->y),atm_r, \
				 sizeof(((T)(POINTER(x)))->y), \
				      POINTER(x),TT,A,I); \
			} while(0)
#endif

#define F_VA_RATTR_AA(local,T,x,y) F_VA_RATTR(local,1,T,x,y,1,#T,#y,0)
#define F_VA_RATTR_NA(local,T,x,y) F_VA_RATTR(local,0,T,x,y,1,#T,#y,0)
#define F_VA_RATTR_AN(local,T,x,y) F_VA_RATTR(local,1,T,x,y,0,#T,#y,0)
#define F_VA_RATTR_NN(local,T,x,y) F_VA_RATTR(local,0,T,x,y,0,#T,#y,0)

#define F_VA_RARR_AA(local,T,x,i) do{SP CHKB(i,0,F_ASIZE(T,x)) F_VA_RATTR(local,1,T,x,arr_part[i],1,#T,NULL,i);}while (0)
#define F_VA_RARR_NA(local,T,x,i) do{SP CHKB(i,0,F_ASIZE(T,x)) F_VA_RATTR(local,0,T,x,arr_part[i],1,#T,NULL,i);}while (0)
#define F_VA_RARR_AN(local,T,x,i) do{SP CHKB(i,0,F_ASIZE(T,x)) F_VA_RATTR(local,1,T,x,arr_part[i],0,#T,NULL,i);}while (0)
#define F_VA_RARR_NN(local,T,x,i) do{SP CHKB(i,0,F_ASIZE(T,x)) F_VA_RATTR(local,0,T,x,arr_part[i],0,#T,NULL,i);}while (0)

#ifndef PSATHER1
#define PRE_VA_RATTR_AA(p,p1,local,T,x,y) PRE_VA_RATTR(p,p1,local,1,T,x,y,1,#T,#y,0)
#define PRE_VA_RATTR_NA(p,p1,local,T,x,y) PRE_VA_RATTR(p,p1,local,0,T,x,y,1,#T,#y,0)
#define PRE_VA_RATTR_AN(p,p1,local,T,x,y) PRE_VA_RATTR(p,p1,local,1,T,x,y,0,#T,#y,0)
#define PRE_VA_RATTR_NN(p,p1,local,T,x,y) PRE_VA_RATTR(p,p1,local,0,T,x,y,0,#T,#y,0)

#define PRE_VA_RARR_AA(p,p1,local,T,x,i) do{SP CHKB(i,0,F_ASIZE(T,x)) PRE_VA_RATTR(p,p1,local,1,T,x,arr_part[i],1,#T,NULL,i);}while (0)
#define PRE_VA_RARR_NA(p,p1,local,T,x,i) do{SP CHKB(i,0,F_ASIZE(T,x)) PRE_VA_RATTR(p,p1,local,0,T,x,arr_part[i],1,#T,NULL,i);}while (0)
#define PRE_VA_RARR_AN(p,p1,local,T,x,i) do{SP CHKB(i,0,F_ASIZE(T,x)) PRE_VA_RATTR(p,p1,local,1,T,x,arr_part[i],0,#T,NULL,i);}while (0)
#define PRE_VA_RARR_NN(p,p1,local,T,x,i) do{SP CHKB(i,0,F_ASIZE(T,x)) PRE_VA_RATTR(p,p1,local,0,T,x,arr_part[i],0,#T,NULL,i);}while (0)
#endif

#ifdef PSATHER1
# define F_V_RATTR(local,local_l,T,x,y,remote_l,VA,TL,TT,A,I) do {SP CHKV(x)\
			VA(local,TL,((T)(x))->y);			    \
			} while(0)
#else
#define F_V_RATTR(local,local_l,T,x,y,remote_l,VA,TL,TT,A,I) do {SP CHKV(x)\
			if(x!=NULL) if(NEAR(x)) VA(local,TL,((T)(POINTER(x)))->y); \
			else READ_V_IT(WHERE(x), \
				 &local,local_l,TL ## _tag, \
			         &(((T)(POINTER(x)))->y),remote_l, \
				 sizeof(((T)(POINTER(x)))->y), \
				 POINTER(x),TT,A,I); \
			} while(0)
#define PRE_V_RATTR(p,p1,local,local_l,T,x,y,remote_l,VA,TL,TT,A,I) do {SP CHKV(x)\
			if(x!=NULL) if(NEAR(x)) VA(local,TL,((T)(POINTER(x)))->y); \
			else PRE_READ_V_IT(p,p1,WHERE(x), \
				 &local,local_l,TL ## _tag, \
			         &(((T)(POINTER(x)))->y),remote_l, \
				 sizeof(((T)(POINTER(x)))->y), \
				 POINTER(x),TT,A,I); \
			} while(0)
#endif

#define F_V_RATTR_PP(TL,local,T,x,y) F_V_RATTR(local,1,T,x,y,1,VASS_PP,TL,#T,#y,0)
#define F_V_RATTR_LP(TL,local,T,x,y) F_V_RATTR(local,0,T,x,y,1,VASS_LP,TL,#T,#y,0)
#define F_V_RATTR_PL(TL,local,T,x,y) F_V_RATTR(local,1,T,x,y,0,VASS_PL,TL,#T,#y,0)
#define F_V_RATTR_LL(TL,local,T,x,y) F_V_RATTR(local,0,T,x,y,0,VASS_LL,TL,#T,#y,0)

#define F_V_RARR_PP(TL,local,T,x,i) do{SP CHKB(i,0,F_ASIZE(T,x)) F_V_RATTR(local,1,T,x,arr_part[i],1,VASS_PP,TL,#T,NULL,i);}while (0)
#define F_V_RARR_LP(TL,local,T,x,i) do{SP CHKB(i,0,F_ASIZE(T,x)) F_V_RATTR(local,0,T,x,arr_part[i],1,VASS_LP,TL,#T,NULL,i);}while (0)
#define F_V_RARR_PL(TL,local,T,x,i) do{SP CHKB(i,0,F_ASIZE(T,x)) F_V_RATTR(local,1,T,x,arr_part[i],0,VASS_PL,TL,#T,NULL,i);}while (0)
#define F_V_RARR_LL(TL,local,T,x,i) do{SP CHKB(i,0,F_ASIZE(T,x)) F_V_RATTR(local,0,T,x,arr_part[i],0,VASS_LL,TL,#T,NULL,i);}while (0)

#ifndef PSATHER1
#define PRE_V_RATTR_PP(p,p1,TL,local,T,x,y) PRE_V_RATTR(p,p1,local,1,T,x,y,1,VASS_PP,TL,#T,#y,0)
#define PRE_V_RATTR_LP(p,p1,TL,local,T,x,y) PRE_V_RATTR(p,p1,local,0,T,x,y,1,VASS_LP,TL,#T,#y,0)
#define PRE_V_RATTR_PL(p,p1,TL,local,T,x,y) PRE_V_RATTR(p,p1,local,1,T,x,y,0,VASS_PL,TL,#T,#y,0)
#define PRE_V_RATTR_LL(p,p1,TL,local,T,x,y) PRE_V_RATTR(p,p1,local,0,T,x,y,0,VASS_LL,TL,#T,#y,0)

#define PRE_V_RARR_PP(p,p1,TL,local,T,x,i) do{SP CHKB(i,0,F_ASIZE(T,x)) PRE_V_RATTR(p,p1,local,1,T,x,arr_part[i],1,VASS_PP,TL,#T,NULL,i);}while (0)
#define PRE_V_RARR_LP(p,p1,TL,local,T,x,i) do{SP CHKB(i,0,F_ASIZE(T,x)) PRE_V_RATTR(p,p1,local,0,T,x,arr_part[i],1,VASS_LP,TL,#T,NULL,i);}while (0)
#define PRE_V_RARR_PL(p,p1,TL,local,T,x,i) do{SP CHKB(i,0,F_ASIZE(T,x)) PRE_V_RATTR(p,p1,local,1,T,x,arr_part[i],0,VASS_PL,TL,#T,NULL,i);}while (0)
#define PRE_V_RARR_LL(p,p1,TL,local,T,x,i) do{SP CHKB(i,0,F_ASIZE(T,x)) PRE_V_RATTR(p,p1,local,0,T,x,arr_part[i],0,VASS_LL,TL,#T,NULL,i);}while (0)
#endif

#ifdef PSATHER1
#define F_R_WATTR(T,x,y,atm_r,local,atm_l,TT,A,I) do {SP CHKV(x) \
			((T)(x))->y=(void *)local; \
			} while(0)
#else
#define F_R_WATTR(T,x,y,atm_r,local,atm_l,TT,A,I) do {SP CHKV(x) \
			if(NEAR(x)) ((T)(POINTER(x)))->y=(void *)local; \
			else WRITE_R_IT(WHERE(x), \
			         (FOB*)&(((T)(POINTER(x)))->y), \
				 (FOB*)&local,POINTER(x),TT,A,I); \
			} while(0)
#define POST_R_WATTR(T,x,y,atm_r,local,atm_l,TT,A,I) do {SP CHKV(x) \
			if(NEAR(x)) ((T)(POINTER(x)))->y=(void *)local; \
			else POST_WRITE_R_IT(WHERE(x), \
			         (FOB*)&(((T)(POINTER(x)))->y), \
				 (FOB*)&local,POINTER(x),TT,A,I); \
			} while(0)
#endif

#define F_R_WATTR_AA(T,x,y,local) F_R_WATTR(T,x,y,1,local,1,#T,#y,0)
#define F_R_WATTR_NA(T,x,y,local) F_R_WATTR(T,x,y,1,local,0,#T,#y,0)
#define F_R_WATTR_AN(T,x,y,local) F_R_WATTR(T,x,y,0,local,1,#T,#y,0)
#define F_R_WATTR_NN(T,x,y,local) F_R_WATTR(T,x,y,0,local,0,#T,#y,0)

#define F_R_WARR_AA(T,x,i,local) do{SP CHKB(i,0,F_ASIZE(T,x)) F_R_WATTR(T,x,arr_part[i],1,local,1,#T,NULL,i);} while(0)
#define F_R_WARR_NA(T,x,i,local) do{SP CHKB(i,0,F_ASIZE(T,x)) F_R_WATTR(T,x,arr_part[i],1,local,0,#T,NULL,i);} while(0)
#define F_R_WARR_AN(T,x,i,local) do{SP CHKB(i,0,F_ASIZE(T,x)) F_R_WATTR(T,x,arr_part[i],0,local,1,#T,NULL,i);} while(0)
#define F_R_WARR_NN(T,x,i,local) do{SP CHKB(i,0,F_ASIZE(T,x)) F_R_WATTR(T,x,arr_part[i],0,local,0,#T,NULL,i);} while(0)

#ifndef PSATHER1
#define POST_R_WATTR_AA(T,x,y,local) POST_R_WATTR(T,x,y,1,local,1,#T,#y,0)
#define POST_R_WATTR_NA(T,x,y,local) POST_R_WATTR(T,x,y,1,local,0,#T,#y,0)
#define POST_R_WATTR_AN(T,x,y,local) POST_R_WATTR(T,x,y,0,local,1,#T,#y,0)
#define POST_R_WATTR_NN(T,x,y,local) POST_R_WATTR(T,x,y,0,local,0,#T,#y,0)

#define POST_R_WARR_AA(T,x,i,local) do{SP CHKB(i,0,F_ASIZE(T,x)) POST_R_WATTR(T,x,arr_part[i],1,local,1,#T,NULL,i);} while(0)
#define POST_R_WARR_NA(T,x,i,local) do{SP CHKB(i,0,F_ASIZE(T,x)) POST_R_WATTR(T,x,arr_part[i],1,local,0,#T,NULL,i);} while(0)
#define POST_R_WARR_AN(T,x,i,local) do{SP CHKB(i,0,F_ASIZE(T,x)) POST_R_WATTR(T,x,arr_part[i],0,local,1,#T,NULL,i);} while(0)
#define POST_R_WARR_NN(T,x,i,local) do{SP CHKB(i,0,F_ASIZE(T,x)) POST_R_WATTR(T,x,arr_part[i],0,local,0,#T,NULL,i);} while(0)
#endif

#ifdef PSATHER1
# define F_VA_WATTR(T,x,y,atm_r,local,atm_l,TT,A,I) do {SP CHKV(x) \
			((T)(x))->y=local; \
			} while(0)
#else
# define F_VA_WATTR(T,x,y,atm_r,local,atm_l,TT,A,I) do {SP CHKV(x) \
			if(NEAR(x)) ((T)(POINTER(x)))->y=local; \
			else WRITE_VA_IT(WHERE(x), \
			         &(((T)(POINTER(x)))->y),atm_r, \
				 sizeof(((T)(POINTER(x)))->y), \
				 &local,atm_l,POINTER(x),TT,A,I); \
			} while(0)
# define POST_VA_WATTR(T,x,y,atm_r,local,atm_l,TT,A,I) do {SP CHKV(x) \
			if(NEAR(x)) ((T)(POINTER(x)))->y=local; \
			else POST_WRITE_VA_IT(WHERE(x), \
			         &(((T)(POINTER(x)))->y),atm_r, \
				 sizeof(((T)(POINTER(x)))->y), \
				 &local,atm_l,POINTER(x),TT,A,I); \
			} while(0)
#endif

#define F_VA_WATTR_AA(T,x,y,local) F_VA_WATTR(T,x,y,1,local,1,#T,#y,0)
#define F_VA_WATTR_NA(T,x,y,local) F_VA_WATTR(T,x,y,1,local,0,#T,#y,0)
#define F_VA_WATTR_AN(T,x,y,local) F_VA_WATTR(T,x,y,0,local,1,#T,#y,0)
#define F_VA_WATTR_NN(T,x,y,local) F_VA_WATTR(T,x,y,0,local,0,#T,#y,0)

#define F_VA_WARR_AA(T,x,i,local) do{SP CHKB(i,0,F_ASIZE(T,x)) F_VA_WATTR(T,x,arr_part[i],1,local,1,#T,NULL,i);} while(0)
#define F_VA_WARR_NA(T,x,i,local) do{SP CHKB(i,0,F_ASIZE(T,x)) F_VA_WATTR(T,x,arr_part[i],1,local,0,#T,NULL,i);} while(0)
#define F_VA_WARR_AN(T,x,i,local) do{SP CHKB(i,0,F_ASIZE(T,x)) F_VA_WATTR(T,x,arr_part[i],0,local,1,#T,NULL,i);} while(0)
#define F_VA_WARR_NN(T,x,i,local) do{SP CHKB(i,0,F_ASIZE(T,x)) F_VA_WATTR(T,x,arr_part[i],0,local,0,#T,NULL,i);} while(0)

#ifndef PSATHER1
#define POST_VA_WATTR_AA(T,x,y,local) POST_VA_WATTR(T,x,y,1,local,1,#T,#y,0)
#define POST_VA_WATTR_NA(T,x,y,local) POST_VA_WATTR(T,x,y,1,local,0,#T,#y,0)
#define POST_VA_WATTR_AN(T,x,y,local) POST_VA_WATTR(T,x,y,0,local,1,#T,#y,0)
#define POST_VA_WATTR_NN(T,x,y,local) POST_VA_WATTR(T,x,y,0,local,0,#T,#y,0)

#define POST_VA_WARR_AA(T,x,i,local) do{SP CHKB(i,0,POST_ASIZE(T,x)) POST_VA_WATTR(T,x,arr_part[i],1,local,1,#T,NULL,i);} while(0)
#define POST_VA_WARR_NA(T,x,i,local) do{SP CHKB(i,0,POST_ASIZE(T,x)) POST_VA_WATTR(T,x,arr_part[i],1,local,0,#T,NULL,i);} while(0)
#define POST_VA_WARR_AN(T,x,i,local) do{SP CHKB(i,0,POST_ASIZE(T,x)) POST_VA_WATTR(T,x,arr_part[i],0,local,1,#T,NULL,i);} while(0)
#define POST_VA_WARR_NN(T,x,i,local) do{SP CHKB(i,0,POST_ASIZE(T,x)) POST_VA_WATTR(T,x,arr_part[i],0,local,0,#T,NULL,i);} while(0)
#endif

#ifdef PSATHER1
#define F_V_WATTR(T,x,y,remote_l,local,local_l,VA,TL,TT,A,I) do {SP CHKV(x)\
			VA(((T)(x))->y,TL,local); \
			} while(0)
#else
#define F_V_WATTR(T,x,y,remote_l,local,local_l,VA,TL,TT,A,I) do {SP CHKV(x)\
			if(NEAR(x)) VA(((T)(POINTER(x)))->y,TL,local); \
			else WRITE_V_IT(WHERE(x), \
			         &(((T)(POINTER(x)))->y),remote_l, \
				 sizeof(((T)(POINTER(x)))->y), \
				 &local,local_l,TL ## _tag,POINTER(x),TT,A,I); \
			} while(0)
#define POST_V_WATTR(T,x,y,remote_l,local,local_l,VA,TL,TT,A,I) do {SP CHKV(x)\
			if(NEAR(x)) VA(((T)(POINTER(x)))->y,TL,local); \
			else POST_WRITE_V_IT(WHERE(x), \
			         &(((T)(POINTER(x)))->y),remote_l, \
				 sizeof(((T)(POINTER(x)))->y), \
				 &local,local_l,TL ## _tag,POINTER(x),TT,A,I); \
			} while(0)
#endif

#define F_V_WATTR_PP(T,x,y,TL,local) F_V_WATTR(T,x,y,1,local,1,VASS_PP,TL,#T,#y,0)
#define F_V_WATTR_LP(T,x,y,TL,local) F_V_WATTR(T,x,y,1,local,0,VASS_LP,TL,#T,#y,0)
#define F_V_WATTR_PL(T,x,y,TL,local) F_V_WATTR(T,x,y,0,local,1,VASS_PL,TL,#T,#y,0)
#define F_V_WATTR_LL(T,x,y,TL,local) F_V_WATTR(T,x,y,0,local,0,VASS_LL,TL,#T,#y,0)

#define F_V_WARR_PP(T,x,i,TL,local) do{SP CHKB(i,0,F_ASIZE(T,x)) F_V_WATTR(T,x,arr_part[i],1,local,1,VASS_PP,TL,#T,NULL,i);} while(0)
#define F_V_WARR_LP(T,x,i,TL,local) do{SP CHKB(i,0,F_ASIZE(T,x)) F_V_WATTR(T,x,arr_part[i],1,local,0,VASS_LP,TL,#T,NULL,i);} while(0)
#define F_V_WARR_PL(T,x,i,TL,local) do{SP CHKB(i,0,F_ASIZE(T,x)) F_V_WATTR(T,x,arr_part[i],0,local,1,VASS_PL,TL,#T,NULL,i);} while(0)
#define F_V_WARR_LL(T,x,i,TL,local) do{SP CHKB(i,0,F_ASIZE(T,x)) F_V_WATTR(T,x,arr_part[i],0,local,0,VASS_LL,TL,#T,NULL,i);} while(0)

#ifndef PSATHER1
#define POST_V_WATTR_PP(T,x,y,TL,local) POST_V_WATTR(T,x,y,1,local,1,VASS_PP,TL,#T,#y,0)
#define POST_V_WATTR_LP(T,x,y,TL,local) POST_V_WATTR(T,x,y,1,local,0,VASS_LP,TL,#T,#y,0)
#define POST_V_WATTR_PL(T,x,y,TL,local) POST_V_WATTR(T,x,y,0,local,1,VASS_PL,TL,#T,#y,0)
#define POST_V_WATTR_LL(T,x,y,TL,local) POST_V_WATTR(T,x,y,0,local,0,VASS_LL,TL,#T,#y,0)

#define POST_V_WARR_PP(T,x,i,TL,local) do{SP CHKB(i,0,POST_ASIZE(T,x)) POST_V_WATTR(T,x,arr_part[i],1,local,1,VASS_PP,TL,#T,NULL,i);} while(0)
#define POST_V_WARR_LP(T,x,i,TL,local) do{SP CHKB(i,0,POST_ASIZE(T,x)) POST_V_WATTR(T,x,arr_part[i],1,local,0,VASS_LP,TL,#T,NULL,i);} while(0)
#define POST_V_WARR_PL(T,x,i,TL,local) do{SP CHKB(i,0,POST_ASIZE(T,x)) POST_V_WATTR(T,x,arr_part[i],0,local,1,VASS_PL,TL,#T,NULL,i);} while(0)
#define POST_V_WARR_LL(T,x,i,TL,local) do{SP CHKB(i,0,POST_ASIZE(T,x)) POST_V_WATTR(T,x,arr_part[i],0,local,0,VASS_LL,TL,#T,NULL,i);} while(0)
#endif


/*
 * Object/Array copy (works only for reference objects,
 * as it does not guaranty atomicity).
 * Does not work correctly if the reference objects has value 
 * attributes that are not defined to be atomic for assignments
 * Often one wants to copy the complete objects or part of arrays.
 * All the macros defines here have the suffix _xx, where x is either
 * A or N. The first x refers to the destination, the second
 * one to the origin. A means that the copy to/from this location
 * has to be atomic in the sense that all attributes of the
 * object have to copied atomically. As mentioned above, this works
 * only for value types that can be assigned atomically.
 * Note that references in the object ARE changed when moving
 * an object from one cluster to another.
 */

/*
 * local must be a pointer to local memory (a local FOB
 * pointer will do), remote is the FOB to this object
 * Both must point to an object of type T
 * This macro may not be used for array objects
 */
#ifdef PSATHER 
#define TG(x) x ## _tag
#else
#define TG(x) 0
#endif
#define F_R_OBJECT_AA(T,local,remote)		p_r_object(local,1,remote,1,sizeof(*(T)NULL),TG(T),-1)
#define F_R_OBJECT_NA(T,local,remote)		p_r_object(local,0,remote,1,sizeof(*(T)NULL),TG(T),-1)
#define F_R_OBJECT_AN(T,local,remote)		p_r_object(local,1,remote,0,sizeof(*(T)NULL),TG(T),-1)
#define F_R_OBJECT_NN(T,local,remote)		p_r_object(local,0,remote,0,sizeof(*(T)NULL),TG(T),-1)
#define F_W_OBJECT_AA(T,remote,local)		p_w_object(remote,1,local,1,sizeof(*(T)NULL),TG(T),-1)
#define F_W_OBJECT_NA(T,remote,local)		p_w_object(remote,0,local,1,sizeof(*(T)NULL),TG(T),-1)
#define F_W_OBJECT_AN(T,remote,local)		p_w_object(remote,1,local,0,sizeof(*(T)NULL),TG(T),-1)
#define F_W_OBJECT_NN(T,remote,local)		p_w_object(remote,0,local,0,sizeof(*(T)NULL),TG(T),-1)
#define F_C_OBJECT_AA(T,remote1,remote2)	p_c_object(remote1,1,remote2,1,sizeof(*(T)NULL),TG(T),-1)
#define F_C_OBJECT_NA(T,remote1,remote2)	p_c_object(remote1,0,remote2,1,sizeof(*(T)NULL),TG(T),-1)
#define F_C_OBJECT_AN(T,remote1,remote2)	p_c_object(remote1,1,remote2,0,sizeof(*(T)NULL),TG(T),-1)
#define F_C_OBJECT_NN(T,remote1,remote2)	p_c_object(remote1,0,remote2,0,sizeof(*(T)NULL),TG(T),-1)

/*
 * To copy objects including arrays, use the following macros.
 * They don't allocate memory, but won't override it either.
 * They don't change pointers either.
 */
#define F_R_ARRAY_OBJECT_AA(T,ET,local,remote)	do { int _AaA=ASIZE((T)local);p_r_array_object(local,1,_AaA,(char *)local->arr_part-(char *)local,remote,1,F_ASIZE(T,remote),sizeof(local->arr_part[0]),TG(T),TG(ET));((T)local)->asize=_AaA; } while(0)
#define F_R_ARRAY_OBJECT_NA(T,ET,local,remote)	do { int _AaA=ASIZE((T)local);p_r_array_object(local,0,_AaA,(char *)local->arr_part-(char *)local,remote,1,F_ASIZE(T,remote),sizeof(local->arr_part[0]),TG(T),TG(ET));((T)local)->asize=_AaA; } while(0)
#define F_R_ARRAY_OBJECT_AN(T,ET,local,remote)	do { int _AaA=ASIZE((T)local);p_r_array_object(local,1,_AaA,(char *)local->arr_part-(char *)local,remote,0,F_ASIZE(T,remote),sizeof(local->arr_part[0]),TG(T),TG(ET));((T)local)->asize=_AaA; } while(0)
#define F_R_ARRAY_OBJECT_NN(T,ET,local,remote)	do { int _AaA=ASIZE((T)local);p_r_array_object(local,0,_AaA,(char *)local->arr_part-(char *)local,remote,0,F_ASIZE(T,remote),sizeof(local->arr_part[0]),TG(T),TG(ET));((T)local)->asize=_AaA; } while(0)
#define F_W_ARRAY_OBJECT_AA(T,ET,remote,local)	do { int _AaA=F_ASIZE(T,remote);p_w_array_object(remote,1,_AaA,(char *)local->arr_part-(char *)local,local,1,ASIZE((T)local),sizeof(local->arr_part[0]),TG(T),TG(ET));F_VA_WATTR_AA(T,remote,asize,_AaA); } while(0)
#define F_W_ARRAY_OBJECT_NA(T,ET,remote,local)	do { int _AaA=F_ASIZE(T,remote);p_w_array_object(remote,0,_AaA,(char *)local->arr_part-(char *)local,local,1,ASIZE((T)local),sizeof(local->arr_part[0]),TG(T),TG(ET));F_VA_WATTR_NA(T,remote,asize,_AaA); } while(0)
#define F_W_ARRAY_OBJECT_AN(T,ET,remote,local)	do { int _AaA=F_ASIZE(T,remote);p_w_array_object(remote,1,_AaA,(char *)local->arr_part-(char *)local,local,0,ASIZE((T)local),sizeof(local->arr_part[0]),TG(T),TG(ET));F_VA_WATTR_AN(T,remote,asize,_AaA); } while(0)
#define F_W_ARRAY_OBJECT_NN(T,ET,remote,local)	do { int _AaA=F_ASIZE(T,remote);p_w_array_object(remote,0,_AaA,(char *)local->arr_part-(char *)local,local,0,ASIZE((T)local),sizeof(local->arr_part[0]),TG(T),TG(ET));F_VA_WATTR_NN(T,remote,asize,_AaA); } while(0)
#define F_C_ARRAY_OBJECT_AA(T,ET,remote1,remote2)	do { int _AaA=F_ASIZE(T,remote1);p_c_array_object(remote1,1,_AaA,(char *)remote1->arr_part-(char *)remote1,remote2,1,F_ASIZE(T,remote2),sizeof(remote2->arr_part[0]),TG(T),TG(ET));F_VA_WATTR_AA(T,remote1,asize,_AaA); } while(0)
/*
#define F_C_ARRAY_OBJECT_AA(T,ET,remote1,remote2)	
	do { 
		int _AaA=F_ASIZE(T,remote1);
		p_c_array_object(remote1,
			1,
			_AaA,
			(char *)remote1->arr_part-(char *)remote1,
			remote2,
			1,
			F_ASIZE(T,remote2),
			sizeof(remote2->arr_part[0]),
			TG(T),
			TG(ET));
		F_VA_WATTR_AA(T,remote1,asize,_AaA); } while(0)
*/
#define F_C_ARRAY_OBJECT_NA(T,ET,remote1,remote2)	do { int _AaA=F_ASIZE(T,remote1);p_c_array_object(remote1,0,_AaA,(char *)remote1->arr_part-(char *)remote1,remote2,1,F_ASIZE(T,remote2),sizeof(remote2->arr_part[0]),TG(T),TG(ET));F_VA_WATTR_NA(T,remote1,asize,_AaA); } while(0)
#define F_C_ARRAY_OBJECT_AN(T,ET,remote1,remote2)	do { int _AaA=F_ASIZE(T,remote1);p_c_array_object(remote1,1,_AaA,(char *)remote1->arr_part-(char *)remote1,remote2,0,F_ASIZE(T,remote2),sizeof(remote2->arr_part[0]),TG(T),TG(ET));F_VA_WATTR_AN(T,remote1,asize,_AaA); } while(0)
#define F_C_ARRAY_OBJECT_NN(T,ET,remote1,remote2)	do { int _AaA=F_ASIZE(T,remote1);p_c_array_object(remote1,0,_AaA,(char *)remote1->arr_part-(char *)remote1,remote2,0,F_ASIZE(T,remote2),sizeof(remote2->arr_part[0]),TG(T),TG(ET));F_VA_WATTR_NN(T,remote1,asize,_AaA); } while(0)

/*
 * Those two macros are similar to the above F_*_OBJECT macro,
 * but they copy only the array part of an object. They copy only
 * as many elements as there are in the smaller of the two arrays
 */
#define F_R_ARRAY_AA(T,ET,local,remote)	p_r_array(local,1,ASIZE(local),(char *)local->arr_part-(char *)local,remote,1,F_ASIZE(T,remote),sizeof(local->arr_part[0]),TG(ET))
#define F_R_ARRAY_NA(T,ET,local,remote)	p_r_array(local,0,ASIZE(local),(char *)local->arr_part-(char *)local,remote,1,F_ASIZE(T,remote),sizeof(local->arr_part[0]),TG(ET))
#define F_R_ARRAY_AN(T,ET,local,remote)	p_r_array(local,1,ASIZE(local),(char *)local->arr_part-(char *)local,remote,0,F_ASIZE(T,remote),sizeof(local->arr_part[0]),TG(ET))
#define F_R_ARRAY_NN(T,ET,local,remote)	p_r_array(local,0,ASIZE(local),(char *)local->arr_part-(char *)local,remote,0,F_ASIZE(T,remote),sizeof(local->arr_part[0]),TG(ET))
#define F_W_ARRAY_AA(T,ET,remote,local)	p_w_array(remote,1,F_ASIZE(T,remote),(char *)local->arr_part-(char *)local,local,1,ASIZE(local),sizeof(local->arr_part[0]),TG(ET))
#define F_W_ARRAY_NA(T,ET,remote,local)	p_w_array(remote,0,F_ASIZE(T,remote),(char *)local->arr_part-(char *)local,local,1,ASIZE(local),sizeof(local->arr_part[0]),TG(ET))
#define F_W_ARRAY_AN(T,ET,remote,local)	p_w_array(remote,1,F_ASIZE(T,remote),(char *)local->arr_part-(char *)local,local,0,ASIZE(local),sizeof(local->arr_part[0]),TG(ET))
#define F_W_ARRAY_NN(T,ET,remote,local)	p_w_array(remote,0,F_ASIZE(T,remote),(char *)local->arr_part-(char *)local,local,0,ASIZE(local),sizeof(local->arr_part[0]),TG(ET))
#define F_C_ARRAY_AA(T,ET,remote,local)	p_c_array(remote,1,F_ASIZE(T,remote),(char *)local->arr_part-(char *)local,local,1,F_ASIZE(T,local),sizeof(local->arr_part[0]),TG(ET))
#define F_C_ARRAY_NA(T,ET,remote,local)	p_c_array(remote,0,F_ASIZE(T,remote),(char *)local->arr_part-(char *)local,local,1,F_ASIZE(T,local),sizeof(local->arr_part[0]),TG(ET))
#define F_C_ARRAY_AN(T,ET,remote,local)	p_c_array(remote,1,F_ASIZE(T,remote),(char *)local->arr_part-(char *)local,local,0,F_ASIZE(T,local),sizeof(local->arr_part[0]),TG(ET))
#define F_C_ARRAY_NN(T,ET,remote,local)	p_c_array(remote,0,F_ASIZE(T,remote),(char *)local->arr_part-(char *)local,local,0,F_ASIZE(T,local),sizeof(local->arr_part[0]),TG(ET))
/*
 * Copy at most elems elements, beginning at the respective
 * array elements
 */
#define F_R_ARRAYPART_AA(T,ET,local,begin,remote,rbegin,elems)	p_r_array_part(local,1,ASIZE(local),begin,(char *)local->arr_part-(char *)local,remote,1,F_ASIZE(T,remote),rbegin,sizeof(local->arr_part[0]),elems,TG(ET))
#define F_R_ARRAYPART_NA(T,ET,local,begin,remote,rbegin,elems)	p_r_array_part(local,0,ASIZE(local),begin,(char *)local->arr_part-(char *)local,remote,1,F_ASIZE(T,remote),rbegin,sizeof(local->arr_part[0]),elems,TG(ET))
#define F_R_ARRAYPART_AN(T,ET,local,begin,remote,rbegin,elems)	p_r_array_part(local,1,ASIZE(local),begin,(char *)local->arr_part-(char *)local,remote,0,F_ASIZE(T,remote),rbegin,sizeof(local->arr_part[0]),elems,TG(ET))
#define F_R_ARRAYPART_NN(T,ET,local,begin,remote,rbegin,elems)	p_r_array_part(local,0,ASIZE(local),begin,(char *)local->arr_part-(char *)local,remote,0,F_ASIZE(T,remote),rbegin,sizeof(local->arr_part[0]),elems,TG(ET))
#define F_W_ARRAYPART_AA(T,ET,remote,rbegin,local,begin,elems)	p_w_array_part(remote,1,F_ASIZE(T,remote),rbegin,(char *)local->arr_part-(char *)local,local,1,ASIZE(local),begin,sizeof(local->arr_part[0]),elems,TG(ET))
#define F_W_ARRAYPART_NA(T,ET,remote,rbegin,local,begin,elems)	p_w_array_part(remote,0,F_ASIZE(T,remote),rbegin,(char *)local->arr_part-(char *)local,local,1,ASIZE(local),begin,sizeof(local->arr_part[0]),elems,TG(ET))
#define F_W_ARRAYPART_AN(T,ET,remote,rbegin,local,begin,elems)	p_w_array_part(remote,1,F_ASIZE(T,remote),rbegin,(char *)local->arr_part-(char *)local,local,0,ASIZE(local),begin,sizeof(local->arr_part[0]),elems,TG(ET))
#define F_W_ARRAYPART_NN(T,ET,remote,rbegin,local,begin,elems)	p_w_array_part(remote,0,F_ASIZE(T,remote),rbegin,(char *)local->arr_part-(char *)local,local,0,ASIZE(local),begin,sizeof(local->arr_part[0]),elems,TG(ET))
#define F_C_ARRAYPART_AA(T,ET,remote,rbegin,local,begin,elems)	p_c_array_part(remote,1,F_ASIZE(T,remote),rbegin,(char *)local->arr_part-(char *)local,local,1,F_ASIZE(T,local),begin,sizeof(local->arr_part[0]),elems,TG(ET))
#define F_C_ARRAYPART_NA(T,ET,remote,rbegin,local,begin,elems)	p_c_array_part(remote,0,F_ASIZE(T,remote),rbegin,(char *)local->arr_part-(char *)local,local,1,F_ASIZE(T,local),begin,sizeof(local->arr_part[0]),elems,TG(ET))
#define F_C_ARRAYPART_AN(T,ET,remote,rbegin,local,begin,elems)	p_c_array_part(remote,1,F_ASIZE(T,remote),rbegin,(char *)local->arr_part-(char *)local,local,0,F_ASIZE(T,local),begin,sizeof(local->arr_part[0]),elems,TG(ET))
#define F_C_ARRAYPART_NN(T,ET,remote,rbegin,local,begin,elems)	p_c_array_part(remote,0,F_ASIZE(T,remote),rbegin,(char *)local->arr_part-(char *)local,local,0,F_ASIZE(T,local),begin,sizeof(local->arr_part[0]),elems,TG(ET))

/*
 * some often used reads and writes
 * Most of those functions are used in *.config
 */
#ifdef PSATHER1
#define READTAG(t,obj)		(t=TAG(obj))
#define F_TAG(obj)		TAG(obj)
#define READASIZE(res,T,obj)	(res=ASIZE(((T)obj)))
#define F_ASIZE(T,obj)		(ASIZE(((T)obj)))
#else
#define READTAG(t,obj)		do { short xxTTxx; if(NEAR(obj)) t=TAG(obj); else { F_VA_RATTR_NN(xxTTxx,OB,obj,header.tag);t=xxTTxx;} } while(0)
#define F_TAG(obj)		(SPe CHKVv(obj) (NEAR(obj)?TAG(obj):read_tag(obj)))
#define READASIZE(res,T,obj)	do { if(NEAR(obj)) res=(ASIZE(((T)obj))); else F_VA_RATTR_NN(res,T,obj,asize); } while(0)
#define F_ASIZE(T,obj)		(NEAR(obj)?(ASIZE(((T)obj))):read_asize((SPe CHKVv(obj) &(((T)(obj))->asize))))
#endif
#define F_AREFACLEAR_A(T,x)	p_aref_aclear(((T)x)->arr_part,1,F_ASIZE(T,x),sizeof(((T)x)->arr_part[0]))
#define F_AREFACLEAR_N(T,x)	p_aref_aclear(((T)x)->arr_part,0,F_ASIZE(T,x),sizeof(((T)x)->arr_part[0]))
#define F_AREFACOPY_AA(T,ET,x,y)		F_C_ARRAY_AA(T,ET,x,y)
#define F_AREFACOPYB_AA(T,ET,x,b,y)		F_C_ARRAYPART_AA(T,ET,x,b,y,0,F_ASIZE(T,x))
#define F_AREFACOPYBN_AA(T,ET,x,b,n,y)		F_C_ARRAYPART_AA(T,ET,x,b,y,0,n)
#define F_AREFACOPYBNS_AA(T,ET,x,b,n,s,y)	F_C_ARRAYPART_AA(T,ET,x,b,y,s,n)
#define F_STRACOPYNF(s,f,n)	get_memory(((STR)s)->arr_part,((FSTR)f)->arr_part,n);
#define F_STRACOPYN(s,f,n)	get_memory(((STR)s)->arr_part,((STR)f)->arr_part,n);
#define F_FSTRACOPYF(n,o)	F_C_ARRAY_OBJECT_AA(FSTR,CHAR,n,o)
#define F_FSTRACOPYS(n,s)	get_memory(((FSTR)n)->arr_part,((STR)s)->arr_part,F_ASIZE(s))
#define F_FSTRACOPYN(n,s,b)	get_memory(((FSTR)n)->arr_part,((STR)s)->arr_part,b)
#define F_FSTRACOPYNF(n,s,b)	get_memory(((FSTR)n)->arr_part,((FSTR)s)->arr_part,b)
#define F_FSTRACOPYIF(n,i,s)	get_memory(((FSTR)n)->arr_part+i,((FSTR)s)->arr_part,pmin(F_ASIZE(FSTR,n)-i,F_ASIZE(FSTR,s)))
#define F_FSTRACOPYIS(n,i,s)	get_memory(((FSTR)n)->arr_part+i,((STR)s)->arr_part,pmin(F_ASIZE(FSTR,n)-i,F_ASIZE(STR,s)))
#define F_STRISEQHELPER(T1,o1,T2,o2,s) do_memcmp(((T1)o1)->arr_part,((T2)o2)->arr_part,s)
#define F_SYSID(x)			(SPe p_sysid(x))
#define F_SYSTP(x)			F_TAG(x)
#define F_SYSOBEQ(x,y) 	   	(x==y)||(FVOID(x)&&FVOID(y))||(!FVOID(x))&&(!FVOID(y))&&gen_SYS_ob_eq((OB)x,(OB)y)
#define F_SYSDESTROY(x)		p_sysdestroy(x)

typedef BR_thread_t THREAD_ID;
#define THREAD_ID_zero		BR_INVALID_ID()
/* static BR_thread_t		THREAD_ID_zero; */  /* ANSI C guarantees init zeroed */
#define THREAD_ID_IS_EQ(a,b)	BR_SAME_THREAD(a,b)
#define THREAD_ID_IS_VOID(a)	BR_SAME_THREAD(THREAD_ID_zero,a)

typedef struct THREAD_ID_boxed_struct { OB_HEADER header; THREAD_ID immutable_part; } *THREAD_ID_boxed;

typedef short PREFETCH;
typedef struct PREFETCH_boxed_struct { OB_HEADER header; PREFETCH immutable_part; } *PREFETCH_boxed;
#define PREFETCH_IS_EQ(a,b) 	((a)==(b))
#define PREFETCH_zero 		0
#define PREFETCH_IS_VOID(a) 	((a)==0)
#ifdef PSATHER_STAT
#define PREFETCH_WAIT(a)		do { if(a==0) stat_no_wait(); else { stat_pre_wait(a);am_wait_for((a)==0); } } while(0)
#define PREFETCH_SPECUL_WAIT(a)		do { if(a==0) stat_specul_no_wait(); else { stat_specul_wait(a);am_wait_for((a)==0); } } while(0)
#else
#define PREFETCH_WAIT(a)		am_wait_for((a)==0)
#define PREFETCH_SPECUL_WAIT(a)		am_wait_for((a)==0)
#endif

#define BROADCAST_GLOBAL_R(T,global)	broadcast_global_r((FOB *)&global)
#define BROADCAST_GLOBAL_V(T,global)	do { T xxx;VASS_LP(xxx,T,global);broadcast_global_v((void *)&xxx,(void *)&global,sizeof(T),T ## _tag); } while(0)
#define BROADCAST_GLOBAL_VA(T,global)	do { T xxx=global;broadcast_global_va((void *)&xxx,(void *)&global,sizeof(T)); } while(0)

typedef struct LL_LOCK_struct {
	OB_HEADER header;
	BR_lock_t lck;
} *LL_LOCK;
extern void ll_lock_lck_remote(LL_LOCK);
extern void ll_lock_unlck_remote(LL_LOCK);
extern int ll_lock_try_remote(LL_LOCK);
extern void lm_get_remote_ptr_request(BR_cluster_t target, BR_word_t from,
                                      BR_word_t to, BR_word_t spinlock);

/*
** If compiling with gcc on a sparc one can use spinlocks
** for fast and dirty locks. See am.h for more informtion.
*/

typedef struct SPINLOCK_struct {
	OB_HEADER header;
	BR_SPINLOCK_DEC(lck);
} *SPINLOCK;

typedef BR_sema_t SEMA;

extern void spinlock_lck_remote(SPINLOCK);
extern void spinlock_unlck_remote(SPINLOCK);
extern int spinlock_try_remote(SPINLOCK);
# define SPINLOCK_LCK(x) 		 BR_SPINLOCK_LOCK(x->lck)
# define SPINLOCK_UNLCK(x) 		 BR_SPINLOCK_UNLOCK(x->lck)


#define SPINLOCK_TRY(x) BR_SPINLOCK_TRY(x->lck)
# define SPINLOCK_CREATE(x)		 BR_SPINLOCK_INIT(x->lck);

/*
 * The rest of this header file is filled with prototype declarations
 * FORBID FORK_FUNC
 * (the other identifiers are catched by reading the .o files)
 */

typedef void (*FORK_FUNC)(FOB self,FOB arg,FOB attach,int pos);

void stat_no_wait(void);
void stat_pre_wait(int);
void stat_specul_no_wait(void);
void stat_specul_wait(int);

void p_ex_push(void *);
void p_ex_push_first(void *);
void p_ex_pop(int);

void psather_start(int argc,char *argv[]);
int psather_stop(void);
int psather_abort(void);

void pd_lock_v(void *);
void pd_unlock_v(void *);

int read_tag(FOB ob);
void p_ex_raise(FOB);
FOB p_ex_exception(void);
void p_ex_set(void *);
void *p_ex_top();

void *pd_arg_alloc(long size);
void pd_arg_free(char *ptr);
void *pd_cohort(void);

void pd_defer(void);
int am_my_proc(void);

extern int debug_psather;
void p_attach(FORK_FUNC func,FOB self,FOB arg,FOB attach,int pos);
void p_fork_attach(FORK_FUNC func,FOB self,FOB arg,FOB attach,int pos);
void p_import(void);
void p_export(void);
#if defined(PSATHER_TRACE) && defined(ATTR_TRACE)
void cache_r_read(int pos,FOB *local,FOB *remote,void *r,char *type,char *attr,int index);
void remote_r_read(int pos,FOB *local,FOB *remote,void *r,char *type,char *attr,int index);
void cache_r_write(int pos,FOB *remote,FOB *local,void *r,char *type,char *attr,int index);
void remote_r_write(int pos,FOB *remote,FOB *local,void *r,char *type,char *attr,int index);
void cache_va_read(int pos,void *local,int atm_l,void *remote,int atm_r,int size,void *r,char *type,char *attr,int index);
void remote_va_read(int pos,void *local,int atm_l,void *remote,int atm_r,int size,void *r,char *type,char *attr,int index);
void cache_va_write(int pos,void *remote,int atm_r,int size,void *local,int atm_l,void *r,char *type,char *attr,int index);
void remote_va_write(int pos,void *remote,int atm_r,int size,void *local,int atm_l,void *r,char *type,char *attr,int index);
void cache_v_read(int pos,void *local,int lck_l,int tag,void *remote,int size,int lck_r,void *r,char *type,char *attr,int index);
void remote_v_read(int pos,void *local,int lck_l,int tag,void *remote,int size,int lck_r,void *r,char *type,char *attr,int index);
void cache_v_write(int pos,void *remote,int size,int lck_r,void *local,int lck_l,int tag,void *r,char *type,char *attr,int index);
void remote_v_write(int pos,void *remote,int size,int lck_r,void *local,int lck_l,int tag,void *r,char *type,char *attr,int index);
void pre_cache_r_read(volatile PREFETCH *p,volatile PREFETCH *p1,int pos,FOB *local,FOB *remote,void *r,char *type,char *attr,int index);
void pre_remote_r_read(volatile PREFETCH *p,volatile PREFETCH *p1,int pos,FOB *local,FOB *remote,void *r,char *type,char *attr,int index);
void post_remote_r_write(int pos,FOB *remote,FOB *local,void *r,char *type,char *attr,int index);
void pre_cache_va_read(volatile PREFETCH *p,volatile PREFETCH *p1,int pos,void *local,int atm_l,void *remote,int atm_r,int size,void *r,char *type,char *attr,int index);
void pre_remote_va_read(volatile PREFETCH *p,volatile PREFETCH *p1,int pos,void *local,int atm_l,void *remote,int atm_r,int size,void *r,char *type,char *attr,int index);
void post_remote_va_write(int pos,void *remote,int atm_r,int size,void *local,int atm_l,void *r,char *type,char *attr,int index);
void pre_cache_v_read(volatile PREFETCH *p,volatile PREFETCH *p1,int pos,void *local,int lck_l,int tag,void *remote,int size,int lck_r,void *r,char *type,char *attr,int index);
void pre_remote_v_read(volatile PREFETCH *p,volatile PREFETCH *p1,int pos,void *local,int lck_l,int tag,void *remote,int size,int lck_r,void *r,char *type,char *attr,int index);
void post_remote_v_write(int pos,void *remote,int size,int lck_r,void *local,int lck_l,int tag,void *r,char *type,char *attr,int index);
#else
void cache_r_read(int pos,FOB *local,FOB *remote);
void remote_r_read(int pos,FOB *local,FOB *remote);
void cache_r_write(int pos,FOB *remote,FOB *local);
void remote_r_write(int pos,FOB *remote,FOB *local);
void cache_va_read(int pos,void *local,int atm_l,void *remote,int atm_r,int size);
void remote_va_read(int pos,void *local,int atm_l,void *remote,int atm_r,int size);
void cache_va_write(int pos,void *remote,int atm_r,int size,void *local,int atm_l);
void remote_va_write(int pos,void *remote,int atm_r,int size,void *local,int atm_l);
void cache_v_read(int pos,void *local,int lck_l,int tag,void *remote,int size,int lck_r);
void remote_v_read(int pos,void *local,int lck_l,int tag,void *remote,int size,int lck_r);
void cache_v_write(int pos,void *remote,int size,int lck_r,void *local,int lck_l,int tag);
void remote_v_write(int pos,void *remote,int size,int lck_r,void *local,int lck_l,int tag);
void pre_cache_r_read(volatile PREFETCH *p,volatile PREFETCH *p1,int pos,FOB *local,FOB *remote);
void pre_remote_r_read(volatile PREFETCH *p,volatile PREFETCH *p1,int pos,FOB *local,FOB *remote);
void post_remote_r_write(int pos,FOB *remote,FOB *local);
void pre_cache_va_read(volatile PREFETCH *p,volatile PREFETCH *p1,int pos,void *local,int atm_l,void *remote,int atm_r,int size);
void pre_remote_va_read(volatile PREFETCH *p,volatile PREFETCH *p1,int pos,void *local,int atm_l,void *remote,int atm_r,int size);
void post_remote_va_write(int pos,void *remote,int atm_r,int size,void *local,int atm_l);
void pre_cache_v_read(volatile PREFETCH *p,volatile PREFETCH *p1,int pos,void *local,int lck_l,int tag,void *remote,int size,int lck_r);
void pre_remote_v_read(volatile PREFETCH *p,volatile PREFETCH *p1,int pos,void *local,int lck_l,int tag,void *remote,int size,int lck_r);
void post_remote_v_write(int pos,void *remote,int size,int lck_r,void *local,int lck_l,int tag);
#endif

void p_r_object(void *local,int latm,FOB remote,int ratm,int size,int tag,int elems);
void p_w_object(FOB remote,int ratm,void *local,int latm,int size,int tag,int elems);
void p_c_object(FOB remote,int ratm,FOB local,int latm,int size,int tag,int elems);
void p_r_array_object(void *local,int latm,int lsize,int arr_offset,FOB remote,int ratm,int rsize,int elemsize,int tag,int eltag);
void p_w_array_object(FOB remote,int ratm,int rsize,int arr_offset,void *local,int latm,int lsize,int elemsize,int tag,int eltag);
void p_c_array_object(FOB remote,int ratm,int rsize,int arr_offset,FOB local,int latm,int lsize,int elemsize,int tag,int eltag);
void p_r_array(void *local,int latm,int lsize,int arr_offset,FOB remote,int ratm,int rsize,int elemsize,int eltag);
void p_w_array(FOB remote,int ratm,int rsize,int arr_offset,void *local,int latm,int lsize,int elemsize,int eltag);
void p_c_array(FOB remote,int ratm,int rsize,int arr_offset,FOB local,int latm,int lsize,int elemsize,int eltag);
void p_r_array_part(void *local,int latm,int lsize,int lbegin,int arr_offset,FOB remote,int ratm,int rsize,int rbegin,int elemsize,int elems,int eltag);
void p_w_array_part(FOB remote,int ratm,int rsize,int rbegin,int arr_offset,void *local,int latm,int lsize,int lbegin,int elemsize,int elems,int eltag);
void p_c_array_part(FOB remote,int ratm,int rsize,int rbegin,int arr_offset,FOB local,int latm,int lsize,int lbegin,int elemsize,int elems,int eltag);
int read_asize(FOB where);
void p_aref_aclear(FOB arr,int atm,int asize,int elemsize);

void broadcast_global_r(FOB *global);
void broadcast_global_va(void *local,void *global,int size);
void broadcast_global_v(void *local,void *global,int size,int tg);

void p_sr_only_ob(FOB (*sr)(FOB,int),int tag,void *pt,int cl);
void p_sr_array(FOB (*sr)(FOB,int),int tag,void **pt,int cl,int size);
void p_sr_ob(FOB (*sr)(FOB,int),int tag,void *pt,int cl);

extern int clusters;
BR_thread_t thr_ps_id(void);
FOB p_send_fob(FOB pt,int cluster);
int p_fwrite(FOB p,int size,int elems,FILE *file);
long p_sysid(FOB r);
void p_sysdestroy(FOB x);
void get_memory(void *local,FOB remote,int size);
void put_memory(FOB remote,void *local,int size);
int do_memcmp(FOB a,FOB b,int s);
void pd_sync(void);
void p_init_lock_header(LOCK s);
void p_remote_exec(vnn_t from,void *bnd,int size);
void lm_get_remote_locks(void *to,BR_word_t from,long size,BR_cluster_t source);
int p_rfatalv(char *s);
int p_rfatalvfl(char *s,char *f,int l);
void start_gdb();
void use_statistics(int);
void use_trace(int);
int EXT_LOCK_is_dlock(void *);
/*
 * the following definitions should be private to the psather library,
 * but as they are used in some macros, we need them here.
 */
/*
 * If you add any information to the local memory structure
 * which must be passed from one cluster to another, you
 * MUST do it by hand (most of the changes will be in 
 * attach.c)
 */
typedef volatile struct LOCAL_MEM_struct {
	struct EXCEPT_ELEMENT_COMMON *exception_stack;
	void *info;
	char *file;
	long line;
	struct _func_frame *pFF;
	/* don't change the ordering of the following three lines
	 * the runtime depends on them beeing this way! (attach,
	 * psather_tid, prev_FF)
	 */
	FOB attach;
	struct _func_frame *prev_FF;
	BR_thread_t psather_tid;
	vnn_t prev_cluster;
	unsigned got_tid:1;
	unsigned long import;
	unsigned long exports_waiting;
} *LOCAL_MEM;

#define INIT_THREAD_MEMORY() init_thread_memory()

#ifdef __GNUC__
extern inline INLINED_P_SEND_FOB
extern inline INLINED_P_RECV_FOB
extern inline INLINED_P_WHERE
extern inline INLINED_PS_WHERE
extern inline INLINED_PSR_WHERE
extern inline INLINED_PS_NEAR
extern inline INLINED_PSR_NEAR
extern inline INLINED_PS_FAR
extern inline INLINED_PSR_FAR
extern inline INLINED_PS_NEAR_OR_VOID
extern inline int pmin(int a,int b) { return a<b?a:b; }
#else
FOB p_send_fob(FOB pt,int cl);
FOB p_recv_fob(FOB pt,int cl);
int p_where(FOB x);
int ps_where(FOB x);
int ps_near_or_void(FOB);
int ps_near(FOB);
int ps_far(FOB);
int pmin(int a,int b);
#endif /* __GNUC__ */

#ifndef IS_ITER
#define IS_ITER 0
#endif

/* some macros for fast translations of par/fork/at */
#include "simple-par.h"

#endif
