/*------------------------------------------------------------------------------
* Copyright 1992 by Forschungszentrum Informatik(FZI)
*
* You can use and distribute this software under the terms of the license
* you should have received along with this software.
* If not or if you want additional information, write to
* Forschungszentrum Informatik, "STONE", Haid-und-Neu-Strasse 10-14,
* D-76131 Karlsruhe , Germany.
*-------------------------------------------------------------------------------
*/
/* OBST LIBRARY MODULE */
// *****************************************************************************
// Module psm                  Emil Sekerinski, Oliver Spatscheck, Walter Zimmer
//
// *****************************************************************************
//  persistent storage manager: utility implementation 
// *****************************************************************************
 
// *****************************************************************************
// NOTES 
// *****************************************************************************
/*
*  The cpp constant LOCKD_CORRECT defines the manner of locking:
*  if defined
*  then fcntl is used for container locking
*  else flock is used for container locking
*
*  As discussed in README.install, the default is "flock"(LOCKD_CORRECT not
*  defined), because of a bug in the SunOS lock demon.
*  The constant is set in obst_config.h.
*/

// *****************************************************************************
// INCLUDE FILES 
// *****************************************************************************

// ================= system and platform files =================================

#define OBST_IMP_STDCONST
#define OBST_IMP_ERROR
#define OBST_IMP_MEMOP
#define OBST_IMP_STRINGOP
#define OBST_IMP_STREAM
#define OBST_IMP_FORMATTED_IO
#define OBST_IMP_FILE
#define OBST_IMP_FILELOCK
#define OBST_IMP_RANDOM
#define OBST_IMP_TIME
#include "obst_stdinc.h"

#include "_obst_config.h"

// ================ obst files =================================================

#include "obst.h"
#include "obst_progstd.h"
#include "knl_use.h"
#include "psm.h"
#include "psm_err.h"     // err_raise, err_SYS
#include "trc_psm.h"     // trace-module; numbers 11(H) -14(VL) + psm_CHECK
#ifdef OBST_HAVE_JOYCE
#  include "sync_use.h"  // transaction handling
#  include "sync_decl.h" // declarations for transactions
#endif
#include "psm_util.h"

// *****************************************************************************
// PORTING DEFINITIONS & DECLARATIONS
// *****************************************************************************



// *****************************************************************************
// PUBLIC DECLARATIONS
// *****************************************************************************

// ================= data declaration ==========================================
// (in obst_globals.C: psm_ERR, ROOT_OFFSET, _psm_checked_cnt,
//		       [_]{TEMP,ROOT,SYNC,UNUSED}_CONTAINER)

psm_PAGE *psm_buf;               // internal buffer for paging 
psm_buf_info *psm_buf_tbl;       // table of infos for each ...
                                 // ... page of the buffer
psm_cnt_info psm_cnt_tbl[psm_C]; // `hash` table for container entries

#ifdef OBST_HAVE_JOYCE
sos_Bool psm_is_deadlock;        // used in trans.c(part of JOYCE)
sos_Bool psm_is_syncactivated;
#endif

extern sos_Container _TEMP_CONTAINER;
extern sos_Container _ROOT_CONTAINER;
extern sos_Container _SYNC_CONTAINER;
extern sos_Container _UNUSED_CONTAINER;

const char* psm_cntdir;


// *****************************************************************************
// PRIVATE DECLARATIONS
// *****************************************************************************

// ================= function prototyps ========================================

// ----------------- calculation functions -------------------------------------

LOCAL inline sos_Int abs_sw_in_pg_offset(sos_Bool sw, 
                                          sos_Int pg_number);
LOCAL void calculate_free_file_pgs(psm_cnt_info &ci); 
LOCAL inline sos_Offset cnt_pgs_offset(sos_Int sw);
LOCAL inline  int file_position(int file_pg); 
LOCAL inline unsigned free_pgs(psm_cnt_info &ci); 
LOCAL inline unsigned free_sz(psm_cnt_info &ci); 
LOCAL inline int highest_valid_pg_tbl_part(int cnt_pgs);
LOCAL inline int necessary_file_pgs(int used_pgs, int cnt_pgs); 
LOCAL inline int N_upround(int nr);                
LOCAL inline sos_Int pg_in_pg_tbl_buf_offset(sos_Bool in_pg_sw);
LOCAL inline sos_Offset pg_tbl_offset(int pg, sos_Int sw);
LOCAL inline sos_Int sw_in_pg_offset(sos_Bool sw);
LOCAL unsigned sz_of_free_blocks(psm_cnt_info &ci); 
LOCAL unsigned used_pgs(psm_cnt_info &ci);

// ----------------- lock and lock auxilary functions --------------------------

LOCAL inline int status_equal_accessmode(sos_Container_status stat,
                                          sos_Access_mode am);

// ----------------- page and integer functions --------------------------------

LOCAL int find_free_file_pg(psm_cnt_info &ci);
LOCAL int preempt_pg(sos_Int id, int cnt_pg);
LOCAL void read_pg(psm_cnt_info &ci, int cnt_pg, int buf_pg);
LOCAL void write_pg(psm_cnt_info &ci, int cnt_pg, int buf_pg);

// ----------------- pagetable functions ---------------------------------------

LOCAL void read_pg_tbl(psm_cnt_info &ci); 
LOCAL void write_pg_tbl(psm_cnt_info &ci); 
LOCAL psm_cnt_info &new_cnt(sos_Int id); 
LOCAL inline void auto_squeeze(psm_cnt_info &ci);


// ================= macros ====================================================


#define number_in_pg_tbl_info(cnt_pg)((cnt_pg) / psm_N)
           // calculates the index of the pagetablepage in which the the ...
           // ... containerpage is mapped

// ================= classes & structures ======================================

/* LOCAL */
class psm_list_item;
class psm_list;

typedef  psm_list_item* psm_list_item_ptr;

/* LOCAL */
class psm_list_item
{
   friend class psm_list;
   public:
      int offset, sz;
      psm_list_item *next;
      psm_list_item(int o, int s)  { offset = o; sz = s; next = 0;}
};

/* LOCAL */
class psm_list
{
   private:
      psm_list_item *header;
   public:

      psm_list() { header = 0; }

      ~psm_list() 
      {  
         psm_list_item_ptr curr, next; 
         curr = this->header;
         while (curr != 0) { next = curr->next; delete curr; curr = next; }
      }

      void insert(psm_list_item_ptr new_item) // insert new_item in ... 
      {                                       // ... right position ...
                                              // ...(ascending order)
         psm_list_item_ptr cur  = this->header;
         psm_list_item_ptr prev = 0;

         while (cur != 0  &&  new_item->offset > cur->offset)  
                                              // search position
         { 
            prev = cur;
            cur = cur->next;
         }
         // insert new item

         if (cur == this->header)          
            this->header = new_item; // no item in list or insert ...
                                     // ... at listheader
         else   
            prev->next = new_item;   // at least one item in list ...
                                     // ...(=> at least one loopbody)       
         new_item->next = cur;
      }

   psm_list_item* get_header() const { return header; }

   int empty()  const { return(header == 0);  }

};

//------------------------------------------------------------------------------

     // calls automatically psm_initialize when main program ... 
     // ... starts, because initialize() is constructor of ...
     // ... struct initialize(standard trick)
static int _dummy = (_psm_initialize(), 0);

// ================= constant declarations =====================================

#define DEFAULT_OCCUPATION_DEGREE (double)0.2
                                      // default degree of occupation for ...
                                      // ... automatic compress

// ================= data declarations =========================================

LOCAL  char pg_tbl_buf[psm_P];  // Buffer for reading pagetable; 
LOCAL  int B;                   // number of pages in buffer, ...
                                // ... 1 <= B <= Bmax
LOCAL  struct stat file_stat_buf; // for reading statusinformation ...
                                  // ... of containerfile
LOCAL  int buf_tbl_index /* = 0 */;// index to make search for next ...
                                   // ... bufferpage more  efficient; points ...
                                   // ... to last found not referenced page ...
                                   // ... + 1(in the beginning => 0)
LOCAL  int AUTOSQUEEZE;         // for automatic squeeze
LOCAL  double MIN_OCCUPATION_DEGREE;        

LOCAL  char err_buf[150];  
         // err_buf is used for variable error messages ...
         // ... current use only in : lookupcontainer

#ifdef MONITOR
LOCAL  sos_Bool mon_activated;
#endif

 
// *****************************************************************************
// PUBLIC FUNCTION DEFINITIONS 
// *****************************************************************************

// ================= error handling functions ==================================

/*
   The function psm_write_err_fct  usually handles the case when 
   writing to a container which is not yet opened for writing. Normally,
   an error is raised. But the STONE project partners from dresden want 
   to change this behaviour in their local installation in order to
   implement another transaction schema and therefore the function pointer
   psm_write_err_fct_ptr was offered. 
*/
psm_write_err_fct_type psm_write_err_fct_ptr;

void psm_write_err_fct(sos_Int)
{
   err_raise(err_SYS, err_PSM_NONWRITEABLE_CONTAINER, psm_ERR, FALSE);
}

// ================= initalize functions =======================================

void _psm_initialize() 
{
   // runs psm_initialize only once
   static int is_initialized=0;
   if (is_initialized) return;
   is_initialized++;
 
   // check psm local assertions:
   // - the literals of sos_Access_mode must be convertible into the
   //   corresponding literals of sos_Container_status(and vice versa) by
   //   a simple cast(used in entercontainer, sos_Container::access).

   err_assert(CHECKOUT ==(sos_Access_mode)CHECKEDOUT
              && READING  ==(sos_Access_mode)READABLE
              && WRITING  ==(sos_Access_mode)WRITEABLE,
              "psm: can't cast sos_Container_status to sos_Access_mode");

   // error handling for WRITE operations on closed containers
   psm_write_err_fct_ptr=psm_write_err_fct;
 
   // initialize extern const & variables

#ifdef OBST_HAVE_JOYCE   
   psm_is_deadlock = FALSE;
#endif
   _TEMP_CONTAINER = sos_Container::make(0);
   _ROOT_CONTAINER = sos_Container::make(1);
   _SYNC_CONTAINER = sos_Container::make(2);
   _UNUSED_CONTAINER = sos_Container::make(0xffffffff); //2^32-1

#ifndef NO_TT
   _psm_checked_cnt = sos_Container::make(0);
#endif
  
   if (!(psm_cntdir = obst_getenv(ENVVAR_CONTAINER)))
      err_raise(err_SYS, err_NO_CONTAINER_PATH, psm_ERR, FALSE);

   // Set AUTOSQUEEZE and BUFFERSIZE to their default values, if no other 
   // value is specified by the user(environment variable).
   // It then allocates buffer for psm and initialize the tables.
   // Also it reads and interpretes MONITOR.
   
   char* as = obst_getenv(ENVVAR_AUTOSQUEEZE);
   if (as != 0)
   {
      AUTOSQUEEZE = 1;
      int i;
      sscanf(as, "%d", &i);
      if (i <= 0 || i >= 100)
         MIN_OCCUPATION_DEGREE = DEFAULT_OCCUPATION_DEGREE;
      else
         MIN_OCCUPATION_DEGREE = double(i)/100.0;
   }
   else 
      AUTOSQUEEZE = 0;

   char* buf_sz = obst_getenv(ENVVAR_BUFFERSIZE);
   if (buf_sz != 0)
   {
      sscanf(buf_sz, "%d", &B);
      if (B <= 0)
         B = DEFAULT_BUFFERSIZE;
   }
   else
      B = DEFAULT_BUFFERSIZE;  // normal value of B if not monitoring 
                               //   &&  SOSBUFFERSIZE not set

#ifdef MONITOR
   mon_activated = FALSE;
   char* e = obst_getenv(ENVVAR_MONITOR);
   if (e != 0)
   {
      sscanf(e, "%d", &B);
      if (B > 0)
      {
         mon_initialize();
         mon_activated = TRUE;
      }
      else 
         B = DEFAULT_BUFFERSIZE;
   }
#endif

#if BOOT
    srandom(1);
#else
    timeval tp;
    gettimeofday(&tp, 0);
    srandom((int)tp.tv_sec);
#endif
 
   psm_buf_tbl     = new psm_buf_info [B];   // allocation of buffers
   psm_buf         = (psm_PAGE *) new psm_PAGE [B];

   int i, j;                    
   
   // initialization of buffers and tables
   for (i = 0; i < psm_C; i++)    psm_cnt_tbl[i].id = psm_UNUSED;
   for (i = 0; i < B; i++)        psm_buf_tbl[i].id = psm_UNUSED;
   for (i = 0; i < B; i++) 
      for (j = 0; j < psm_P; j++) psm_buf[i][j]     = psm_UNUSED;

#ifdef OBST_HAVE_JOYCE
   psm_is_syncactivated = FALSE;
#endif
}

//------------------------------------------------------------------------------

void _psm_final_initialize()
{  
   T_PROC("_psm_final_initialize");
   TT(psm_H, T_ENTER);

#ifdef OBST_HAVE_JOYCE
   // psm_is_syncactivated indicates if  transaction ...
   // ... management is turned on/off.
   // If SYNC_CONTAINER exists then ta management is turned on.

   int fd = ::open(psm_cnt_path(SYNC_CONTAINER), O_RDONLY);
   if (fd != psm_UNDEF)
   {  
      ::close(fd);
      psm_is_syncactivated = TRUE;
   }
   else
      psm_is_syncactivated = FALSE;
#endif

   TT(psm_H, T_LEAVE);
} 

// ================= auxiliary functions =======================================


void* psm_extend_tbl(void* tbl,
                     unsigned unit, unsigned sz, unsigned new_sz)
{
   char *new_tbl = new char[new_sz *= unit];
   memcpy((char*)new_tbl,(char*)tbl, sz *= unit);
   memset((char*)(new_tbl + sz), 0, new_sz - sz);
   return(void*)new_tbl;
}



// ================= freelist functions ========================================

void psm_print_f_lists(psm_cnt_info &ci) 
{
   // only for test purposes  

   if (ci.cnt_pgs == 0) 
   { 
      cout << "NO FREELISTS\n";
      return;
   }
   cout << 
    "\n=================  FREELISTS   ===================================\n";
   for (int sz = 4; sz <= psm_MAX_FREEBLOCK_LENGTH; sz += 4) 
   {
      int o = sz;
      if (psm_read_int(ci, o) != 0)  
         cout << "\n"  << "size " << sz << " : ";
         while (psm_read_int(ci, o) != 0)
         {
            o = psm_read_int(ci, o);
            cout << "  " << o << "  ";
         }
   }
   cout << 
    "\n==================== END OF FREELISTS ============================\n";
}

//------------------------------------------------------------------------------


void psm_insert_in_f_list(psm_cnt_info &ci, unsigned sz, sos_Offset offset)
{
   // Inserts a block with offset "offset" in freelist[size] ...
   // ...(before listheader). PRE: Size in [0, MAX_FREEBLOCK_LENGTH]) ... 
   // ... and multiple of 4(size = Four_upround(size)).
   
   err_assert(sz == psm_four_upround(sz), "PSM:InsertInFreelist");
   err_assert(sz <= psm_MAX_FREEBLOCK_LENGTH, "PSM:InsertInFreelist");
   if (sz == 0) 
      return;
   psm_write_int(ci, offset, psm_read_int(ci, sz));
   psm_write_int(ci, sz, offset);

   ci.ub_free_sz = psm_max(ci.ub_free_sz, sz); 
   if (sz >= 8)
      psm_write_int(ci, offset + psm_SZ_OF_INT, psm_UNDEF); 
      // flag to indicate, that object is deleted:necessary ...
     // ... for object_exists()
} 

//------------------------------------------------------------------------------

unsigned psm_rm_first_from_f_list(psm_cnt_info &ci, unsigned sz)
{
   // Deletes first element from list "sz" and returns offset ...
   // ... from deleted element. PRE: List "sz" is not empty. ...
   // ... Size in [0, MAX_FREEBLOCK_LENGTH]) and multiple of 4 ... 
   // ...(sz = rounded(sz)).

   err_assert(psm_read_int(ci, sz) != 0, "PSM:DeleteFirstFromFreelist");
   err_assert(sz == psm_four_upround(sz), "PSM:DeleteFirstFromFreelist");
   err_assert(sz <= psm_MAX_FREEBLOCK_LENGTH, "PSM:DeleteFirstFromFreelist");

   unsigned result =(unsigned) psm_read_int(ci, sz);
   psm_write_int(ci, sz, unsigned(psm_read_int(ci, result)));  
   return result;
}

//------------------------------------------------------------------------------

#ifndef NO_TT

   // check_freelists checks for a block(given by psm_cnt_info, start ...
   // ... and length of the block), if it overlaps with a block in the ...
   // ... freelists(the freelist contains previous deallocated blocks).

   // if (_psm_checked_cnt == 0) check all containers
   // else                       check only  _psm_checked_cnt


   void psm_check_f_list(psm_cnt_info &ci, int start, int length)
   {  
      if ((_psm_checked_cnt  == 0  ||  _psm_checked_cnt == ci.id)
           &&  ci.id != 0)
      {
          for (int sz = 4; sz <= psm_MAX_FREEBLOCK_LENGTH; sz += 4)  
          {  
             // iterate over lists of different size

             int offset = psm_read_int(ci, sz);
             while (offset != 0)     
             {
                if ((start >= offset  &&  start <(offset + sz)) 
                    || (start < offset  && (start + length) > offset))
                   err_raise(err_SYS, err_PSM_FREELIST_CHECK_FAILED,
                             psm_ERR, FALSE);
                offset = psm_read_int(ci, offset);  // next list_element
             }
          }
      }
   }

#endif NO_TT


// ================= psm functions =============================================

// ----------------- calculation functions -------------------------------------

unsigned psm_occupied_bytes(psm_cnt_info &ci)
{
   // Returns the number of occupied bytes(data which is referenced ...
   // ... from fileindex) in the container file. It's done by ... 
   // ... substracting from used_pages all free blocks and first page ... 
   //(table of free blocks).

   return(unsigned)(ci.cnt_pgs == 0) 
      ? 0 
      : used_pgs(ci)*psm_P - sz_of_free_blocks(ci) - psm_P;
}


// ----------------- time functions --------------------------------------------

void psm_set_last_read_time(psm_cnt_info &ci) 
{
   // psm_last_read_time is need in modified_since_last_read  -> update   
   fstat(ci.fd, &file_stat_buf);
   ci.last_read_time =(int)file_stat_buf.st_mtime; 
                                             // last_modification_time ...
                                             // ... of file
   // st_mtime could be higher(later) than the last read time but this ...
   // ... doesn't disturb as that time is used to determine if there has ...
   // ... been anything changed to the container since that time. 
}

//------------------------------------------------------------------------------

void psm_set_last_read_time(psm_cnt_info &ci, int time) 
{
   ci.last_read_time = time;
}

// ----------------- path functions --------------------------------------------

char* psm_cnt_path(sos_Int id, int path_for_lockfile /* = FALSE */)
{

   // Returns pointer to path of container id. If path_for_lockfile is ...
   // ... TRUE, then path of lockfile is returned.

   static char *psm_buf,
	       *append_from /* = NULL */;

   if (!append_from)
   {  
      psm_buf = new char[strlen(psm_cntdir) + 20];
				 // 16 = length(2^32) + length(".lck") + '\0'

      strcpy(psm_buf, psm_cntdir);
      strcat(psm_buf, "/");

      append_from = psm_buf + strlen(psm_buf);
   }
   sprintf(append_from, "%ld",(long)id);

   if (path_for_lockfile)
      strcat(append_from, ".lck");

   return psm_buf;
}

//------------------------------------------------------------------------------

char* psm_cnt_path_lock(int id)
{
   // Returns pointer to path of lockfile for container id.  

   return psm_cnt_path(id, TRUE);
}

// ----------------- lock and lock auxilary functions --------------------------

void psm_check_lock_method()
{
   static int is_initialized=0;
   if (is_initialized) 
      return;
   is_initialized++;
   int fd;
#ifdef LOCKD_CORRECT
   fd = ::open(psm_cnt_path_lock(1), O_RDONLY);
   if (fd != psm_UNDEF)
      err_raise(err_WNG, err_PSM_CNT_FLOCK_PROG_FCNTL, psm_ERR, TRUE);
#else
   fd = ::open(psm_cnt_path_lock(1), O_RDONLY);
   if (fd == psm_UNDEF)
      err_raise(err_WNG, err_PSM_CNT_FCNTL_PROG_FLOCK, psm_ERR, TRUE);
   else
      ::close(fd);
#endif LOCKD_CORRECT
   return;
}

//------------------------------------------------------------------------------

int psm_downgrading(sos_Container_status stat, sos_Access_mode am)
{
   return(!psm_upgrading(stat, am)  &&  !status_equal_accessmode(stat, am));
}

//------------------------------------------------------------------------------

sos_Open_result psm_lock(int fd, sos_Access_mode am, sos_Sync_mode sm,
                         sos_Container_status previous_stat, int id,
                         int &fd_lock) 
{
   // lock() locks the containerfile. ...
   // ... If necessary(because of checkout) the container-lockfile is ...
   // ... opened and locked, too, and the fd_lock(call by reference) ...
   // ... is updated. If state changes from previous_state == CHECKEDOUT ... 
   // ... to an other state, then the lockfile is closed and fd_lock is ... 
   // ... set to psm_UNDEF. If lock() fails the previous lock is maintained;...
   // ... if this is not possible(see flock error handling) then an ...
   // ... error is raised.(Perhaps the order of the arguments in the ... 
   // ... signature will be changed later.) 


   T_PROC("psm::psm_lock");
   TT(psm_H, T_ENTER;TI((int)fd);TI((int)am);TI((int)sm);
      TI((int)previous_stat);TI((int)id);TI((int)fd_lock));

   int result;

   if ((id == SYNC_CONTAINER) && ((am != WRITING) || (sm != WAITING)))
      err_raise(err_SYS, err_PSM_SYCT_WRONG_ACCESS, psm_ERR, FALSE);

#ifdef OBST_HAVE_JOYCE
   if ((psm_is_syncactivated) && (id != SYNC_CONTAINER))
      
      // with synchronization 

      result = psm_sync_lock_a_file(am, sm, id);

   else // without synchronization
#endif
   if (status_equal_accessmode(previous_stat, am))        
      result = TRUE;               // no change of access_mode
   else if (previous_stat == WRITEABLE && am == CHECKOUT)   
   {                            
      // special case
#ifdef LOCKD_CORRECT                              
      result = psm_lock_a_file(fd, WRITING, sm, psm_LOCK, TRUE)        
               && psm_lock_a_file(fd, READING, sm, psm_LOCK, FALSE);
#else
      fd_lock = ::open(psm_cnt_path_lock(id), O_RDWR, 0644); 
      if (fd_lock == psm_UNDEF)
         result = FALSE;                             
      else 
      {  
         // order is important
         result = psm_lock_a_file(fd_lock, WRITING, sm)        
            	  && psm_lock_a_file(fd, READING, sm);
         // downgrading W->C should never fail ...
         // ... => closing of fd_lock never necessary
         err_assert(result, "PSM:lock():downgrading_failed");
      }
#endif LOCKD_CORRECT   
   }
   else    // * all other cases
   {
      result = psm_lock_a_file(fd,(am == CHECKOUT ? READING : am), sm);

#ifndef LOCKD_CORRECT
      
      // FLOCK ERROR HANDLING: If upgrading for fd failed(R->W, ...
      // ... C->W), then try to get previous(READING) lock again for...
      // ... fd. Downgrading and "R->C" cannot fail for fd. If upgrading ...
      // ... from UNAVAILABLE fails no previous lock  must be reset. ...
      // ... If flock() fails and it is not possible to get the previous ...
      // ... lock back then an error is raised. 
      
      if (!result 
           && (previous_stat == READABLE  ||  previous_stat == CHECKEDOUT)
           && ! psm_lock_a_file(fd, READING, TESTING))

         err_raise(err_SYS, err_PSM_LOST_ALL_LOCKS, psm_ERR, FALSE);

#endif LOCKD_CORRECT

      if (result  &&  am == CHECKOUT)    // lock fd_lock
      {
#ifdef LOCKD_CORRECT
         result = psm_lock_a_file(fd, WRITING, sm, psm_LOCK, TRUE);
         if (!result && previous_stat == UNAVAILABLE)
            psm_lock_a_file(fd, WRITING, TESTING, psm_UNLOCK);
#else
         fd_lock = ::open(psm_cnt_path_lock(id), O_RDWR, 0644); 
         if (fd_lock == psm_UNDEF)
         result = FALSE;  
         else   
         {
            result = psm_lock_a_file(fd_lock, WRITING, sm);
            if (!result) 
            {
               ::close(fd_lock);
               fd_lock = psm_UNDEF;
            }
            if (!result  &&  previous_stat == UNAVAILABLE)
               psm_lock_a_file(fd, WRITING, TESTING, psm_UNLOCK);        
         }
#endif LOCKD_CORRECT         
      }
      if (result  &&  previous_stat == CHECKOUT)  // * close fd_lock
      { 
#ifdef LOCKD_CORRECT
         psm_lock_a_file(fd, WRITING, TESTING, psm_UNLOCK, TRUE);
#else
         ::close(fd_lock);
         fd_lock = psm_UNDEF;
#endif LOCKD_CORRECT
      }
   }
   TT(psm_H, T_LEAVE; TI(result));

#ifdef OBST_HAVE_JOYCE
   if ((!result) && (psm_is_deadlock) && (id != SYNC_CONTAINER))
      return DEADLOCK;
   else
#endif
      return result ? OPENED : LOCKED;
}


//------------------------------------------------------------------------------

int psm_lock_a_file(int fd, sos_Access_mode am, sos_Sync_mode sm, 
                    int lock_mode /* = psm_LOCK */,
		    sos_Bool is_lockfile /* = FALSE */)
{
   // is_lockfile is only for fcntl based locking 
   // - if is_lockfile is True the first byte of the container is lockt 
   // - if is_lockfile is False the second byte of the container is lockt

   // It tries to lock the file with file descriptor fd and returns whether ...
   // ... it has been successful. Which lock-manager(fcntl or flock) is ...
   // ... called depends on "LOCKD_CORRRECT". 
   // The lock_mode can be set to psm_UNLOCK.

   // TRACE
   T_PROC("psm:psm_lock_a_file");TT(psm_VL, T_ENTER;
           TI(fd); TI(am); TI(sm); TI(lock_mode));

   int result;

#ifdef LOCKD_CORRECT

   // FCNTL

   struct flock fl;
   int cmd;

   if (lock_mode == psm_LOCK)
   {
      fl.l_type =(am == WRITING ? F_WRLCK : F_RDLCK);
      cmd       =(sm == TESTING ? F_SETLK : F_SETLKW);
   }
   else
   {
      fl.l_type = F_UNLCK;
      cmd       = F_SETLK;
   }
   fl.l_whence = SEEK_SET;                    // starting offset = start of file
   fl.l_start  =(is_lockfile ? 0 : 1);        // relativ_offset for lockfile  = 0
                                              //                for container = 1
   fl.l_len    = 1;                           // 1 Byte locked
   result      =(fcntl(fd, cmd,(int)&fl) != -1) ;

#else

   // FLOCK

   int sync, access;
   if (lock_mode == psm_LOCK)
   {
      access    =(am == WRITING ? LOCK_EX : LOCK_SH);
      sync      =(sm == WAITING ? 0 : LOCK_NB);
   }
   else 
   {
      access    = LOCK_UN;
      sync      = LOCK_NB;
   }
   result =(flock(fd, access | sync) != -1);

   is_lockfile = is_lockfile; // suppress warning

#endif LOCKD_CORRECT

   // TRACE
   TT(psm_VL, T_LEAVE);

   return result;
}

//------------------------------------------------------------------------------

#ifdef OBST_HAVE_JOYCE
int psm_sync_lock_a_file(sos_Access_mode am, sos_Sync_mode sm, int id, 
                         int lock_mode /* = psm_LOCK */)

{
 
   // if lock_mode = psm_UNLOCK then am and sm are of no interest

  
   // TRACE
   T_PROC("psm.Sync_lock_a_file");TT(psm_VL, T_ENTER; TI(am); 
           TI(sm); TI(id); TI(lock_mode));
 
   int             result;
   SyncCont        sync_cnt;
   SyncObj         sync_object;
   sos_Open_result open_result;
   sos_Bool        is_sync_cnt_open = psm_is_cnt_open(SYNC_CONTAINER); 


   if (!is_sync_cnt_open) 
   { 
      open_result = SYNC_CONTAINER.open(WRITING, WAITING);
      if (open_result != OPENED)
      { 
         TT(tsy_H, TXT("cannot find SYNC_CONTAINER"); T_LEAVE);
         err_raise(err_SYS, err_PSM_NO_SYNC_CONTAINER, psm_ERR, FALSE); 
      }
   }

   sync_cnt = SyncCont::get_root();

   sync_object = sync_cnt.get_SO(sos_Container::make(id));
   if (sync_cnt.get_error() != SYNC_OK)
      err_raise(err_SYS, err_PSM_SYNC, psm_ERR, FALSE);

   if (lock_mode == psm_LOCK)
   { 
      result = sync_object.get((am==CHECKOUT ? WRITING : am), sm);
      if ((sync_object.get_error() != SYNC_OK))
         if ((sync_object.get_error() == SYNC_TA_ABORTED) && 
             (sync_deadlock))
            // set sync_deadlock for psm intern. From now on every method ... 
            // ... call will be a dummy until the transaction is ... 
            // aborted or committed explicitly. Then psm_is_deadlock ...
            // ... is set to FALSE again.
            psm_is_deadlock = TRUE;
         else if ((sync_object.get_error() == SYNC_IMPLTA_ABORTED))
         { 
            SYNC_CONTAINER.close();
            err_raise(err_SYS, err_PSM_SYNC_IMPLTA_ABORTED, psm_ERR, FALSE);
         }
         else
            err_raise(err_SYS, err_PSM_SYNC, psm_ERR, FALSE);
   }
   else
   { 
      sync_object.release();
      if (sync_object.get_error() != SYNC_OK)
         err_raise(err_SYS, err_PSM_SYNC, psm_ERR, FALSE);
      else 
         result = TRUE;
   }

   if (!is_sync_cnt_open)
      SYNC_CONTAINER.close();

   // TRACE
   TT(psm_VL, T_LEAVE);

   return result;
}
#endif

//------------------------------------------------------------------------------

int psm_upgrading(sos_Container_status stat, sos_Access_mode am)
{
   err_assert(stat != DESTROYED,
              "PSM:psm_upgrading():status DESTROYED is not allowed");

   return(((stat == READABLE  ||  stat == CHECKEDOUT) && (am == WRITING))
            || (stat == READABLE  &&  am == CHECKOUT)
            || (stat == UNAVAILABLE));
}

// ----------------- page and integer functions --------------------------------

int psm_lookup_pg(psm_cnt_info &ci, int cnt_pg) 
{
   // Returns index of buffer page of container ci's contpage.

   // If contpage is not already in buffer then  read it from the ... 
   // ... container file. It can be necessary to swap(preemptpage) ...
   // ... a bufferpage of another container. The bufferpage is marked as ...
   // ...  referenced according the second chance algorithm(this happens ... 
   // ... only here).

   if (cnt_pg >= ci.cnt_pgs) 
      err_raise(err_SYS, err_PSM_WRONG_OFFSET, psm_ERR, FALSE);
   int &buf_pg = ci.buf_index[cnt_pg];
   if (buf_pg == psm_UNDEF)
   {   
      buf_pg = preempt_pg(ci.id, cnt_pg);
#ifdef MONITOR
      if (mon_activated) mon_black(ci.win, cnt_pg);
#endif
      read_pg(ci, cnt_pg, buf_pg); 
   }
   psm_buf_tbl[buf_pg].is_ref = TRUE;
   return buf_pg; 
}

//------------------------------------------------------------------------------

sos_Int psm_read_int(psm_cnt_info &ci, sos_Offset o)
{
   // PRE: o%P+SIZEOF_INT<=P, i.e. data does not cross page boundary

   return *(sos_Int*) &psm_buf[psm_lookup_pg(ci, o/psm_P)][o%psm_P]; 
}

//------------------------------------------------------------------------------

void psm_write_int(psm_cnt_info &ci, sos_Offset o, sos_Int data) 
{
   // PRE: o%P+SIZEOF_INT<=P, i.e. data does not cross page boundary

   int buf_pg = psm_lookup_pg(ci, o/psm_P);
   *(sos_Int*) &psm_buf[buf_pg][o%psm_P] = data;
   psm_buf_tbl[buf_pg].is_mod = TRUE;
}

// ----------------- pagetable functions ---------------------------------------

sos_Offset psm_alloc_pg(psm_cnt_info &ci, int n) 
{
 
   // Returns offset of n consecutive free pages in fileindex ...
   // ...(not freefilepage) and marks them as used. Extending of ...
   // ... the tables could be necessary.

   int p = 0, f = 0, s = 0;
   while (s < n  &&  p < ci.cnt_pgs)
      if (ci.file_index[p] == psm_UNDEF) 
      {
         s++; 
         p++;
      }
      else 
      {
         s=0; 
         p++; 
         f=p;
      } 

   if (s < n)
   {
      // If not enough free pages are found then extend the tables. ...
      // ... Afterwards rest of the pages is surely found.
      ci.cnt_pgs +=  n - s;
      if (ci.cnt_tbl_sz < ci.cnt_pgs)
      {
         // ci.conttablesize is increased as necessary, but at least ... 
         // ... doubled. This should be more efficient(time, not space) ...
         // ... for large containers.
         int new_sz = psm_max(N_upround(ci.cnt_pgs), 2*ci.cnt_tbl_sz);
         ci.buf_index      =(int*)psm_extend_tbl(ci.buf_index, sizeof(int),
                            ci.cnt_tbl_sz, new_sz);
         ci.file_index     =(int*)psm_extend_tbl(ci.file_index, sizeof(int), 
                            ci.cnt_tbl_sz, new_sz);
         ci.is_shadowed    =(sos_Bool*)psm_extend_tbl(ci.is_shadowed, 
                            sizeof(sos_Bool),  ci.cnt_tbl_sz, new_sz);
         ci.pg_tbl_info    =(psm_pg_info*)psm_extend_tbl(ci.pg_tbl_info,
                            sizeof(psm_pg_info),
                            number_in_pg_tbl_info(ci.cnt_tbl_sz)+1,
                            number_in_pg_tbl_info(new_sz)+1); 
         ci.cnt_tbl_sz     = new_sz; 
      } 
   }
   for (s = 0; s < n; s++)
   {   
      // mark rest of pages
      ci.file_index[f+s]                  =  find_free_file_pg(ci);
      ci.pg_tbl_info[
         number_in_pg_tbl_info(f+s)].
         is_mod                           =  TRUE;
      ci.is_shadowed[f+s]                 =  TRUE;
      ci.buf_index[f+s]                   =  preempt_pg(ci.id, f+s);
#ifdef MONITOR
      if (mon_activated) mon_black(ci.win, f+s);
#endif
      psm_buf_tbl[ci.buf_index[f+s]].is_mod = TRUE;
      memset(psm_buf[ci.buf_index[f+s]], 0, psm_P);    // initialize with zeros
   }
   return(unsigned) f * psm_P; 
}

//------------------------------------------------------------------------------

void psm_dealloc_pg(psm_cnt_info &ci, int p, int n) 
{

   // Marks n consecutive pages starting with page p as unused and delete ...
   // ... them from buffer.  In the case of clear it's possible that some ... 
   // ... of these pages are not allocated, but this do not disturb.

   //err_assert(p > 0, "PSM:deallocatepages");  // never deallocate freelists 

   if ((p+n) == ci.cnt_pgs)    // deallocated pages are at the end of ...
                               // ... the container
      ci.cnt_pgs  -= n;        //(in the rest of the method ci.contpages ... 
                               // ... isn't used)

   while (n > 0) 
   {
      if (ci.is_shadowed[p]) 
      { 
         ci.free_file_pg[ci.file_index[p]] = TRUE; 
         // only to shadowed pages because of a possible reset to the container
         ci.is_shadowed[p] = FALSE; 
      }
      ci.file_index[p] = psm_UNDEF;
      ci.pg_tbl_info[number_in_pg_tbl_info(p)].is_mod = TRUE;
      if (ci.buf_index[p] != psm_UNDEF) 
      {
         psm_buf_tbl[ci.buf_index[p]].id = psm_UNUSED;
         ci.buf_index[p] = psm_UNDEF; 
      }
      n--; p++; 
   } 
}  

// ----------------- container functions ---------------------------------------

sos_Bool psm_is_equal_cnt(psm_cnt_info ci1, psm_cnt_info ci2)
{
   int base_offset, x;
   sos_Bool free[256];
   for (int pg=1; pg < ci1.cnt_pgs; pg++)
   {
      base_offset=pg*1024;
      if (ci1.file_index[pg] != psm_UNDEF)
      {
         for (x=0; x < 256; x++) free[x]=TRUE;
         for (int sz =4; sz <= psm_MAX_FREEBLOCK_LENGTH;sz+=4)
         {
            int o=sz;
            while ((o=psm_read_int(ci1, o)) != 0)
               if ((o>=base_offset) && (o<=base_offset+1023))
                  for (int i=(o-base_offset);i<(o-base_offset+sz);i+=4)
                     free[i/4]=FALSE;
         } 
         for (x=0;x<256;x++) 
            if (free[x] ==TRUE)
            {
               if (ci2.file_index[pg] == psm_UNDEF)
               {
                  return FALSE;
               }
               if (psm_read_int(ci1, base_offset+(x*4)) != 
                   psm_read_int(ci2, base_offset+(x*4))) 
               {
                  return FALSE;
               }
            }
       }
   }
   return TRUE;
}

// -----------------------------------------------------------------------------

sos_Existing_status psm_check_object_exists(int id, sos_Offset o, sos_Int sz)
{
   sos_Int flag;
   if (psm_four_upround(sz) < 8) 
      return PERHAPS_EXISTING;   // no sos_Object
   if (o < 1024) 
      return NOT_EXISTING;       // wrong offset

   if (! psm_is_cnt_open(id))  // container-file(!) not opened or not existing
   {  
      // now similar statements as in entercontainer   

      sos_Int sw, cnt_pgs;
      char cur_sw;

      int fd = ::open(psm_cnt_path(id), O_RDONLY); 

      if (fd == psm_UNDEF) 
         return NOT_EXISTING;                    // file does not exist

      lseek(fd, psm_SW_OFFSET, SEEK_SET);        // switch switch
      ::read(fd,(char *)&sw, psm_SZ_OF_INT);

      lseek(fd, cnt_pgs_offset(sw), SEEK_SET);   // number of contpages
      ::read(fd,(char *)&cnt_pgs, psm_SZ_OF_INT);

      if (cnt_pgs == 0)  
         return NOT_EXISTING;                    // container file empty

      sos_Int cnt_pg   = o / psm_P;              // logical container page

      if (cnt_pg > cnt_pgs) 
         return NOT_EXISTING; 

      lseek(fd, abs_sw_in_pg_offset(sos_Bool(sw),
            number_in_pg_tbl_info(cnt_pg)), SEEK_SET); // page switch
      ::read(fd,(char *)&cur_sw, sizeof(char));

      // pagetable_offset in containerfile  +  page_offset inside pagetable
      lseek(fd, pg_tbl_offset(cnt_pg, (sos_Int)cur_sw)
            +(cnt_pg%psm_N) * psm_SZ_OF_INT, SEEK_SET);
      ::read(fd, (char *)&cnt_pg, psm_SZ_OF_INT);   // page in file

      if (sz <= psm_MAX_FREEBLOCK_LENGTH) 
      {  
         if (cnt_pg == psm_UNDEF)  
            return NOT_EXISTING;                 // WRONG_OFFSET
         lseek(fd, file_position(cnt_pg) + o%psm_P + 
               psm_SZ_OF_INT, SEEK_SET);             //offset in file
         // file_position(cnt_pg) + offset inside of page ...
         // ... + psm_SZ_OF_INT(for special bytes(=flagbyte) extra ...
         // ... for this function)
         ::read(fd,(char *)&flag, psm_SZ_OF_INT);
         return(flag == psm_UNDEF) ? NOT_EXISTING : PERHAPS_EXISTING;
      }
      else  // look only at first of(possibly) several ... 
            // ... pages because of efficiency
      {  
         return(cnt_pg == psm_UNDEF)  ? NOT_EXISTING : PERHAPS_EXISTING;
      }
   }
   else                                                 // container opened 
   {  
      psm_cnt_info &ci = psm_lookup_cnt(id, READ_PERM, TRUE, TRUE);
      psm_return_if_destroyed_no_trc(sos_Container_status(ci.stat),
                                     NOT_EXISTING);
      if (sz <= psm_MAX_FREEBLOCK_LENGTH)  
      {  
         // offset 'cannot' be wrong(see:precondition)  because never ...
         // ... merge free blocks to greater free blocks(pages) 

         if (psm_uprounded_pg_number(o + sz) > ci.cnt_pgs) 
            return NOT_EXISTING;  
         flag = psm_read_int(ci, o + psm_SZ_OF_INT);   
         return(flag == psm_UNDEF) ? NOT_EXISTING : PERHAPS_EXISTING;  
      }
      else // if one of the pages is UNDEF or no more existing(>contapges) ...
           // ... => object is  NOT_EXISTING
      {
         if (o % psm_P != 0) return NOT_EXISTING;   // offset must be ...
                                                    // ... a multiple of P
         int ub = psm_uprounded_pg_number(o + sz);
         for (int p = o/psm_P; p < ub; p++)
            if ((p > ci.cnt_pgs) || (ci.file_index[p] == psm_UNDEF)) 
               return NOT_EXISTING;
         return PERHAPS_EXISTING;
      }
   }
}

//------------------------------------------------------------------------------

psm_cnt_info* psm_cnt_tbl_position(sos_Int id)
{
   // returns pointer to containertable entry with key `id` ...
   // ...(id == UNUSED == 0 possible, see newcontainer), if container with ...
   // ... `id` is opened, else UNDEF(the "else-case" is inefficient, ... 
   // ... because the whole containertable must be searched through, ... 
   // ... e.g. in sos_Container::open)

   psm_cnt_info *start, *probe;
   probe =
   start = &psm_cnt_tbl[id % psm_C];    // in most cases a good starting ... 
                                    // ... point(see newcontainer)
   do
   {  
      if (probe->id == id) 
         return probe;
      probe = (probe == psm_cnt_tbl) ? &psm_cnt_tbl[psm_C-1]
           			     : -- probe;
   } while (probe != start);

   return(psm_cnt_info *)psm_UNDEF;
}


//------------------------------------------------------------------------------


void psm_commit_cnt(psm_cnt_info &ci) 
{
   // Commits container, i.e. writing changes back to disk or destroys ...
   // ... container 

   // TRACE
   T_PROC("psm.commitcontainer"); 
          TT(psm_VL, T_ENTER; TI(ci.id); TB(ci.is_cnt_open));

   if (ci.stat == DESTROYED) 
   {
      unlink(psm_cnt_path(ci.id));  
#ifdef LOCKD_CORRECT
      unlink(psm_cnt_path_lock(ci.id));   // locked or not doesn't care 
#endif LOCKD_CORRECT   
      psm_rm_cnt(ci);                     // closing of fd ...
   }
   else
   {  
      for (int i = 0; i < ci.cnt_pgs; i++)
         if (ci.buf_index[i] != psm_UNDEF && 
             psm_buf_tbl[ci.buf_index[i]].is_mod)
         {
            write_pg(ci, i, ci.buf_index[i]);
            psm_buf_tbl[ci.buf_index[i]].is_mod = FALSE; 
         }
      write_pg_tbl(ci); 
   }

   // TRACE
   TT(psm_VL, T_LEAVE);
}

//------------------------------------------------------------------------------

void psm_compress_cnt(psm_cnt_info &ci)
{
   // FUNCTION:
   //(i) Merges neighbouring blocks to bigger blocks.
   //(ii) Pages consisting only of free blocks  are formed to one free page ...
   //     ...(instead of many free blocks).
   //
   // REMARKS: 
   //(i) Takes a lot of time. I can't estimate the use of compress, this ...
   //     ... is dependent from the application program. 
   //(ii)The smaller the blocks are, the bigger their offsets(inside one ...
   //     ... page) are(e.g. size = 700 => offset%P <= 324). Therefore the ...
   //     ... smaller blocks with greater offsets will be inserted first in ... 
   //     ... the sorted lists.

   // TRACE
   T_PROC("psm:psm_compress_cnt");
          TT(psm_VL, T_ENTER; TI(ci.id); TB(ci.is_cnt_open));

   psm_list *L;
   int number_of_lists = ci.cnt_pgs;
   if (number_of_lists > 1)       //  first page is freelist !!!  is o.k.
      L = new psm_list[number_of_lists];
   else 
   { 
      TT(psm_VL, T_LEAVE);        // TRACE
      return; 
   }
   //(1) Iterate over freelists and  move items to(sorted) ...
   // ... lists per containerpage


   for (int sz = 4; sz <= psm_MAX_FREEBLOCK_LENGTH; sz += 4)  
   {
      int cur_offset = psm_read_int(ci, sz);         // get_freelistheader 
      psm_write_int(ci, sz, psm_UNUSED);             // clear_freelistheader 

      while (cur_offset != 0)    
      {  
         int next_offset= psm_read_int(ci, cur_offset);

         int pg_number = cur_offset / psm_P;
         L[pg_number].insert(new psm_list_item(cur_offset, sz));

         cur_offset         = next_offset;
      }
   }
   ci.ub_free_sz = 0; 

   //(2) Merge blocks for each containerpage and 
   //    (a) move merged blocks(back) to freelists
   //    (b) pages which are free at all => new free filepage

   err_assert((L[0].get_header() == 0), "PSM:compress(freelist)");


   for (int i = 1; i < number_of_lists; ++i)
   { 
      err_assert((ci.file_index[i] == psm_UNDEF ||  
                 ci.free_file_pg[ci.file_index[i]] == FALSE),
                 "PSM:compress(freefilepage)");

      if (! L[i].empty())
      {
         psm_list_item_ptr cur  = L[i].get_header();
         int lower_bound        = cur->offset;
         int upper_bound        = lower_bound + cur->sz;
         cur                    = cur->next;

         while (cur != 0)
         {
            if (upper_bound == cur->offset)  // merge neighbouring blocks
               upper_bound += cur->sz;
            else                             // blocks are no neighbours
            {
               psm_insert_in_f_list(ci, upper_bound - lower_bound,
                                    lower_bound); // 2(a)
               lower_bound = cur->offset;
               upper_bound = lower_bound + cur->sz;
            }
            cur = cur->next;
         }

         if (upper_bound - lower_bound == psm_P)
         {
            ci.is_shadowed[i]=TRUE;  
                        // to mark the filpage in freefilepage as free
            psm_dealloc_pg(ci, i, 1);
         }
         else            
         psm_insert_in_f_list(ci, upper_bound - lower_bound, 
                          lower_bound); // 2(a)
      }
   }
#ifdef VECTOR_DELETE_WITH_SIZE  
   delete [number_of_lists] L;
#else
   delete L;
#endif
   // TRACE
   TT(psm_VL, T_LEAVE);
}

//------------------------------------------------------------------------------

sos_Bool psm_is_cnt_open(sos_Int id) 
{
   // Returns TRUE, if status in {READABLE, WRITEABLE, CHECKEDOUT, ... 
   // ... DESTROYED}, i.e., if 'id' is in containertable.

   return(sos_Bool)(psm_cnt_tbl_position(id) != (psm_cnt_info *)psm_UNDEF);
}

//------------------------------------------------------------------------------

void psm_enter_cnt(sos_Int id, int fd, sos_Container_status 
                   stat, int fd_lock, sos_Bool cnt_create /* = FALSE */)
{

   // Makes the necessary initializations for the psm_cnt_info ci when ... 
   // ... opening a new container.

   // TRACE
   T_PROC("psm:psm_enter_cnt"); TT(psm_VL, T_ENTER; TI(id); TI(fd); 
           TI(stat); TI(fd_lock));

   fcntl(fd, F_SETFD, 1);    // close on exec(necessary for framework)
   psm_cnt_info &ci = new_cnt(id);
   ci.id             = id;
   ci.stat           = stat;
   ci.stat_ta        = stat;
   ci.is_cnt_open    = TRUE;
   ci.is_cnt_created = cnt_create; 
   ci.fd             = fd;
   ci.fd_lock        = fd_lock;
   int real_pgs      = psm_uprounded_pg_number(int(lseek(fd, 0, SEEK_END)));
                       // total number of file pages
   int tbl_sz        =(real_pgs == 0) ? psm_N
                       : N_upround(real_pgs);
                       // realpages rounded up to a positive multiple of N
   ci.cnt_tbl_sz     = tbl_sz;
   ci.file_tbl_sz    = tbl_sz;         // must be a multiple of N
   ci.file_index     = new int[tbl_sz];
   ci.is_shadowed    = new sos_Bool[tbl_sz];
   ci.free_file_pg   = new sos_Bool[tbl_sz];
   ci.pg_tbl_info    = new psm_pg_info[number_in_pg_tbl_info(tbl_sz)+1];
   memset((char*)ci.pg_tbl_info, 0, sizeof(psm_pg_info)*
          (number_in_pg_tbl_info(tbl_sz)+1));
   ci.free_file_pg_index = -1; // = no freefilepage found(start value)

   // During the opening of a container for READING it's possible, that ...
   // ... another process updates the container(only during CHECKOUT). ...
   // ... Then it's necessary to read the pagetable once again.

   if (ci.stat == READABLE) 
   {
      fstat(fd, &file_stat_buf);
      int last_mod_time;
      do
      {  
         last_mod_time =(int)file_stat_buf.st_mtime;  // last_modification_time
         read_pg_tbl(ci);
         fstat(fd, &file_stat_buf);
      }
      while (last_mod_time != file_stat_buf.st_mtime); 
                                                   // container has changed ...
                                                   // ... between readpagtable
      psm_set_last_read_time(ci, last_mod_time);  
   }
   else 
      read_pg_tbl(ci);


   ci.buf_index = new int[tbl_sz];
   for (int i = 0; i < ci.cnt_pgs; i++)
      ci.buf_index[i] = psm_UNDEF;

   ci.operation_at_end = NOP;
   auto_squeeze(ci);

#ifdef MONITOR
   if (mon_activated) 
   {  
      ci.win = mon_create(ci.id, ci.cnt_pgs);
      char *title;
      if (ci.cnt_pgs <= 1) 
         title = obst_strdup("<new>");
      else
      {
         sos_Object o = sos_Container::make(ci.id).root_object();
         if (o.isa(sos_Named_type))
         {
            title = sos_Named::make(o).get_name().make_Cstring();
         } 
         else 
         {
            title = new char [32];
            sprintf(title, "%d", ci.id); 
         }
      }
      mon_open(ci.win, stat, title);
      delete title; 
   }
#endif

   // TRACE
   TT(psm_VL, TI(ci.operation_at_end); T_LEAVE);
}

//------------------------------------------------------------------------------

psm_cnt_info &psm_lookup_cnt(int id, 
                             psm_precondition_for_lookup precondition_stat,
                             sos_Bool is_destroyed_permitted /* = FALSE */,
                             sos_Bool is_open_needed /* = FALSE */)
{
 
   // Returns the index of id( != 0) in the containertable. 
   
   // precondition_status :

   // This flag indicates the preconditions for the status of the container.

   // NO_PERM :     If the container is not in the containertable(status...
   //               ...  is UNAVAILABLE), then it is tried to open the ... 
   //               ...  container(implicit open), i.e. there is NO ...
   //               ... PRECONDITION for the status of the container. ...
   //               ... If the container is in the containertable and ... 
   //               ... syncactivated is TRUE, then if open_needed is TRUE ...
   //               ... and the container is already closed, the container ... 
   //               ... is reopened with the  Accessmode READING. It is not ...
   //               ... allowed that the container is destroyed. ... 
   //               ... destroyed_permitted is without any effect.
   // READ_PERM :   The container must be opened(no implicit open ... 
   //               ... possible), but NO WRITE-PERMISSION is required ...
   //               ... i.e. status in {READABLE, CHECKEDOUT, WRITEABLE}. 
   // WRITE_PERM:   The container must be opened for writing, i.e. status ... 
   //               ... in {CHECKEDOUT, WRITEABLE}(and also DESTROYED ...
   //               ... if destroyed_permitted is set).

   // destroyed_permitted        : ...

   // ... If set, then status DESTROYED is allowed for the container. ... 
   // ... This flag is set only for the following operations : ...
   // ... destroy, close, commit, reset ; status, exists, object_exists. ...
   // ... The defaultvalue is therefore FALSE.

   // open_needed          : ...

   // If syncactivated, the psm_cnt_info is not deleted from the ... 
   // ... containertable when the container is closed. Only a switch ... 
   // ... 'ct_open' is set to FALSE. But the methods have sometimes to ... 
   // ... handle this container as if it is not in the containertable. ...
   // ... In this case 'open_needed' is set to TRUE. This switch has ...
   // ... no function if syncactivated is FALSE.
 

   psm_cnt_info *cnt = psm_cnt_tbl_position(id);

  
   if (cnt ==(psm_cnt_info *)psm_UNDEF) 
      if (precondition_stat == NO_PERM)
         if (sos_Container::make(id).open(READING, WAITING)==OPENED)
            return psm_lookup_cnt(id, READ_PERM); 
         else
#ifdef OBST_HAVE_JOYCE
            if ((psm_is_syncactivated) && (sync_deadlock))
               err_raise(err_SYS, err_PSM_SYNC_TA_ABORTED, psm_ERR, FALSE);
            else
#endif
            {  
               // 36 characters, 20 reserved for %d's, 70 for %s <= 150 ... 
               // ... characters
               sprintf(err_buf,
                       "cannot open container %d\n\tUnix error %d : %.70s",
                       id, errno,
                      (0 <= errno && errno < sys_nerr) ?
                       sys_errlist[errno] : "***");
               err_raise(err_SYS, err_buf, psm_ERR, FALSE);
            }
      else 
         err_raise(err_SYS, err_PSM_UNOPENED_CONTAINER, psm_ERR, FALSE);
   else
#ifdef OBST_HAVE_JOYCE
      if ((psm_is_syncactivated) && (precondition_stat == NO_PERM)
           && (is_open_needed) && (!cnt->is_cnt_open))  // reopen of container
      { 
         if (cnt->stat == DESTROYED)
            err_raise(err_SYS, err_PSM_DESTROYED_CONTAINER, psm_ERR, FALSE);
         else
         {
            if (sos_Container::make(id).open(READING, WAITING)==OPENED)
               return psm_lookup_cnt(id, READ_PERM); 
            else
            {
               sprintf(err_buf,
                       "cannot reopen container %d\n\tUnix error %d : %.70s",
                       id, errno,(0 <= errno && errno < sys_nerr) ?
                       sys_errlist[errno] : "***");
               err_raise(err_SYS, err_buf, psm_ERR, FALSE);
            }
         }
      }
      else
#endif
      if (cnt->stat == DESTROYED)
         if (is_destroyed_permitted)
         { 
#ifdef OBST_HAVE_JOYCE
            if ((psm_is_syncactivated) && (is_open_needed) 
		&& (!cnt->is_cnt_open))
               err_raise(err_SYS, err_PSM_UNOPENED_CONTAINER, psm_ERR, FALSE);
#endif
            return *cnt;   // avoid check_if_writing
         }
         else
            err_raise(err_SYS, err_PSM_DESTROYED_CONTAINER, psm_ERR, FALSE);

   if (precondition_stat == WRITE_PERM  &&  cnt->stat != WRITEABLE  
        &&  cnt->stat != CHECKEDOUT)

     (*psm_write_err_fct_ptr)(id);

#ifdef OBST_HAVE_JOYCE
   if ((psm_is_syncactivated) && (is_open_needed) && (!cnt->is_cnt_open))
      err_raise(err_SYS, err_PSM_UNOPENED_CONTAINER, psm_ERR, FALSE);
#endif

   return *cnt;  
}

//------------------------------------------------------------------------------

int psm_is_mod_since_last_commit(psm_cnt_info &ci)
{ 

   // PRE: ci.stat == WRITEABLE  ||  ci.stat == CHECKEDOUT 
   // Returns TRUE if the container has been modified since the last commit. 

   err_assert(ci.stat == WRITEABLE  ||  ci.stat == CHECKEDOUT,
              "PSM:psm_mod_since_last_commit");
   
   for (int i = 0; i < ci.cnt_pgs; i++)   
   {
      if (ci.buf_index[i] != psm_UNDEF  &&  
          psm_buf_tbl[ci.buf_index[i]].is_mod)
         return TRUE;   // TRUE if something in the buffer has changed(write)

      if (ci.pg_tbl_info[number_in_pg_tbl_info(i)].is_mod)
         return TRUE;   // TRUE if something in the file has changed ...
                        // ...(allocate, deallocate, writepage) ... 
   }                    // ... at least one page is  modified

   return FALSE;
}

//------------------------------------------------------------------------------

int psm_is_mod_since_last_read(psm_cnt_info &ci)
{


   // PRE: ci.stat == READABLE 
   // Returns TRUE if the container has been modified since the last ...
   // ... read(i.e. the last time when the status has changed to READABLE ...
   // ...(access, open) or the container has been updated).(container can ...
   // ... be modified during checkout).

   // ci.last_read_time is initialized/changed in entercontainer, access 
   // ...(when downgrading) and in updating()->entercontainer. Another ...
   // ... possibility for implementation(more encapsulated but much more ... 
   // ... inefficient) would be to to compare the pagetable with that ... 
   // ... of current container.

   err_assert(ci.stat == READABLE, "PSM:modified_since_last_read");
   fstat(ci.fd, &file_stat_buf);
   return(ci.last_read_time != (int)file_stat_buf.st_mtime);
}

//------------------------------------------------------------------------------

void psm_rm_cnt(psm_cnt_info &ci, int close_filedescriptors /* = TRUE */)
{ 

   // Deletes pages from buffer, entry from containertable(i.e. marks ... 
   // ... UNUSED), delete the arrays of psm_cnt_info and close ... 
   // ... filedescriptor(s) in most cases.

   // TRACE
   T_PROC("psm:psm_rm_cnt"); TT(psm_VL, T_ENTER; TI(ci.id); 
          TB(ci.is_cnt_open));

   for (int i = 0; i < B; i++)
      if (psm_buf_tbl[i].id == ci.id)
      {
         psm_buf_tbl[i].id = psm_UNUSED;
#ifdef MONITOR
         if (mon_activated) 
            mon_white(ci.win, psm_buf_tbl[i].pg);
#endif
      }
   ci.id = psm_UNUSED;
   delete ci.buf_index;
   delete ci.file_index;
   delete ci.is_shadowed;
   delete ci.free_file_pg;
   delete ci.pg_tbl_info;

   if (close_filedescriptors)                // normal case when closing 
                                        // container(else only updating)
   {
      ::close(ci.fd);                   // close and unlocks file
#ifndef LOCKD_CORRECT
      if (
#  ifdef OBST_HAVE_JOYCE
	  (!psm_is_syncactivated) &&
#  endif
				     (ci.stat == CHECKEDOUT))
         ::close(ci.fd_lock);
#endif
   }

#ifdef MONITOR
   if (mon_activated) 
      mon_close(ci.win);
#endif

   // TRACE
   TT(psm_VL, T_LEAVE);
}


//------------------------------------------------------------------------------

void psm_reset_cnt(psm_cnt_info &ci)
{
  
   // Resets contents to that of the container file by(1) deallocating ... 
   // ... modified pages in buffer and(2) reads the pagetable from ... 
   // ... containerfile. If status is DESTROYED then the status must 
   // ... be set back, too.

   // TRACE
   T_PROC("psm:psm_reset_cnt"); TT(psm_VL, T_ENTER; TI(ci.id); 
          TB(ci.is_cnt_open); TI(ci.operation_at_end));

   if (ci.stat == DESTROYED) 
      // destroy only allowed with status WRITEABLE;
      ci.stat = WRITEABLE;
   else
   {
      for (int i = 0; i < ci.cnt_pgs; i++)   
         // deallocate modified pages in buffer(for status DESTROYED this is...
         // ... already done in clear->deallocatepages)
         if (ci.buf_index[i] != psm_UNDEF && 
             psm_buf_tbl[ci.buf_index[i]].is_mod) 
         {
            psm_buf_tbl[ci.buf_index[i]].id = psm_UNUSED;
            ci.buf_index[i] = psm_UNDEF; 
         }
   }

   read_pg_tbl(ci);             // readpagetable from containerfile

   ci.operation_at_end = NOP;
   auto_squeeze(ci);

   // TRACE
   TT(psm_VL, TI(ci.operation_at_end); T_LEAVE);
}

//------------------------------------------------------------------------------

void psm_squeeze_cnt(psm_cnt_info &ci) 
{

   // Reduces the size of the containerfile, if possible. The algorithm ...
   // ... works in the following way: All references from the ...
   // ... pagetable(fileindex[]) to containerpages, which are higher than ... 
   // ... the number of used pages, are replaced by references to unused ... 
   // ... pages in the first part of the container. This replacing of ... 
   // ... references is achieved by the following trick: All the pages with ...
   // ... higher offset than the number of used pages is marked as ...
   // ... modified. By commitcontainer, which follows immediately after ...
   // ... squeezecontainer, the formerly unused pages are now reused by ... 
   // ... the modified pages.

   // TRACE
   T_PROC("psm:psm_squeeze_cnt"); TT(psm_VL, T_ENTER;
          TI(ci.id); TB(ci.is_cnt_open));

   if (psm_occupied_bytes(ci) == 0)
      psm_dealloc_pg(ci, 0, ci.cnt_pgs);       // clear();      
   else 
   {   
      int u = used_pgs(ci);
      for (int i = 0; i < ci.cnt_pgs; i++)
         if (ci.file_index[i] >= u)            // pre : psm_UNDEF < 0
            psm_buf_tbl[psm_lookup_pg(ci, i)].is_mod = TRUE; 
      ci.free_file_pg_index = -1;    // reset freefilepage_index 
   }

   // if it doesn't equal -1 then it happens this in the following commit: ...
   // ... the modified pages could be written(see commit -> ... 
   // ... findfreefilepage ->...) to free pages at the end of the container ...
   // ... file => the container will not be truncated(or only parts of) ... 
   // ... after the commit

   // TRACE
   TT(psm_VL, T_LEAVE);
}

//------------------------------------------------------------------------------

sos_Bool psm_is_squeeze_possible(psm_cnt_info&)
{

   // if ci.stat_ta=CHECKEDOUT it has be checked whether it is possible to ...
   // ... squeeze the container. Therefore the synchronization component ... 
   // ... has to check whether there are any other transactions working on ... 
   // ... this container. If not, the container can be squeeze. An ...
   // ... exclusive lock is not demanded because the squeeze will be done ...
   // ... during the SYNC_Container is locked exclusive. Squeeze_possible ...
   // ... is only called by write_close and if write_close is called, ...
   // ... the SYNC_Container should already been locked.


   return TRUE;
}

// -----------------------------------------------------------------------------

void psm_updating(psm_cnt_info &ci)
{

   // update() brings the container content to the current state(perhaps ...
   // ... the container is modified during checkout). 
   //(1) This can be useful when another user who has the container ... 
   // ... currently checked out has modified the container contents and ... 
   // ... you wants to update to the new contents. This makes only sense ... 
   // ... when status == READABLE. 
   //(2) It must be done when upgrading from status READABLE because the ... 
   // pagetable not valid anymore(see open, access).

   err_assert(ci.stat == READABLE, 
              "PSM: deallocate in freelists not allowed");

   if (! psm_is_mod_since_last_read(ci)) 
      return;     // no update necessary
   else 
   {
      // Conceptual: close & open again. 
      // To be more efficient and reliable(because the open could fail ... 
      // ... because of another writer, although this very unlikely), ... 
      // ... one can save the "open of the Unix file and lock. This is ...
      //  done by(modified) removecontainer & entercontainer is done.
   

      int fd            = ci.fd;
      sos_Int id        = ci.id;
      int fd_lock       = ci.fd_lock;
      psm_rm_cnt(ci, FALSE);      // remove without closing/unlocking file
      psm_enter_cnt(id, fd, READABLE, fd_lock); 
   }
}

 
// *****************************************************************************
// PRIVATE FUNCTION DEFINITIONS 
// *****************************************************************************

// ================= psm functions =============================================

// ----------------- calculation functions -------------------------------------

LOCAL inline sos_Int abs_sw_in_pg_offset(sos_Bool sw, sos_Int pg_number)
{ 

   // returns the absolut offset of the pagetbale switch for the ... 
   // ... pagtablepage pagenumber
   
   return(sos_Int)(pg_number*(psm_N+1))*psm_P + sw_in_pg_offset(sw);

}

//------------------------------------------------------------------------------

LOCAL  void calculate_free_file_pgs(psm_cnt_info &ci) 
{

   // Calculates the array ci.freefilepage and ci.filepage. ci.shadowed is ...
   // ... cleared because calculatefreefilepages is used only in ... 
   // ... readpagetable/writepagetable.

   for (int i = 0; i < ci.file_tbl_sz; i++) 
      ci.free_file_pg[i] = TRUE;

   ci.file_pgs =(ci.stat == CHECKEDOUT) 
                 ? psm_uprounded_pg_number(
                 int(lseek(ci.fd, 0, SEEK_END))) : 0;
 
   for (i = 0; i < ci.cnt_pgs; i++)
   {   
      if (ci.file_index[i] != psm_UNDEF)
      {   
         ci.free_file_pg[ci.file_index[i]] = FALSE;
         if (ci.stat != CHECKEDOUT)
            ci.file_pgs = psm_max(ci.file_pgs, ci.file_index[i] + 1);
            // "+1" because of "offset + 1 page"
      }
      ci.is_shadowed[i] = FALSE; 
   } 
}

//------------------------------------------------------------------------------

LOCAL inline sos_Offset cnt_pgs_offset(sos_Int sw)
{
   return(sos_Offset)((sw ? 1 : 2) * psm_SZ_OF_INT);
}

//------------------------------------------------------------------------------

LOCAL inline  int file_position(int file_pg) 
{   

   // The term filepage/N + 1 adjusts for the pages of the "page table" ...
   // ... scattered in  the container file.
   // pages(= filepage) +  pages for pagetable(= filepage/N)
   //                     +  rootoffset(1 = table of free blocks) 

   return(file_pg + file_pg/psm_N + 1) * psm_P; 
}

//------------------------------------------------------------------------------

LOCAL inline unsigned free_pgs(psm_cnt_info &ci) 
{
   // returns number of free places(psm_UNDEF) in fileindex 

   return(unsigned)(ci.cnt_pgs - used_pgs(ci)); 
}

//------------------------------------------------------------------------------

LOCAL inline unsigned free_sz(psm_cnt_info &ci) 
{
   // Returns free size of logical container, not of container file.  

   return(unsigned)(free_pgs(ci) * psm_P + sz_of_free_blocks(ci));
}

//------------------------------------------------------------------------------

LOCAL inline int highest_valid_pg_tbl_part(int cnt_pgs)
{
   // Returns the offset of the highest valid pagetablepart + P(=> the end ...
   // ... of this part, see writepagetable). contpages is the number of ... 
   // ... pages in the container.
   // The following row must be generated: 1, 128, 255, ... .
   // contpages    : 0  1 - [1*(N+1)-1]  [1*(N+1)]-[2*(N+1)-1]  [2*(N+1)]- ...
   //                0  1 - 126              127-252                253-...
   // hrv          : 0    1                  1*(N+1)+1              2*(N+1)+1
   // hrv          : 0    1                  128                    255

   return cnt_pgs > 0 ?((((cnt_pgs-1) /psm_N) *(psm_N+1)) +1) * psm_P : 0;
}

//------------------------------------------------------------------------------

LOCAL inline int necessary_file_pgs(int used_pgs, int cnt_pgs) 
{ 
   // returns the lowest number of filepages which are necessary to store ...
   // ... all contpages   
                                                          
   return psm_max(used_pgs+N_upround(cnt_pgs)/psm_N,      
              highest_valid_pg_tbl_part(cnt_pgs)-psm_P); 
}

//------------------------------------------------------------------------------

LOCAL inline int N_upround(int nr)                
{
   // returns the next multiple of psm_N
   return((nr + psm_N - 1) / psm_N) * psm_N;
}

//------------------------------------------------------------------------------

LOCAL inline sos_Int pg_in_pg_tbl_buf_offset(sos_Bool in_pg_sw)
{  
   // returns the offset of the new pagetable in the pagetablebuffer
   
   return(sos_Int)((in_pg_sw ? 4 + psm_N : 4) * psm_SZ_OF_INT);
}

//------------------------------------------------------------------------------

LOCAL inline sos_Offset pg_tbl_offset(int pg, sos_Int sw)
{
   // return offset of pagetable in containerfile, which contains entry ...
   // ... for `page`. addition of offsets for
   //(1) page of pagetable  
   //(2) offset inside of this page(old or new pagetable)

   return(sos_Offset)(int)((pg/psm_N) * (psm_N+1) * psm_P +  
                            (sw ? 4 + psm_N : 4) * psm_SZ_OF_INT);
}

//------------------------------------------------------------------------------

LOCAL  unsigned sz_of_free_blocks(psm_cnt_info &ci) 
{
   // without free_pages()
   if (ci.cnt_pgs == 0) 
      return 0;

   unsigned sum = 0;
   for (int sz = 4; sz <= psm_MAX_FREEBLOCK_LENGTH; sz += 4) 
   {  
      int o = sz;
      while (psm_read_int(ci, o) != 0) 
      { 
         // sum all free blocks of size sz
         sum += sz; o = psm_read_int(ci, o); 
      }
   }
   return(unsigned) sum;
}

//------------------------------------------------------------------------------

LOCAL inline sos_Int sw_in_pg_offset(sos_Bool sw)
{  
   // calculates the offset(relativ to the beginning of the aktuell ... 
   // ... page) of the pagetable switch. 

   return 3*psm_SZ_OF_INT+(sw ? 1:0);
}

//------------------------------------------------------------------------------

LOCAL  unsigned used_pgs(psm_cnt_info &ci) 
{
   // Returns the number of occupied data pages(referenced from fileindex). ...
   // ... The file for the container is normally larger than necessary ... 
   // ... because of unused pages and  shadowed pages(if container is open).

   unsigned used_pgs = 0;
   for (int i = 0; i < ci.cnt_pgs; i++)
      if (ci.file_index[i] != psm_UNDEF) 
         used_pgs++;
   return used_pgs; 
}

// ----------------- time functions --------------------------------------------
// ----------------- lock and lock auxilary functions --------------------------

LOCAL inline int status_equal_accessmode(sos_Container_status stat,
                                          sos_Access_mode am)
{
   err_assert(stat != DESTROYED, 
             "PSM:status_equal_accessmode():status DESTROYED is not allowed");
   return(stat == sos_Container_status(am));
}

// ----------------- page and integer functions --------------------------------

LOCAL  int find_free_file_pg(psm_cnt_info &ci)
{ 
   // Returns number of first free file page, mark it as used and increment ... 
   // ...(if necessary) ci.filepages. It can also be necessary to extend ... 
   // the freefilepagetable. 

   int i;

   if (ci.stat != CHECKEDOUT)
   {
      if (ci.file_pgs > 0)
         ci.free_file_pg_index =(ci.free_file_pg_index + 1) % ci.file_pgs; 
      else 
         ci.free_file_pg_index = 0;

      // "+1" for next possibly empty filepage;
      // "% ci.filepages" because in writepagetable(in commit) the container 
      // can be truncated(decrease ci.filepages) and the ci.freefilepage_index 
      // can so point to a location outside the container area.

      i = 0;
      while (i < ci.file_pgs  &&  ! ci.free_file_pg[ci.free_file_pg_index])
      {   
         i++;
         ci.free_file_pg_index =(ci.free_file_pg_index + 1) % ci.file_pgs;
      }
   }
   else
      i = ci.file_pgs;  // a p p e n d  at end, if status == CHECKEDOUT

   if (i == ci.file_pgs)   
   {   
      if (ci.file_tbl_sz == i)
      {  
         // no free page found => extend freefilepage   
         ci.free_file_pg =(sos_Bool*)psm_extend_tbl
                          (ci.free_file_pg, sizeof(sos_Bool), i, 2*i);
         ci.file_tbl_sz *= 2;
      }
      ci.file_pgs++; 
      ci.free_file_pg_index = i;
   }

   ci.free_file_pg[ci.free_file_pg_index] = FALSE;
   return ci.free_file_pg_index;
}

//------------------------------------------------------------------------------

LOCAL  int preempt_pg(sos_Int id, int cnt_pg) 
{
   // Returns free buffer page which is searched according to the second ... 
   // ... chance algorithm. If necessary, it writes the chosen page back ... 
   // ... to container file. Mark chosen buffer page as used for 'contpage'... 
   // ... of container 'id'(update psm_buf_tbl).

   // second chance algoritm(referenced is set in lookuppage)

   while (psm_buf_tbl[buf_tbl_index].is_ref)
   { 
      psm_buf_tbl[buf_tbl_index].is_ref = FALSE;
      buf_tbl_index =(buf_tbl_index + 1) % B; 
   }
    
   if (psm_buf_tbl[buf_tbl_index].id != psm_UNUSED) 
   {    
      // previous contents of chosen page must be written to container
      psm_cnt_info &ci = psm_lookup_cnt(psm_buf_tbl[buf_tbl_index].id,
                                        READ_PERM);
      if (psm_buf_tbl[buf_tbl_index].is_mod)
         write_pg(ci, psm_buf_tbl[buf_tbl_index].pg, buf_tbl_index);
      ci.buf_index[psm_buf_tbl[buf_tbl_index].pg] = psm_UNDEF;
#ifdef MONITOR
      if (mon_activated) mon_white(ci.win, psm_buf_tbl[buf_tbl_index].pg);
#endif
   }

   psm_buf_tbl[buf_tbl_index].id     = id;
   psm_buf_tbl[buf_tbl_index].pg     = cnt_pg;
   psm_buf_tbl[buf_tbl_index].is_mod = FALSE;
   int result                        = buf_tbl_index;
   buf_tbl_index                     =(buf_tbl_index + 1) % B;
   return result;
}

//------------------------------------------------------------------------------

LOCAL  void read_pg(psm_cnt_info &ci, int cnt_pg, int buf_pg)
{
   // reads one page from file to buffer 

   int file_pg = ci.file_index[cnt_pg];
   if (file_pg == psm_UNDEF) 
      err_raise(err_SYS, err_PSM_WRONG_OFFSET, psm_ERR, FALSE);
   lseek(ci.fd, file_position(file_pg), SEEK_SET);
   read(ci.fd,(char *)psm_buf[buf_pg], psm_P); 
}

//------------------------------------------------------------------------------

LOCAL  void write_pg(psm_cnt_info &ci, int cnt_pg, int buf_pg)
{
   // Writes one page from buffer to file. If this page isn't yet shadowed ... 
   // ... then a new free page must be found.

   int &file_pg = ci.file_index[cnt_pg];
   if (file_pg == psm_UNDEF) err_raise(err_SYS, err_PSM_WRITEPAGE, psm_ERR, FALSE);
   if (! ci.is_shadowed[cnt_pg])
   {   
      file_pg =find_free_file_pg(ci);
      ci.pg_tbl_info[number_in_pg_tbl_info(cnt_pg)].is_mod = TRUE;
      ci.is_shadowed[cnt_pg] = TRUE; 
   }
   lseek(ci.fd, file_position(file_pg), SEEK_SET);
   write(ci.fd,(char *)psm_buf[buf_pg], psm_P);
}

// ----------------- pagetable functions ---------------------------------------

LOCAL  void read_pg_tbl(psm_cnt_info &ci) 
{
   // ci.sw and  ci.contpages are read from file and also the ci.fileindex ... 
   // ...(=pagetable). Then it calls calculatefreefilepages.

   // TRACE
   T_PROC("psm:read_pg_tbl"); TT(psm_VL, T_ENTER; TI(ci.id); 
          TB(ci.is_cnt_open));
   
   if (lseek(ci.fd, 0, SEEK_END) == 0)  
   {
      // container file empty(only during create)
      ci.sw        = 0;
      ci.cnt_pgs   = 0; 
   }
   else  
   {
      // read ci.sw, ci.contpages, and ci.fileindex from file
      lseek(ci.fd, psm_SW_OFFSET, SEEK_SET);           // ci.sw
      read(ci.fd,(char *)&ci.sw, psm_SZ_OF_INT);
      lseek(ci.fd, cnt_pgs_offset(ci.sw), SEEK_SET); // ci.contpages
      read(ci.fd,(char *)&ci.cnt_pgs, psm_SZ_OF_INT);
      int j;
      for (int i = 0; i < ci.cnt_pgs; i += psm_N)             // fileindex
      {   
         // calculates number of the current pagetablepage
         j=number_in_pg_tbl_info(i);
         // calculates current pagetablepage
         psm_pg_info & cur_pg_tbl_info=ci.pg_tbl_info[j];
         lseek(ci.fd, j*(psm_N+1)*psm_P, SEEK_SET);
         read(ci.fd,(char *)pg_tbl_buf, psm_P);
         cur_pg_tbl_info.is_mod=FALSE;
         cur_pg_tbl_info.cur_sel_sw=sos_Bool(pg_tbl_buf[
                                         sw_in_pg_offset(sos_Bool(
                                         ci.sw))]);
         cur_pg_tbl_info.old_sel_sw=sos_Bool(pg_tbl_buf[
                                         sw_in_pg_offset(sos_Bool(
                                         !ci.sw))]);
         memcpy(&ci.file_index[i], &pg_tbl_buf[
                pg_in_pg_tbl_buf_offset(cur_pg_tbl_info.cur_sel_sw)],
                psm_N*sizeof(int));
       }
   }
   calculate_free_file_pgs(ci);
   ci.ub_free_sz = psm_MAX_FREEBLOCK_LENGTH; 

   // TRACE
   TT(psm_VL, T_LEAVE);
}

//------------------------------------------------------------------------------

LOCAL  void write_pg_tbl(psm_cnt_info &ci) 
{
   // It toggles ci.sw and writes ci.sw, ci.contpages, ci.fileindex to file. ...
   // ... It recalculates ci.freefilepage and clears ci.shadowed.

   // TRACE
   T_PROC("psm:write_pg_tbl"); TT(psm_VL, T_ENTER; TI(ci.id); 
          TB(ci.is_cnt_open)); 

   ci.sw = !ci.sw;
   lseek(ci.fd, cnt_pgs_offset(ci.sw), SEEK_SET);
   write(ci.fd,(char *)&ci.cnt_pgs, psm_SZ_OF_INT);       // ci.contpages
   int j;
   char sw;
   for (int i = 0; i < ci.cnt_pgs; i += psm_N) 
   {  
      // calculates number of the current pagetablepage
      j=number_in_pg_tbl_info(i);
      // calculates current pagetablepage
      psm_pg_info & cur_pg_tbl_info=ci.pg_tbl_info[j];
      if (cur_pg_tbl_info.is_mod)
      { 
         cur_pg_tbl_info.is_mod=FALSE;
         if (cur_pg_tbl_info.cur_sel_sw == 
             cur_pg_tbl_info.old_sel_sw)
         { 
            cur_pg_tbl_info.old_sel_sw= sos_Bool
                                        (!cur_pg_tbl_info.old_sel_sw);
            lseek(ci.fd, abs_sw_in_pg_offset(sos_Bool(ci.sw), j), SEEK_SET);
            sw =(char)cur_pg_tbl_info.old_sel_sw;
            write(ci.fd,(char*)&sw, sizeof(char));
         }
         lseek(ci.fd, pg_tbl_offset(i, cur_pg_tbl_info.old_sel_sw),
               SEEK_SET);
         write(ci.fd,(char *) &ci.file_index[i], psm_N * sizeof(int)); 
      }
      else
         if (cur_pg_tbl_info.cur_sel_sw != 
             cur_pg_tbl_info.old_sel_sw)
         { 
            cur_pg_tbl_info.old_sel_sw= cur_pg_tbl_info.cur_sel_sw;
            lseek(ci.fd, abs_sw_in_pg_offset(sos_Bool(ci.sw), j),
                  SEEK_SET);
            sw =(char) cur_pg_tbl_info.old_sel_sw;
            write(ci.fd,(char*) &sw, sizeof(char));
         }    
      sos_Bool tempsw=cur_pg_tbl_info.old_sel_sw;
      cur_pg_tbl_info.old_sel_sw=cur_pg_tbl_info.cur_sel_sw;
      cur_pg_tbl_info.cur_sel_sw=tempsw;
   }
   fsync(ci.fd);
   lseek(ci.fd, psm_SW_OFFSET, SEEK_SET);
   write(ci.fd,(char *) &ci.sw, psm_SZ_OF_INT);
   fsync(ci.fd);
   calculate_free_file_pgs(ci);

   if (ci.stat != CHECKEDOUT)     
   {  
      // Truncate the container behind the last valid page. This must NOT be ...
      // ... happen when status == CHECKOUT.
      int hvp = highest_valid_pg_tbl_part(ci.cnt_pgs); 
      int hrp = file_position(ci.file_pgs);   // highest referenced page
      ftruncate(ci.fd, psm_max(hvp, hrp));  
   }

   // TRACE
   TT(psm_VL, T_LEAVE);
}

// ----------------- containertable functions ----------------------------------

LOCAL psm_cnt_info &new_cnt(sos_Int id) 
{
   psm_cnt_info *start, *probe;
   probe =
   start = &psm_cnt_tbl[id % psm_C];
   // in most cases a good starting point(see containertable_pos)
   do
   {  
      if (probe->id == psm_UNUSED) 
         return *probe;
      probe =(probe == psm_cnt_tbl) ? &psm_cnt_tbl[psm_C-1] : -- probe;
   } while (probe != start);

   err_raise(err_SYS, err_PSM_CONTAINER_TABLE_FULL, psm_ERR, FALSE);
   return psm_cnt_tbl[0];  // never reached! - only to avoid warnings
}

// ----------------- container functions ---------------------------------------

LOCAL inline void auto_squeeze(psm_cnt_info &ci)
{
 
   // autosqueeze squeezes the container if conditions for autosqueeze are ... 
   // ... fulfilled. called by : entercontainer

   if (AUTOSQUEEZE == 1
       && (ci.stat == WRITING || ci.stat == CHECKEDOUT)
       && (double) necessary_file_pgs((int)used_pgs(ci),(int)ci.cnt_pgs)
       /((double)ci.file_pgs) < MIN_OCCUPATION_DEGREE)
   {
#ifdef OBST_HAVE_JOYCE
      if ((psm_is_syncactivated) && (ci.id != SYNC_CONTAINER))
         // autosqueeze will be done in write_close()
         ci.operation_at_end =(ci.operation_at_end == NOP
                               ? SQUEEZE : ci.operation_at_end);
      else
#endif
         if (ci.stat == WRITEABLE) 
         {
            psm_commit_cnt(ci);
            psm_squeeze_cnt(ci);
            psm_commit_cnt(ci);
         }
         else 
            if (ci.stat == CHECKEDOUT)
            {
               // tries to get an WRITELOG
               if (OPENED == psm_lock(ci.fd, WRITING, TESTING,
                   CHECKEDOUT, ci.id, ci.fd_lock))
               {
                  ci.stat=WRITEABLE;
                  psm_commit_cnt(ci);
                  psm_squeeze_cnt(ci);
                  psm_commit_cnt(ci);
                  ci.stat=CHECKEDOUT;
                  psm_lock(ci.fd, CHECKOUT, WAITING,
                       WRITEABLE, ci.id, ci.fd_lock); 
               }
            }
    } 
}

//------------------------------------------------------------------------------
