From xemacs-m  Fri Jun 20 09:47:42 1997
Received: from cnri.reston.va.us (cnri.CNRI.Reston.VA.US [132.151.1.1])
	by xemacs.org (8.8.5/8.8.5) with ESMTP id JAA03679
	for <xemacs-beta@xemacs.org>; Fri, 20 Jun 1997 09:47:41 -0500 (CDT)
Received: from newcnri.CNRI.Reston.Va.US (newcnri [132.151.1.84]) by cnri.reston.va.us (8.8.5/8.7.3) with SMTPid KAA06487; Fri, 20 Jun 1997 10:50:53 -0400 (EDT)
Received: from anthem.CNRI.Reston.Va.US by newcnri.CNRI.Reston.Va.US (SMI-8.6/SMI-SVR4)
	id KAA11672; Fri, 20 Jun 1997 10:51:48 -0400
Received: by anthem.CNRI.Reston.Va.US (SMI-8.6/SMI-SVR4)
	id KAA19145; Fri, 20 Jun 1997 10:50:58 -0400
Date: Fri, 20 Jun 1997 10:50:58 -0400
Message-Id: <199706201450.KAA19145@anthem.CNRI.Reston.Va.US>
From: "Barry A. Warsaw" <bwarsaw@CNRI.Reston.Va.US>
MIME-Version: 1.0
Content-Type: text/plain; charset=us-ascii
Content-Transfer-Encoding: 7bit
To: karlheg@inetarena.com (Karl M. Hegbloom)
Cc: Hrvoje Niksic <hniksic@srce.hr>,
        XEmacs Developers <xemacs-beta@xemacs.org>
Subject: regi.el
References: <87lo46iybz.fsf@bittersweet.inetarena.com>
	<kig4tau1nis.fsf@jagor.srce.hr>
	<87u3iuowc6.fsf@bittersweet.inetarena.com>
X-Mailer: VM 6.32 under 20.3 "Moscow" XEmacs Lucid (beta6)
Reply-To: bwarsaw@python.org
X-Attribution: BAW
X-Oblique-Strategy: Add a layer
X-Url: http://www.python.org/~bwarsaw


>>>>> "KMH" == Karl M Hegbloom <karlheg@inetarena.com> writes:

    KMH> I've started reading through `regi.el' too. :-)

Oh gawd, please don't!  ;-)

I wrote that stuff a *long* time ago and it's pretty crappy.  And
slow!  I'm using something much simpler and quicker in Supercite 4
(still sadly on hold due to lack of time).  I append the code below,
along with an example.

-Barry

-------------------- snip snip --------------------
;; parsing state machine
(defmacro sc-state-incr (&rest body)
  ;; execute body and go to the next state, remaining on current line
  (` (progn (,@ body) (1+ statei))))

(defmacro sc-state-if (cond &rest body)
  ;; if condition is true, execute body, then go to next line and
  ;; reset counter to state-start.  otherwise go to next state
  (` (if (not (, cond))
	 (1+ statei)
       (,@ body)
       (forward-line)
       state-start)))

(defmacro sc-state-looking-at (regexp &rest body)
  ;; if looking-at the regexp, execute body, then go to next line and
  ;; reset counter to state-start.  otherwise, go to next state
  (` (if (not (looking-at (, regexp)))
	 (1+ statei)
       (,@ body)
       (forward-line)
       state-start)))

(defmacro sc-state-looking-at-done (regexp)
  ;; if looking-at the regexp, go to state-last, otherwise go to next state
  (` (if (not (looking-at (, regexp)))
	 (1+ statei)
       state-last)))

(defmacro sc-state-do-and-exit (&rest body)
  ;; execute body and exit state machine
  (` (progn (,@ body) nil)))

(defun sc-state-execute (program)
  "State machine execution.
PROGRAM is a vector containing the states as `eval'able expressions.
Each expression is a different state and transitions are determined by
the expression's return values.  The expression can do anything it
wants, and can have any side effect, but it must return an integer or
nil.

If the expression returns an integer, it must be between zero and one
less than the length of the vector.  It indicates the next state to
transition to.  If the return value is nil, execution ends.

Three special symbols are defined during execution of the expressions:

    state-start   -- the first state.  By default this is 1 and not 0,
                     since the 0th state is typically considered an
                     initialization state and is executed only once.

    statei        -- the currently executing state index

    state-last    -- the last state, typically for clean-up-and-exit

Any expression can have a side effect of changing these variables.
Execution always begins at state zero.
"
  (let ((state-start 1)
	(statei 0)
	(state-last (1- (length program)))
	next current done)
    (while (not done)
      (setq next (eval (aref program statei)))
      (if (not next)
	  (setq done t)
	(setq statei next)
	))))
-------------------- snip snip --------------------
;; mail header processing

(defvar sc-mail-headers-start nil
  "Start of header fields.")
(defvar sc-mail-headers-end nil
  "End of header fields.")

(defvar sc-state-mail-glommer
  [;; state 0: initialization
   (sc-state-incr (setq sc-mail-headers-start (point)))
   ;; state 1: look for x-attribution: headers
   (sc-state-looking-at "x-attribution:[ \t]+.*$" (sc-scarf-line t))
   ;; state 2: look for any other header
   (sc-state-looking-at "\\S +:.*$" (sc-scarf-line))
   ;; state 3: looking for an empty line which ends header section
   (sc-state-looking-at-done "$")
   ;; state 4: looking for a continuation line
   (sc-state-looking-at "[ \t]+" (sc-append-field))
   ;; state 5: must be an error
   (sc-state-if sc-mail-warn-if-non-rfc822-p (sc-issue-bad-header-warning))
   ;; state 6: clean up on exit
   (sc-state-do-and-exit (setq sc-mail-headers-end (point)))
   ]
  "Input to `sc-state-execute' for glomming mail header information")

(defun sc-scarf-line (&optional add-to-attribs-p)
  ;; Insert a key and value into sc-information.  If optional
  ;; ADD-TO-ATTRIBS-P is non-nil, the key/value pair is placed in
  ;; sc-attributions too.
  (if (looking-at "\\(\\S *\\)\\s *:\\s +\\(.*\\)$")
      (let* ((key (downcase (match-string 1)))
	     (val (match-string 2)))
	(sc-set-information key val 'add)
	(if add-to-attribs-p
	    (setq sc-attributions (acons key val sc-attributions)))
	)))

(defun sc-append-field ()
  ;; Append a continuation line onto the last fetched mail field info
  (let* ((elt (pop sc-information))	; destructive fetch
	 (key (car elt))
	 (val (cdr elt)))
    (if (and elt (looking-at "\\s *\\(.*\\)$"))
	(setq val (concat val " " (match-string 1))))
    (sc-set-information key val 'add)
    ))

(defun sc-issue-bad-header-warning ()
  ;; Issue warning that mail headers don't conform to RFC 822
  (let* ((line (sc-current-line))
	 (len (min (length line) 10))
	 (ellipsis (if (< len (length line)) "..." ""))
	 (msg "Mail header \"%s%s\" not RFC822 conformant. Skipping."))
    (if (fboundp 'warn)
	(warn msg (substring line 0 len) ellipsis)
      (message msg (substring line 0 len) ellipsis)
      (beep)
      (sit-for 2))))

(defun sc-nuke-headers ()
  ;; Nuke or keep all headers between sc-mail-headers-start and
  ;; sc-mail-headers-end based on sc-nuke-mail-headers and
  ;; sc-nuke-mail-header-list
  (cond 
   ((eq sc-nuke-mail-headers 'none))
   ((eq sc-nuke-mail-headers 'all)
    (delete-region sc-mail-headers-start sc-mail-headers-end))
   (t
    (let ((match (mapconcat 'identity sc-nuke-mail-header-list "\\|"))
	  end nukedprev-p matched-p)
      (unwind-protect
	  (progn
	    (goto-char sc-mail-headers-end)
	    (setq end (point-marker))
	    (goto-char sc-mail-headers-start)
	    (while (< (point) end)
	      (setq matched-p (looking-at "\\S +:.*$"))
	      (if (or (and matched-p (eq sc-nuke-mail-headers 'specified))
		      (and (not matched-p) (eq sc-nuke-mail-headers 'keep))
		      (and nukedprev-p (looking-at "[ \t]+")))
		  (progn
		    (sc-delete-line)
		    (setq nukedprev-p t))
		(setq nukedprev-p nil)
		(forward-line 1))))
	(and end (set-marker end nil)))))))

;; entry defun to all header processing
(defun sc-mail-process-headers (start end)
  "Process original mail message's mail headers.
After processing, mail headers may be nuked.  Header information is
stored in `sc-information', and any old information is lost."
  (interactive "r")
  (setq sc-information nil
	sc-attributions nil)
  (goto-char start)
  (sc-state-execute sc-state-mail-glommer)
  (sc-nuke-headers)
  (delete-blank-lines)
  (if (<= 0 sc-blank-lines-after-headers)
      (sc-delete-line)
    (insert-char ?\n (1- sc-blank-lines-after-headers)))
  (run-hooks 'sc-mail-process-headers-hook))
-------------------- snip snip --------------------

