shithub: flite

ref: baaa888c574e075222ad70b002fe6a140a19098b
dir: /tools/make_lts.scm/

View raw version
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;                                                                     ;;;
;;;                  Language Technologies Institute                    ;;;
;;;                     Carnegie Mellon University                      ;;;
;;;                         Copyright (c) 1999                          ;;;
;;;                        All Rights Reserved.                         ;;;
;;;                                                                     ;;;
;;; Permission is hereby granted, free of charge, to use and distribute ;;;
;;; this software and its documentation without restriction, including  ;;;
;;; without limitation the rights to use, copy, modify, merge, publish, ;;;
;;; distribute, sublicense, and/or sell copies of this work, and to     ;;;
;;; permit persons to whom this work is furnished to do so, subject to  ;;;
;;; the following conditions:                                           ;;;
;;;  1. The code must retain the above copyright notice, this list of   ;;;
;;;     conditions and the following disclaimer.                        ;;;
;;;  2. Any modifications must be clearly marked as such.               ;;;
;;;  3. Original authors' names are not deleted.                        ;;;
;;;  4. The authors' names are not used to endorse or promote products  ;;;
;;;     derived from this software without specific prior written       ;;;
;;;     permission.                                                     ;;;
;;;                                                                     ;;;
;;; CARNEGIE MELLON UNIVERSITY AND THE CONTRIBUTORS TO THIS WORK        ;;;
;;; DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING     ;;;
;;; ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT  ;;;
;;; SHALL CARNEGIE MELLON UNIVERSITY NOR THE CONTRIBUTORS BE LIABLE     ;;;
;;; FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES   ;;;
;;; WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN  ;;;
;;; AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,         ;;;
;;; ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF      ;;;
;;; THIS SOFTWARE.                                                      ;;;
;;;                                                                     ;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;             Author: Alan W Black ([email protected])                   ;;;
;;;               Date: December 1999                                   ;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;                                                                     ;;;
;;; Generate a C compilable lts rules.                                  ;;;
;;;                                                                     ;;;
;;; Two modes, from decision graphs as wfsts or from CART trees         ;;;
;;;                                                                     ;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

;;; These are preordained by the LTS building process
(set! lts_context_window_size 4)
(set! lts_context_extra_feats 1)

(define (ltsregextoC name trees idir odir)
  "(ltsregextoC name idir odir)
Converts its wfsts to a C compilation structure for flite.  Assumes
$idir/[a-z].tree.wfst to compile from."
  (let 
    ((ofde (fopen (path-append odir (string-append name "_lts_rules.c")) "w"))
     (ofdm (fopen (path-append odir (string-append name "_lts_model.c")) "w"))
     (ofdh (fopen (path-append odir (string-append name "_lts_model.h")) "w"))
     (ifd)
     (rule_index nil))
    (set! lts_pos 0)
    (set! phone_table (list "epsilon"))
    (set! letter_table (list "nothing" "#" "0"))
    (set! letter_table (append letter_table (mapcar car trees)))

    (format ofde "/*******************************************************/\n")
    (format ofde "/**  Autogenerated lts rules (regex) for %s     */\n" name)
    (format ofde "/**  from %s    */\n" idir)
    (format ofde "/*******************************************************/\n")
    (format ofde "\n")
    (format ofde "#include \"cst_string.h\"\n")
    (format ofde "#include \"cst_lts.h\"\n")
    (format ofde "#include \"cst_lexicon.h\"\n")

    (format ofdm "/*******************************************************/\n")
    (format ofdm "/**  Autogenerated lts rules (regex) for %s     */\n" name)
    (format ofdm "/**  from %s    */\n" idir)
    (format ofdm "/*******************************************************/\n")
    (format ofdm "\n")
    (format ofdm "#include \"cst_string.h\"\n")
    (format ofdm "#include \"cst_lts.h\"\n")
    (format ofdm "#include \"cst_lexicon.h\"\n")
    (format ofdm "#include \"%s_lts_model.h\"\n\n" name)
    (format ofdm "const cst_lts_model %s_lts_model[] = \n" name)
    (format ofdm "{\n")

    (set! ln 0)
    (mapcar
     (lambda (l)
       (let ((ifd (fopen (path-append idir 
			  (string-append l ".tree.wfst")) "r")))
	 (format t "doing: %s\n" l)
	 (format ofdm "   /** letter \"%s\" **/\n" l)
	 (format ofdh "   /** letter \"%s\" **/\n" l)
	 (set! rule_index (cons (list l lts_pos ln) rule_index))
	 (set! lts_pos (dump_lts_wfst ln ifd ofdm ofdh lts_pos))
	 (fclose ifd)
         (set! ln (+ 1 ln))
         ))
     (cdr (cddr letter_table))
     )
    (format ofdm "    0, 0, 0,0, 0,0\n")
    (format ofdm "};\n")

    ;; Make the letter table be the same order as the rule inde (+2)

    ;; The phone table (bytes to phone names)
    (format ofde "\n")
    (format ofde "const char * const %s_lts_phone_table[%d] = \n" 
	    name (+ 1 (length phone_table)))
    (format ofde "{\n")
    (mapcar (lambda (p) (format ofde "    \"%s\",\n" p)) phone_table)
    (format ofde "    NULL\n")
    (format ofde "};\n")

    ;; The letter_table (bytes to letter names)
    (format ofde "\n")
    (format ofde "const char * const %s_lts_letter_table[%d] = \n" 
	    name (+ 1 (length letter_table)))
    (format ofde "{\n")
    (mapcar (lambda (p) (format ofde "    \"%s\",\n" p)) letter_table)
    (format ofde "    NULL\n")
    (format ofde "};\n")

    ;; Which rule starts where
    (format ofde "\n")
    (format ofde "const cst_lts_addr %s_lts_letter_index[%d] = \n" 
	    name (+ 1 (length rule_index)) )
    (format ofde "{\n")
    (mapcar 
     (lambda (p) (format ofde "    %d, /* %s */\n" (car (cdr p)) (car p)))
     (reverse rule_index))
    (format ofde "    0\n")
    (format ofde "};\n")

    (format ofde "\n")

;     (format ofde "const cst_lts_rules %s_lts_rules = {\n" name)
;     (format ofde "   \"%s\",\n" name)
;     (format ofde "   %s_lts_letter_index,\n" name)
;     (format ofde "   %s_lts_model,\n" name)
;     (format ofde "   %s_lts_phone_table,\n" name)
;     (format ofde "   4, /* context_window_size */\n")
;     (format ofde "   1,  /* context_extra_feats */\n")
;     (format ofde "   %s_lts_letter_table\n" name)
;     (format ofde "};\n")
;     (format ofde "\n")

    (fclose ofde)
    (fclose ofdh)
    (fclose ofdm)
    ))

(define (dump_lts_wfst ln ifd ofde ofdh lts_pos)
  "(dump_lts_wfst ifd ofde ofdh lts_pos)
Dump the WFST as a byte table to ifd.  Jumps are dumped as
#define's to ofdh so forward references work.  lts_pos is the 
rule position.  Each state is saves as
    feature  value  true_addr  false_addr
Feature and value are single bytes, which addrs are double bytes."
  (let ((state))
    ;; Skip WFST header
    (while (not (string-equal (set! state (readfp ifd)) "EST_Header_End"))
       (if (equal? state (eof-val))
	   (error "eof in lts regex file")))
    (while (not (equal? (set! state (readfp ifd)) (eof-val)))
      (format ofdh "#define LTS_STATE_%d_%d %s\n" 
	      ln (car (car state)) 
	      (lts_bytify lts_pos))
      (cond 
       ((string-equal "final" (car (cdr (car state))))
	(set! lts_pos (- lts_pos 1))
	t) ;; do nothing
       ((string-matches (car (car (cdr state))) ".*_.*")
	(format ofde "   %s, %d, %s , %s , \n"
		(lts_feat (car (car (cdr state))))
;		(lts_val (car (car (cdr state))))
		(lts_let_num (lts_letter (car (car (cdr state)))) 0 letter_table)
		(format nil "LTS_STATE_%d_%d" ln
			(car (cdr (cdr (car (cdr (cdr state)))))))
		(format nil "LTS_STATE_%d_%d" ln 
			(car (cdr (cdr (car (cdr state))))))))
       (t ;; its a letter output state
	(format ofde "   255, %s, 0,0 , 0,0 , \n"
		(lts_phone (car (car (cdr state))) 0 phone_table))))
      (set! lts_pos (+ 1 lts_pos)))
    lts_pos))

(define (lts_feat trans)
  "(lts_feat trans)
Returns the feature number represented in this transition name."
  (let ((fname (substring trans 5 (- (length trans) 11))))
    (if (string-matches fname ".*_i?")
	(set! fname (string-before fname "_")))
    (cond
     ((string-equal fname "p.p.p.p.name") 0)
     ((string-equal fname "p.p.p.name") 1)
     ((string-equal fname "p.p.name") 2)
     ((string-equal fname "p.name") 3)
     ((string-equal fname "n.name") 4)
     ((string-equal fname "n.n.name") 5)
     ((string-equal fname "n.n.n.name") 6)
     ((string-equal fname "n.n.n.n.name") 7)
     (t (error (format nil "ltsregex2C: unknown feat %s %s\n" fname trans ))))))

(define (lts_letter trans)
  "(lts_val trans)
The letter being tested."
  (string-before (string-after trans "is_") "_"))

(define (lts_val trans)
  "(lts_val trans)
The letter being tested."
  (substring trans (- (length trans) 2) 1))

(define (lts_phone p n table)
  (cond
   ((string-equal p (car table))
    n)
   ((not (cdr table))  ;; new p
    (set-cdr! table (list p))
    (+ 1 n))
   (t
    (lts_phone p (+ 1 n) (cdr table)))))

(define (lts_let_num p n table)
  (cond
   ((null table)
    (format t "no entry %s %d\n" p n)
    (errrr))
   ((string-equal p (car table))
    n)
   (t
    (lts_let_num p (+ 1 n) (cdr table)))))
  
(define (lts_bytify n)
  "(lts_bytify n)
Return this short as a two byte comma separated string."
  (let ((xx (format nil "%04x" n)))
    ;; This is unfortunately byte order specific
    (format nil "0x%s,0x%s"
	    (substring xx 2 2)
	    (substring xx 0 2))))

(provide 'make_lts)