;  CZ2 mbrola: czech voice for festival. Requires festival, mbrola and
;  cz2 database.
;
;  Master copy is located at http://atrey.karlin.mff.cuni.cz/~pavel/cz2_mbrola.scm.
;  Pasky's copy is located at http://pasky.ji.cz/~pasky/dev/festival/.
;  Please send patches to pavel@ucw.cz.
;
;  Put this into lib/voices/czech/cz2_mbrola/festvox/ and put cz2
;  database to lib/voices/czech/cz2_mbrola. (You can get cz2 database
;  by going to http://tcts.fpms.ac.be/synthesis/, clicking "download",
;  "MBROLA binary and voices", "cz2". You'll also need mbrola binary,
;  which can be get nearby.
;
;  Mbrola binary for i386 linux is at http://tcts.fpms.ac.be/synthesis/mbrola/bin/pclinux/mbr301h.zip.
;  cz2 voice is at http://tcts.fpms.ac.be/synthesis/mbrola/dba/cz2/cz2-001009.zip.
;
;  Then do (set! voice_default 'voice_cz2_mbrola) to use it or put
;  (set! voice_default 'voice_cz2_mbrola) into your .festivalrc.
;  
;
;  Copyright 2000,2003 Pavel Machek <pavel@ucw.cz>
;  Copyright 2002 Petr Baudis <pasky@ucw.cz>
;
;  Version 0.6.5
;
;  You MAY use this software under terms of GNU GPL, or under following license:
;
;  Permission is hereby granted, free of charge, to use and distribute  
;  this software and its documentation without restriction, including   
;  without limitation the rights to use, copy, modify, merge, publish,  
;  distribute, sublicense, and/or sell copies of this work, and to      
;  permit persons to whom this work is furnished to do so, subject to   
;  the following conditions:                                            
;   1. The code must retain the above copyright notice, this list of    
;      conditions and the following disclaimer.                         
;   2. Original authors' names are not deleted.                         
;                                                                       
;  EVERYONE
;  DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING      
;  ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT   
;  SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE     
;  FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES    
;  WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN   
;  AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,          
;  ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF       
;  THIS SOFTWARE.                                                       
;
; Festival is buggy, and ignores current locale setting. It also has
; lowercase letters hardcoded to a-z, and similar fatal bugs
; w.r.t. localization. I try to workaround it. --pavel
;
; FIXME: should create my own _accent_cart_tree and use it
;
; FIXME: when we stumble upon <, > or - (surrounded by spaces), festivals
; dumps core... with english voice, it doesn't though :-(
;
; There is annoying problem with @ phonems inserted at some places - I don't
; know how to prevent that, so we workaround that by accepting them patiently
; and filtering them on their way to mbrola.
;
; TODO:
;   * Better phonems set (less spanish, more czech) and durations (ditto) - I
;     tried to improve them, but I'm not a linguistic expert and the durations
;     may (very very ;-) littly resemble moravian accent
;   * Better accents
;   * Better voice (without confined nose)
;   * Better intonation (it is not real enough yet and it's not 100% correct
;     for ! and ?)
;   * Sometimes the pauses around ',' and '.' aren't long enough. Sometimes
;     they are. They appear to be variable to some degree, needs to be
;     investigated.
;   * Reading of multi-cypher numbers
;   * x should be sometimes transcribed as 'ks' and sometimes as 'gz'. 
;   * "Dub, zpěv, sjezd, sběr, když" should be pronounced "Dup, sjest, zbjer, 
;     gdiš". Both are pronounced horribly.
;     (see Psutka: Komunikace s pocitacem pluvenou reci)
;   * "K bráně" should be "G bráně".
;

(set! foo "
# SOME DIFFICULT WORDS
     echo 'Běží liška k Táboru. Věští že dostane pěstí.' | festival --tts
     echo 'Oběd, opěra, štěně, měchýř. Dítě, nikdy.' | festival --tts
     echo 'Exhumace, sex, xylofon, boxer, existovat.' | festival --tts
# (It should be handled up to here. Those below are transcribed wrong.)
     echo 'Dub, zpěv, sjezd, sběr, když.' | festival --tts
     echo 'Ukaž zub, ukaž chrup, ukaž účet, ukaž mlže.' | festival --tts
     echo 'Vlak dojel, máš řetěz.' | festival --tts
     echo 'Pod bukem, bez masa.' | festival --tts
     echo 'Z domu, v lese.' | festival --tts
     echo 'K mámě, k vozu, k bráně.' | festival --tts
     echo 'Však, švarný, shora.' | festival --tts
     echo 'Shnilý, nashledanou, hřbet, kuř, mařka, hříbě, švarný, shora.' | festival --tts
     echo 'Banka, tango, tramvaj, nymfa, punťa, pindík, odpovědně, sto dní, vodník, hutník.' | festival --tts
     echo 'Dětský, tlustší, lidský.' | festival --tts
     echo 'Odsát, podšívka, odžhavit, odzbrojit, pod sklepem.' | festival --tts
     echo 'Babiččin, měkký, nižší, trojjediný, dvacetdevět.' | festival --tts
     echo 'Využil, poodešel, pravoúhlí.' | festival --tts
     echo 'Loudat, auto, eunuch.' | festival --tts
     echo 'Asi vrčí medvědi. Odvšivil postupně všechny.' | festival --tts
     echo 'V uchu, z oka, s ocasem, k odstřelu.' | festival --tts
# (Even humans have problems with these)
     echo 'Strč prst skrz krk!' | festival --tts
# (Causes crash?!)
     echo '.Ahoj,' | festival --tts
")


(set! cz2_mbrola_dir (cdr (assoc 'cz2_mbrola voice-locations)))

(require 'mbrola)

(defPhoneSet
  czech
  ;;;  Phone Features
  (;; vowel or consonant
   (vc + -)  

   ;; vowel length: short long diphthong schwa
   (vlng s l d a 0)
   ;; vowel height: high mid low
   (vheight 1 2 3 -)
   ;; vowel frontness: front mid back
   (vfront 1 2 3 -)
   ;; lip rounding
   (vrnd + -)
   ;; consonant type: stop fricative affricative nasal liquid
   (ctype s f a n l 0)
   ;; place of articulation: labial alveolar palatal labio-dental
   ;;                         dental velar
   (cplace l a p b d v 0)
   ;; consonant voicing
   (cvox + -)
   )
  ;; Phone set members (features not set properly: copied from spanish; roughly corrected by pasky)
  (
   (_    - 0 - - - 0 0 +)
   (@    - 0 - - - 0 0 +) ;; ???? this needs to be here :/
   (a    + s 3 2 - 0 0 -)
   (a:   + l 3 2 - 0 0 -)
   (b    - 0 - - + s l +)
   (ts   - 0 - - + s v +)
   (d    - 0 - - + s a +)
   ("d'" - 0 - - + s a +)
   (e    + s 2 1 - 0 0 -)
   (e:   + l 2 1 - 0 0 -)
   (f    - 0 - - + f b -)
   (g    - 0 - - + s p +)
   (h\   - 0 - - + a a -)
   (i    + s 1 1 - 0 0 -)
   (i:   + d 1 1 - 0 0 -)
   (j    - 0 - - - l a +)
   (k    - 0 - - - s p -)
   (l    - 0 - - - l d +)
   (m    - 0 - - + n l +)
   (n    - 0 - - + n d +)
   ("n'" - 0 - - - n d +)
   (o    + s 3 3 + 0 0 -)
   (o:   + l 3 3 + 0 0 -)
   (p    - 0 - - + s l -)
   (r    - 0 - - + f p +)
   ("r'" - 0 - - + f p +)
   (s    - 0 - - - f a +)
   (S    - 0 - - + f a +)
   (t    - 0 - - + s v -)
   ("t'" - 0 - - + s v +)
   (u    + s 2 3 + 0 0 -)
   (u:   + l 2 3 + 0 0 -)
   (v    - 0 - - - n l +)
   (x    - 0 - - + a a -)
   (z    - 0 - - - f a +)
   (Z    - 0 - - + f a +)
   (dz    - 0 - - + f a +)
   (dZ    - 0 - - + f a +)
   (tS   - 0 - - + s v +)
  )
)

(set! czech_el_phone_data
; This is also not right
'(
   (_ 0.0 0.250)
   (a 0.0 0.090)
   (a: 0.0 0.170)
   (b 0.0 0.065)
   (ts 0.0 0.065)
   (d 0.0 0.060)
   ("d'" 0.0 0.060)
   (e 0.0 0.090)
   (e: 0.0 0.170)
   (f 0.0 0.100)
   (g 0.0 0.080)
   (h\ 0.0 0.090)
   (i 0.0 0.080)
   (i: 0.0 0.190)
   (j 0.0 0.100)
   (k 0.0 0.100)
   (l 0.0 0.060)
   (m 0.0 0.100)
   (n 0.0 0.080)
   ("n'" 0.0 0.080)
   (o 0.0 0.090)
   (o: 0.0 0.180)
   (p 0.0 0.100)
   (r 0.0 0.060)
   ("r'" 0.0 0.050)
   (s 0.0 0.110)
   (S 0.0 0.110)
   (t 0.0 0.055)
   ("t'" 0.0 0.065)
   (u 0.0 0.080)
   (u: 0.0 0.170)
   (v 0.0 0.100)
   (x 0.0 0.135)
   (z 0.0 0.110)
   (Z 0.0 0.110)
   (dz 0.0 0.110)
   (dZ 0.0 0.110)
   (tS 0.0 0.110)
   (@ 0.0 0.100) ;; ???? this needs to be here :/
))

(set! czech_dur_tree
 '
   ((R:SylStructure.parent.R:Syllable.p.syl_break > 1 ) ;; clause initial
    ((R:SylStructure.parent.stress is 1)
     ((1.5))
     ((1.2))
    )
    ((R:SylStructure.parent.syl_break > 1)   ;; clause final
     ((R:SylStructure.parent.stress is 1)
      ((2.0))
      ((1.5))
     )
     ((R:SylStructure.parent.stress is 1)
      ((1.2))
      ((1.0))
     )
    )
   )
)

(PhoneSet.silences '(_ @))

(lex.create "czech")
(lex.set.phoneset "czech")

(lex.add.entry
  '("pocitac" nil ( ((p o) 1) ((tS i:) 0)  ((t a tS) 0) ))
)

(lex.add.entry
  '("pocitaca" nil ( ((p o) 0) ((tS i:) 1)  ((t a tS) 0) ))
)

(lex.add.entry
  '("pocitacb" nil ( ((p o) 0) ((tS i:) 0)  ((t a tS) 1) ))
)

(lts.ruleset
;  Name of rule set
 czech_ruleset
;  Sets used in the rules
(
  (MEKCIDLO i í ě I Í Ě)
  (MEKCENO b p f v)
; Samohlaska
  (SA a e i o u á é í ó ú ů Á É Í Ó Ů Ú)
; Znela parova souhlaska
  (ZPS c d ď Ď g v z ž Ž h)
; Neznela parova souhlaska
  (NPS p t ť Ť k f s š Š c č Č)
; Jedinecna souhlaska
  (JS m n ň Ň l r j)
)
;  Rules
(
 ( [ f e s t i v ] = f e s t i v )
 ( [ r á d i o ] = r a: d i o )
 ( [ r a d i o ] = r a d i o )
 ( [ a ] = a )
 ( [ á ] = a: )
 ( [ Á ] = a: )
 ( [ b ] = b )
 ( [ c h ] = x )
 ( [ c ] = ts )
 ( [ Č ] = tS )
 ( [ č ] = tS )
 ( [ d ] MEKCIDLO = "d'" )
 ( [ d ] = d )
 ( [ ď ] = "d'" )
 ( [ Ď ] = "d'" )
 ( [ e ] = e )
 ( [ é ] = e: )
 ( [ É ] = e: )
 ( MEKCENO [ ě ] = j e )
 ( MEKCENO [ Ě ] = j e )
 ( [ ě ] = e )
 ( [ Ě ] = e )
 ( [ f ] = f )
 ( [ g ] = g )
 ( [ h ] = h\ )
 ( [ i ] = i )
 ( [ í ] = i: )
 ( [ Í ] = i: )
 ( [ j ] = j )
 ( [ k ] = k )
 ( [ l ] = l )
 ( [ m ě ] = m "n'" e )
 ( [ m Ě ] = m "n'" e )
 ( [ m ] = m )
 ( [ n ] MEKCIDLO = "n'" )
 ( [ n ] = n )
 ( [ ň ] = "n'" )
 ( [ Ň ] = "n'" )
 ( [ o ] = o )
 ( [ ó ] = o: )
 ( [ Ó ] = o: )
 ( [ p ] = p )
 ( [ q ] = k v )
 ( [ r ] = r )
 ( [ ř ] = "r'" )
 ( [ Ř ] = "r'" )
 ( [ s ] = s )
 ( [ š ] = S )
 ( [ Š ] = S )
 ( [ t ] MEKCIDLO = "t'" )
 ( [ t ] = t )
 ( [ ť ] = "t'" )
 ( [ Ť ] = T )
 ( [ u ] = u )
 ( [ ú ] = u: )
 ( [ Ú ] = u: )
 ( [ ů ] = u: )
 ( [ Ů ] = u: )
 ( [ v ] = v )
 ( [ w ] = v )
 ( e [ x ] SA = g z )	; FIXME: this should only apply at word beggining
 ( [ x ] ZPS = g z )
 ( [ x ] JS = g z )
 ( [ x ] = k s )
 ( [ y ] = i )
 ( [ ý ] = i: )
 ( [ Ý ] = i: )
 ( [ z ] = z )
 ( [ ž ] = Z )
 ( [ Ž ] = Z )
 ( [ 1 ] = j e d n a _ )
 ( [ 2 ] = d v a _ )
 ( [ 3 ] = t "r'" i _ )
 ( [ 4 ] = tS t i "r'" i _ )
 ( [ 5 ] = p j e t _ )
 ( [ 6 ] = S e s t _ )
 ( [ 7 ] = s e d m _ )
 ( [ 8 ] = o s m _ )
 ( [ 9 ] = d e v j e t _ )
 ( [ 0 ] = n u l a _ )
 ( [ "." ] = _ )
 ( [ "," ] = _ )
 ( [ "?" ] = _ )
 ( [ "!" ] = _ )
 ( [ ":" ] = _ )
 ( [ ";" ] = _ )
 ( [ "+" ] = p l u s _ )
 ( [ "-" ] = _ )
 ( [ "_" ] = _ )
 ( [ ">" ] = _ )
 ( [ "<" ] = _ )
 ( [ "(" ] = _ )
 ( [ ")" ] = _ )
 ( [ "[" ] = _ )
; ( [ "]" ] = _ ) FIXME: festival complains about this rule
 ( [ "{" ] = _ )
 ( [ "}" ] = _ )
 ( [ "\"" ] = _ )
 ( [ "'" ] = _ )
 ( [ "`" ] = _ )
 ( [ "@" ] = z a v i n a: tS _ )
 ( [ "*" ] = h\ v j e z d i tS k a _ )
 ( [ "~" ] = t i l d a _ )
 ( [ "#" ] = h e S _ )
 ( [ "$" ] = d o l a r _ )
 ( [ "%" ] = p r o ts e n t _ )
 ( [ "^" ] = _ ) ;; ???
 ( [ "&" ] = a _ )
 ( [ "|" ] = _ _ )
 ( [ "\\" ] = b e k s l e S _ )
))

(define (czech_lts word features)
  "(czech_lts WORD FEATURES)
Using letter to sound rules build a spanish pronunciation of WORD."
  (list word
        nil
        (lex.syllabify.phstress (lts.apply (downcase word) 'czech_ruleset))))

(lex.set.lts.method 'czech_lts)

(set! czech_phrase_cart_tree
'
((lisp_token_end_punc in ("?" "." ":" "!"))
  ((BB))
  ((lisp_token_end_punc in ("'" "\"" "," ";"))
   ((B))
   ((n.name is 0)  ;; end of utterance
    ((BB))
    ((NB))))))

(define (czech_token_to_words token name)
"(czech_token_to_words TOKEN NAME)
This is workaround for ugly bugs w.r.t. iso-8859-2 in core festival"
  (list name)
)

; Something like ((= R:SylStructure.parent.daughter1 R:SylStructure) ((Accented))) ?


(set! czech_accent_cart_tree
 '
  ((R:SylStructure.parent.gpos is content)
   ((stress is 1)
    ((Accented))
    ((position_type is single)
     ((Accented))
     ((NONE))))
   ((NONE))))

(define (int_simple)
  (set! int_simple_params
	'((f0_mean 120) (f0_std 10)))

  (Parameter.set 'Int_Target_Method 'Simple)
  (Parameter.set 'Int_Method 'Simple))

(define (int_tree)
  (require 'f2bf0lr)
  (set! f0_lr_start f2b_f0_lr_start)
  (set! f0_lr_mid f2b_f0_lr_mid)
  (set! f0_lr_end f2b_f0_lr_end)

  (Parameter.set 'Int_Target_Method 'Int_Targets_LR)
  (Parameter.set 'Int_Method 'Intonation_Tree)
  (set! int_lr_params
        '((target_f0_mean 105) (target_f0_std 15)
          (model_f0_mean 170) (model_f0_std 34)))
)


(define (voice_cz2_mbrola)
"(voice_cz2_mbrola)
 Set up the current voice to be male Czech using mbrola."
  ;; Phone set
  (voice_reset)
  (Parameter.set 'Language 'czech)
  (Parameter.set 'PhoneSet 'czech)
  (PhoneSet.select 'czech)
  ;; Tokenization rules
  (set! token_to_words czech_token_to_words)
  ;; POS tagger
;  (require 'pos)
  (set! pos_supported nil)
;  (set! pos_lex_name nil)	; If I enable this, it just says nothing
  (set! pos_ngram_name 'english_pos_ngram)
  ;; Lexicon selection
  (lex.select "czech")

  ;; Phrase prediction
  (require 'phrase)

  (set! phrase_cart_tree czech_phrase_cart_tree)
  (Parameter.set 'Phrase_Method 'cart_tree)

  ;; Accent and tone prediction
  (require 'tobi)
  (set! int_tone_cart_tree f2b_int_tone_cart_tree)
  (set! int_accent_cart_tree czech_accent_cart_tree) ;f2b_int gives better results than czech ?
  ;                          try f2b_int_accent_cart_tree instead.
  (set! postlex_vowel_reduce_cart_tree 
	postlex_vowel_reduce_cart_data)
  ;; F0 prediction

  (int_simple)
;  (int_tree)
	 
  ;; Duration prediction
  (set! duration_cart_tree czech_dur_tree)
  (set! duration_ph_info czech_el_phone_data)
  (Parameter.set 'Duration_Method Duration_Tree_ZScores)
  (Parameter.set 'Duration_Stretch 1.1)
  ;; Waveform synthesizer
  (set! us_abs_offset 0.0)
  (set! window_factor 1.0)
  (set! us_rel_offset 0.0)
  (set! us_gain 0.9)

  (Parameter.set 'Synth_Method 'MBROLA_Synth)
  (set! mbrola_progname "mbrola")
  (set! mbrola_database 
	(format 
	 nil
	 "%s%s "
	 cz2_mbrola_dir "cz2/cz2" 
	 ))

  (set! current-voice 'cz2_mbrola)
)

(proclaim_voice
 'cz2_mbrola
 '((language czech)
   (gender male)
   (dialect none)
   (description
    "This is test czech voice using mbrola.")))

(provide 'cz2_mbrola)