From 6202ee62d0f34509d7a652c30a1d5c19efec70ae Mon Sep 17 00:00:00 2001 From: Robert Alessi Date: Sun, 8 May 2016 16:09:05 +0200 Subject: done implementing 'easy' rules set --- arabluatex.dtx | 19 ++- arabluatex.lua | 451 +++++++++++++++++++++++++++++-------------------- arabluatex_fullvoc.lua | 305 ++++++++++++++++++++++++++++++++- arabluatex_trans.lua | 55 ++++++ arabluatex_voc.lua | 176 +++++++++++++++++++ 5 files changed, 810 insertions(+), 196 deletions(-) diff --git a/arabluatex.dtx b/arabluatex.dtx index f44b309..2661ad4 100644 --- a/arabluatex.dtx +++ b/arabluatex.dtx @@ -2061,7 +2061,7 @@ wa-ya.sIru ta.hta 'l-jildi % \end{macrocode} % \end{macro} % \begin{macro}{\SetArbEasy} -% \begin{macro}{\SetArbDeflt} +% \begin{macro}{\SetArbDflt} % By default, \package{arabluatex} applies complex rules to generate % euphonic \arb[trans]{ta^sdId}, \arb[trans]{'alif mamdUdaT} % \linebreak and \arb[trans]{sukUn} depending on the modes which are @@ -2073,7 +2073,7 @@ wa-ya.sIru ta.hta 'l-jildi % \begin{macrocode} \def\al@arb@rules{dflt} \NewDocumentCommand{\SetArbEasy}{}{\def\al@arb@rules{easy}} -\NewDocumentCommand{\SetArbDflt}{}{\def\al@arb@rules{default}} +\NewDocumentCommand{\SetArbDflt}{}{\def\al@arb@rules{dflt}} % \end{macrocode} % \end{macro} % \end{macro} @@ -2130,7 +2130,8 @@ wa-ya.sIru ta.hta 'l-jildi \else% \ifx\@tempa\al@mode@fullvoc% \bgroup\textdir TRT\arabicfont% - \luadirect{tex.sprint(processfullvoc(\luastringN{#2}))}\egroup% + \luadirect{tex.sprint(processfullvoc(\luastringN{#2}, + \luastringO{\al@arb@rules}))}\egroup% \else% \ifx\@tempa\al@mode@novoc% \bgroup\textdir TRT\arabicfont% @@ -2139,7 +2140,8 @@ wa-ya.sIru ta.hta 'l-jildi \ifx\@tempa\al@mode@trans% \bgroup\textdir TLT\al@trans@style% \luadirect{tex.sprint(processtrans(\luastringN{#2}, - \luastringO{\al@trans@convention}))}\egroup% + \luastringO{\al@trans@convention}, + \luastringO{\al@arb@rules}))}\egroup% \else% \fi\fi\fi\fi} % \end{macrocode} @@ -2152,11 +2154,13 @@ wa-ya.sIru ta.hta 'l-jildi {\par\edef\@tempa{#1}% \ifx\@tempa\al@mode@voc% \bgroup\pardir TRT\textdir TRT\arabicfont% - \luadirect{tex.sprint(processvoc(\luastringO{\BODY}))}\egroup% + \luadirect{tex.sprint(processvoc(\luastringO{\BODY}, + \luastringO{\al@arb@rules}))}\egroup% \else% \ifx\@tempa\al@mode@fullvoc% \bgroup\pardir TRT\textdir TRT\arabicfont% - \luadirect{tex.sprint(processfullvoc(\luastringO{\BODY}))}\egroup% + \luadirect{tex.sprint(processfullvoc(\luastringO{\BODY}, + \luastringO{\al@arb@rules}))}\egroup% \else% \ifx\@tempa\al@mode@novoc% \bgroup\pardir TRT\textdir TRT\arabicfont% @@ -2164,7 +2168,8 @@ wa-ya.sIru ta.hta 'l-jildi \else \ifx\@tempa\al@mode@trans% \bgroup\pardir TLT\textdir TLT\al@trans@style% \luadirect{tex.sprint(processtrans(\luastringO{\BODY}, - \luastringO{\al@trans@convention}))}\egroup% + \luastringO{\al@trans@convention}, + \luastringO{\al@arb@rules}))}\egroup% \else \fi\fi\fi\fi}[\par] % \end{macrocode} % \end{environment} diff --git a/arabluatex.lua b/arabluatex.lua index d999548..0bb4756 100644 --- a/arabluatex.lua +++ b/arabluatex.lua @@ -134,217 +134,288 @@ end local function voc(str) str = string.gsub(str, "\\arb(%b{})", function(inside) inside = string.sub(inside, 2, -2) - for i = 1,#hamza do - inside = string.gsub(inside, hamza[i].a, hamza[i].b) - end - for i = 1,#tanwin do - inside = string.gsub(inside, tanwin[i].a, tanwin[i].b) - end - for i = 1,#trigraphs do - inside = string.gsub(inside, trigraphs[i].a, trigraphs[i].b) - end - for i = 1,#digraphs do - inside = string.gsub(inside, digraphs[i].a, digraphs[i].b) - end - for i = 1,#single do - inside = string.gsub(inside, single[i].a, single[i].b) - end - for i = 1,#longv do - inside = string.gsub(inside, longv[i].a, longv[i].b) - end - for i = 1,#shortv do - inside = string.gsub(inside, shortv[i].a, shortv[i].b) - end - for i = 1,#punctuation do - inside = string.gsub(inside, punctuation[i].a, punctuation[i].b) - end - for i = 1,#null do - inside = string.gsub(inside, null[i].a, null[i].b) - end - inside = indnum(inside) - return string.format("\\txarb{%s}", inside) - end) + for i = 1,#hamza do + inside = string.gsub(inside, hamza[i].a, hamza[i].b) + end + for i = 1,#tanwin do + inside = string.gsub(inside, tanwin[i].a, tanwin[i].b) + end + for i = 1,#trigraphs do + inside = string.gsub(inside, trigraphs[i].a, trigraphs[i].b) + end + for i = 1,#digraphs do + inside = string.gsub(inside, digraphs[i].a, digraphs[i].b) + end + for i = 1,#single do + inside = string.gsub(inside, single[i].a, single[i].b) + end + for i = 1,#longv do + inside = string.gsub(inside, longv[i].a, longv[i].b) + end + for i = 1,#shortv do + inside = string.gsub(inside, shortv[i].a, shortv[i].b) + end + for i = 1,#punctuation do + inside = string.gsub(inside, punctuation[i].a, punctuation[i].b) + end + for i = 1,#null do + inside = string.gsub(inside, null[i].a, null[i].b) + end + inside = indnum(inside) + return string.format("\\txarb{%s}", inside) + end) return str end local function voceasy(str) str = string.gsub(str, "\\arb(%b{})", function(inside) inside = string.sub(inside, 2, -2) - for i = 1,#hamza do - inside = string.gsub(inside, hamza[i].a, hamza[i].b) - end - for i = 1,#tanwin do - inside = string.gsub(inside, tanwin[i].a, tanwin[i].b) - end - for i = 1,#trigraphs do - inside = string.gsub(inside, trigraphs[i].a, trigraphs[i].b) - end - for i = 1,#digraphs do - inside = string.gsub(inside, digraphs[i].a, digraphs[i].b) - end - for i = 1,#single do - inside = string.gsub(inside, single[i].a, single[i].b) - end - for i = 1,#longv do - inside = string.gsub(inside, longv[i].a, longv[i].b) - end - for i = 1,#shortv do - inside = string.gsub(inside, shortv[i].a, shortv[i].b) - end - for i = 1,#punctuation do - inside = string.gsub(inside, punctuation[i].a, punctuation[i].b) - end - for i = 1,#null do - inside = string.gsub(inside, null[i].a, null[i].b) - end - inside = indnum(inside) - return string.format("\\txarb{%s}", inside) - end) + for i = 1,#hamzaeasy do + inside = string.gsub(inside, hamzaeasy[i].a, hamzaeasy[i].b) + end + for i = 1,#tanwineasy do + inside = string.gsub(inside, tanwineasy[i].a, tanwineasy[i].b) + end + for i = 1,#trigraphseasy do + inside = string.gsub(inside, trigraphseasy[i].a, trigraphseasy[i].b) + end + for i = 1,#digraphs do + inside = string.gsub(inside, digraphs[i].a, digraphs[i].b) + end + for i = 1,#single do + inside = string.gsub(inside, single[i].a, single[i].b) + end + for i = 1,#longv do + inside = string.gsub(inside, longv[i].a, longv[i].b) + end + for i = 1,#shortv do + inside = string.gsub(inside, shortv[i].a, shortv[i].b) + end + for i = 1,#punctuation do + inside = string.gsub(inside, punctuation[i].a, punctuation[i].b) + end + for i = 1,#null do + inside = string.gsub(inside, null[i].a, null[i].b) + end + inside = indnum(inside) + return string.format("\\txarb{%s}", inside) + end) return str end local function fullvoc(str) str = string.gsub(str, "\\arb(%b{})", function(inside) inside = string.sub(inside, 2, -2) - for i = 1,#hamzafv do - inside = string.gsub(inside, hamzafv[i].a, hamzafv[i].b) - end - for i = 1,#tanwinfv do - inside = string.gsub(inside, tanwinfv[i].a, tanwinfv[i].b) - end - for i = 1,#trigraphsfv do - inside = string.gsub(inside, trigraphsfv[i].a, trigraphsfv[i].b) - end - for i = 1,#digraphsfv do - inside = string.gsub(inside, digraphsfv[i].a, digraphsfv[i].b) - end - for i = 1,#singlefv do - inside = string.gsub(inside, singlefv[i].a, singlefv[i].b) - end - for i = 1,#longv do - inside = string.gsub(inside, longv[i].a, longv[i].b) - end - for i = 1,#shortv do - inside = string.gsub(inside, shortv[i].a, shortv[i].b) - end - for i = 1,#punctuation do - inside = string.gsub(inside, punctuation[i].a, punctuation[i].b) - end - for i = 1,#null do - inside = string.gsub(inside, null[i].a, null[i].b) - end - inside = indnum(inside) - return string.format("\\txarb{%s}", inside) - end) + for i = 1,#hamzafv do + inside = string.gsub(inside, hamzafv[i].a, hamzafv[i].b) + end + for i = 1,#tanwinfv do + inside = string.gsub(inside, tanwinfv[i].a, tanwinfv[i].b) + end + for i = 1,#trigraphsfv do + inside = string.gsub(inside, trigraphsfv[i].a, trigraphsfv[i].b) + end + for i = 1,#digraphsfv do + inside = string.gsub(inside, digraphsfv[i].a, digraphsfv[i].b) + end + for i = 1,#singlefv do + inside = string.gsub(inside, singlefv[i].a, singlefv[i].b) + end + for i = 1,#longv do + inside = string.gsub(inside, longv[i].a, longv[i].b) + end + for i = 1,#shortv do + inside = string.gsub(inside, shortv[i].a, shortv[i].b) + end + for i = 1,#punctuation do + inside = string.gsub(inside, punctuation[i].a, punctuation[i].b) + end + for i = 1,#null do + inside = string.gsub(inside, null[i].a, null[i].b) + end + inside = indnum(inside) + return string.format("\\txarb{%s}", inside) + end) +return str +end + +local function fullvoceasy(str) + str = string.gsub(str, "\\arb(%b{})", function(inside) + inside = string.sub(inside, 2, -2) + for i = 1,#hamzafveasy do + inside = string.gsub(inside, hamzafveasy[i].a, hamzafveasy[i].b) + end + for i = 1,#tanwinfveasy do + inside = string.gsub(inside, tanwinfveasy[i].a, tanwinfveasy[i].b) + end + for i = 1,#trigraphsfveasy do + inside = string.gsub(inside, trigraphsfveasy[i].a, trigraphsfveasy[i].b) + end + for i = 1,#digraphsfveasy do + inside = string.gsub(inside, digraphsfveasy[i].a, digraphsfveasy[i].b) + end + for i = 1,#singlefveasy do + inside = string.gsub(inside, singlefveasy[i].a, singlefveasy[i].b) + end + for i = 1,#longv do + inside = string.gsub(inside, longv[i].a, longv[i].b) + end + for i = 1,#shortv do + inside = string.gsub(inside, shortv[i].a, shortv[i].b) + end + for i = 1,#punctuation do + inside = string.gsub(inside, punctuation[i].a, punctuation[i].b) + end + for i = 1,#null do + inside = string.gsub(inside, null[i].a, null[i].b) + end + inside = indnum(inside) + return string.format("\\txarb{%s}", inside) + end) return str end local function novoc(str) str = string.gsub(str, "\\arb(%b{})", function(inside) inside = string.sub(inside, 2, -2) - for i = 1,#hamza do - inside = string.gsub(inside, hamza[i].a, hamza[i].b) - end - for i = 1,#tanwinnv do - inside = string.gsub(inside, tanwinnv[i].a, tanwinnv[i].b) - end - for i = 1,#trigraphsnv do - inside = string.gsub(inside, trigraphsnv[i].a, trigraphsnv[i].b) - end - for i = 1,#digraphs do - inside = string.gsub(inside, digraphs[i].a, digraphs[i].b) - end - for i = 1,#single do - inside = string.gsub(inside, single[i].a, single[i].b) - end - for i = 1,#longvnv do - inside = string.gsub(inside, longvnv[i].a, longvnv[i].b) - end - for i = 1,#shortvnv do - inside = string.gsub(inside, shortvnv[i].a, shortvnv[i].b) - end - for i = 1,#punctuation do - inside = string.gsub(inside, punctuation[i].a, punctuation[i].b) - end - for i = 1,#null do - inside = string.gsub(inside, null[i].a, null[i].b) - end - inside = indnum(inside) - return string.format("\\txarb{%s}", inside) - end) + for i = 1,#hamza do + inside = string.gsub(inside, hamza[i].a, hamza[i].b) + end + for i = 1,#tanwinnv do + inside = string.gsub(inside, tanwinnv[i].a, tanwinnv[i].b) + end + for i = 1,#trigraphsnv do + inside = string.gsub(inside, trigraphsnv[i].a, trigraphsnv[i].b) + end + for i = 1,#digraphs do + inside = string.gsub(inside, digraphs[i].a, digraphs[i].b) + end + for i = 1,#single do + inside = string.gsub(inside, single[i].a, single[i].b) + end + for i = 1,#longvnv do + inside = string.gsub(inside, longvnv[i].a, longvnv[i].b) + end + for i = 1,#shortvnv do + inside = string.gsub(inside, shortvnv[i].a, shortvnv[i].b) + end + for i = 1,#punctuation do + inside = string.gsub(inside, punctuation[i].a, punctuation[i].b) + end + for i = 1,#null do + inside = string.gsub(inside, null[i].a, null[i].b) + end + inside = indnum(inside) + return string.format("\\txarb{%s}", inside) + end) return str end local function transdmg(str) str = string.gsub(str, "\\arb(%b{})", function(inside) inside = string.sub(inside, 2, -2) - for i = 1,#hamzatrdmg do - inside = string.gsub(inside, hamzatrdmg[i].a, hamzatrdmg[i].b) - end - for i = 1,#tanwintrdmg do - inside = string.gsub(inside, tanwintrdmg[i].a, tanwintrdmg[i].b) - end - for i = 1,#trigraphstrdmg do - inside = string.gsub(inside, trigraphstrdmg[i].a, trigraphstrdmg[i].b) - end - for i = 1,#digraphstrdmg do - inside = string.gsub(inside, digraphstrdmg[i].a, digraphstrdmg[i].b) - end - for i = 1,#singletrdmg do - inside = string.gsub(inside, singletrdmg[i].a, singletrdmg[i].b) - end - for i = 1,#longvtrdmg do - inside = string.gsub(inside, longvtrdmg[i].a, longvtrdmg[i].b) - end - for i = 1,#shortvtrdmg do - inside = string.gsub(inside, shortvtrdmg[i].a, shortvtrdmg[i].b) - end - for i = 1,#punctuationtr do - inside = string.gsub(inside, punctuationtr[i].a, punctuationtr[i].b) - end - for i = 1,#nulltr do - inside = string.gsub(inside, nulltr[i].a, nulltr[i].b) - end - return string.format("\\txtrans{%s}", inside) - end) + for i = 1,#hamzatrdmg do + inside = string.gsub(inside, hamzatrdmg[i].a, hamzatrdmg[i].b) + end + for i = 1,#tanwintrdmg do + inside = string.gsub(inside, tanwintrdmg[i].a, tanwintrdmg[i].b) + end + for i = 1,#trigraphstrdmg do + inside = string.gsub(inside, trigraphstrdmg[i].a, trigraphstrdmg[i].b) + end + for i = 1,#digraphstrdmg do + inside = string.gsub(inside, digraphstrdmg[i].a, digraphstrdmg[i].b) + end + for i = 1,#singletrdmg do + inside = string.gsub(inside, singletrdmg[i].a, singletrdmg[i].b) + end + for i = 1,#longvtrdmg do + inside = string.gsub(inside, longvtrdmg[i].a, longvtrdmg[i].b) + end + for i = 1,#shortvtrdmg do + inside = string.gsub(inside, shortvtrdmg[i].a, shortvtrdmg[i].b) + end + for i = 1,#punctuationtr do + inside = string.gsub(inside, punctuationtr[i].a, punctuationtr[i].b) + end + for i = 1,#nulltr do + inside = string.gsub(inside, nulltr[i].a, nulltr[i].b) + end + return string.format("\\txtrans{%s}", inside) + end) +return str +end + +local function transdmgeasy(str) + str = string.gsub(str, "\\arb(%b{})", function(inside) + inside = string.sub(inside, 2, -2) + for i = 1,#hamzatrdmg do + inside = string.gsub(inside, hamzatrdmg[i].a, hamzatrdmg[i].b) + end + for i = 1,#tanwintrdmg do + inside = string.gsub(inside, tanwintrdmg[i].a, tanwintrdmg[i].b) + end + for i = 1,#trigraphstrdmgeasy do + inside = string.gsub(inside, trigraphstrdmgeasy[i].a, trigraphstrdmgeasy[i].b) + end + for i = 1,#digraphstrdmg do + inside = string.gsub(inside, digraphstrdmg[i].a, digraphstrdmg[i].b) + end + for i = 1,#singletrdmg do + inside = string.gsub(inside, singletrdmg[i].a, singletrdmg[i].b) + end + for i = 1,#longvtrdmg do + inside = string.gsub(inside, longvtrdmg[i].a, longvtrdmg[i].b) + end + for i = 1,#shortvtrdmg do + inside = string.gsub(inside, shortvtrdmg[i].a, shortvtrdmg[i].b) + end + for i = 1,#punctuationtr do + inside = string.gsub(inside, punctuationtr[i].a, punctuationtr[i].b) + end + for i = 1,#nulltr do + inside = string.gsub(inside, nulltr[i].a, nulltr[i].b) + end + return string.format("\\txtrans{%s}", inside) + end) return str end local function transloc(str) str = string.gsub(str, "\\arb(%b{})", function(inside) inside = string.sub(inside, 2, -2) - for i = 1,#hamzatrloc do - inside = string.gsub(inside, hamzatrloc[i].a, hamzatrloc[i].b) - end - for i = 1,#tanwintrloc do - inside = string.gsub(inside, tanwintrloc[i].a, tanwintrloc[i].b) - end - for i = 1,#trigraphstrloc do - inside = string.gsub(inside, trigraphstrloc[i].a, trigraphstrloc[i].b) - end - for i = 1,#digraphstrloc do - inside = string.gsub(inside, digraphstrloc[i].a, digraphstrloc[i].b) - end - for i = 1,#singletrloc do - inside = string.gsub(inside, singletrloc[i].a, singletrloc[i].b) - end - for i = 1,#longvtrloc do - inside = string.gsub(inside, longvtrloc[i].a, longvtrloc[i].b) - end - for i = 1,#shortvtrloc do - inside = string.gsub(inside, shortvtrloc[i].a, shortvtrloc[i].b) - end - for i = 1,#finaltrloc do - inside = string.gsub(inside, finaltrloc[i].a, finaltrloc[i].b) - end - for i = 1,#punctuationtr do - inside = string.gsub(inside, punctuationtr[i].a, punctuationtr[i].b) - end - for i = 1,#nulltr do - inside = string.gsub(inside, nulltr[i].a, nulltr[i].b) - end - return string.format("\\txtrans{%s}", inside) - end) + for i = 1,#hamzatrloc do + inside = string.gsub(inside, hamzatrloc[i].a, hamzatrloc[i].b) + end + for i = 1,#tanwintrloc do + inside = string.gsub(inside, tanwintrloc[i].a, tanwintrloc[i].b) + end + for i = 1,#trigraphstrloc do + inside = string.gsub(inside, trigraphstrloc[i].a, trigraphstrloc[i].b) + end + for i = 1,#digraphstrloc do + inside = string.gsub(inside, digraphstrloc[i].a, digraphstrloc[i].b) + end + for i = 1,#singletrloc do + inside = string.gsub(inside, singletrloc[i].a, singletrloc[i].b) + end + for i = 1,#longvtrloc do + inside = string.gsub(inside, longvtrloc[i].a, longvtrloc[i].b) + end + for i = 1,#shortvtrloc do + inside = string.gsub(inside, shortvtrloc[i].a, shortvtrloc[i].b) + end + for i = 1,#finaltrloc do + inside = string.gsub(inside, finaltrloc[i].a, finaltrloc[i].b) + end + for i = 1,#punctuationtr do + inside = string.gsub(inside, punctuationtr[i].a, punctuationtr[i].b) + end + for i = 1,#nulltr do + inside = string.gsub(inside, nulltr[i].a, nulltr[i].b) + end + return string.format("\\txtrans{%s}", inside) + end) return str end @@ -356,20 +427,24 @@ function processvoc(str, rules) str = holdcmd(str) if rules == "easy" then str = voceasy(str) - elseif mode == "dflt" then + elseif rules == "dflt" then str = voc(str) else end str = unprotectarb(str) return str end -function processfullvoc(str) +function processfullvoc(str, rules) str = "\\arb{".. str.."}" str = takeoutcap(str) str = protectarb(str) str = breakcmd(str) str = holdcmd(str) - str = fullvoc(str) + if rules == "easy" then + str = fullvoceasy(str) + elseif rules == "dflt" then + str = fullvoc(str) + else end str = unprotectarb(str) return str end @@ -385,14 +460,18 @@ function processnovoc(str) return str end -function processtrans(str, mode) +function processtrans(str, mode, rules) str = "\\arb{".. str.."}" str = takeoutabjad(str) str = protectarb(str) str = breakcmd(str) str = holdcmd(str) if mode == "dmg" then - str = transdmg(str) + if rules == "easy" then + str = transdmgeasy(str) + elseif rules == "dflt" then + str = transdmg(str) + else end elseif mode == "loc" then str = transloc(str) else end diff --git a/arabluatex_fullvoc.lua b/arabluatex_fullvoc.lua index 8263734..a2676e0 100644 --- a/arabluatex_fullvoc.lua +++ b/arabluatex_fullvoc.lua @@ -22,7 +22,6 @@ along with this program. If not, see . --]] --- this is new hamzafv = { -- hard coded hamza {a="|\"'", b="ء"}, @@ -115,6 +114,98 @@ hamzafv = { {a="(i)(')([^uaiUAI])", b="%1ئْ%3"} } +hamzafveasy = { -- differences marked below with 'easy' + -- hard coded hamza + {a="|\"'", b="ء"}, + {a="A\"'", b="آ"}, + {a="[au]\"'", b="أ"}, + {a="w\"'", b="ؤ"}, + {a="i\"'", b="إ"}, + {a="y\"'", b="ئ"}, + {a="ؤ([^uaiUAI])", b="ؤْ%1"}, + {a="ؤ$", b="ؤْ"}, + {a="ؤ(%s)", b="ؤْ%1"}, + {a="أ([^uaiUAI])", b="أْ%1"}, + {a="أ$", b="أْ"}, + {a="أ(%s)", b="أْ%1"}, + {a="ئ([^uaiUAI])", b="ئْ%1"}, + {a="ئ$", b="ئْ"}, + {a="ئ(%s)", b="ئْ%1"}, + -- hamza takes tašdīd too + {a="''([Uu])", b="ؤؤ%1"}, + {a="''([Aa])", b="أأ%1"}, + {a="''([Ii])", b="ئئ%1"}, + -- initial long u and i (for a, see below) + {a="%'%_U", b="أU"}, + {a="%'%_I", b="إI"}, + -- taḫfīfu 'l-hamza + {a="'u'([^uaiUAI])", b="أU%1"}, + {a="'i'([^uaiUAI])", b="إI%1"}, + -- madda (historic writing below) + {a="'a'([^uaiUAI])", b="آ%1"}, + {a="'a?A([%_%^%.]?[%`%'btjghdrzsfqklmnywAY])", b="آ%1"}, +--easy {a="(A)(')(uN?)$", b="aآء%3"}, +--easy {a="(A)(')(uN?)(%W)", b="aآء%3%4"}, +--easy {a="(A)(')(iN?)$", b="aآء%3"}, +--easy {a="(A)(')(iN?)(%W)", b="aآء%3%4"}, +--easy {a="(A)(')(i)", b="aآئ%3"}, -- historic madda +--easy {a="(A)(')(u)", b="aآؤ%3"}, -- historic madda +--easy {a="(A)(')", b="aآء"}, -- historic madda + -- initial (needs both ^ and %W patterns) + {a="^(')([ua])", b="أ%2"}, + {a="^(')(i)", b="إ%2"}, + {a="(%W)(')([ua])", b="%1أ%3"}, + {a="(%W)(')(i)", b="%1إ%3"}, + -- final + -- ^say'aN and .zim'aN are special orthographies + {a="(%^say)(%')(aN)", b="%1ئ%3"}, + {a="(.zi?m)(%')(aN)", b="%1ئ%3"}, + {a="([^uai])(')([uai]N?)$", b="%1ء%3"}, + {a="([^uai])(')([uai]N?)(%W)", b="%1ء%3%4"}, +-- u + {a="(u)(')([uai]?N)$", b="%1ؤ%3"}, + {a="(u)(')([uai]N?)(%W)", b="%1ؤ%3%4"}, + {a="(u)(')$", b="%1ؤْ"}, + {a="(u)(')(%W)", b="%1ؤْ%3"}, +-- a + {a="(a)(')(A)$", b="%1آ"}, + {a="(a)(')(A)(%W)", b="%1آ%4"}, + {a="(a)(')([u]N?)$", b="%1أ%3"}, + {a="(a)(')([u]N?)(%W)", b="%1أ%3%4"}, + {a="(a)(')(a)$", b="%1أ%3"}, + {a="(a)(')(a)(%W)", b="%1أ%3%4"}, + {a="(a)(')(aN)$", b="%1أً"}, + {a="(a)(')(aN)(%W)", b="%1أً%4"}, + {a="(a)(')([i]N?)$", b="%1إ%3"}, + {a="(a)(')([i]N?)(%W)", b="%1إ%3%4"}, + {a="(a)(')$", b="%1أْ"}, + {a="(a)(')(%W)", b="%1أْ%3"}, +-- i + {a="(i)(')([uai]N?)$", b="%1ئ%3"}, + {a="(i)(')([uai]N?)(%W)", b="%1ئ%3%4"}, + {a="(i)(')$", b="%1ئْ"}, + {a="(i)(')(%W)", b="%1ئْ%3"}, +-- + -- middle + {a="(U)(')", b="%1ء"}, + {a="([Iy])(')", b="%1ئ"}, + {a="([^uai])(')([uU])", b="%1ؤ%3"}, + {a="([^uai])(')([aA])", b="%1أ%3"}, + {a="([^uai])(')([iI])", b="%1ئ%3"}, + {a="(u)(')([uU])", b="%1ؤ%3"}, + {a="(u)(')([aA])", b="%1ؤ%3"}, + {a="(u)(')([iI])", b="%1ئ%3"}, + {a="(a)(')([aA])", b="%1أ%3"}, + {a="(a)(')([uU])", b="%1ؤ%3"}, + {a="(a)(')([iI])", b="%1ئ%3"}, + {a="(i)(')([aA])", b="%1ئ%3"}, + {a="(i)(')([uU])", b="%1ئ%3"}, + {a="(i)(')([iI])", b="%1ئ%3"}, + {a="(a)(')([^uaiUAI])", b="%1أْ%3"}, + {a="(u)(')([^uaiUAI])", b="%1ؤْ%3"}, + {a="(i)(')([^uaiUAI])", b="%1ئْ%3"} +} + tanwinfv = { {a="uNU", b="ٌو"}, {a="aNU", b="ًوا"}, @@ -150,7 +241,41 @@ tanwinfv = { {a="(iN)", b="ٍ"} } --- this is new +tanwinfveasy = { -- no assimilations (see below) + {a="uNU", b="ٌو"}, + {a="aNU", b="ًوا"}, + {a="iNU", b="ٍو"}, + {a="([uai]N)(%s)([uai])", b="%1%2ٱ"}, + {a="(aN[%_]?[AY])(%s)([uai])", b="%1%2ٱ"}, + -- assimilations (begin) +--easy {a="(uN)(%s)([rlmnwy])", b="ٌ%2%3%3"}, +--easy {a="(aN)(_A)(%s)([rlmnwy])", b="ًى%3%4%4"}, +--easy {a="(aN)(Y)(%s)([rlmnwy])", b="ًى%3%4%4"}, +--easy {a="(T)(aN)(%s)([rlmnwy])", b="%1ً%3%4%4"}, +--easy {a="(ء)(aN)(%s)([rlmnwy])", b="%1ً%3%4%4"}, +--easy {a="([^TA])(aN)(%s)([rlmnwy])", b="%1ًا%3%4%4"}, +--easy {a="(iN)(%s)([rlmnwy])", b="ٍ%2%3%3"}, + -- assimilations (end) + -- quoted tanwīn (begin) + {a="(\"uN)", b=""}, + {a="(B)(\"aN)", b="%1"}, + {a="(\"aN)(_A)", b="ى"}, + {a="(\"aN)(Y)", b="ى"}, + {a="(T)(\"aN)", b="%1"}, + {a="(ء)(\"aN)", b="%1"}, + {a="([^TA])(\"aN)", b="%1ا"}, + {a="(\"iN)", b=""}, + -- quoted tanwīn (end) + {a="(uN)", b="ٌ"}, + {a="(B)(aN)", b="%1ً"}, + {a="(aN)(_A)", b="ًى"}, + {a="(aN)(Y)", b="ًى"}, + {a="(T)(aN)", b="%1ً"}, + {a="(ء)(aN)", b="%1ً"}, + {a="([^TA])(aN)", b="%1ًا"}, + {a="(iN)", b="ٍ"} +} + trigraphsfv = { -- trigraphs or more -- 'llatI / 'llad_I {a="^'ll(a)([%_]?[dt])", b="ٱلّ%1%2"}, @@ -207,7 +332,62 @@ trigraphsfv = { -- trigraphs or more {a="(n)(%s)([rlmnwy])", b="%1%2%3%3"} } --- this is new +trigraphsfveasy = { -- trigraphs or more (see 'easy' tag below for the diffs) + -- 'llatI / 'llad_I + {a="^'ll(a)([%_]?[dt])", b="ٱلّ%1%2"}, + {a="([%s%-])'ll(a)([%_]?[dt])", b="%1ٱلّ%2%3"}, + -- al- + lām + {a="^(a)l%-(l)", b="ا%1ل%2%2"}, + {a="([%s%-])(a)l%-(l)", b="%1ا%2ل%3%3"}, + -- al- + solar consonant + {a="^(a)l%-([%_%^%.]?[tdrzsn])", b="ا%1ل%2%2"}, + {a="([%s%-])(a)l%-([%_%^%.]?[tdrzsn])", b="%1ا%2ل%3%3"}, + -- assim. art. + solar consonant + {a="^(a)([%_%^%.]?[tdrzsn])%-", b="ا%1ل%2"}, + {a="([%s%-])(a)([%_%^%.]?[tdrzsn])%-", b="%1ا%2ل%3"}, + -- al- + initial unstable hamza + {a="^(a)l%-(\"?[uai])", b="ا%1لٱ%2"}, + {a="([%s%-])(a)l%-(\"?[uai])", b="%1ا%2لٱ%3"}, + -- li-/la + art. + initial unstable hamza is a special orthography + {a="l([ai])%-l%-(\"?[uai])", b="ل%1لٱ%2"}, + -- al- + lunar consonant (i.e. what remains) + {a="^(a)l%-", b="ا%1لْ"}, + {a="([%s%-])(a)l%-", b="%1ا%2لْ"}, + -- diphthongs to be resolved before ʾalif conjunctionis + {a="(aW)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="awuا%2%3"}, + {a="(aw)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="%1u%2%3"}, + {a="(ay)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="%1i%2%3"}, + -- art. with waṣla + lām + {a="'l%-(l)", b="ٱل%1%1"}, + -- art. with waṣla + solar consonant + {a="'l%-([%_%^%.]?[tdrzsn])", b="ٱل%1%1"}, + -- li-/la- + art. + lām + {a="l([ai])%-l%-(l)", b="ل%1%2%2"}, + -- assim. art. with waṣla + solar consonant + {a="'([%_%^%.]?[tdrzsn])%-", b="ٱل%1"}, + -- li-/la- + art. + solar consonant is a special orthography + {a="l([ai])%-l%-([%_%^%.]?[tdrzsn])", b="ل%1ل%2%2"}, + -- li-/la- + assim. art. + solar consonant is a special orthography + {a="l([ai])%-([%_%^%.]?[tdrzsn])%-([%_%^%.]?[tdrzsn])", b="ل%1ل%3%3"}, + -- art. with waṣla + initial unstable hamza + {a="'l%-(\"?[uai])", b="ٱلٱ%1"}, + -- art. with waṣla + lunar consonant (i.e. what remains) + {a="'l%-", b="ٱلْ"}, + -- the silent wāw + {a="uU$", b="uو"}, + {a="uU(%W)", b="uو%1"}, + {a="aU$", b="aو"}, + {a="aU(%W)", b="aو%1"}, + {a="iU$", b="iو"}, + {a="iU(%W)", b="iو%1"}, + -- words ending in -āT with silent wāw/yāʾ + {a="(_a)UA", b="%1وا"}, + {a="(_a)U", b="%1و"}, + {a="(_a)I", b="%1ي"}, + -- assimilations +--easy {a="(n)(%s)([rlmnwy])", b="%1%2%3%3"} +} + digraphsfv = { -- initial straight double quote gives a connective ʾalif {a="^\"[uai]", b="ٱ"}, @@ -293,6 +473,94 @@ digraphsfv = { {a="%^d", b="ڊ"} } +digraphsfveasy = { -- see the diffenrences under 'easy' marker below + -- initial straight double quote gives a connective ʾalif + {a="^\"[uai]", b="ٱ"}, + {a="([%s%-])\"[uai]", b="%1ٱ"}, + -- diphthongs to be resolved before ʾalif conjunctionis + {a="(aW)(%s)(\"?[uai])", b="awuا%2ٱ"}, + {a="(aw)(%s)(\"?[uai])", b="%1u%2ٱ"}, + {a="(ay)(%s)(\"?[uai])", b="%1i%2ٱ"}, + {a="([uai]%-)(\"?[uai])", b="%1ٱ"}, -- hyphen + initial alif without hamza + -- initial alif without hamza + {a="([%_]?[uaiUAIY])(%s)(\"?[uai])", b="%1%2ٱ"}, + {a="^([uai])", b="ا%1"}, -- initial alif without hamza + {a="(%s)([uai])", b="%1ا%2"}, -- initial alif without hamza + {a="%-%-", b="ـ"}, + {a="ؤؤ", b="ؤّ"}, + {a="أأ", b="أّ"}, + {a="ئئ", b="ئّ"}, + {a="bb", b="بّ"}, + {a="BB", b="ـّ"}, + {a="(%_)([thd])([thd])", b="%1%2|%3"}, + {a="tt", b="تّ"}, + {a="%_t%_t", b="ثّ"}, + {a="jj", b="جّ"}, + {a="%^g%^g", b="جّ"}, + {a="xx", b="خّ"}, + {a="%_h%_h", b="خّ"}, + {a="dd", b="دّ"}, + {a="%_d%_d", b="ذّ"}, + {a="rr", b="رّ"}, + {a="zz", b="زّ"}, + {a="ss", b="سّ"}, + {a="%^s%^s", b="شّ"}, + {a="%.s%.s", b="صّ"}, + {a="%.d%.d", b="ضّ"}, + {a="%.t%.t", b="طّ"}, + {a="%.z%.z", b="ظّ"}, + {a="%`%`", b="عّ"}, + {a="%.g%.g", b="غّ"}, + {a="ff", b="فّ"}, + {a="qq", b="قّ"}, + {a="kk", b="كّ"}, + {a="ll", b="لّ"}, + {a="mm", b="مّ"}, + {a="nn", b="نّ"}, + {a="hh", b="هّ"}, + {a="ww", b="وّ"}, + {a="yy", b="يّ"}, + -- sukūn begin ('easy' needs these rules to be taken out); but + -- first take out every previously generated sukūn by hamza rules, + -- so there be no need to edit them: + {a="ْ", b=""}, +-- {a="([%_%^%.]?[Bbtjghxdrzs%`fqklmnwy])$", b="%1ْ"}, +-- {a="([%_%^%.]?[Bbtjghxdrzs%`fqklmnwy])([%s])", b="%1ْ%2"}, +-- {a="([%_%^%.]?[Bbtjghxdrzs%`fqklmnwy])([%_]?[^%_uaiUAIًٌٍ])", b="%1ْ%2"}, + -- take out sukūn in cases of assimilation +-- {a="(n)(ْ)(%s)([روي])", b="%1%3%4"}, +-- {a="(n)(ْ)(%s)([ل])", b="%1%3%4"}, +-- {a="(n)(ْ)(%s)([م])", b="%1%3%4"}, +-- {a="(n)(ْ)(%s)([ن])", b="%1%3%4"}, +-- {a="ْ\"", b="\""}, + -- sukūn end + {a="_t", b="ث"}, + {a="%^g", b="ج"}, + {a="%.h", b="ح"}, + {a="_h", b="خ"}, + {a="_d", b="ذ"}, + {a="%^s", b="ش"}, + {a="%.s", b="ص"}, + {a="%.d", b="ض"}, + {a="%.t", b="ط"}, + {a="%.z", b="ظ"}, + {a="%.g", b="غ"}, + {a="(U)(A)", b="%1ا"}, + {a="WA", b="وْا"}, + {a="(a)W\"", b="%1وا"}, + {a="(a)W", b="%1وْا"}, + {a="_A", b="aى"}, + {a="_u", b="ٗ"}, + {a="_a", b="ٰ"}, + {a="_i", b="ٖ"}, + {a="%.b", b="ٮ"}, + {a="%.f", b="ڡ"}, + {a="%.q", b="ٯ"}, + {a="%.k", b="ک"}, + {a="%.n", b="ں"}, + {a="%^d", b="ڊ"} +} + singlefv = { {a="b", b="ب"}, {a="t", b="ت"}, @@ -320,3 +588,34 @@ singlefv = { {a="([^0-9])%-([^0-9])", b="%1%2"}, {a="B", b="ـ"}, } + +singlefveasy = { -- see the differences under 'easy' tag below + {a="b", b="ب"}, + {a="t", b="ت"}, + {a="j", b="ج"}, + {a="x", b="خ"}, + {a="d", b="د"}, + {a="r", b="ر"}, + {a="z", b="ز"}, + {a="s", b="س"}, + {a="f", b="ف"}, + {a="`", b="ع"}, + {a="f", b="ف"}, + {a="q", b="ق"}, + {a="k", b="ك"}, + {a="l", b="ل"}, + {a="m", b="م"}, + {a="n", b="ن"}, + {a="h", b="ه"}, + {a="w", b="و"}, + {a="y", b="ي"}, + {a="T", b="ة"}, + -- easy (begin): \" needs to put back the sukūn + {a="\"$", b="ْ"}, + {a="\"(%W)", b="ْ%1"}, + {a="\"([^uaiUAI])", b="ْ%1"}, + -- easy (end) + {a="([^0-9])%-([^0-9])", b="%1%2"}, + {a="B", b="ـ"}, +} + diff --git a/arabluatex_trans.lua b/arabluatex_trans.lua index 6b3f1bc..c96a9da 100644 --- a/arabluatex_trans.lua +++ b/arabluatex_trans.lua @@ -201,6 +201,61 @@ trigraphstrdmg = { -- trigraphs or more {a="(n)(%s)([rlmnwy])", b="%3%2%3"} } +trigraphstrdmgeasy = { -- see the differences below under 'easy' tag + -- 'llatI / 'llad_I + {a="^'ll(a)([%_]?[dt])", b="'ll%1%2"}, + {a="(%s)'ll(a)([%_]?[dt])", b="%1'll%2%3"}, + -- al- + lām + {a="^(a)l%-(l)", b="%1l-%2"}, + {a="([%s%-])(a)l%-(l)", b="%1%2l-%3"}, + -- al- + solar consonant + {a="^(a)l%-([%_%^%.]?[tdrzsn])", b="%1%2-%2"}, + {a="([%s%-])(a)l%-([%_%^%.]?[tdrzsn])", b="%1%2%3-%3"}, + -- assim. art. + solar consonant + {a="^(a)([%_%^%.]?[tdrzsn])%-", b="%1%2-"}, + {a="([%s%-])(a)([%_%^%.]?[tdrzsn])%-", b="%1%2%3-"}, + -- al- + initial unstable hamza + {a="^(a)l%-([uai])", b="%1l-%2"}, + {a="([%s%-])(a)l%-([uai])", b="%1%2l-%3"}, + -- li-/la- + art. + initial unstable hamza is a special orthography + {a="l([ai])%-l%-([uai])", b="l%1-l-%2"}, + -- al- + lunar consonant (i.e. what remains) + {a="^(a)l%-", b="%1l-"}, + {a="([%s%-])(a)l%-", b="%1%2l-"}, + -- diphthongs to be resolved before ʾalif conjunctionis + {a="(aw)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="%1u%2%3"}, + {a="(ay)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="%1i%2%3"}, + -- art. with waṣla + lām + {a="'l%-(l)", b="'l-%1"}, + -- art. with waṣla + solar consonant + {a="'l%-([%_%^%.]?[tdrzsn])", b="'%1-%1"}, + -- li-/la- + art. + lām + {a="l([ai])%-l%-(l)", b="l%1-%2%2"}, + -- assim. art. with waṣla + solar consonant + {a="'([%_%^%.]?[tdrzsn])%-", b="'%1-"}, + -- li-/la- + art. + solar consonant is a special orthography + {a="l([ai])%-l%-([%_%^%.]?[tdrzsn])", b="l%1-%2-%2"}, + -- li-/la- + assim. art. + solar consonant is a special orthography + {a="l([ai])%-([%_%^%.]?[tdrzsn])%-([%_%^%.]?[tdrzsn])", b="l%1-%2-%3"}, + -- art. with waṣla + initial unstable hamza + {a="'l%-([uai])", b="'l-%1"}, + -- art. with waṣla + lunar consonant (i.e. what remains) + {a="'l%-", b="'l-"}, + -- the silent wāw + {a="uU$", b="u"}, + {a="uU(%W)", b="u%1"}, + {a="aU$", b="a"}, + {a="aU(%W)", b="a%1"}, + {a="iU$", b="i"}, + {a="iU(%W)", b="i%1"}, + -- words ending in -āT with silent wāw/yāʾ + {a="(_a)UA", b="A"}, + {a="(_a)U", b="A"}, + {a="(_a)I", b="A"} + -- assimilations +--easy {a="(n)(%s)([rlmnwy])", b="%3%2%3"} +} + digraphstrdmg = { {a="([uai]%-)(\"?[uai])", b="%1'"}, -- hyphen + initial alif without hamza {a="^(\"?[uai])", b="%1"}, -- initial alif without hamza diff --git a/arabluatex_voc.lua b/arabluatex_voc.lua index 5eafca5..4f5399f 100644 --- a/arabluatex_voc.lua +++ b/arabluatex_voc.lua @@ -154,6 +154,89 @@ hamza = { {a="(i)(')([^uaiUAI])", b="%1ئ%3"} } +hamzaeasy = { -- differences marked below with 'easy' + -- hard coded hamza + {a="|\"'", b="ء"}, + {a="A\"'", b="آ"}, + {a="[au]\"'", b="أ"}, + {a="w\"'", b="ؤ"}, + {a="i\"'", b="إ"}, + {a="y\"'", b="ئ"}, + -- hamza takes tašdīd too + {a="''([Uu])", b="ؤؤ%1"}, + {a="''([Aa])", b="أأ%1"}, + {a="''([Ii])", b="ئئ%1"}, + -- initial long u and i (for a, see below) + {a="%'%_U", b="أU"}, + {a="%'%_I", b="إI"}, + -- taḫfīfu 'l-hamza + {a="'u'([^uaiUAI])", b="أU%1"}, + {a="'i'([^uaiUAI])", b="إI%1"}, + -- madda (historic writing below) + {a="'a'([^uaiUAI])", b="آ%1"}, + {a="'a?A([%_%^%.]?[%`%'btjghdrzsfqklmnywAY])", b="آ%1"}, +--easy {a="(A)(')(uN?)$", b="aآء%3"}, +--easy {a="(A)(')(uN?)(%W)", b="aآء%3%4"}, +--easy {a="(A)(')(iN?)$", b="aآء%3"}, +--easy {a="(A)(')(iN?)(%W)", b="aآء%3%4"}, +--easy {a="(A)(')(i)", b="aآئ%3"}, -- historic madda +--easy {a="(A)(')(u)", b="aآؤ%3"}, -- historic madda +--easy {a="(A)(')", b="aآء"}, -- historic madda + -- initial (needs both ^ and %W patterns) + {a="^(')([ua])", b="أ%2"}, + {a="^(')(i)", b="إ%2"}, + {a="(%W)(')([ua])", b="%1أ%3"}, + {a="(%W)(')(i)", b="%1إ%3"}, + -- final + -- ^say'aN and .zim'aN are special orthographies + {a="(%^say)(%')(aN)", b="%1ئ%3"}, + {a="(.zi?m)(%')(aN)", b="%1ئ%3"}, + {a="([^uai])(')([uai]N?)$", b="%1ء%3"}, + {a="([^uai])(')([uai]N?)(%W)", b="%1ء%3%4"}, +-- u + {a="(u)(')([uai]?N)$", b="%1ؤ%3"}, + {a="(u)(')([uai]N?)(%W)", b="%1ؤ%3%4"}, + {a="(u)(')$", b="%1ؤ"}, + {a="(u)(')(%W)", b="%1ؤ%3"}, +-- a + {a="(a)(')(A)$", b="%1آ"}, + {a="(a)(')(A)(%W)", b="%1آ%4"}, + {a="(a)(')([u]N?)$", b="%1أ%3"}, + {a="(a)(')([u]N?)(%W)", b="%1أ%3%4"}, + {a="(a)(')(a)$", b="%1أ%3"}, + {a="(a)(')(a)(%W)", b="%1أ%3%4"}, + {a="(a)(')(aN)$", b="%1أً"}, + {a="(a)(')(aN)(%W)", b="%1أً%4"}, + {a="(a)(')([i]N?)$", b="%1إ%3"}, + {a="(a)(')([i]N?)(%W)", b="%1إ%3%4"}, + {a="(a)(')$", b="%1أ"}, + {a="(a)(')(%W)", b="%1أ%3"}, +-- i + {a="(i)(')([uai]N?)$", b="%1ئ%3"}, + {a="(i)(')([uai]N?)(%W)", b="%1ئ%3%4"}, + {a="(i)(')$", b="%1ئ"}, + {a="(i)(')(%W)", b="%1ئ%3"}, +-- + -- middle + {a="(U)(')", b="%1ء"}, + {a="([Iy])(')", b="%1ئ"}, + {a="([^uai])(')([uU])", b="%1ؤ%3"}, + {a="([^uai])(')([aA])", b="%1أ%3"}, + {a="([^uai])(')([iI])", b="%1ئ%3"}, + {a="(u)(')([uU])", b="%1ؤ%3"}, + {a="(u)(')([aA])", b="%1ؤ%3"}, + {a="(u)(')([iI])", b="%1ئ%3"}, + {a="(a)(')([aA])", b="%1أ%3"}, + {a="(a)(')([uU])", b="%1ؤ%3"}, + {a="(a)(')([iI])", b="%1ئ%3"}, + {a="(i)(')([aA])", b="%1ئ%3"}, + {a="(i)(')([uU])", b="%1ئ%3"}, + {a="(i)(')([iI])", b="%1ئ%3"}, + {a="(a)(')([^uaiUAI])", b="%1أ%3"}, + {a="(u)(')([^uaiUAI])", b="%1ؤ%3"}, + {a="(i)(')([^uaiUAI])", b="%1ئ%3"} +} + tanwin = { {a="uNU", b="ٌو"}, {a="aNU", b="ًوا"}, @@ -187,6 +270,39 @@ tanwin = { {a="(iN)", b="ٍ"} } +tanwineasy = { -- 'easy' requires some lines to be taken out: + {a="uNU", b="ٌو"}, + {a="aNU", b="ًوا"}, + {a="iNU", b="ٍو"}, + -- assimilations (begin) +-- {a="(uN)(%s)([rlmnwy])", b="ٌ%2%3%3"}, +-- {a="(aN)(_A)(%s)([rlmnwy])", b="ًى%3%4%4"}, +-- {a="(aN)(Y)(%s)([rlmnwy])", b="ًى%3%4%4"}, +-- {a="(T)(aN)(%s)([rlmnwy])", b="%1ً%3%4%4"}, +-- {a="(ء)(aN)(%s)([rlmnwy])", b="%1ً%3%4%4"}, +-- {a="([^TA])(aN)(%s)([rlmnwy])", b="%1ًا%3%4%4"}, +-- {a="(iN)(%s)([rlmnwy])", b="ٍ%2%3%3"}, + -- assimilations (end) + -- quoted tanwīn (begin) + {a="(\"uN)", b=""}, + {a="(B)(\"aN)", b="%1"}, + {a="(\"aN)(_A)", b="ى"}, + {a="(\"aN)(Y)", b="ى"}, + {a="(T)(\"aN)", b="%1"}, + {a="(ء)(\"aN)", b="%1"}, + {a="([^TA])(\"aN)", b="%1ا"}, + {a="(\"iN)", b=""}, + -- quoted tanwīn (end) + {a="(uN)", b="ٌ"}, + {a="(B)(aN)", b="%1ً"}, + {a="(aN)(_A)", b="ًى"}, + {a="(aN)(Y)", b="ًى"}, + {a="(T)(aN)", b="%1ً"}, + {a="(ء)(aN)", b="%1ً"}, + {a="([^TA])(aN)", b="%1ًا"}, + {a="(iN)", b="ٍ"} +} + trigraphs = { -- trigraphs or more -- 'llatI / 'llad_I {a="^'ll(a)([%_]?[dt])", b="الّ%1%2"}, @@ -247,6 +363,66 @@ trigraphs = { -- trigraphs or more {a="(n)(%s)([rlmnwy])", b="%1%2%3%3"} } +trigraphseasy = { -- differences marked below with 'easy' + -- 'llatI / 'llad_I + {a="^'ll(a)([%_]?[dt])", b="الّ%1%2"}, + {a="([%s%-])'ll(a)([%_]?[dt])", b="%1الّ%2%3"}, + -- al- + lām (easy) + {a="^(a)l%-(l)", b="ا%1ل%2"}, + {a="([%s%-])(a)l%-(l)", b="%1ا%2ل%3"}, + -- al- + solar consonant (easy) + {a="^(a)l%-([%_%^%.]?[tdrzsn])", b="ا%1ل%2"}, + {a="([%s%-])(a)l%-([%_%^%.]?[tdrzsn])", b="%1ا%2ل%3"}, + -- assim. art. + solar consonant (easy) + {a="^(a)([%_%^%.]?[tdrzsn])%-", b="ا%1ل"}, + {a="([%s%-])(a)([%_%^%.]?[tdrzsn])%-", b="%1ا%2ل"}, + -- al- + initial unstable hamza + {a="^(a)l%-(\")([uai])", b="ا%1لٱ%3"}, + {a="([%s%-])(a)l%-(\")([uai])", b="%1ا%2لٱ%4"}, + {a="^(a)l%-([uai])", b="ا%1لا%2"}, + {a="([%s%-])(a)l%-([uai])", b="%1ا%2لا%3"}, + -- li-/la- + art. + initial unstable hamza is a special orthography + {a="l([ai])%-l%-(\")([uai])", b="ل%1لٱ%3"}, + {a="l([ai])%-l%-([uai])", b="ل%1لا%2"}, + -- al- + lunar consonant (i.e. what remains) + {a="^(a)l%-", b="ا%1ل"}, + {a="([%s%-])(a)l%-", b="%1ا%2ل"}, + -- diphthongs to be resolved before ʾalif conjunctionis + {a="(aW)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="awuا%2%3"}, + {a="(aw)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="%1u%2%3"}, + {a="(ay)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="%1i%2%3"}, + -- art. with waṣla + lām (easy) + {a="'l%-(l)", b="ال%1"}, + -- art. with waṣla + solar consonant (easy) + {a="'l%-([%_%^%.]?[tdrzsn])", b="ال%1"}, + -- li-/la- + art. + lām (easy) + {a="l([ai])%-l%-(l)", b="ل%1%2"}, + -- assim. art. with waṣla + solar consonant (easy) + {a="'([%_%^%.]?[tdrzsn])%-", b="ال"}, + -- li-/la- + art. + solar consonant is a special orthography (easy) + {a="l([ai])%-l%-([%_%^%.]?[tdrzsn])", b="ل%1ل%2"}, + -- li-/la + assim. art. + solar consonant is a special orthography (easy) + {a="l([ai])%-([%_%^%.]?[tdrzsn])%-([%_%^%.]?[tdrzsn])", b="ل%1ل%3"}, + -- art. with waṣla + initial unstable hamza + {a="'l%-(\")([uai])", b="الٱ%2"}, + {a="'l%-([uai])", b="الا%1"}, + -- art. with waṣla + lunar consonant (i.e. what remains) + {a="'l%-", b="ال"}, + -- the silent wāw + {a="uU$", b="uو"}, + {a="uU(%W)", b="uو%1"}, + {a="aU$", b="aو"}, + {a="aU(%W)", b="aو%1"}, + {a="iU$", b="iو"}, + {a="iU(%W)", b="iو%1"}, + -- words ending in -āT with silent wāw/yāʾ + {a="(_a)UA", b="%1وا"}, + {a="(_a)U", b="%1و"}, + {a="(_a)I", b="%1ي"}, + -- assimilations +--easy {a="(n)(%s)([rlmnwy])", b="%1%2%3%3"} +} + digraphs = { -- initial straight double quote gives a connective ʾalif {a="^\"[uai]", b="ٱ"}, -- cgit v1.2.3