From 07ec135217f9cf9c2cf4117ec5a6d65b8e1a4236 Mon Sep 17 00:00:00 2001 From: Robert Alessi Date: Wed, 30 Nov 2016 17:32:18 +0100 Subject: new option \SetArbDflt* for applying the assimilation rules for these are not applied by default anymore --- arabluatex.dtx | 13 +++++- arabluatex.lua | 108 +++++++++++++++++++++----------------------- arabluatex_fullvoc.lua | 119 +++++++++++++++++++++++++++++++++++++++++++++---- arabluatex_trans.lua | 60 ++----------------------- arabluatex_voc.lua | 9 ++-- 5 files changed, 183 insertions(+), 126 deletions(-) diff --git a/arabluatex.dtx b/arabluatex.dtx index 0c5d2a1..4bd9057 100644 --- a/arabluatex.dtx +++ b/arabluatex.dtx @@ -2751,17 +2751,28 @@ wa-ya.sIru ta.hta 'l-jild-i % \arb[trans]{sukUn} that is generated, while the starred version % \cs{SetArbEasy*} takes it away. Default complex rules can be set % back at any point of the document with \cs{SetArbDflt}. +% \begin{macro}{\SetArbDflt*} +% \changes{v1.6}{2016/11/30}{This starred version applies the +% assimilation rules (\arb[trans]{al-'id.gAm}) in addition to what +% \cs{SetArbDflt} already does.} As of v1.6, \package{arabluatex} +% does not applies any more the assimilation rules that are laid on +% \vref{ref:assimilation}; a new starred version \cs{SetArbDflt*} is +% now available to the user should he wish to apply them. % \begin{macrocode} \def\al@arb@rules{dflt} \NewDocumentCommand{\SetArbEasy}{s}{% \IfBooleanTF{#1} {\def\al@arb@rules{easynosukun}} {\def\al@arb@rules{easy}}} -\NewDocumentCommand{\SetArbDflt}{}{\def\al@arb@rules{dflt}} +\NewDocumentCommand{\SetArbDflt}{s}{% + \IfBooleanTF{#1} + {\def\al@arb@rules{idgham}} + {\def\al@arb@rules{dflt}}} % \end{macrocode} % \end{macro} % \end{macro} % \end{macro} +% \end{macro} % \begin{macro}{\SetTranslitFont} % \changes{v1.4}{2016/07/05}{For selecting a specific font for % transliterated texts} By default, the font that is used for diff --git a/arabluatex.lua b/arabluatex.lua index 38fe87b..07968c0 100644 --- a/arabluatex.lua +++ b/arabluatex.lua @@ -146,18 +146,29 @@ local function takeoutcapetc(str) return str end -local function voc(str) +local function voc(str, rules) str = string.gsub(str, "\\arb(%b{})", function(inside) inside = string.sub(inside, 2, -2) for i = 1,#hamza do inside = string.gsub(inside, hamza[i].a, hamza[i].b) end - for i = 1,#tanwin do - inside = string.gsub(inside, tanwin[i].a, tanwin[i].b) + if rules == "idgham" then + for i = 1,#tanwin do + inside = string.gsub(inside, tanwin[i].a, tanwin[i].b) + end + else + for i = 1,#tanwineasy do + inside = string.gsub(inside, tanwineasy[i].a, tanwineasy[i].b) + end end for i = 1,#trigraphs do inside = string.gsub(inside, trigraphs[i].a, trigraphs[i].b) end + if rules == "idgham" then + for i = 1,#idgham do + inside = string.gsub(inside, idgham[i].a, idgham[i].b) + end + end for i = 1,#digraphs do inside = string.gsub(inside, digraphs[i].a, digraphs[i].b) end @@ -218,20 +229,37 @@ local function voceasy(str) return str end -local function fullvoc(str) +local function fullvoc(str, rules) str = string.gsub(str, "\\arb(%b{})", function(inside) inside = string.sub(inside, 2, -2) for i = 1,#hamzafv do inside = string.gsub(inside, hamzafv[i].a, hamzafv[i].b) end - for i = 1,#tanwinfv do - inside = string.gsub(inside, tanwinfv[i].a, tanwinfv[i].b) + if rules == "idgham" then + for i = 1,#tanwinfv do + inside = string.gsub(inside, tanwinfv[i].a, tanwinfv[i].b) + end + else + for i = 1,#tanwinfveasy do + inside = string.gsub(inside, tanwinfveasy[i].a, tanwinfveasy[i].b) + end end for i = 1,#trigraphsfv do inside = string.gsub(inside, trigraphsfv[i].a, trigraphsfv[i].b) end - for i = 1,#digraphsfv do - inside = string.gsub(inside, digraphsfv[i].a, digraphsfv[i].b) + if rules == "idgham" then + for i = 1,#idgham do + inside = string.gsub(inside, idgham[i].a, idgham[i].b) + end + end + if rules == "idgham" then + for i = 1,#digraphsfvidgham do + inside = string.gsub(inside, digraphsfvidgham[i].a, digraphsfvidgham[i].b) + end + else + for i = 1,#digraphsfv do + inside = string.gsub(inside, digraphsfv[i].a, digraphsfv[i].b) + end end for i = 1,#singlefv do inside = string.gsub(inside, singlefv[i].a, singlefv[i].b) @@ -254,7 +282,7 @@ local function fullvoc(str) return str end -local function fullvoceasy(str, opt) +local function fullvoceasy(str, rules) str = string.gsub(str, "\\arb(%b{})", function(inside) inside = string.sub(inside, 2, -2) for i = 1,#hamzafveasy do @@ -266,7 +294,7 @@ local function fullvoceasy(str, opt) for i = 1,#trigraphsfveasy do inside = string.gsub(inside, trigraphsfveasy[i].a, trigraphsfveasy[i].b) end - if opt == "nosukun" then + if rules == "nosukun" then for i = 1,#digraphsfveasy do inside = string.gsub(inside, digraphsfveasy[i].a, digraphsfveasy[i].b) end @@ -368,7 +396,7 @@ local function novoceasy(str) return str end -local function transdmg(str) +local function transdmg(str, rules) str = string.gsub(str, "\\arb(%b{})", function(inside) inside = string.sub(inside, 2, -2) for i = 1,#hamzatrdmg do @@ -380,40 +408,10 @@ local function transdmg(str) for i = 1,#trigraphstrdmg do inside = string.gsub(inside, trigraphstrdmg[i].a, trigraphstrdmg[i].b) end - for i = 1,#digraphstrdmg do - inside = string.gsub(inside, digraphstrdmg[i].a, digraphstrdmg[i].b) - end - for i = 1,#singletrdmg do - inside = string.gsub(inside, singletrdmg[i].a, singletrdmg[i].b) - end - for i = 1,#longvtrdmg do - inside = string.gsub(inside, longvtrdmg[i].a, longvtrdmg[i].b) - end - for i = 1,#shortvtrdmg do - inside = string.gsub(inside, shortvtrdmg[i].a, shortvtrdmg[i].b) - end - for i = 1,#punctuationtr do - inside = string.gsub(inside, punctuationtr[i].a, punctuationtr[i].b) - end - for i = 1,#nulltr do - inside = string.gsub(inside, nulltr[i].a, nulltr[i].b) - end - return string.format("\\txtrans{%s}", inside) - end) -return str -end - -local function transdmgeasy(str) - str = string.gsub(str, "\\arb(%b{})", function(inside) - inside = string.sub(inside, 2, -2) - for i = 1,#hamzatrdmg do - inside = string.gsub(inside, hamzatrdmg[i].a, hamzatrdmg[i].b) - end - for i = 1,#tanwintrdmg do - inside = string.gsub(inside, tanwintrdmg[i].a, tanwintrdmg[i].b) - end - for i = 1,#trigraphstrdmgeasy do - inside = string.gsub(inside, trigraphstrdmgeasy[i].a, trigraphstrdmgeasy[i].b) + if rules == "idgham" then + for i = 1,#idghamtrdmg do + inside = string.gsub(inside, idghamtrdmg[i].a, idghamtrdmg[i].b) + end end for i = 1,#digraphstrdmg do inside = string.gsub(inside, digraphstrdmg[i].a, digraphstrdmg[i].b) @@ -498,8 +496,8 @@ function processvoc(str, rules, scheme) else end if rules == "easy" or rules == "easynosukun" then str = voceasy(str) - elseif rules == "dflt" then - str = voc(str) + elseif rules == "dflt" or rules == "idgham" then + str = voc(str, rules) else end str = unprotectarb(str) return str @@ -518,8 +516,8 @@ function processfullvoc(str, rules, scheme) str = fullvoceasy(str, "sukun") elseif rules == "easynosukun" then str = fullvoceasy(str, "nosukun") - elseif rules == "dflt" then - str = fullvoc(str) + elseif rules == "dflt" or rules == "idgham" then + str = fullvoc(str, rules) else end str = unprotectarb(str) return str @@ -536,7 +534,7 @@ function processnovoc(str, rules, scheme) else end if rules == "easy" or rules == "easynosukun" then str = novoceasy(str) - elseif rules == "dflt" then + elseif rules == "dflt" or rules == "idgham" then str = novoc(str) else end str = unprotectarb(str) @@ -551,16 +549,12 @@ function processtrans(str, mode, rules, scheme) str = holdcmd(str) if scheme == "buckwalter" then str = processbuckw(str) - else end + end if mode == "dmg" then - if rules == "easy" or rules == "easynosukun" then - str = transdmgeasy(str) - elseif rules == "dflt" then - str = transdmg(str) - else end + str = transdmg(str, rules) elseif mode == "loc" then str = transloc(str) - else end + end str = unprotectarb(str) return str end diff --git a/arabluatex_fullvoc.lua b/arabluatex_fullvoc.lua index ff2ef35..898aecd 100644 --- a/arabluatex_fullvoc.lua +++ b/arabluatex_fullvoc.lua @@ -341,11 +341,12 @@ trigraphsfv = { -- trigraphs or more -- words ending in -āT with silent wāw/yāʾ {a="(_a)UA", b="%1وا"}, {a="(_a)U", b="%1و"}, - {a="(_a)I", b="%1ي"}, - -- assimilations - {a="(n)(%s)([rlmnwy])", b="%1%2%3%3"} + {a="(_a)I", b="%1ي"} } +-- idgham/assimilation rules for trigraphs apply here : see voc lua +-- file. + trigraphsfveasy = { -- trigraphs or more (see 'easy' tag below for the diffs) -- 'llatI / 'llad_I {a="^'ll(a)([%_]?[dt])", b="ٱلّ%1%2"}, @@ -397,12 +398,10 @@ trigraphsfveasy = { -- trigraphs or more (see 'easy' tag below for the diffs) -- words ending in -āT with silent wāw/yāʾ {a="(_a)UA", b="%1وا"}, {a="(_a)U", b="%1و"}, - {a="(_a)I", b="%1ي"}, - -- assimilations ---easy {a="(n)(%s)([rlmnwy])", b="%1%2%3%3"} + {a="(_a)I", b="%1ي"} } -digraphsfv = { +digraphsfvidgham = { -- iʿrāb: straight double quote must be discarded {a="(%-)(\"?[UI]na)(%p?%s)", b="%2%3"}, {a="(%-)(\"?[UI]na)(%p?)$", b="%2%3"}, @@ -504,6 +503,108 @@ digraphsfv = { {a="%^d", b="ڊ"} } +digraphsfv = { + -- iʿrāb: straight double quote must be discarded + {a="(%-)(\"?[UI]na)(%p?%s)", b="%2%3"}, + {a="(%-)(\"?[UI]na)(%p?)$", b="%2%3"}, + {a="(%-)(\"?At[ui])(%p?%s)", b="%2%3"}, + {a="(%-)(\"?At[ui])(%p?)$", b="%2%3"}, + {a="(%-)(\"?Ani)(%p?%s)", b="%2%3"}, + {a="(%-)(\"?Ani)(%p?)$", b="%2%3"}, + {a="(%-)(\"?ayni)(%p?%s)", b="%2%3"}, + {a="(%-)(\"?ayni)(%p?)$", b="%2%3"}, + {a="(%-)(\"?[uai])(%p?%s)", b="%2%3"}, + {a="(%-)(\"?[uai])(%p?)$", b="%2%3"}, + -- ʾiʿrāb (end) + -- initial straight double quote gives a connective ʾalif + {a="^\"[uai]", b="ٱ"}, + {a="([%s%-])\"[uai]", b="%1ٱ"}, + -- diphthongs to be resolved before ʾalif conjunctionis + {a="(aW)(%s)(\"?[uai])", b="awuا%2ٱ"}, + {a="(aw)(%s)(\"?[uai])", b="%1u%2ٱ"}, + {a="(ay)(%s)(\"?[uai])", b="%1i%2ٱ"}, + {a="([uai]%-)(\"?[uai])", b="%1ٱ"}, -- hyphen + initial alif without hamza + -- initial alif without hamza + {a="([%_]?[uaiUAIY])(%s)(\"?[uai])", b="%1%2ٱ"}, + {a="^([uai])", b="ا%1"}, -- initial alif without hamza + {a="(%s)([uai])", b="%1ا%2"}, -- initial alif without hamza + {a="%-%-", b="ـ"}, + {a="ؤؤ", b="ؤّ"}, + {a="أأ", b="أّ"}, + {a="ئئ", b="ئّ"}, + {a="bb", b="بّ"}, + {a="BB", b="ـّ"}, + {a="(%_)([thd])([thd])", b="%1%2|%3"}, + {a="tt", b="تّ"}, + {a="%_t%_t", b="ثّ"}, + {a="jj", b="جّ"}, + {a="%^g%^g", b="جّ"}, + {a="%.h%.h", b="حّ"}, + {a="xx", b="خّ"}, + {a="%_h%_h", b="خّ"}, + {a="dd", b="دّ"}, + {a="%_d%_d", b="ذّ"}, + {a="rr", b="رّ"}, + {a="zz", b="زّ"}, + {a="ss", b="سّ"}, + {a="%^s%^s", b="شّ"}, + {a="%.s%.s", b="صّ"}, + {a="%.d%.d", b="ضّ"}, + {a="%.t%.t", b="طّ"}, + {a="%.z%.z", b="ظّ"}, + {a="%`%`", b="عّ"}, + {a="%.g%.g", b="غّ"}, + {a="ff", b="فّ"}, + {a="qq", b="قّ"}, + {a="kk", b="كّ"}, + {a="ll", b="لّ"}, + {a="mm", b="مّ"}, + {a="nn", b="نّ"}, + {a="hh", b="هّ"}, + {a="ww", b="وّ"}, + {a="yy", b="يّ"}, + -- sukūn begin + -- first, take out hyphen if any: + {a="([%_%^%.]?[Bbtjghxdrzs%`fqklmnwy])%-([uaiUAI])", b="%1%2"}, + {a="([%_%^%.]?[Bbtjghxdrzs%`fqklmnwy])$", b="%1ْ"}, + {a="([%_%^%.]?[Bbtjghxdrzs%`fqklmnwy])([%s])", b="%1ْ%2"}, + {a="([%_%^%.]?[Bbtjghxdrzs%`fqklmnwy])([%_]?[^%_uaiUAIYًٌٍ])", b="%1ْ%2"}, + -- take out sukūn in cases of assimilation +-- {a="(n)(ْ)(%s)(ر)", b="%1%3%4"}, +-- {a="(n)(ْ)(%s)(و)", b="%1%3%4"}, +-- {a="(n)(ْ)(%s)(ي)", b="%1%3%4"}, +-- {a="(n)(ْ)(%s)(ل)", b="%1%3%4"}, +-- {a="(n)(ْ)(%s)(م)", b="%1%3%4"}, +-- {a="(n)(ْ)(%s)(ن)", b="%1%3%4"}, +-- {a="ْ\"", b="\""}, + -- sukūn end + {a="_t", b="ث"}, + {a="%^g", b="ج"}, + {a="%.h", b="ح"}, + {a="_h", b="خ"}, + {a="_d", b="ذ"}, + {a="%^s", b="ش"}, + {a="%.s", b="ص"}, + {a="%.d", b="ض"}, + {a="%.t", b="ط"}, + {a="%.z", b="ظ"}, + {a="%.g", b="غ"}, + {a="(U)(A)", b="%1ا"}, + {a="WA", b="وْا"}, + {a="(a)W\"", b="%1وا"}, + {a="(a)W", b="%1وْا"}, + {a="_A", b="aى"}, + {a="_u", b="ٗ"}, + {a="_a", b="ٰ"}, + {a="_i", b="ٖ"}, + {a="%.b", b="ٮ"}, + {a="%.f", b="ڡ"}, + {a="%.q", b="ٯ"}, + {a="%.k", b="ک"}, + {a="%.n", b="ں"}, + {a="%^d", b="ڊ"} +} + digraphsfveasy = { -- see the differences under 'easy' marker below -- iʿrāb: straight double quote must be discarded {a="(%-)(\"?[UI]na)(%p?%s)", b="%2%3"}, @@ -574,7 +675,9 @@ digraphsfveasy = { -- see the differences under 'easy' marker below -- {a="([%_%^%.]?[Bbtjghxdrzs%`fqklmnwy])([%s])", b="%1ْ%2"}, -- {a="([%_%^%.]?[Bbtjghxdrzs%`fqklmnwy])([%_]?[^%_uaiUAIًٌٍ])", b="%1ْ%2"}, -- take out sukūn in cases of assimilation --- {a="(n)(ْ)(%s)([روي])", b="%1%3%4"}, +-- {a="(n)(ْ)(%s)(ر)", b="%1%3%4"}, +-- {a="(n)(ْ)(%s)(و)", b="%1%3%4"}, +-- {a="(n)(ْ)(%s)(ي)", b="%1%3%4"}, -- {a="(n)(ْ)(%s)([ل])", b="%1%3%4"}, -- {a="(n)(ْ)(%s)([م])", b="%1%3%4"}, -- {a="(n)(ْ)(%s)([ن])", b="%1%3%4"}, diff --git a/arabluatex_trans.lua b/arabluatex_trans.lua index 87c85e9..981810f 100644 --- a/arabluatex_trans.lua +++ b/arabluatex_trans.lua @@ -205,65 +205,13 @@ trigraphstrdmg = { -- trigraphs or more -- words ending in -āT with silent wāw/yāʾ {a="(_a)UA", b="A"}, {a="(_a)U", b="A"}, - {a="(_a)I", b="A"}, - -- assimilations - {a="(n)(}?)(%s)([rlmnwy])", b="%4%2%3%4"} + {a="(_a)I", b="A"} } -trigraphstrdmgeasy = { -- see the differences below under 'easy' tag - -- 'llatI / 'llad_I - {a="^'ll(a)([%_]?[dt])", b="'ll%1%2"}, - {a="(%s)'ll(a)([%_]?[dt])", b="%1'll%2%3"}, - -- al- + lām - {a="^(a)l%-(l)", b="%1l-%2"}, - {a="([%s%-])(a)l%-(l)", b="%1%2l-%3"}, - -- al- + solar consonant - {a="^(a)l%-([%_%^%.]?[tdrzsn])", b="%1%2-%2"}, - {a="([%s%-])(a)l%-([%_%^%.]?[tdrzsn])", b="%1%2%3-%3"}, - -- assim. art. + solar consonant - {a="^(a)([%_%^%.]?[tdrzsn])%-", b="%1%2-"}, - {a="([%s%-])(a)([%_%^%.]?[tdrzsn])%-", b="%1%2%3-"}, - -- al- + initial unstable hamza - {a="^(a)l%-([uai])", b="%1l-%2"}, - {a="([%s%-])(a)l%-([uai])", b="%1%2l-%3"}, - -- li-/la- + art. + initial unstable hamza is a special orthography - {a="l([ai])%-l%-([uai])", b="l%1-l-%2"}, - -- al- + lunar consonant (i.e. what remains) - {a="^(a)l%-", b="%1l-"}, - {a="([%s%-])(a)l%-", b="%1%2l-"}, - -- diphthongs to be resolved before ʾalif conjunctionis - {a="(aw)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="%1u%2%3"}, - {a="(ay)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="%1i%2%3"}, - -- art. with waṣla + lām - {a="'l%-(l)", b="'l-%1"}, - -- art. with waṣla + solar consonant - {a="'l%-([%_%^%.]?[tdrzsn])", b="'%1-%1"}, - -- li-/la- + art. + lām - {a="l([ai])%-l%-(l)", b="l%1-%2%2"}, - -- assim. art. with waṣla + solar consonant - {a="'([%_%^%.]?[tdrzsn])%-", b="'%1-"}, - -- li-/la- + art. + solar consonant is a special orthography - {a="l([ai])%-l%-([%_%^%.]?[tdrzsn])", b="l%1-%2-%2"}, - -- li-/la- + assim. art. + solar consonant is a special orthography - {a="l([ai])%-([%_%^%.]?[tdrzsn])%-([%_%^%.]?[tdrzsn])", b="l%1-%2-%3"}, - -- art. with waṣla + initial unstable hamza - {a="'l%-([uai])", b="'l-%1"}, - -- art. with waṣla + lunar consonant (i.e. what remains) - {a="'l%-", b="'l-"}, - -- the silent wāw - {a="uU$", b="u"}, - {a="uU(%W)", b="u%1"}, - {a="aU$", b="a"}, - {a="aU(%W)", b="a%1"}, - {a="iU$", b="i"}, - {a="iU(%W)", b="i%1"}, - -- words ending in -āT with silent wāw/yāʾ - {a="(_a)UA", b="A"}, - {a="(_a)U", b="A"}, - {a="(_a)I", b="A"} +idghamtrdmg = { -- assimilations ---easy {a="(n)(%s)([rlmnwy])", b="%3%2%3"} -} + {a="(n)(}?)(%s)([rlmnwy])", b="%4%2%3%4"} +} digraphstrdmg = { {a="([uai]%-)(\"?[uai])", b="%1'"}, -- hyphen + initial alif without hamza diff --git a/arabluatex_voc.lua b/arabluatex_voc.lua index 8f71e52..ddd8fda 100644 --- a/arabluatex_voc.lua +++ b/arabluatex_voc.lua @@ -467,7 +467,10 @@ trigraphs = { -- trigraphs or more -- words ending in -āT with silent wāw/yāʾ {a="(_a)UA", b="%1وا"}, {a="(_a)U", b="%1و"}, - {a="(_a)I", b="%1ي"}, + {a="(_a)I", b="%1ي"} +} + +idgham = { -- assimilations {a="(n)(%s)([rlmnwy])", b="%1%2%3%3"} } @@ -529,9 +532,7 @@ trigraphseasy = { -- differences marked below with 'easy' -- words ending in -āT with silent wāw/yāʾ {a="(_a)UA", b="%1وا"}, {a="(_a)U", b="%1و"}, - {a="(_a)I", b="%1ي"}, - -- assimilations ---easy {a="(n)(%s)([rlmnwy])", b="%1%2%3%3"} + {a="(_a)I", b="%1ي"} } digraphs = { -- cgit v1.2.3