From b4ea0b56577c3ffa721b180f70c4c708a6560071 Mon Sep 17 00:00:00 2001 From: Robert Alessi Date: Thu, 22 Dec 2016 18:13:48 +0100 Subject: arbnulls inserted in trans table --- arabluatex.dtx | 11 +++++++- arabluatex.lua | 6 ++++ arabluatex_trans.lua | 78 +++++++++++++++++++++++++++++++++++++++++----------- arabluatex_voc.lua | 4 +-- 4 files changed, 80 insertions(+), 19 deletions(-) diff --git a/arabluatex.dtx b/arabluatex.dtx index 5ec4cbe..efac2c3 100644 --- a/arabluatex.dtx +++ b/arabluatex.dtx @@ -3324,7 +3324,6 @@ wa-ya.sIru ta.hta 'l-jild-i \ifdefined\spewnotes\spewnotes\else\fi% } % \end{macrocode} -% % \end{macro} % \end{environment} % \begin{macro}{\abjad} \cs{abjad}\marg{number} expresses its argument @@ -3345,6 +3344,16 @@ wa-ya.sIru ta.hta 'l-jild-i \fi} % \end{macrocode} % \end{macro} +% \begin{macro}{\arbnull} +% \changes{v1.7}{2016/12/23}{New \cs{arbnull} command for putting +% back on any contextual analysis broken by other commands.} The +% \cs{arbnull} command does nothing by itself. It is processed only +% if it is found in Arabic context so as to put back on contextual +% analysis in case it has been broken by other commands. +% \begin{macrocode} +\NewDocumentCommand{\arbnull}{m}{\relax} +% \end{macrocode} +% \end{macro} % \begin{macro}{\abraces} % \cs{abraces}\marg{Arabic text} puts its argument between % braces. This macro is written in Lua and is dependent on the diff --git a/arabluatex.lua b/arabluatex.lua index 527be3b..294e75f 100644 --- a/arabluatex.lua +++ b/arabluatex.lua @@ -128,6 +128,11 @@ local function indnum(str) return str end +local function processdiscretionary(str) + str = string.gsub(str, "\\%-", "\\-{}") + return str +end + local function processarbnull(str, scheme) if scheme == "buckwalter" then str = string.gsub(str, "(\\arbnull.?)(%b{})", function(tag, body) @@ -558,6 +563,7 @@ end function processtrans(str, mode, rules, scheme) str = "\\arb{".. str.."}" + str = processdiscretionary(str) str = processarbnull(str, scheme) str = takeoutabjad(str) str = protectarb(str) diff --git a/arabluatex_trans.lua b/arabluatex_trans.lua index bc44229..0868262 100644 --- a/arabluatex_trans.lua +++ b/arabluatex_trans.lua @@ -31,7 +31,7 @@ punctuationtr = { nulltr = { {a="%|", b=""}, - {a="o.-o", b=""} + {a="o[%S]-o", b=""} } -- cap @@ -97,14 +97,14 @@ hamzatrdmg = { {a="(A)(')", b="%1ʾ"}, -- historic madda -- initial (needs both ^ and %W patterns) -- 'aw: the diphthong is to be resolved into 'awi' (next 8 lines) - {a="^('aw)(o)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)(.-o)", b="%1i"}, - {a="(%W)('aw)(o)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)(.-o)", b="%1%2i"}, - {a="^('aw)(o)([%(%[%|%<]?\"?[uai])(.-o)", b="%1i"}, - {a="(%W)('aw)(o)([%(%[%|%<]?\"?[uai])(.-o)", b="%1%2i"}, + {a="^('aw)(o)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)([%S]-o)", b="%1i"}, + {a="(%W)('aw)(o)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)([%S]-o)", b="%1%2i"}, + {a="^('aw)(o)(\"?[uai])([%S]-o)", b="%1i"}, + {a="(%W)('aw)(o)(\"?[uai])([%S]-o)", b="%1%2i"}, {a="^('aw)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="%1i%2%3"}, {a="(%W)('aw)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="%1%2i%3%4"}, - {a="^('aw)(%s)([%(%[%|%<]?\"?[uai])", b="%1i%2%3"}, - {a="(%W)('aw)(%s)([%(%[%|%<]?\"?[uai])", b="%1%2i%3%4"}, + {a="^('aw)(%s)([%(%[%|%<]?\"?[uai])", b="%1i%2%3"}, --p + {a="(%W)('aw)(%s)([%(%[%|%<]?\"?[uai])", b="%1%2i%3%4"}, --p -- then the 'initial' rules for the remaining cases {a="^(')([ua])", b="ʾ%2"}, {a="^(')(i)", b="ʾ%2"}, @@ -138,12 +138,19 @@ hamzatrdmg = { } tanwintrdmg = { + {a="%-?([uai]NU)(o)([ui])([%S]-o)", b="\\arbup{un%3}"}, {a="%-?([uai]NU)(%s)([ui])", b="\\arbup{un%3}%2'"}, {a="%-?uNU", b="\\arbup{un}"}, {a="%-?aNU", b="\\arbup{an}"}, {a="%-?iNU", b="\\arbup{in}"}, {a="%-?iNI", b="i\\arbup{n}"}, -- tanwīn preceding ʾalif conjunctionis + {a="%-?(uN)(o)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)([%S]-o)", b="\\arbup{uni}"}, + {a="%-?(aN)(_A)(o)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)([%S]-o)", b="ạ\\arbup{ni}"}, + {a="%-?(aN)(Y)(o)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)([%S]-o)", b="ạ\\arbup{ni}"}, + {a="(T)%-?(aN)(o)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)([%S]-o)", b="t\\arbup{ani}"}, + {a="([^TA])%-?(aN)(o)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)([%S]-o)", b="%1\\arbup{ani}"}, + {a="%-?(iN)(o)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)([%S]-o)", b="\\arbup{ini}"}, {a="%-?(uN)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="\\arbup{uni}%2%3"}, {a="%-?(aN)(_A)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="ạ\\arbup{ni}%3%4"}, {a="%-?(aN)(Y)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="ạ\\arbup{ni}%3%4"}, @@ -151,6 +158,12 @@ tanwintrdmg = { {a="([^TA])%-?(aN)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="%1\\arbup{ani}%3%4"}, {a="%-?(iN)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="\\arbup{ini}%2%3"}, -- tanwīn preceding 'lla_dI/'llatI + {a="%-?(uN)(o)('lla[%_]?[dt])([%S]-o)", b="\\arbup{uni}"}, + {a="%-?(aN)(_A)(o)('lla[%_]?[dt])([%S]-o)", b="ạ\\arbup{ni}"}, + {a="%-?(aN)(Y)(o)('lla[%_]?[dt])([%S]-o)", b="ạ\\arbup{ni}"}, + {a="(T)%-?(aN)(o)('lla[%_]?[dt])([%S]-o)", b="t\\arbup{ani}"}, + {a="([^TA])%-?(aN)(o)('lla[%_]?[dt])([%S]-o)", b="%1\\arbup{ani}"}, + {a="%-?(iN)(o)('lla[%_]?[dt])([%S]-o)", b="\\arbup{ini}"}, {a="%-?(uN)(%s)('lla[%_]?[dt])", b="\\arbup{uni}%2%3"}, {a="%-?(aN)(_A)(%s)('lla[%_]?[dt])", b="ạ\\arbup{ni}%3%4"}, {a="%-?(aN)(Y)(%s)('lla[%_]?[dt])", b="ạ\\arbup{ni}%3%4"}, @@ -158,12 +171,19 @@ tanwintrdmg = { {a="([^TA])%-?(aN)(%s)('lla[%_]?[dt])", b="%1\\arbup{ani}%3%4"}, {a="%-?(iN)(%s)('lla[%_]?[dt])", b="\\arbup{ini}%2%3"}, -- tanwīn + alif without hamza and kasra (ibn) or dhamma (uhrub) + {a="%-?(uN)(o)([ui])([%S]-o)", b="\\arbup{un%3}"}, + {a="%-?(aN)(_A)(o)([ui])([%S]-o)", b="ạ\\arbup{n%4}"}, + {a="%-?(aN)(Y)(o)([ui])([%S]-o)", b="ạ\\arbup{n%4}"}, + {a="(T)%-?(aN)(o)([ui])([%S]-o)", b="t\\arbup{an%4}"}, + {a="([^TA])%-?(aN)(o)([ui])([%S]-o)", b="%1\\arbup{an%4}"}, + {a="%-?(iN)(o)([ui])([%S]-o)", b="\\arbup{in%3}"}, + {a="(o[%S]-)([uai]N)(o)(\"?[ui])", b="'"}, {a="%-?(uN)(%s)([ui])", b="\\arbup{un%3}%2'"}, - {a="%-?(aN)(_A)(%s)([ui])", b="ạ\\arbup{n%4}%3%'"}, - {a="%-?(aN)(Y)(%s)([ui])", b="ạ\\arbup{n%4}%3%'"}, - {a="(T)%-?(aN)(%s)([ui])", b="t\\arbup{an%4}%3%'"}, - {a="([^TA])%-?(aN)(%s)([ui])", b="%1\\arbup{an%4}%3%'"}, - {a="%-?(iN)(%s)([ui])", b="\\arbup{in%3}%2%'"}, + {a="%-?(aN)(_A)(%s)([ui])", b="ạ\\arbup{n%4}%3'"}, + {a="%-?(aN)(Y)(%s)([ui])", b="ạ\\arbup{n%4}%3'"}, + {a="(T)%-?(aN)(%s)([ui])", b="t\\arbup{an%4}%3'"}, + {a="([^TA])%-?(aN)(%s)([ui])", b="%1\\arbup{an%4}%3'"}, + {a="%-?(iN)(%s)([ui])", b="\\arbup{in%3}%2'"}, -- -- {a="uN", b="\\arbup{un}"}, (now included in the last line of this table) {a="%-?(\"?At)%-?([ui])N", b="\\arbup{%1%2n}"}, @@ -178,11 +198,15 @@ trigraphstrdmg = { -- trigraphs or more -- 'llatI / 'llad_I {a="^'ll(a)([%_]?[dt])", b="'ll%1%2"}, {a="([%(%[%|%<%s])'ll(a)([%_]?[dt])", b="%1'll%2%3"}, --p - -- law: the diphthong is to be resoved into 'awi' (next 4 lines) + -- law: the diphthong is to be resolved into 'awi' (next 8 lines) + {a="^(law)(o)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)([%S]-o)", b="%1i"}, + {a="(%W)(law)(o)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)([%S]-o)", b="%1%2i"}, + {a="^(law)(o)(\"?[uai])([%S]-o)", b="%1i"}, + {a="(%W)(law)(o)(\"?[uai])([%S]-o)", b="%1%2i"}, {a="^(law)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="%1i%2%3"}, {a="(%W)(law)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="%1%2i%3%4"}, - {a="^(law)(%s)([%(%[%|%<]?\"?[uai])", b="%1i%2%3"}, - {a="(%W)(law)(%s)([%(%[%|%<]?\"?[uai])", b="%1%2i%3%4"}, + {a="^(law)(%s)([%(%[%|%<]?\"?[uai])", b="%1i%2%3"}, --p + {a="(%W)(law)(%s)([%(%[%|%<]?\"?[uai])", b="%1%2i%3%4"}, --p -- al- + lām {a="^(a)l%-(l)", b="%1l-%2"}, {a="([%(%[%|%<%s%-])(a)l%-(l)", b="%1%2l-%3"}, --p @@ -201,8 +225,14 @@ trigraphstrdmg = { -- trigraphs or more {a="^(a)l%-", b="%1l-"}, {a="([%(%[%|%<%s%-])(a)l%-", b="%1%2l-"}, --p -- diphthongs to be resolved before ʾalif conjunctionis + {a="(aw)(o)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)([%S]-o)", b="%1u"}, + {a="(ay)(o)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)([%S]-o)", b="%1i"}, + {a="(aw)(o)(\"?[uai])([%S]-o)", b="%1u"}, + {a="(ay)(o)(\"?[uai])([%S]-o)", b="%1i"}, {a="(aw)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="%1u%2%3"}, {a="(ay)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="%1i%2%3"}, + {a="(aw)(%s)([%(%[%|%<]?\"?[uai])", b="%1u%2%3"}, --p + {a="(ay)(%s)([%(%[%|%<]?\"?[uai])", b="%1i%2%3"}, --p -- art. with waṣla + lām {a="'l%-(l)", b="'l-%1"}, -- art. with waṣla + solar consonant @@ -235,7 +265,7 @@ trigraphstrdmg = { -- trigraphs or more idghamtrdmg = { -- assimilations {a="(n)(}?)(%s)([rlmnwy])", b="%4%2%3%4"}, - {a="(n)(o)([rlmnwy])(.-o)", b="%3"} + {a="(n)(o)([rlmnwy])([%S]-o)", b="%3"} } digraphstrdmg = { @@ -249,6 +279,15 @@ digraphstrdmg = { {a="(%W)(\"?[uai])", b="%1%2"}, -- initial alif without hamza -- this is not necessary, take out for now: -- {a="([%_]?[uaiUAIY])(%s)([uai])", b="%1%2'"}, -- initial alif without hamza + {a="(aW)(o)(\"?[uai])([%S]-o)", b="awu"}, + {a="(UA)(o)(\"?[uai])([%S]-o)", b="u"}, + {a="(%_A)(o)(\"?[uai])([%S]-o)", b="ạ"}, + {a="(Y)(o)(\"?[uai])([%S]-o)", b="ạ"}, + {a="(%_a)(o)(\"?[uai])([%S]-o)", b="a"}, + {a="(A)(o)(\"?[uai])([%S]-o)", b="a"}, + {a="([%_]?[Uu])(o)(\"?[uai])([%S]-o)", b="u"}, + {a="([%_]?[Ii])(o)(\"?[uai])([%S]-o)", b="i"}, + {a="(o[%S]-)([UAIYWuaiyw])(o)(\"?[uai])", b="'"}, {a="(aW)(%s)([%(%[%|%<]?)(\"?[uai])", b="awu%2%3%4"}, --p {a="(UA)(%s)([%(%[%|%<]?)(\"?[uai])", b="u%2%3'"}, --p {a="([^%_][uai])(%s)([%(%[%|%<]?)(\"?[uai])", b="%1%2%3'"}, --p @@ -272,6 +311,13 @@ digraphstrdmg = { -- ʾiʿrāb hyphen (end) -- shorten long vowels preceding ʾalif conjunctionis {a="(U)(A)", b="U"}, + {a="(aW)(o)('[%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)([%S]-o)", b="awu"}, + {a="(%_a)(o)('[%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)([%S]-o)", b="a"}, + {a="(%_A)(o)('[%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)([%S]-o)", b="ạ"}, + {a="(A)(o)('[%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)([%S]-o)", b="a"}, + {a="(Y)(o)('[%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)([%S]-o)", b="ạ"}, + {a="([%_]?[Uu])(o)('[%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)([%S]-o)", b="u"}, + {a="([%_]?[Ii])(o)('[%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)([%S]-o)", b="i"}, --p (next 7 lines, just after %s) {a="(aW)(%s)([%(%[%|%<]?['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="awu%2%3"}, {a="(%_a)(%s)([%(%[%|%<]?['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="a%2%3"}, diff --git a/arabluatex_voc.lua b/arabluatex_voc.lua index c856818..2f5e445 100644 --- a/arabluatex_voc.lua +++ b/arabluatex_voc.lua @@ -205,8 +205,8 @@ hamza = { -- 'aw: the diphthong is to be resoved into 'awi' (next 4 lines) {a="^('aw)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="%1i%2%3"}, {a="(%W)('aw)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="%1%2i%3%4"}, - {a="^('aw)(%s)([%(%[%|%<]?\"?[uai])", b="%1i%2%3"}, - {a="(%W)('aw)(%s)([%(%[%|%<]?\"?[uai])", b="%1%2i%3%4"}, + {a="^('aw)(%s)([%(%[%|%<]?\"?[uai])", b="%1i%2%3"}, --p + {a="(%W)('aw)(%s)([%(%[%|%<]?\"?[uai])", b="%1%2i%3%4"}, --p -- then the 'initial' rules for the remaining cases {a="^(')([ua])", b="أ%2"}, {a="^(')(i)", b="إ%2"}, -- cgit v1.2.3