From d4299f53e1abe77ba50210f2345ccfcb33abf79c Mon Sep 17 00:00:00 2001 From: Robert Alessi Date: Wed, 7 Dec 2016 17:21:54 +0100 Subject: =?UTF-8?q?rules=20related=20to=20punctuation=20characters=20at=20?= =?UTF-8?q?word=20boundaries=20needed=20much=20more=20work=20(see=20'--p'?= =?UTF-8?q?=20tag);=20new=20rules=20for=20the=20resolution=20of=20the=20di?= =?UTF-8?q?phthong=20in=20=CA=BEaw=20and=20law?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- arabluatex.dtx | 9 ++++-- arabluatex_fullvoc.lua | 78 ++++++++++++++++++++++++++++++++++---------------- arabluatex_novoc.lua | 14 ++++----- arabluatex_trans.lua | 56 ++++++++++++++++++++++-------------- arabluatex_voc.lua | 56 +++++++++++++++++++++++++----------- 5 files changed, 140 insertions(+), 73 deletions(-) diff --git a/arabluatex.dtx b/arabluatex.dtx index 7128adf..c1633da 100644 --- a/arabluatex.dtx +++ b/arabluatex.dtx @@ -2973,17 +2973,20 @@ wa-ya.sIru ta.hta 'l-jild-i \newlength{\al@bayt@width} \setlength{\al@bayt@width}{.3\textwidth} \define@key[al]{verse}{width}{\setlength{\al@bayt@width}{#1}} -\define@key[al]{verse}{metre}{#1} +\define@key[al]{verse}{metre}{\arb{#1}} \define@boolkey[al]{verse}{utf}[true]{} \define@choicekey[al]{verse}{mode}{fullvoc, voc, novoc, trans}{\def\al@mode{#1}} \presetkeys[al]{verse}{utf=false}{} \NewDocumentEnvironment{arbverse}{O{} O{}}% -{\par\bgroup\setkeys[al]{verse}[metre]{#1}% +{\bgroup\setkeys[al]{verse}[metre]{#1}% \ifx\al@mode\al@mode@trans% \ifal@verse@utf\setRL\else\setLR\fi% \else\setRL\fi% -}{\newline\null\hfill\setkeys[al]{verse}[width,utf,mode]{#1}\egroup\par} + \begin{center} +}% +{\end{center}% +\hfill\setkeys[al]{verse}[width,utf,mode]{#1}\egroup\par} % \end{macrocode} % \end{environment} % \begin{macro}{\abjad} \cs{abjad}\marg{number} expresses its argument diff --git a/arabluatex_fullvoc.lua b/arabluatex_fullvoc.lua index bb928e4..efe26c0 100644 --- a/arabluatex_fullvoc.lua +++ b/arabluatex_fullvoc.lua @@ -66,6 +66,12 @@ hamzafv = { {a="(A)(')(u)", b="aآؤ%3"}, -- historic madda {a="(A)(')", b="aآء"}, -- historic madda -- initial (needs both ^ and %W patterns) + -- 'aw: the diphthong is to be resoved into 'awi' (next 4 lines) + {a="^('aw)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="%1i%2%3"}, + {a="(%W)('aw)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="%1%2i%3%4"}, + {a="^('aw)(%s)([%(%[%|%<]?\"?[uai])", b="%1i%2%3"}, + {a="(%W)('aw)(%s)([%(%[%|%<]?\"?[uai])", b="%1%2i%3%4"}, + -- then the 'initial' rules for the remaining cases {a="^(')([ua])", b="أ%2"}, {a="^(')(i)", b="إ%2"}, {a="(%W)(')([ua])", b="%1أ%3"}, @@ -166,6 +172,12 @@ hamzafveasy = { -- differences marked below with 'easy' {a="(A)(')", b="aاء"}, -- historic madda --easy (end) -- initial (needs both ^ and %W patterns) + -- 'aw: the diphthong is to be resoved into 'awi' (next 4 lines) + {a="^('aw)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="%1i%2%3"}, + {a="(%W)('aw)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="%1%2i%3%4"}, + {a="^('aw)(%s)([%(%[%|%<]?\"?[uai])", b="%1i%2%3"}, + {a="(%W)('aw)(%s)([%(%[%|%<]?\"?[uai])", b="%1%2i%3%4"}, + -- then the 'initial' rules for the remaining cases {a="^(')([ua])", b="أ%2"}, {a="^(')(i)", b="إ%2"}, {a="(%W)(')([ua])", b="%1أ%3"}, @@ -220,7 +232,10 @@ hamzafveasy = { -- differences marked below with 'easy' {a="(i)(')([^uaiUAI])", b="%1ئْ%3"} } -tanwinfv = { +tanwinfv = { -- with assimilations (\SetArbDflt*) + {a="%-?(uNU)(%s)([uai])", b="%1%2ٱ"}, + {a="%-?(aNU)(%s)([uai])", b="%1%2ٱ"}, + {a="%-?(iNU)(%s)([uai])", b="%1%2ٱ"}, {a="%-?uNU", b="ٌو"}, {a="%-?aNU", b="ًوا"}, {a="%-?iNU", b="ٍو"}, @@ -256,6 +271,9 @@ tanwinfv = { } tanwinfveasy = { -- no assimilations (see below) + {a="%-?(uNU)(%s)([uai])", b="%1%2ٱ"}, + {a="%-?(aNU)(%s)([uai])", b="%1%2ٱ"}, + {a="%-?(iNU)(%s)([uai])", b="%1%2ٱ"}, {a="%-?uNU", b="ٌو"}, {a="%-?aNU", b="ًوا"}, {a="%-?iNU", b="ٍو"}, @@ -293,24 +311,29 @@ tanwinfveasy = { -- no assimilations (see below) trigraphsfv = { -- trigraphs or more -- 'llatI / 'llad_I {a="^'ll(a)([%_]?[dt])", b="ٱلّ%1%2"}, - {a="([%p%s%-])'ll(a)([%_]?[dt])", b="%1ٱلّ%2%3"}, --p + {a="([%(%[%|%<%s%-])'ll(a)([%_]?[dt])", b="%1ٱلّ%2%3"}, --p + -- law: the diphthong is to be resoved into 'awi' (next 4 lines) + {a="^(law)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="%1i%2%3"}, + {a="(%W)(law)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="%1%2i%3%4"}, + {a="^(law)(%s)([%(%[%|%<]?\"?[uai])", b="%1i%2%3"}, + {a="(%W)(law)(%s)([%(%[%|%<]?\"?[uai])", b="%1%2i%3%4"}, -- al- + lām {a="^(a)l%-(l)", b="ا%1ل%2%2"}, - {a="([%p%s%-])(a)l%-(l)", b="%1ا%2ل%3%3"}, --p + {a="([%(%[%|%<%s%-])(a)l%-(l)", b="%1ا%2ل%3%3"}, --p -- al- + solar consonant {a="^(a)l%-([%_%^%.]?[tdrzsn])", b="ا%1ل%2%2"}, - {a="([%p%s%-])(a)l%-([%_%^%.]?[tdrzsn])", b="%1ا%2ل%3%3"}, --p + {a="([%(%[%|%<%s%-])(a)l%-([%_%^%.]?[tdrzsn])", b="%1ا%2ل%3%3"}, --p -- assim. art. + solar consonant {a="^(a)([%_%^%.]?[tdrzsn])%-", b="ا%1ل%2"}, - {a="([%p%s%-])(a)([%_%^%.]?[tdrzsn])%-", b="%1ا%2ل%3"}, --p + {a="([%(%[%|%<%s%-])(a)([%_%^%.]?[tdrzsn])%-", b="%1ا%2ل%3"}, --p -- al- + initial unstable hamza {a="^(a)l%-(\"?[uai])", b="ا%1ل%2ٱ"}, - {a="([%p%s%-])(a)l%-(\"?[uai])", b="%1ا%2ل%3ٱ"}, --p + {a="([%(%[%|%<%s%-])(a)l%-(\"?[uai])", b="%1ا%2ل%3ٱ"}, --p -- li-/la + art. + initial unstable hamza is a special orthography {a="l([ai])%-l%-(\"?[uai])", b="ل%1ل%2ٱ"}, -- al- + lunar consonant (i.e. what remains) {a="^(a)l%-", b="ا%1لْ"}, - {a="([%p%s%-])(a)l%-", b="%1ا%2لْ"}, --p + {a="([%(%[%|%<%s%-])(a)l%-", b="%1ا%2لْ"}, --p -- diphthongs to be resolved before ʾalif conjunctionis {a="(aW)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="awuا%2%3"}, {a="(aw)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="%1u%2%3"}, @@ -350,24 +373,29 @@ trigraphsfv = { -- trigraphs or more trigraphsfveasy = { -- trigraphs or more (see 'easy' tag below for the diffs) -- 'llatI / 'llad_I {a="^'ll(a)([%_]?[dt])", b="ٱلّ%1%2"}, - {a="([%p%s%-])'ll(a)([%_]?[dt])", b="%1ٱلّ%2%3"}, --p + {a="([%(%[%|%<%s%-])'ll(a)([%_]?[dt])", b="%1ٱلّ%2%3"}, --p + -- law: the diphthong is to be resoved into 'awi' (next 4 lines) + {a="^(law)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="%1i%2%3"}, + {a="(%W)(law)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="%1%2i%3%4"}, + {a="^(law)(%s)([%(%[%|%<]?\"?[uai])", b="%1i%2%3"}, + {a="(%W)(law)(%s)([%(%[%|%<]?\"?[uai])", b="%1%2i%3%4"}, -- al- + lām {a="^(a)l%-(l)", b="ا%1ل%2%2"}, - {a="([%p%s%-])(a)l%-(l)", b="%1ا%2ل%3%3"}, --p + {a="([%(%[%|%<%s%-])(a)l%-(l)", b="%1ا%2ل%3%3"}, --p -- al- + solar consonant {a="^(a)l%-([%_%^%.]?[tdrzsn])", b="ا%1ل%2%2"}, - {a="([%p%s%-])(a)l%-([%_%^%.]?[tdrzsn])", b="%1ا%2ل%3%3"}, --p + {a="([%(%[%|%<%s%-])(a)l%-([%_%^%.]?[tdrzsn])", b="%1ا%2ل%3%3"}, --p -- assim. art. + solar consonant {a="^(a)([%_%^%.]?[tdrzsn])%-", b="ا%1ل%2"}, - {a="([%p%s%-])(a)([%_%^%.]?[tdrzsn])%-", b="%1ا%2ل%3"}, --p + {a="([%(%[%|%<%s%-])(a)([%_%^%.]?[tdrzsn])%-", b="%1ا%2ل%3"}, --p -- al- + initial unstable hamza {a="^(a)l%-(\"?[uai])", b="ا%1ل%2ٱ"}, - {a="([%p%s%-])(a)l%-(\"?[uai])", b="%1ا%2ل%3ٱ"}, --p + {a="([%(%[%|%<%s%-])(a)l%-(\"?[uai])", b="%1ا%2ل%3ٱ"}, --p -- li-/la + art. + initial unstable hamza is a special orthography {a="l([ai])%-l%-(\"?[uai])", b="ل%1ل%2ٱ"}, -- al- + lunar consonant (i.e. what remains) {a="^(a)l%-", b="ا%1لْ"}, - {a="([%p%s%-])(a)l%-", b="%1ا%2لْ"}, --p + {a="([%(%[%|%<%s%-])(a)l%-", b="%1ا%2لْ"}, --p -- diphthongs to be resolved before ʾalif conjunctionis {a="(aW)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="awuا%2%3"}, {a="(aw)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="%1u%2%3"}, @@ -416,16 +444,16 @@ digraphsfvidgham = { -- ʾiʿrāb (end) -- initial straight double quote gives a connective ʾalif {a="^\"[uai]", b="ٱ"}, - {a="([%p%s%-])\"[uai]", b="%1ٱ"}, --p + {a="([%(%[%|%<%s%-])\"[uai]", b="%1ٱ"}, --p -- diphthongs to be resolved before ʾalif conjunctionis {a="(aW)(%s)(\"?[uai])", b="awuا%2ٱ"}, {a="(aw)(%s)(\"?[uai])", b="%1u%2ٱ"}, {a="(ay)(%s)(\"?[uai])", b="%1i%2ٱ"}, {a="([uai]%-)(\"?[uai])", b="%1ٱ"}, -- hyphen + initial alif without hamza -- initial alif without hamza - {a="([%_]?[uaiUAIY])(%s)(\"?[uai])", b="%1%2ٱ"}, - {a="^([uai])", b="ا%1"}, -- initial alif without hamza - {a="([%p%s])([uai])", b="%1ا%2"}, -- initial alif without hamza --p + {a="([%_]?[uaiUAIY])(%s[%(%[%|%<]?)(\"?[uai])", b="%1%2ٱ"}, --p + {a="^([%(%[%|%<]?)([uai])", b="%1ا%2"}, -- initial alif without hamza --p + {a="(%s)([%(%[%|%<]?)([uai])", b="%1%2ا%3"}, -- initial alif without hamza --p {a="%-%-", b="ـ"}, {a="ؤؤ", b="ؤّ"}, {a="أأ", b="أّ"}, @@ -518,16 +546,16 @@ digraphsfv = { -- ʾiʿrāb (end) -- initial straight double quote gives a connective ʾalif {a="^\"[uai]", b="ٱ"}, - {a="([%p%s%-])\"[uai]", b="%1ٱ"}, --p + {a="([%(%[%|%<%s%-])\"[uai]", b="%1ٱ"}, --p -- diphthongs to be resolved before ʾalif conjunctionis {a="(aW)(%s)(\"?[uai])", b="awuا%2ٱ"}, {a="(aw)(%s)(\"?[uai])", b="%1u%2ٱ"}, {a="(ay)(%s)(\"?[uai])", b="%1i%2ٱ"}, {a="([uai]%-)(\"?[uai])", b="%1ٱ"}, -- hyphen + initial alif without hamza -- initial alif without hamza - {a="([%_]?[uaiUAIY])(%s)(\"?[uai])", b="%1%2ٱ"}, - {a="^([uai])", b="ا%1"}, -- initial alif without hamza - {a="([%p%s])([uai])", b="%1ا%2"}, -- initial alif without hamza --p + {a="([%_]?[uaiUAIY])(%s[%(%[%|%<]?)(\"?[uai])", b="%1%2ٱ"}, --p + {a="^([%(%[%|%<]?)([uai])", b="%1ا%2"}, -- initial alif without hamza --p + {a="(%s)([%(%[%|%<]?)([uai])", b="%1%2ا%3"}, -- initial alif without hamza --p {a="%-%-", b="ـ"}, {a="ؤؤ", b="ؤّ"}, {a="أأ", b="أّ"}, @@ -620,16 +648,16 @@ digraphsfveasy = { -- see the differences under 'easy' marker below -- ʾiʿrāb (end) -- initial straight double quote gives a connective ʾalif {a="^\"[uai]", b="ٱ"}, - {a="([%p%s%-])\"[uai]", b="%1ٱ"}, --p + {a="([%(%[%|%<%s%-])\"[uai]", b="%1ٱ"}, --p -- diphthongs to be resolved before ʾalif conjunctionis {a="(aW)(%s)(\"?[uai])", b="awuا%2ٱ"}, {a="(aw)(%s)(\"?[uai])", b="%1u%2ٱ"}, {a="(ay)(%s)(\"?[uai])", b="%1i%2ٱ"}, {a="([uai]%-)(\"?[uai])", b="%1ٱ"}, -- hyphen + initial alif without hamza -- initial alif without hamza - {a="([%_]?[uaiUAIY])(%s)(\"?[uai])", b="%1%2ٱ"}, - {a="^([uai])", b="ا%1"}, -- initial alif without hamza - {a="([%p%s])([uai])", b="%1ا%2"}, -- initial alif without hamza --p + {a="([%_]?[uaiUAIY])(%s[%(%[%|%<]?)(\"?[uai])", b="%1%2ٱ"}, --p + {a="^([%(%[%|%<]?)([uai])", b="%1ا%2"}, -- initial alif without hamza --p + {a="(%s)([%(%[%|%<]?)([uai])", b="%1%2ا%3"}, -- initial alif without hamza --p {a="%-%-", b="ـ"}, {a="ؤؤ", b="ؤّ"}, {a="أأ", b="أّ"}, diff --git a/arabluatex_novoc.lua b/arabluatex_novoc.lua index b88eaf2..f2b6a6a 100644 --- a/arabluatex_novoc.lua +++ b/arabluatex_novoc.lua @@ -64,27 +64,27 @@ trigraphsnv = { -- trigraphs or more {a="l%-l_ah", b="l-ll_ah"}, -- 'llatI / 'llad_I {a="^'ll(a)([%_]?[dt])", b="ال%1%2"}, - {a="([%p%s%-])'ll(a)([%_]?[dt])", b="%1ال%2%3"}, --p + {a="([%(%[%|%<%s%-])'ll(a)([%_]?[dt])", b="%1ال%2%3"}, --p -- al- + lām {a="^(a)l%-(l)", b="ا%1ل%2"}, - {a="([%p%s%-])(a)l%-(l)", b="%1ا%2ل%3"}, --p + {a="([%(%[%|%<%s%-])(a)l%-(l)", b="%1ا%2ل%3"}, --p -- al- + solar consonant {a="^(a)l%-([%_%^%.]?[tdrzsn])", b="ا%1ل%2"}, - {a="([%p%s%-])(a)l%-([%_%^%.]?[tdrzsn])", b="%1ا%2ل%3"}, --p + {a="([%(%[%|%<%s%-])(a)l%-([%_%^%.]?[tdrzsn])", b="%1ا%2ل%3"}, --p -- assim. art. + solar consonant {a="^(a)([%_%^%.]?[tdrzsn])%-", b="ا%1ل"}, - {a="([%p%s%-])(a)([%_%^%.]?[tdrzsn])%-", b="%1ا%2ل"}, --p + {a="([%(%[%|%<%s%-])(a)([%_%^%.]?[tdrzsn])%-", b="%1ا%2ل"}, --p -- al- + initial unstable hamza {a="^(a)l%-(\")([uai])", b="ا%1لٱ%3"}, - {a="([%p%s%-])(a)l%-(\")([uai])", b="%1ا%2لٱ%4"}, --p + {a="([%(%[%|%<%s%-])(a)l%-(\")([uai])", b="%1ا%2لٱ%4"}, --p {a="^(a)l%-([uai])", b="ا%1لا%2"}, - {a="([%p%s%-])(a)l%-([uai])", b="%1ا%2لا%3"}, --p + {a="([%(%[%|%<%s%-])(a)l%-([uai])", b="%1ا%2لا%3"}, --p -- li-/la- + art. + initial unstable hamza is a special orthography {a="l([ai])%-l%-(\")([uai])", b="ل%1لٱ%3"}, {a="l([ai])%-l%-([uai])", b="ل%1لا%2"}, -- al- + lunar consonant (i.e. what remains) {a="^(a)l%-", b="ا%1ل"}, - {a="([%p%s%-])(a)l%-", b="%1ا%2ل"}, --p + {a="([%(%[%|%<%s%-])(a)l%-", b="%1ا%2ل"}, --p -- art. with waṣla + lām {a="'l%-(l)", b="ال%1"}, -- art. with waṣla + solar consonant diff --git a/arabluatex_trans.lua b/arabluatex_trans.lua index e7087c0..ef55a2b 100644 --- a/arabluatex_trans.lua +++ b/arabluatex_trans.lua @@ -95,6 +95,12 @@ hamzatrdmg = { {a="(A)(')(i)", b="%1ʾ%3"}, -- historic madda {a="(A)(')", b="%1ʾ"}, -- historic madda -- initial (needs both ^ and %W patterns) + -- 'aw: the diphthong is to be resoved into 'awi' (next 4 lines) + {a="^('aw)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="%1i%2%3"}, + {a="(%W)('aw)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="%1%2i%3%4"}, + {a="^('aw)(%s)([%(%[%|%<]?\"?[uai])", b="%1i%2%3"}, + {a="(%W)('aw)(%s)([%(%[%|%<]?\"?[uai])", b="%1%2i%3%4"}, + -- then the 'initial' rules for the remaining cases {a="^(')([ua])", b="ʾ%2"}, {a="^(')(i)", b="ʾ%2"}, {a="(%W)(')([ua])", b="%1ʾ%3"}, @@ -127,6 +133,7 @@ hamzatrdmg = { } tanwintrdmg = { + {a="%-?([uai]NU)(%s)([ui])", b="\\arbup{un%3}%2'"}, {a="%-?uNU", b="\\arbup{un}"}, {a="%-?aNU", b="\\arbup{an}"}, {a="%-?iNU", b="\\arbup{in}"}, @@ -165,24 +172,29 @@ tanwintrdmg = { trigraphstrdmg = { -- trigraphs or more -- 'llatI / 'llad_I {a="^'ll(a)([%_]?[dt])", b="'ll%1%2"}, - {a="([%p%s])'ll(a)([%_]?[dt])", b="%1'll%2%3"}, --p + {a="([%(%[%|%<%s])'ll(a)([%_]?[dt])", b="%1'll%2%3"}, --p + -- law: the diphthong is to be resoved into 'awi' (next 4 lines) + {a="^(law)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="%1i%2%3"}, + {a="(%W)(law)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="%1%2i%3%4"}, + {a="^(law)(%s)([%(%[%|%<]?\"?[uai])", b="%1i%2%3"}, + {a="(%W)(law)(%s)([%(%[%|%<]?\"?[uai])", b="%1%2i%3%4"}, -- al- + lām {a="^(a)l%-(l)", b="%1l-%2"}, - {a="([%p%s%-])(a)l%-(l)", b="%1%2l-%3"}, --p + {a="([%(%[%|%<%s%-])(a)l%-(l)", b="%1%2l-%3"}, --p -- al- + solar consonant {a="^(a)l%-([%_%^%.]?[tdrzsn])", b="%1%2-%2"}, - {a="([%p%s%-])(a)l%-([%_%^%.]?[tdrzsn])", b="%1%2%3-%3"}, --p + {a="([%(%[%|%<%s%-])(a)l%-([%_%^%.]?[tdrzsn])", b="%1%2%3-%3"}, --p -- assim. art. + solar consonant {a="^(a)([%_%^%.]?[tdrzsn])%-", b="%1%2-"}, - {a="([%p%s%-])(a)([%_%^%.]?[tdrzsn])%-", b="%1%2%3-"}, --p + {a="([%(%[%|%<%s%-])(a)([%_%^%.]?[tdrzsn])%-", b="%1%2%3-"}, --p -- al- + initial unstable hamza {a="^(a)l%-(\"?[uai])", b="%1l-%2"}, - {a="([%p%s%-])(a)l%-(\"?[uai])", b="%1%2l-%3"}, --p + {a="([%(%[%|%<%s%-])(a)l%-(\"?[uai])", b="%1%2l-%3"}, --p -- li-/la- + art. + initial unstable hamza is a special orthography {a="l([ai])%-l%-([uai])", b="l%1-l-%2"}, -- al- + lunar consonant (i.e. what remains) {a="^(a)l%-", b="%1l-"}, - {a="([%p%s%-])(a)l%-", b="%1%2l-"}, --p + {a="([%(%[%|%<%s%-])(a)l%-", b="%1%2l-"}, --p -- diphthongs to be resolved before ʾalif conjunctionis {a="(aw)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="%1u%2%3"}, {a="(ay)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="%1i%2%3"}, @@ -231,14 +243,15 @@ digraphstrdmg = { {a="(%W)(\"?[uai])", b="%1%2"}, -- initial alif without hamza -- this is not necessary, take out for now: -- {a="([%_]?[uaiUAIY])(%s)([uai])", b="%1%2'"}, -- initial alif without hamza - {a="(aW)(%s)(\"?[uai])", b="awu%2%3"}, - {a="([^%_][uai])(%s)(\"?[uai])", b="%1%2'"}, - {a="(%_A)(%s)(\"?[uai])", b="ạ%2'"}, - {a="(Y)(%s)(\"?[uai])", b="ạ%2'"}, - {a="(%_a)(%s)(\"?[uai])", b="a%2'"}, - {a="(A)(%s)(\"?[uai])", b="a%2'"}, - {a="([%_]?[Uu])(%s)(\"?[uai])", b="u%2'"}, - {a="([%_]?[Ii])(%s)(\"?[uai])", b="i%2'"}, + {a="(aW)(%s)([%(%[%|%<]?)(\"?[uai])", b="awu%2%3%4"}, --p + {a="(UA)(%s)([%(%[%|%<]?)(\"?[uai])", b="u%2%3'"}, --p + {a="([^%_][uai])(%s)([%(%[%|%<]?)(\"?[uai])", b="%1%2%3'"}, --p + {a="(%_A)(%s)([%(%[%|%<]?)(\"?[uai])", b="ạ%2%3'"}, --p + {a="(Y)(%s)([%(%[%|%<]?)(\"?[uai])", b="ạ%2%3'"}, --p + {a="(%_a)(%s)([%(%[%|%<]?)(\"?[uai])", b="a%2%3'"}, --p + {a="(A)(%s)([%(%[%|%<]?)(\"?[uai])", b="a%2%3'"}, --p + {a="([%_]?[Uu])(%s)([%(%[%|%<]?)(\"?[uai])", b="u%2%3'"}, --p + {a="([%_]?[Ii])(%s)([%(%[%|%<]?)(\"?[uai])", b="i%2%3'"}, --p -- ʾiʿrāb hyphen (begin) {a="(%-)(\"?[UI]na)(%p?%s)", b="\\arbup{%2}%3"}, {a="(%-)(\"?[UI]na)(%p?)$", b="\\arbup{%2}%3"}, @@ -253,13 +266,14 @@ digraphstrdmg = { -- ʾiʿrāb hyphen (end) -- shorten long vowels preceding ʾalif conjunctionis {a="(U)(A)", b="U"}, - {a="(aW)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="awu%2%3"}, - {a="(%_a)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="a%2%3"}, - {a="(%_A)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="ạ%2%3"}, - {a="(A)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="a%2%3"}, - {a="(Y)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="ạ%2%3"}, - {a="([%_]?[Uu])(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="u%2%3"}, - {a="([%_]?[Ii])(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="i%2%3"}, + --p (next 7 lines, just after %s) + {a="(aW)(%s)([%(%[%|%<]?['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="awu%2%3"}, + {a="(%_a)(%s)([%(%[%|%<]?['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="a%2%3"}, + {a="(%_A)(%s)([%(%[%|%<]?['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="ạ%2%3"}, + {a="(A)(%s)([%(%[%|%<]?['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="a%2%3"}, + {a="(Y)(%s)([%(%[%|%<]?['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="ạ%2%3"}, + {a="([%_]?[Uu])(%s)([%(%[%|%<]?['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="u%2%3"}, + {a="([%_]?[Ii])(%s)([%(%[%|%<]?['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="i%2%3"}, {a="%-%-", b=""}, -- {a="T([^uai])", b="%1"}, {a="T(%p?%s)", b="h%1"}, diff --git a/arabluatex_voc.lua b/arabluatex_voc.lua index 61e4c4a..02c3feb 100644 --- a/arabluatex_voc.lua +++ b/arabluatex_voc.lua @@ -201,6 +201,12 @@ hamza = { {a="(A)(')(u)", b="aآؤ%3"}, -- historic madda {a="(A)(')", b="aآء"}, -- historic madda -- initial (needs both ^ and %W patterns) + -- 'aw: the diphthong is to be resoved into 'awi' (next 4 lines) + {a="^('aw)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="%1i%2%3"}, + {a="(%W)('aw)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="%1%2i%3%4"}, + {a="^('aw)(%s)([%(%[%|%<]?\"?[uai])", b="%1i%2%3"}, + {a="(%W)('aw)(%s)([%(%[%|%<]?\"?[uai])", b="%1%2i%3%4"}, + -- then the 'initial' rules for the remaining cases {a="^(')([ua])", b="أ%2"}, {a="^(')(i)", b="إ%2"}, {a="(%W)(')([ua])", b="%1أ%3"}, @@ -292,6 +298,12 @@ hamzaeasy = { -- differences marked below with 'easy' {a="(A)(')", b="aاء"}, -- historic madda --easy (end) -- initial (needs both ^ and %W patterns) + -- 'aw: the diphthong is to be resoved into 'awi' (next 4 lines) + {a="^('aw)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="%1i%2%3"}, + {a="(%W)('aw)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="%1%2i%3%4"}, + {a="^('aw)(%s)([%(%[%|%<]?\"?[uai])", b="%1i%2%3"}, + {a="(%W)('aw)(%s)([%(%[%|%<]?\"?[uai])", b="%1%2i%3%4"}, + -- then the 'initial' rules for the remaining cases {a="^(')([ua])", b="أ%2"}, {a="^(')(i)", b="إ%2"}, {a="(%W)(')([ua])", b="%1أ%3"}, @@ -415,27 +427,32 @@ tanwineasy = { -- 'easy' requires some lines to be taken out: trigraphs = { -- trigraphs or more -- 'llatI / 'llad_I {a="^'ll(a)([%_]?[dt])", b="الّ%1%2"}, - {a="([%p%s%-])'ll(a)([%_]?[dt])", b="%1الّ%2%3"}, --p + {a="([%(%[%|%<%s%-])'ll(a)([%_]?[dt])", b="%1الّ%2%3"}, --p + -- law: the diphthong is to be resoved into 'awi' (next 4 lines) + {a="^(law)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="%1i%2%3"}, + {a="(%W)(law)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="%1%2i%3%4"}, + {a="^(law)(%s)([%(%[%|%<]?\"?[uai])", b="%1i%2%3"}, + {a="(%W)(law)(%s)([%(%[%|%<]?\"?[uai])", b="%1%2i%3%4"}, -- al- + lām {a="^(a)l%-(l)", b="ا%1ل%2%2"}, - {a="([%p%s%-])(a)l%-(l)", b="%1ا%2ل%3%3"}, --p + {a="([%(%[%|%<%s%-])(a)l%-(l)", b="%1ا%2ل%3%3"}, --p -- al- + solar consonant {a="^(a)l%-([%_%^%.]?[tdrzsn])", b="ا%1ل%2%2"}, - {a="([%p%s%-])(a)l%-([%_%^%.]?[tdrzsn])", b="%1ا%2ل%3%3"}, --p + {a="([%(%[%|%<%s%-])(a)l%-([%_%^%.]?[tdrzsn])", b="%1ا%2ل%3%3"}, --p -- assim. art. + solar consonant {a="^(a)([%_%^%.]?[tdrzsn])%-", b="ا%1ل%2"}, - {a="([%p%s%-])(a)([%_%^%.]?[tdrzsn])%-", b="%1ا%2ل%3"}, --p + {a="([%(%[%|%<%s%-])(a)([%_%^%.]?[tdrzsn])%-", b="%1ا%2ل%3"}, --p -- al- + initial unstable hamza {a="^(a)l%-(\")([uai])", b="ا%1ل%3ٱ"}, - {a="([%p%s%-])(a)l%-(\")([uai])", b="%1ا%2ل%4ٱ"}, --p + {a="([%(%[%|%<%s%-])(a)l%-(\")([uai])", b="%1ا%2ل%4ٱ"}, --p {a="^(a)l%-([uai])", b="ا%1ل%2ا"}, - {a="([%p%s%-])(a)l%-([uai])", b="%1ا%2ل%3ا"}, --p + {a="([%(%[%|%<%s%-])(a)l%-([uai])", b="%1ا%2ل%3ا"}, --p -- li-/la- + art. + initial unstable hamza is a special orthography {a="l([ai])%-l%-(\")([uai])", b="ل%1ل%3ٱ"}, {a="l([ai])%-l%-([uai])", b="ل%1ل%2ا"}, -- al- + lunar consonant (i.e. what remains) {a="^(a)l%-", b="ا%1ل"}, - {a="([%p%s%-])(a)l%-", b="%1ا%2ل"}, --p + {a="([%(%[%|%<%s%-])(a)l%-", b="%1ا%2ل"}, --p -- diphthongs to be resolved before ʾalif conjunctionis {a="(aW)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="awuا%2%3"}, {a="(aw)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="%1u%2%3"}, @@ -480,27 +497,32 @@ trigraphseasy = { -- differences marked below with 'easy' {a="l%-l_ah", b="l-ll_ah"}, -- 'llatI / 'llad_I {a="^'ll(a)([%_]?[dt])", b="الّ%1%2"}, - {a="([%p%s%-])'ll(a)([%_]?[dt])", b="%1الّ%2%3"}, --p + {a="([%(%[%|%<%s%-])'ll(a)([%_]?[dt])", b="%1الّ%2%3"}, --p + -- law: the diphthong is to be resoved into 'awi' (next 4 lines) + {a="^(law)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="%1i%2%3"}, + {a="(%W)(law)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="%1%2i%3%4"}, + {a="^(law)(%s)([%(%[%|%<]?\"?[uai])", b="%1i%2%3"}, + {a="(%W)(law)(%s)([%(%[%|%<]?\"?[uai])", b="%1%2i%3%4"}, -- al- + lām (easy) {a="^(a)l%-(l)", b="ا%1ل%2"}, - {a="([%p%s%-])(a)l%-(l)", b="%1ا%2ل%3"}, --p + {a="([%(%[%|%<%s%-])(a)l%-(l)", b="%1ا%2ل%3"}, --p -- al- + solar consonant (easy) {a="^(a)l%-([%_%^%.]?[tdrzsn])", b="ا%1ل%2"}, - {a="([%p%s%-])(a)l%-([%_%^%.]?[tdrzsn])", b="%1ا%2ل%3"}, --p + {a="([%(%[%|%<%s%-])(a)l%-([%_%^%.]?[tdrzsn])", b="%1ا%2ل%3"}, --p -- assim. art. + solar consonant (easy) {a="^(a)([%_%^%.]?[tdrzsn])%-", b="ا%1ل"}, - {a="([%p%s%-])(a)([%_%^%.]?[tdrzsn])%-", b="%1ا%2ل"}, --p + {a="([%(%[%|%<%s%-])(a)([%_%^%.]?[tdrzsn])%-", b="%1ا%2ل"}, --p -- al- + initial unstable hamza {a="^(a)l%-(\")([uai])", b="ا%1ل%3ٱ"}, - {a="([%p%s%-])(a)l%-(\")([uai])", b="%1ا%2ل%4ٱ"}, --p + {a="([%(%[%|%<%s%-])(a)l%-(\")([uai])", b="%1ا%2ل%4ٱ"}, --p {a="^(a)l%-([uai])", b="ا%1ل%2ا"}, - {a="([%p%s%-])(a)l%-([uai])", b="%1ا%2ل%3ا"}, --p + {a="([%(%[%|%<%s%-])(a)l%-([uai])", b="%1ا%2ل%3ا"}, --p -- li-/la- + art. + initial unstable hamza is a special orthography {a="l([ai])%-l%-(\")([uai])", b="ل%1ل%3ٱ"}, {a="l([ai])%-l%-([uai])", b="ل%1ل%2ا"}, -- al- + lunar consonant (i.e. what remains) {a="^(a)l%-", b="ا%1ل"}, - {a="([%p%s%-])(a)l%-", b="%1ا%2ل"}, --p + {a="([%(%[%|%<%s%-])(a)l%-", b="%1ا%2ل"}, --p -- diphthongs to be resolved before ʾalif conjunctionis {a="(aW)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="awuا%2%3"}, {a="(aw)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="%1u%2%3"}, @@ -550,12 +572,12 @@ digraphs = { -- ʾiʿrāb (end) -- initial straight double quote gives a connective ʾalif {a="^\"[uai]", b="ٱ"}, - {a="([%p%s%-])\"[uai]", b="%1ٱ"}, --p + {a="([%(%[%|%<%s%-])\"[uai]", b="%1ٱ"}, --p {a="(aW)(%s)([uai])", b="awuا%2%3"}, -- hyphen + initial alif without hamza: {a="(%-)([uai])([%^%_%.%`]?)(%a)", b="%1ا%3%4"}, - {a="^([uai])", b="ا%1"}, -- initial alif without hamza - {a="([%p%s])([uai])", b="%1ا"}, -- initial alif without hamza --p + {a="^([%(%[%|%<]?)([uai])", b="%1ا%2"}, -- initial alif without hamza --p + {a="(%s)([%(%[%|%<]?)([uai])", b="%1%2ا"}, -- initial alif without hamza --p {a="%-%-", b="ـ"}, {a="ؤؤ", b="ؤّ"}, {a="أأ", b="أّ"}, -- cgit v1.2.3