From d4299f53e1abe77ba50210f2345ccfcb33abf79c Mon Sep 17 00:00:00 2001 From: Robert Alessi Date: Wed, 7 Dec 2016 17:21:54 +0100 Subject: =?UTF-8?q?rules=20related=20to=20punctuation=20characters=20at=20?= =?UTF-8?q?word=20boundaries=20needed=20much=20more=20work=20(see=20'--p'?= =?UTF-8?q?=20tag);=20new=20rules=20for=20the=20resolution=20of=20the=20di?= =?UTF-8?q?phthong=20in=20=CA=BEaw=20and=20law?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- arabluatex_fullvoc.lua | 78 ++++++++++++++++++++++++++++++++++---------------- 1 file changed, 53 insertions(+), 25 deletions(-) (limited to 'arabluatex_fullvoc.lua') diff --git a/arabluatex_fullvoc.lua b/arabluatex_fullvoc.lua index bb928e4..efe26c0 100644 --- a/arabluatex_fullvoc.lua +++ b/arabluatex_fullvoc.lua @@ -66,6 +66,12 @@ hamzafv = { {a="(A)(')(u)", b="aآؤ%3"}, -- historic madda {a="(A)(')", b="aآء"}, -- historic madda -- initial (needs both ^ and %W patterns) + -- 'aw: the diphthong is to be resoved into 'awi' (next 4 lines) + {a="^('aw)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="%1i%2%3"}, + {a="(%W)('aw)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="%1%2i%3%4"}, + {a="^('aw)(%s)([%(%[%|%<]?\"?[uai])", b="%1i%2%3"}, + {a="(%W)('aw)(%s)([%(%[%|%<]?\"?[uai])", b="%1%2i%3%4"}, + -- then the 'initial' rules for the remaining cases {a="^(')([ua])", b="أ%2"}, {a="^(')(i)", b="إ%2"}, {a="(%W)(')([ua])", b="%1أ%3"}, @@ -166,6 +172,12 @@ hamzafveasy = { -- differences marked below with 'easy' {a="(A)(')", b="aاء"}, -- historic madda --easy (end) -- initial (needs both ^ and %W patterns) + -- 'aw: the diphthong is to be resoved into 'awi' (next 4 lines) + {a="^('aw)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="%1i%2%3"}, + {a="(%W)('aw)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="%1%2i%3%4"}, + {a="^('aw)(%s)([%(%[%|%<]?\"?[uai])", b="%1i%2%3"}, + {a="(%W)('aw)(%s)([%(%[%|%<]?\"?[uai])", b="%1%2i%3%4"}, + -- then the 'initial' rules for the remaining cases {a="^(')([ua])", b="أ%2"}, {a="^(')(i)", b="إ%2"}, {a="(%W)(')([ua])", b="%1أ%3"}, @@ -220,7 +232,10 @@ hamzafveasy = { -- differences marked below with 'easy' {a="(i)(')([^uaiUAI])", b="%1ئْ%3"} } -tanwinfv = { +tanwinfv = { -- with assimilations (\SetArbDflt*) + {a="%-?(uNU)(%s)([uai])", b="%1%2ٱ"}, + {a="%-?(aNU)(%s)([uai])", b="%1%2ٱ"}, + {a="%-?(iNU)(%s)([uai])", b="%1%2ٱ"}, {a="%-?uNU", b="ٌو"}, {a="%-?aNU", b="ًوا"}, {a="%-?iNU", b="ٍو"}, @@ -256,6 +271,9 @@ tanwinfv = { } tanwinfveasy = { -- no assimilations (see below) + {a="%-?(uNU)(%s)([uai])", b="%1%2ٱ"}, + {a="%-?(aNU)(%s)([uai])", b="%1%2ٱ"}, + {a="%-?(iNU)(%s)([uai])", b="%1%2ٱ"}, {a="%-?uNU", b="ٌو"}, {a="%-?aNU", b="ًوا"}, {a="%-?iNU", b="ٍو"}, @@ -293,24 +311,29 @@ tanwinfveasy = { -- no assimilations (see below) trigraphsfv = { -- trigraphs or more -- 'llatI / 'llad_I {a="^'ll(a)([%_]?[dt])", b="ٱلّ%1%2"}, - {a="([%p%s%-])'ll(a)([%_]?[dt])", b="%1ٱلّ%2%3"}, --p + {a="([%(%[%|%<%s%-])'ll(a)([%_]?[dt])", b="%1ٱلّ%2%3"}, --p + -- law: the diphthong is to be resoved into 'awi' (next 4 lines) + {a="^(law)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="%1i%2%3"}, + {a="(%W)(law)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="%1%2i%3%4"}, + {a="^(law)(%s)([%(%[%|%<]?\"?[uai])", b="%1i%2%3"}, + {a="(%W)(law)(%s)([%(%[%|%<]?\"?[uai])", b="%1%2i%3%4"}, -- al- + lām {a="^(a)l%-(l)", b="ا%1ل%2%2"}, - {a="([%p%s%-])(a)l%-(l)", b="%1ا%2ل%3%3"}, --p + {a="([%(%[%|%<%s%-])(a)l%-(l)", b="%1ا%2ل%3%3"}, --p -- al- + solar consonant {a="^(a)l%-([%_%^%.]?[tdrzsn])", b="ا%1ل%2%2"}, - {a="([%p%s%-])(a)l%-([%_%^%.]?[tdrzsn])", b="%1ا%2ل%3%3"}, --p + {a="([%(%[%|%<%s%-])(a)l%-([%_%^%.]?[tdrzsn])", b="%1ا%2ل%3%3"}, --p -- assim. art. + solar consonant {a="^(a)([%_%^%.]?[tdrzsn])%-", b="ا%1ل%2"}, - {a="([%p%s%-])(a)([%_%^%.]?[tdrzsn])%-", b="%1ا%2ل%3"}, --p + {a="([%(%[%|%<%s%-])(a)([%_%^%.]?[tdrzsn])%-", b="%1ا%2ل%3"}, --p -- al- + initial unstable hamza {a="^(a)l%-(\"?[uai])", b="ا%1ل%2ٱ"}, - {a="([%p%s%-])(a)l%-(\"?[uai])", b="%1ا%2ل%3ٱ"}, --p + {a="([%(%[%|%<%s%-])(a)l%-(\"?[uai])", b="%1ا%2ل%3ٱ"}, --p -- li-/la + art. + initial unstable hamza is a special orthography {a="l([ai])%-l%-(\"?[uai])", b="ل%1ل%2ٱ"}, -- al- + lunar consonant (i.e. what remains) {a="^(a)l%-", b="ا%1لْ"}, - {a="([%p%s%-])(a)l%-", b="%1ا%2لْ"}, --p + {a="([%(%[%|%<%s%-])(a)l%-", b="%1ا%2لْ"}, --p -- diphthongs to be resolved before ʾalif conjunctionis {a="(aW)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="awuا%2%3"}, {a="(aw)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="%1u%2%3"}, @@ -350,24 +373,29 @@ trigraphsfv = { -- trigraphs or more trigraphsfveasy = { -- trigraphs or more (see 'easy' tag below for the diffs) -- 'llatI / 'llad_I {a="^'ll(a)([%_]?[dt])", b="ٱلّ%1%2"}, - {a="([%p%s%-])'ll(a)([%_]?[dt])", b="%1ٱلّ%2%3"}, --p + {a="([%(%[%|%<%s%-])'ll(a)([%_]?[dt])", b="%1ٱلّ%2%3"}, --p + -- law: the diphthong is to be resoved into 'awi' (next 4 lines) + {a="^(law)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="%1i%2%3"}, + {a="(%W)(law)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="%1%2i%3%4"}, + {a="^(law)(%s)([%(%[%|%<]?\"?[uai])", b="%1i%2%3"}, + {a="(%W)(law)(%s)([%(%[%|%<]?\"?[uai])", b="%1%2i%3%4"}, -- al- + lām {a="^(a)l%-(l)", b="ا%1ل%2%2"}, - {a="([%p%s%-])(a)l%-(l)", b="%1ا%2ل%3%3"}, --p + {a="([%(%[%|%<%s%-])(a)l%-(l)", b="%1ا%2ل%3%3"}, --p -- al- + solar consonant {a="^(a)l%-([%_%^%.]?[tdrzsn])", b="ا%1ل%2%2"}, - {a="([%p%s%-])(a)l%-([%_%^%.]?[tdrzsn])", b="%1ا%2ل%3%3"}, --p + {a="([%(%[%|%<%s%-])(a)l%-([%_%^%.]?[tdrzsn])", b="%1ا%2ل%3%3"}, --p -- assim. art. + solar consonant {a="^(a)([%_%^%.]?[tdrzsn])%-", b="ا%1ل%2"}, - {a="([%p%s%-])(a)([%_%^%.]?[tdrzsn])%-", b="%1ا%2ل%3"}, --p + {a="([%(%[%|%<%s%-])(a)([%_%^%.]?[tdrzsn])%-", b="%1ا%2ل%3"}, --p -- al- + initial unstable hamza {a="^(a)l%-(\"?[uai])", b="ا%1ل%2ٱ"}, - {a="([%p%s%-])(a)l%-(\"?[uai])", b="%1ا%2ل%3ٱ"}, --p + {a="([%(%[%|%<%s%-])(a)l%-(\"?[uai])", b="%1ا%2ل%3ٱ"}, --p -- li-/la + art. + initial unstable hamza is a special orthography {a="l([ai])%-l%-(\"?[uai])", b="ل%1ل%2ٱ"}, -- al- + lunar consonant (i.e. what remains) {a="^(a)l%-", b="ا%1لْ"}, - {a="([%p%s%-])(a)l%-", b="%1ا%2لْ"}, --p + {a="([%(%[%|%<%s%-])(a)l%-", b="%1ا%2لْ"}, --p -- diphthongs to be resolved before ʾalif conjunctionis {a="(aW)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="awuا%2%3"}, {a="(aw)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="%1u%2%3"}, @@ -416,16 +444,16 @@ digraphsfvidgham = { -- ʾiʿrāb (end) -- initial straight double quote gives a connective ʾalif {a="^\"[uai]", b="ٱ"}, - {a="([%p%s%-])\"[uai]", b="%1ٱ"}, --p + {a="([%(%[%|%<%s%-])\"[uai]", b="%1ٱ"}, --p -- diphthongs to be resolved before ʾalif conjunctionis {a="(aW)(%s)(\"?[uai])", b="awuا%2ٱ"}, {a="(aw)(%s)(\"?[uai])", b="%1u%2ٱ"}, {a="(ay)(%s)(\"?[uai])", b="%1i%2ٱ"}, {a="([uai]%-)(\"?[uai])", b="%1ٱ"}, -- hyphen + initial alif without hamza -- initial alif without hamza - {a="([%_]?[uaiUAIY])(%s)(\"?[uai])", b="%1%2ٱ"}, - {a="^([uai])", b="ا%1"}, -- initial alif without hamza - {a="([%p%s])([uai])", b="%1ا%2"}, -- initial alif without hamza --p + {a="([%_]?[uaiUAIY])(%s[%(%[%|%<]?)(\"?[uai])", b="%1%2ٱ"}, --p + {a="^([%(%[%|%<]?)([uai])", b="%1ا%2"}, -- initial alif without hamza --p + {a="(%s)([%(%[%|%<]?)([uai])", b="%1%2ا%3"}, -- initial alif without hamza --p {a="%-%-", b="ـ"}, {a="ؤؤ", b="ؤّ"}, {a="أأ", b="أّ"}, @@ -518,16 +546,16 @@ digraphsfv = { -- ʾiʿrāb (end) -- initial straight double quote gives a connective ʾalif {a="^\"[uai]", b="ٱ"}, - {a="([%p%s%-])\"[uai]", b="%1ٱ"}, --p + {a="([%(%[%|%<%s%-])\"[uai]", b="%1ٱ"}, --p -- diphthongs to be resolved before ʾalif conjunctionis {a="(aW)(%s)(\"?[uai])", b="awuا%2ٱ"}, {a="(aw)(%s)(\"?[uai])", b="%1u%2ٱ"}, {a="(ay)(%s)(\"?[uai])", b="%1i%2ٱ"}, {a="([uai]%-)(\"?[uai])", b="%1ٱ"}, -- hyphen + initial alif without hamza -- initial alif without hamza - {a="([%_]?[uaiUAIY])(%s)(\"?[uai])", b="%1%2ٱ"}, - {a="^([uai])", b="ا%1"}, -- initial alif without hamza - {a="([%p%s])([uai])", b="%1ا%2"}, -- initial alif without hamza --p + {a="([%_]?[uaiUAIY])(%s[%(%[%|%<]?)(\"?[uai])", b="%1%2ٱ"}, --p + {a="^([%(%[%|%<]?)([uai])", b="%1ا%2"}, -- initial alif without hamza --p + {a="(%s)([%(%[%|%<]?)([uai])", b="%1%2ا%3"}, -- initial alif without hamza --p {a="%-%-", b="ـ"}, {a="ؤؤ", b="ؤّ"}, {a="أأ", b="أّ"}, @@ -620,16 +648,16 @@ digraphsfveasy = { -- see the differences under 'easy' marker below -- ʾiʿrāb (end) -- initial straight double quote gives a connective ʾalif {a="^\"[uai]", b="ٱ"}, - {a="([%p%s%-])\"[uai]", b="%1ٱ"}, --p + {a="([%(%[%|%<%s%-])\"[uai]", b="%1ٱ"}, --p -- diphthongs to be resolved before ʾalif conjunctionis {a="(aW)(%s)(\"?[uai])", b="awuا%2ٱ"}, {a="(aw)(%s)(\"?[uai])", b="%1u%2ٱ"}, {a="(ay)(%s)(\"?[uai])", b="%1i%2ٱ"}, {a="([uai]%-)(\"?[uai])", b="%1ٱ"}, -- hyphen + initial alif without hamza -- initial alif without hamza - {a="([%_]?[uaiUAIY])(%s)(\"?[uai])", b="%1%2ٱ"}, - {a="^([uai])", b="ا%1"}, -- initial alif without hamza - {a="([%p%s])([uai])", b="%1ا%2"}, -- initial alif without hamza --p + {a="([%_]?[uaiUAIY])(%s[%(%[%|%<]?)(\"?[uai])", b="%1%2ٱ"}, --p + {a="^([%(%[%|%<]?)([uai])", b="%1ا%2"}, -- initial alif without hamza --p + {a="(%s)([%(%[%|%<]?)([uai])", b="%1%2ا%3"}, -- initial alif without hamza --p {a="%-%-", b="ـ"}, {a="ؤؤ", b="ؤّ"}, {a="أأ", b="أّ"}, -- cgit v1.2.3