From d4299f53e1abe77ba50210f2345ccfcb33abf79c Mon Sep 17 00:00:00 2001 From: Robert Alessi Date: Wed, 7 Dec 2016 17:21:54 +0100 Subject: =?UTF-8?q?rules=20related=20to=20punctuation=20characters=20at=20?= =?UTF-8?q?word=20boundaries=20needed=20much=20more=20work=20(see=20'--p'?= =?UTF-8?q?=20tag);=20new=20rules=20for=20the=20resolution=20of=20the=20di?= =?UTF-8?q?phthong=20in=20=CA=BEaw=20and=20law?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- arabluatex_voc.lua | 56 +++++++++++++++++++++++++++++++++++++----------------- 1 file changed, 39 insertions(+), 17 deletions(-) (limited to 'arabluatex_voc.lua') diff --git a/arabluatex_voc.lua b/arabluatex_voc.lua index 61e4c4a..02c3feb 100644 --- a/arabluatex_voc.lua +++ b/arabluatex_voc.lua @@ -201,6 +201,12 @@ hamza = { {a="(A)(')(u)", b="aآؤ%3"}, -- historic madda {a="(A)(')", b="aآء"}, -- historic madda -- initial (needs both ^ and %W patterns) + -- 'aw: the diphthong is to be resoved into 'awi' (next 4 lines) + {a="^('aw)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="%1i%2%3"}, + {a="(%W)('aw)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="%1%2i%3%4"}, + {a="^('aw)(%s)([%(%[%|%<]?\"?[uai])", b="%1i%2%3"}, + {a="(%W)('aw)(%s)([%(%[%|%<]?\"?[uai])", b="%1%2i%3%4"}, + -- then the 'initial' rules for the remaining cases {a="^(')([ua])", b="أ%2"}, {a="^(')(i)", b="إ%2"}, {a="(%W)(')([ua])", b="%1أ%3"}, @@ -292,6 +298,12 @@ hamzaeasy = { -- differences marked below with 'easy' {a="(A)(')", b="aاء"}, -- historic madda --easy (end) -- initial (needs both ^ and %W patterns) + -- 'aw: the diphthong is to be resoved into 'awi' (next 4 lines) + {a="^('aw)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="%1i%2%3"}, + {a="(%W)('aw)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="%1%2i%3%4"}, + {a="^('aw)(%s)([%(%[%|%<]?\"?[uai])", b="%1i%2%3"}, + {a="(%W)('aw)(%s)([%(%[%|%<]?\"?[uai])", b="%1%2i%3%4"}, + -- then the 'initial' rules for the remaining cases {a="^(')([ua])", b="أ%2"}, {a="^(')(i)", b="إ%2"}, {a="(%W)(')([ua])", b="%1أ%3"}, @@ -415,27 +427,32 @@ tanwineasy = { -- 'easy' requires some lines to be taken out: trigraphs = { -- trigraphs or more -- 'llatI / 'llad_I {a="^'ll(a)([%_]?[dt])", b="الّ%1%2"}, - {a="([%p%s%-])'ll(a)([%_]?[dt])", b="%1الّ%2%3"}, --p + {a="([%(%[%|%<%s%-])'ll(a)([%_]?[dt])", b="%1الّ%2%3"}, --p + -- law: the diphthong is to be resoved into 'awi' (next 4 lines) + {a="^(law)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="%1i%2%3"}, + {a="(%W)(law)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="%1%2i%3%4"}, + {a="^(law)(%s)([%(%[%|%<]?\"?[uai])", b="%1i%2%3"}, + {a="(%W)(law)(%s)([%(%[%|%<]?\"?[uai])", b="%1%2i%3%4"}, -- al- + lām {a="^(a)l%-(l)", b="ا%1ل%2%2"}, - {a="([%p%s%-])(a)l%-(l)", b="%1ا%2ل%3%3"}, --p + {a="([%(%[%|%<%s%-])(a)l%-(l)", b="%1ا%2ل%3%3"}, --p -- al- + solar consonant {a="^(a)l%-([%_%^%.]?[tdrzsn])", b="ا%1ل%2%2"}, - {a="([%p%s%-])(a)l%-([%_%^%.]?[tdrzsn])", b="%1ا%2ل%3%3"}, --p + {a="([%(%[%|%<%s%-])(a)l%-([%_%^%.]?[tdrzsn])", b="%1ا%2ل%3%3"}, --p -- assim. art. + solar consonant {a="^(a)([%_%^%.]?[tdrzsn])%-", b="ا%1ل%2"}, - {a="([%p%s%-])(a)([%_%^%.]?[tdrzsn])%-", b="%1ا%2ل%3"}, --p + {a="([%(%[%|%<%s%-])(a)([%_%^%.]?[tdrzsn])%-", b="%1ا%2ل%3"}, --p -- al- + initial unstable hamza {a="^(a)l%-(\")([uai])", b="ا%1ل%3ٱ"}, - {a="([%p%s%-])(a)l%-(\")([uai])", b="%1ا%2ل%4ٱ"}, --p + {a="([%(%[%|%<%s%-])(a)l%-(\")([uai])", b="%1ا%2ل%4ٱ"}, --p {a="^(a)l%-([uai])", b="ا%1ل%2ا"}, - {a="([%p%s%-])(a)l%-([uai])", b="%1ا%2ل%3ا"}, --p + {a="([%(%[%|%<%s%-])(a)l%-([uai])", b="%1ا%2ل%3ا"}, --p -- li-/la- + art. + initial unstable hamza is a special orthography {a="l([ai])%-l%-(\")([uai])", b="ل%1ل%3ٱ"}, {a="l([ai])%-l%-([uai])", b="ل%1ل%2ا"}, -- al- + lunar consonant (i.e. what remains) {a="^(a)l%-", b="ا%1ل"}, - {a="([%p%s%-])(a)l%-", b="%1ا%2ل"}, --p + {a="([%(%[%|%<%s%-])(a)l%-", b="%1ا%2ل"}, --p -- diphthongs to be resolved before ʾalif conjunctionis {a="(aW)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="awuا%2%3"}, {a="(aw)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="%1u%2%3"}, @@ -480,27 +497,32 @@ trigraphseasy = { -- differences marked below with 'easy' {a="l%-l_ah", b="l-ll_ah"}, -- 'llatI / 'llad_I {a="^'ll(a)([%_]?[dt])", b="الّ%1%2"}, - {a="([%p%s%-])'ll(a)([%_]?[dt])", b="%1الّ%2%3"}, --p + {a="([%(%[%|%<%s%-])'ll(a)([%_]?[dt])", b="%1الّ%2%3"}, --p + -- law: the diphthong is to be resoved into 'awi' (next 4 lines) + {a="^(law)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="%1i%2%3"}, + {a="(%W)(law)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="%1%2i%3%4"}, + {a="^(law)(%s)([%(%[%|%<]?\"?[uai])", b="%1i%2%3"}, + {a="(%W)(law)(%s)([%(%[%|%<]?\"?[uai])", b="%1%2i%3%4"}, -- al- + lām (easy) {a="^(a)l%-(l)", b="ا%1ل%2"}, - {a="([%p%s%-])(a)l%-(l)", b="%1ا%2ل%3"}, --p + {a="([%(%[%|%<%s%-])(a)l%-(l)", b="%1ا%2ل%3"}, --p -- al- + solar consonant (easy) {a="^(a)l%-([%_%^%.]?[tdrzsn])", b="ا%1ل%2"}, - {a="([%p%s%-])(a)l%-([%_%^%.]?[tdrzsn])", b="%1ا%2ل%3"}, --p + {a="([%(%[%|%<%s%-])(a)l%-([%_%^%.]?[tdrzsn])", b="%1ا%2ل%3"}, --p -- assim. art. + solar consonant (easy) {a="^(a)([%_%^%.]?[tdrzsn])%-", b="ا%1ل"}, - {a="([%p%s%-])(a)([%_%^%.]?[tdrzsn])%-", b="%1ا%2ل"}, --p + {a="([%(%[%|%<%s%-])(a)([%_%^%.]?[tdrzsn])%-", b="%1ا%2ل"}, --p -- al- + initial unstable hamza {a="^(a)l%-(\")([uai])", b="ا%1ل%3ٱ"}, - {a="([%p%s%-])(a)l%-(\")([uai])", b="%1ا%2ل%4ٱ"}, --p + {a="([%(%[%|%<%s%-])(a)l%-(\")([uai])", b="%1ا%2ل%4ٱ"}, --p {a="^(a)l%-([uai])", b="ا%1ل%2ا"}, - {a="([%p%s%-])(a)l%-([uai])", b="%1ا%2ل%3ا"}, --p + {a="([%(%[%|%<%s%-])(a)l%-([uai])", b="%1ا%2ل%3ا"}, --p -- li-/la- + art. + initial unstable hamza is a special orthography {a="l([ai])%-l%-(\")([uai])", b="ل%1ل%3ٱ"}, {a="l([ai])%-l%-([uai])", b="ل%1ل%2ا"}, -- al- + lunar consonant (i.e. what remains) {a="^(a)l%-", b="ا%1ل"}, - {a="([%p%s%-])(a)l%-", b="%1ا%2ل"}, --p + {a="([%(%[%|%<%s%-])(a)l%-", b="%1ا%2ل"}, --p -- diphthongs to be resolved before ʾalif conjunctionis {a="(aW)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="awuا%2%3"}, {a="(aw)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="%1u%2%3"}, @@ -550,12 +572,12 @@ digraphs = { -- ʾiʿrāb (end) -- initial straight double quote gives a connective ʾalif {a="^\"[uai]", b="ٱ"}, - {a="([%p%s%-])\"[uai]", b="%1ٱ"}, --p + {a="([%(%[%|%<%s%-])\"[uai]", b="%1ٱ"}, --p {a="(aW)(%s)([uai])", b="awuا%2%3"}, -- hyphen + initial alif without hamza: {a="(%-)([uai])([%^%_%.%`]?)(%a)", b="%1ا%3%4"}, - {a="^([uai])", b="ا%1"}, -- initial alif without hamza - {a="([%p%s])([uai])", b="%1ا"}, -- initial alif without hamza --p + {a="^([%(%[%|%<]?)([uai])", b="%1ا%2"}, -- initial alif without hamza --p + {a="(%s)([%(%[%|%<]?)([uai])", b="%1%2ا"}, -- initial alif without hamza --p {a="%-%-", b="ـ"}, {a="ؤؤ", b="ؤّ"}, {a="أأ", b="أّ"}, -- cgit v1.2.3