From d4299f53e1abe77ba50210f2345ccfcb33abf79c Mon Sep 17 00:00:00 2001 From: Robert Alessi Date: Wed, 7 Dec 2016 17:21:54 +0100 Subject: =?UTF-8?q?rules=20related=20to=20punctuation=20characters=20at=20?= =?UTF-8?q?word=20boundaries=20needed=20much=20more=20work=20(see=20'--p'?= =?UTF-8?q?=20tag);=20new=20rules=20for=20the=20resolution=20of=20the=20di?= =?UTF-8?q?phthong=20in=20=CA=BEaw=20and=20law?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- arabluatex_trans.lua | 56 ++++++++++++++++++++++++++++++++-------------------- 1 file changed, 35 insertions(+), 21 deletions(-) (limited to 'arabluatex_trans.lua') diff --git a/arabluatex_trans.lua b/arabluatex_trans.lua index e7087c0..ef55a2b 100644 --- a/arabluatex_trans.lua +++ b/arabluatex_trans.lua @@ -95,6 +95,12 @@ hamzatrdmg = { {a="(A)(')(i)", b="%1ʾ%3"}, -- historic madda {a="(A)(')", b="%1ʾ"}, -- historic madda -- initial (needs both ^ and %W patterns) + -- 'aw: the diphthong is to be resoved into 'awi' (next 4 lines) + {a="^('aw)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="%1i%2%3"}, + {a="(%W)('aw)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="%1%2i%3%4"}, + {a="^('aw)(%s)([%(%[%|%<]?\"?[uai])", b="%1i%2%3"}, + {a="(%W)('aw)(%s)([%(%[%|%<]?\"?[uai])", b="%1%2i%3%4"}, + -- then the 'initial' rules for the remaining cases {a="^(')([ua])", b="ʾ%2"}, {a="^(')(i)", b="ʾ%2"}, {a="(%W)(')([ua])", b="%1ʾ%3"}, @@ -127,6 +133,7 @@ hamzatrdmg = { } tanwintrdmg = { + {a="%-?([uai]NU)(%s)([ui])", b="\\arbup{un%3}%2'"}, {a="%-?uNU", b="\\arbup{un}"}, {a="%-?aNU", b="\\arbup{an}"}, {a="%-?iNU", b="\\arbup{in}"}, @@ -165,24 +172,29 @@ tanwintrdmg = { trigraphstrdmg = { -- trigraphs or more -- 'llatI / 'llad_I {a="^'ll(a)([%_]?[dt])", b="'ll%1%2"}, - {a="([%p%s])'ll(a)([%_]?[dt])", b="%1'll%2%3"}, --p + {a="([%(%[%|%<%s])'ll(a)([%_]?[dt])", b="%1'll%2%3"}, --p + -- law: the diphthong is to be resoved into 'awi' (next 4 lines) + {a="^(law)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="%1i%2%3"}, + {a="(%W)(law)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="%1%2i%3%4"}, + {a="^(law)(%s)([%(%[%|%<]?\"?[uai])", b="%1i%2%3"}, + {a="(%W)(law)(%s)([%(%[%|%<]?\"?[uai])", b="%1%2i%3%4"}, -- al- + lām {a="^(a)l%-(l)", b="%1l-%2"}, - {a="([%p%s%-])(a)l%-(l)", b="%1%2l-%3"}, --p + {a="([%(%[%|%<%s%-])(a)l%-(l)", b="%1%2l-%3"}, --p -- al- + solar consonant {a="^(a)l%-([%_%^%.]?[tdrzsn])", b="%1%2-%2"}, - {a="([%p%s%-])(a)l%-([%_%^%.]?[tdrzsn])", b="%1%2%3-%3"}, --p + {a="([%(%[%|%<%s%-])(a)l%-([%_%^%.]?[tdrzsn])", b="%1%2%3-%3"}, --p -- assim. art. + solar consonant {a="^(a)([%_%^%.]?[tdrzsn])%-", b="%1%2-"}, - {a="([%p%s%-])(a)([%_%^%.]?[tdrzsn])%-", b="%1%2%3-"}, --p + {a="([%(%[%|%<%s%-])(a)([%_%^%.]?[tdrzsn])%-", b="%1%2%3-"}, --p -- al- + initial unstable hamza {a="^(a)l%-(\"?[uai])", b="%1l-%2"}, - {a="([%p%s%-])(a)l%-(\"?[uai])", b="%1%2l-%3"}, --p + {a="([%(%[%|%<%s%-])(a)l%-(\"?[uai])", b="%1%2l-%3"}, --p -- li-/la- + art. + initial unstable hamza is a special orthography {a="l([ai])%-l%-([uai])", b="l%1-l-%2"}, -- al- + lunar consonant (i.e. what remains) {a="^(a)l%-", b="%1l-"}, - {a="([%p%s%-])(a)l%-", b="%1%2l-"}, --p + {a="([%(%[%|%<%s%-])(a)l%-", b="%1%2l-"}, --p -- diphthongs to be resolved before ʾalif conjunctionis {a="(aw)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="%1u%2%3"}, {a="(ay)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="%1i%2%3"}, @@ -231,14 +243,15 @@ digraphstrdmg = { {a="(%W)(\"?[uai])", b="%1%2"}, -- initial alif without hamza -- this is not necessary, take out for now: -- {a="([%_]?[uaiUAIY])(%s)([uai])", b="%1%2'"}, -- initial alif without hamza - {a="(aW)(%s)(\"?[uai])", b="awu%2%3"}, - {a="([^%_][uai])(%s)(\"?[uai])", b="%1%2'"}, - {a="(%_A)(%s)(\"?[uai])", b="ạ%2'"}, - {a="(Y)(%s)(\"?[uai])", b="ạ%2'"}, - {a="(%_a)(%s)(\"?[uai])", b="a%2'"}, - {a="(A)(%s)(\"?[uai])", b="a%2'"}, - {a="([%_]?[Uu])(%s)(\"?[uai])", b="u%2'"}, - {a="([%_]?[Ii])(%s)(\"?[uai])", b="i%2'"}, + {a="(aW)(%s)([%(%[%|%<]?)(\"?[uai])", b="awu%2%3%4"}, --p + {a="(UA)(%s)([%(%[%|%<]?)(\"?[uai])", b="u%2%3'"}, --p + {a="([^%_][uai])(%s)([%(%[%|%<]?)(\"?[uai])", b="%1%2%3'"}, --p + {a="(%_A)(%s)([%(%[%|%<]?)(\"?[uai])", b="ạ%2%3'"}, --p + {a="(Y)(%s)([%(%[%|%<]?)(\"?[uai])", b="ạ%2%3'"}, --p + {a="(%_a)(%s)([%(%[%|%<]?)(\"?[uai])", b="a%2%3'"}, --p + {a="(A)(%s)([%(%[%|%<]?)(\"?[uai])", b="a%2%3'"}, --p + {a="([%_]?[Uu])(%s)([%(%[%|%<]?)(\"?[uai])", b="u%2%3'"}, --p + {a="([%_]?[Ii])(%s)([%(%[%|%<]?)(\"?[uai])", b="i%2%3'"}, --p -- ʾiʿrāb hyphen (begin) {a="(%-)(\"?[UI]na)(%p?%s)", b="\\arbup{%2}%3"}, {a="(%-)(\"?[UI]na)(%p?)$", b="\\arbup{%2}%3"}, @@ -253,13 +266,14 @@ digraphstrdmg = { -- ʾiʿrāb hyphen (end) -- shorten long vowels preceding ʾalif conjunctionis {a="(U)(A)", b="U"}, - {a="(aW)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="awu%2%3"}, - {a="(%_a)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="a%2%3"}, - {a="(%_A)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="ạ%2%3"}, - {a="(A)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="a%2%3"}, - {a="(Y)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="ạ%2%3"}, - {a="([%_]?[Uu])(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="u%2%3"}, - {a="([%_]?[Ii])(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="i%2%3"}, + --p (next 7 lines, just after %s) + {a="(aW)(%s)([%(%[%|%<]?['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="awu%2%3"}, + {a="(%_a)(%s)([%(%[%|%<]?['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="a%2%3"}, + {a="(%_A)(%s)([%(%[%|%<]?['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="ạ%2%3"}, + {a="(A)(%s)([%(%[%|%<]?['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="a%2%3"}, + {a="(Y)(%s)([%(%[%|%<]?['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="ạ%2%3"}, + {a="([%_]?[Uu])(%s)([%(%[%|%<]?['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="u%2%3"}, + {a="([%_]?[Ii])(%s)([%(%[%|%<]?['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="i%2%3"}, {a="%-%-", b=""}, -- {a="T([^uai])", b="%1"}, {a="T(%p?%s)", b="h%1"}, -- cgit v1.2.3