From 6202ee62d0f34509d7a652c30a1d5c19efec70ae Mon Sep 17 00:00:00 2001 From: Robert Alessi Date: Sun, 8 May 2016 16:09:05 +0200 Subject: done implementing 'easy' rules set --- arabluatex_voc.lua | 176 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 176 insertions(+) (limited to 'arabluatex_voc.lua') diff --git a/arabluatex_voc.lua b/arabluatex_voc.lua index 5eafca5..4f5399f 100644 --- a/arabluatex_voc.lua +++ b/arabluatex_voc.lua @@ -154,6 +154,89 @@ hamza = { {a="(i)(')([^uaiUAI])", b="%1ئ%3"} } +hamzaeasy = { -- differences marked below with 'easy' + -- hard coded hamza + {a="|\"'", b="ء"}, + {a="A\"'", b="آ"}, + {a="[au]\"'", b="أ"}, + {a="w\"'", b="ؤ"}, + {a="i\"'", b="إ"}, + {a="y\"'", b="ئ"}, + -- hamza takes tašdīd too + {a="''([Uu])", b="ؤؤ%1"}, + {a="''([Aa])", b="أأ%1"}, + {a="''([Ii])", b="ئئ%1"}, + -- initial long u and i (for a, see below) + {a="%'%_U", b="أU"}, + {a="%'%_I", b="إI"}, + -- taḫfīfu 'l-hamza + {a="'u'([^uaiUAI])", b="أU%1"}, + {a="'i'([^uaiUAI])", b="إI%1"}, + -- madda (historic writing below) + {a="'a'([^uaiUAI])", b="آ%1"}, + {a="'a?A([%_%^%.]?[%`%'btjghdrzsfqklmnywAY])", b="آ%1"}, +--easy {a="(A)(')(uN?)$", b="aآء%3"}, +--easy {a="(A)(')(uN?)(%W)", b="aآء%3%4"}, +--easy {a="(A)(')(iN?)$", b="aآء%3"}, +--easy {a="(A)(')(iN?)(%W)", b="aآء%3%4"}, +--easy {a="(A)(')(i)", b="aآئ%3"}, -- historic madda +--easy {a="(A)(')(u)", b="aآؤ%3"}, -- historic madda +--easy {a="(A)(')", b="aآء"}, -- historic madda + -- initial (needs both ^ and %W patterns) + {a="^(')([ua])", b="أ%2"}, + {a="^(')(i)", b="إ%2"}, + {a="(%W)(')([ua])", b="%1أ%3"}, + {a="(%W)(')(i)", b="%1إ%3"}, + -- final + -- ^say'aN and .zim'aN are special orthographies + {a="(%^say)(%')(aN)", b="%1ئ%3"}, + {a="(.zi?m)(%')(aN)", b="%1ئ%3"}, + {a="([^uai])(')([uai]N?)$", b="%1ء%3"}, + {a="([^uai])(')([uai]N?)(%W)", b="%1ء%3%4"}, +-- u + {a="(u)(')([uai]?N)$", b="%1ؤ%3"}, + {a="(u)(')([uai]N?)(%W)", b="%1ؤ%3%4"}, + {a="(u)(')$", b="%1ؤ"}, + {a="(u)(')(%W)", b="%1ؤ%3"}, +-- a + {a="(a)(')(A)$", b="%1آ"}, + {a="(a)(')(A)(%W)", b="%1آ%4"}, + {a="(a)(')([u]N?)$", b="%1أ%3"}, + {a="(a)(')([u]N?)(%W)", b="%1أ%3%4"}, + {a="(a)(')(a)$", b="%1أ%3"}, + {a="(a)(')(a)(%W)", b="%1أ%3%4"}, + {a="(a)(')(aN)$", b="%1أً"}, + {a="(a)(')(aN)(%W)", b="%1أً%4"}, + {a="(a)(')([i]N?)$", b="%1إ%3"}, + {a="(a)(')([i]N?)(%W)", b="%1إ%3%4"}, + {a="(a)(')$", b="%1أ"}, + {a="(a)(')(%W)", b="%1أ%3"}, +-- i + {a="(i)(')([uai]N?)$", b="%1ئ%3"}, + {a="(i)(')([uai]N?)(%W)", b="%1ئ%3%4"}, + {a="(i)(')$", b="%1ئ"}, + {a="(i)(')(%W)", b="%1ئ%3"}, +-- + -- middle + {a="(U)(')", b="%1ء"}, + {a="([Iy])(')", b="%1ئ"}, + {a="([^uai])(')([uU])", b="%1ؤ%3"}, + {a="([^uai])(')([aA])", b="%1أ%3"}, + {a="([^uai])(')([iI])", b="%1ئ%3"}, + {a="(u)(')([uU])", b="%1ؤ%3"}, + {a="(u)(')([aA])", b="%1ؤ%3"}, + {a="(u)(')([iI])", b="%1ئ%3"}, + {a="(a)(')([aA])", b="%1أ%3"}, + {a="(a)(')([uU])", b="%1ؤ%3"}, + {a="(a)(')([iI])", b="%1ئ%3"}, + {a="(i)(')([aA])", b="%1ئ%3"}, + {a="(i)(')([uU])", b="%1ئ%3"}, + {a="(i)(')([iI])", b="%1ئ%3"}, + {a="(a)(')([^uaiUAI])", b="%1أ%3"}, + {a="(u)(')([^uaiUAI])", b="%1ؤ%3"}, + {a="(i)(')([^uaiUAI])", b="%1ئ%3"} +} + tanwin = { {a="uNU", b="ٌو"}, {a="aNU", b="ًوا"}, @@ -187,6 +270,39 @@ tanwin = { {a="(iN)", b="ٍ"} } +tanwineasy = { -- 'easy' requires some lines to be taken out: + {a="uNU", b="ٌو"}, + {a="aNU", b="ًوا"}, + {a="iNU", b="ٍو"}, + -- assimilations (begin) +-- {a="(uN)(%s)([rlmnwy])", b="ٌ%2%3%3"}, +-- {a="(aN)(_A)(%s)([rlmnwy])", b="ًى%3%4%4"}, +-- {a="(aN)(Y)(%s)([rlmnwy])", b="ًى%3%4%4"}, +-- {a="(T)(aN)(%s)([rlmnwy])", b="%1ً%3%4%4"}, +-- {a="(ء)(aN)(%s)([rlmnwy])", b="%1ً%3%4%4"}, +-- {a="([^TA])(aN)(%s)([rlmnwy])", b="%1ًا%3%4%4"}, +-- {a="(iN)(%s)([rlmnwy])", b="ٍ%2%3%3"}, + -- assimilations (end) + -- quoted tanwīn (begin) + {a="(\"uN)", b=""}, + {a="(B)(\"aN)", b="%1"}, + {a="(\"aN)(_A)", b="ى"}, + {a="(\"aN)(Y)", b="ى"}, + {a="(T)(\"aN)", b="%1"}, + {a="(ء)(\"aN)", b="%1"}, + {a="([^TA])(\"aN)", b="%1ا"}, + {a="(\"iN)", b=""}, + -- quoted tanwīn (end) + {a="(uN)", b="ٌ"}, + {a="(B)(aN)", b="%1ً"}, + {a="(aN)(_A)", b="ًى"}, + {a="(aN)(Y)", b="ًى"}, + {a="(T)(aN)", b="%1ً"}, + {a="(ء)(aN)", b="%1ً"}, + {a="([^TA])(aN)", b="%1ًا"}, + {a="(iN)", b="ٍ"} +} + trigraphs = { -- trigraphs or more -- 'llatI / 'llad_I {a="^'ll(a)([%_]?[dt])", b="الّ%1%2"}, @@ -247,6 +363,66 @@ trigraphs = { -- trigraphs or more {a="(n)(%s)([rlmnwy])", b="%1%2%3%3"} } +trigraphseasy = { -- differences marked below with 'easy' + -- 'llatI / 'llad_I + {a="^'ll(a)([%_]?[dt])", b="الّ%1%2"}, + {a="([%s%-])'ll(a)([%_]?[dt])", b="%1الّ%2%3"}, + -- al- + lām (easy) + {a="^(a)l%-(l)", b="ا%1ل%2"}, + {a="([%s%-])(a)l%-(l)", b="%1ا%2ل%3"}, + -- al- + solar consonant (easy) + {a="^(a)l%-([%_%^%.]?[tdrzsn])", b="ا%1ل%2"}, + {a="([%s%-])(a)l%-([%_%^%.]?[tdrzsn])", b="%1ا%2ل%3"}, + -- assim. art. + solar consonant (easy) + {a="^(a)([%_%^%.]?[tdrzsn])%-", b="ا%1ل"}, + {a="([%s%-])(a)([%_%^%.]?[tdrzsn])%-", b="%1ا%2ل"}, + -- al- + initial unstable hamza + {a="^(a)l%-(\")([uai])", b="ا%1لٱ%3"}, + {a="([%s%-])(a)l%-(\")([uai])", b="%1ا%2لٱ%4"}, + {a="^(a)l%-([uai])", b="ا%1لا%2"}, + {a="([%s%-])(a)l%-([uai])", b="%1ا%2لا%3"}, + -- li-/la- + art. + initial unstable hamza is a special orthography + {a="l([ai])%-l%-(\")([uai])", b="ل%1لٱ%3"}, + {a="l([ai])%-l%-([uai])", b="ل%1لا%2"}, + -- al- + lunar consonant (i.e. what remains) + {a="^(a)l%-", b="ا%1ل"}, + {a="([%s%-])(a)l%-", b="%1ا%2ل"}, + -- diphthongs to be resolved before ʾalif conjunctionis + {a="(aW)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="awuا%2%3"}, + {a="(aw)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="%1u%2%3"}, + {a="(ay)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="%1i%2%3"}, + -- art. with waṣla + lām (easy) + {a="'l%-(l)", b="ال%1"}, + -- art. with waṣla + solar consonant (easy) + {a="'l%-([%_%^%.]?[tdrzsn])", b="ال%1"}, + -- li-/la- + art. + lām (easy) + {a="l([ai])%-l%-(l)", b="ل%1%2"}, + -- assim. art. with waṣla + solar consonant (easy) + {a="'([%_%^%.]?[tdrzsn])%-", b="ال"}, + -- li-/la- + art. + solar consonant is a special orthography (easy) + {a="l([ai])%-l%-([%_%^%.]?[tdrzsn])", b="ل%1ل%2"}, + -- li-/la + assim. art. + solar consonant is a special orthography (easy) + {a="l([ai])%-([%_%^%.]?[tdrzsn])%-([%_%^%.]?[tdrzsn])", b="ل%1ل%3"}, + -- art. with waṣla + initial unstable hamza + {a="'l%-(\")([uai])", b="الٱ%2"}, + {a="'l%-([uai])", b="الا%1"}, + -- art. with waṣla + lunar consonant (i.e. what remains) + {a="'l%-", b="ال"}, + -- the silent wāw + {a="uU$", b="uو"}, + {a="uU(%W)", b="uو%1"}, + {a="aU$", b="aو"}, + {a="aU(%W)", b="aو%1"}, + {a="iU$", b="iو"}, + {a="iU(%W)", b="iو%1"}, + -- words ending in -āT with silent wāw/yāʾ + {a="(_a)UA", b="%1وا"}, + {a="(_a)U", b="%1و"}, + {a="(_a)I", b="%1ي"}, + -- assimilations +--easy {a="(n)(%s)([rlmnwy])", b="%1%2%3%3"} +} + digraphs = { -- initial straight double quote gives a connective ʾalif {a="^\"[uai]", b="ٱ"}, -- cgit v1.2.3