From 885a3e5523dc7d33ad45a3254b2d0be2af8f40ad Mon Sep 17 00:00:00 2001 From: Robert Alessi Date: Wed, 29 Mar 2017 21:34:39 +0200 Subject: arabica: started implementing this standard: added new functions; now working on tables --- arabluatex.lua | 40 +++++++++ arabluatex_trans.lua | 236 ++++++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 272 insertions(+), 4 deletions(-) diff --git a/arabluatex.lua b/arabluatex.lua index 91932b7..722084c 100644 --- a/arabluatex.lua +++ b/arabluatex.lua @@ -494,6 +494,44 @@ local function transloc(str) return str end +local function transarabica(str) + str = string.gsub(str, "\\arb(%b{})", function(inside) + inside = string.sub(inside, 2, -2) + for i = 1,#hamzatrarabica do + inside = string.gsub(inside, hamzatrarabica[i].a, hamzatrarabica[i].b) + end + for i = 1,#tanwintrarabica do + inside = string.gsub(inside, tanwintrarabica[i].a, tanwintrarabica[i].b) + end + for i = 1,#trigraphstrarabica do + inside = string.gsub(inside, trigraphstrarabica[i].a, trigraphstrarabica[i].b) + end + for i = 1,#digraphstrarabica do + inside = string.gsub(inside, digraphstrarabica[i].a, digraphstrarabica[i].b) + end + for i = 1,#singletrarabica do + inside = string.gsub(inside, singletrarabica[i].a, singletrarabica[i].b) + end + for i = 1,#longvtrarabica do + inside = string.gsub(inside, longvtrarabica[i].a, longvtrarabica[i].b) + end + for i = 1,#shortvtrarabica do + inside = string.gsub(inside, shortvtrarabica[i].a, shortvtrarabica[i].b) + end + for i = 1,#finaltrarabica do + inside = string.gsub(inside, finaltrarabica[i].a, finaltrarabica[i].b) + end + for i = 1,#punctuationtr do + inside = string.gsub(inside, punctuationtr[i].a, punctuationtr[i].b) + end + for i = 1,#nulltr do + inside = string.gsub(inside, nulltr[i].a, nulltr[i].b) + end + return string.format("\\txtrans{%s}", inside) + end) +return str +end + local function processbuckw(str) str = string.gsub(str, "\\arb(%b{})", function(inside) inside = string.sub(inside, 2, -2) @@ -579,6 +617,8 @@ function processtrans(str, mode, rules, scheme) str = transdmg(str, rules) elseif mode == "loc" then str = transloc(str) + elseif mode == "arabica" then + str = transarabica(str) end str = unprotectarb(str) return str diff --git a/arabluatex_trans.lua b/arabluatex_trans.lua index 275e235..f6c90f7 100644 --- a/arabluatex_trans.lua +++ b/arabluatex_trans.lua @@ -496,7 +496,7 @@ trigraphstrloc = { -- trigraphs or more {a="^(a)l%-([uai])", b="%1l-%2"}, {a="(%s)(a)l%-([uai])", b="%1%2l-%3"}, -- li-/la- + art. + initial unstable hamza is a special orthography - {a="l([ai])%-l%-([uai])", b="l%1l-%2"}, + {a="l([ai])%-l%-([uai])", b="l%1-l-%2"}, -- al- + lunar consonant (i.e. what remains) {a="^(a)l%-", b="%1l-"}, {a="(%s)(a)l%-", b="%1%2l-"}, @@ -505,13 +505,13 @@ trigraphstrloc = { -- trigraphs or more -- art. with waṣla + solar consonant {a="'l%-([%_%^%.]?[tdrzsn])", b="al-%1"}, -- li-/la- + art. + lām - {a="l([ai])%-l%-(l)", b="l%1-%2"}, + {a="l([ai])%-l%-(l)", b="l%1-l-%2"}, -- assim. art. with waṣla + solar consonant {a="'([%_%^%.]?[tdrzsn])%-", b="al-"}, -- li-/la- + art. + solar consonant is a special orthography - {a="l([ai])%-l%-([%_%^%.]?[tdrzsn])", b="l%1l-%2"}, + {a="l([ai])%-l%-([%_%^%.]?[tdrzsn])", b="l%1-l-%2"}, -- li-/la- + assim. art. + solar consonant is a special orthography - {a="l([ai])%-([%_%^%.]?[tdrzsn])%-([%_%^%.]?[tdrzsn])", b="l%1l-%3"}, + {a="l([ai])%-([%_%^%.]?[tdrzsn])%-([%_%^%.]?[tdrzsn])", b="l%1-l-%3"}, -- art. with waṣla + initial unstable hamza {a="'l%-([uai])", b="al-%1"}, -- art. with waṣla + lunar consonant (i.e. what remains) @@ -622,3 +622,231 @@ shortvtrloc = { finaltrloc = { {a="ʾ", b="'"}, } + +-- arabica + +hamzatrarabica = { --UNTOUCHED/LOC + -- hard coded hamza + {a="|\"'", b="ʾ"}, + {a="A\"'", b="ʾA"}, + {a="[au]\"'", b="ʾ"}, + {a="w\"'", b="ʾ"}, + {a="i\"'", b="ʾ"}, + {a="y\"'", b="ʾ"}, + -- hamza takes tašdīd too + {a="''([Uu])", b="ʾʾ%1"}, + {a="''([Aa])", b="ʾʾ%1"}, + {a="''([Ii])", b="ʾʾ%1"}, + -- initial long u and i (for a, see below) + {a="%'%_U", b="U"}, + {a="%'%_I", b="I"}, + -- taḫfīfu 'l-hamza + {a="^'u'([^uaiUAI])", b="U%1"}, + {a="(%W)'u'([^uaiUAI])", b="%1U%2"}, + {a="'u'([^uaiUAI])", b="ʾU"}, + {a="^'i'([^uaiUAI])", b="I%1"}, + {a="(%W)'i'([^uaiUAI])", b="%1I%2"}, + {a="'i'([^uaiUAI])", b="ʾI"}, + -- madda (historic writing below) + {a="^(')(A)", b="%2"}, + {a="(%W)(')(A)", b="%1%3"}, + {a="^'a'([^uaiUAI])", b="A%1"}, + {a="(%W)'a'([^uaiUAI])", b="%1A%2"}, + {a="'a'([^uaiUAI])", b="A%1"}, + {a="^'a?A", b="A"}, + {a="(%W)'a?A", b="%1A"}, + {a="'a?A", b="ʾA"}, + {a="(A)(')(i)$", b="%1ʾ%3"}, + {a="(A)(')(i)(%W)", b="%1ʾ%3%4"}, + {a="(A)(')(i)", b="%1ʾ%3"}, -- historic madda + {a="(A)(')", b="%1ʾ"}, -- historic madda + -- initial (needs both ^ and %W patterns) + {a="^(')([ua])", b="%2"}, + {a="^(')(i)", b="%2"}, + {a="(%W)(')([ua])", b="%1%3"}, + {a="(%W)(')(i)", b="%1%3"}, + -- final + {a="([Iy])(')(aN)$", b="%1ʾ%3"}, + {a="([Iy])(')(aN)(%W)", b="%1ʾ%3%4"}, + {a="([^uai])(')([uai]N?)$", b="%1ʾ%3"}, + {a="([^uai])(')([uai]N?)(%W)", b="%1ʾ%3%4"}, + {a="([UI])(')([uai])$", b="%1ʾ%3"}, + {a="([UI])(')([uai])(%W)", b="%1ʾ%3%4"}, + -- middle + {a="(U)(')", b="%1ʾ"}, + {a="([Iy])(')", b="%1ʾ"}, + {a="([^uai])(')([uU])", b="%1ʾ%3"}, + {a="([^uai])(')([aA])", b="%1ʾ%3"}, + {a="([^uai])(')([iI])", b="%1ʾ%3"}, + {a="(u)(')([uU])", b="%1ʾ%3"}, + {a="(u)(')([aA])", b="%1ʾ%3"}, + {a="(u)(')([iI])", b="%1ʾ%3"}, + {a="(a)(')([aA])", b="%1ʾ%3"}, + {a="(a)(')([uU])", b="%1ʾ%3"}, + {a="(a)(')([iI])", b="%1ʾ%3"}, + {a="(i)(')([aA])", b="%1ʾ%3"}, + {a="(i)(')([uU])", b="%1ʾ%3"}, + {a="(i)(')([iI])", b="%1ʾ%3"}, + {a="(a)(')([^uaiUAI])", b="%1ʾ%3"}, + {a="(u)(')([^uaiUAI])", b="%1ʾ%3"}, + {a="(i)(')([^uaiUAI])", b="%1ʾ%3"} +} + +tanwintrarabica = { --UNTOUCHED/LOC + {a="%-?uNU", b="un"}, + {a="%-?aNU", b="an"}, + {a="%-?iNU", b="in"}, + {a="%-?(\"?At)%-?([ui])N", b="%1%2n"}, + {a="%-?([ui])N", b="%1n"}, + {a="%-?(aN)(_A)", b="an"}, + {a="%-?(aN)(Y)", b="an"}, + {a="(T)%-?(\"?aN)", b="tan"}, + {a="([^TA])%-?(\"?aN)", b="%1an"} +} + +-- new +trigraphstrarabica = { -- trigraphs or more + -- 'llatI / 'llad_I + {a="^'ll(a)([%_]?[dt])", b="ll%1%2"}, + {a="([%(%[%|%<%s])'ll(a)([%_]?[dt])", b="%1ll%2%3"}, --p + -- al- + lām + {a="^(a)l%-(l)", b="%1l-%2"}, + {a="(%s)(a)l%-(l)", b="%1%2l-%3"}, + -- al- + solar consonant + {a="^(a)l%-([%_%^%.]?[tdrzsn])", b="%1l-%2"}, + {a="(%s)(a)l%-([%_%^%.]?[tdrzsn])", b="%1%2l-%3"}, + -- assim. art. + solar consonant + {a="^(a)([%_%^%.]?[tdrzsn])%-", b="%1l-"}, + {a="(%s)(a)([%_%^%.]?[tdrzsn])%-", b="%1%2l-"}, + -- al- + initial unstable hamza + {a="^(a)l%-([uai])", b="%1l-%2"}, + {a="(%s)(a)l%-([uai])", b="%1%2l-%3"}, + -- li-/la- + art. + initial unstable hamza is a special orthography + {a="l([ai])%-l%-([uai])", b="l%1-l-%2"}, + -- al- + lunar consonant (i.e. what remains) + {a="^(a)l%-", b="%1l-"}, + {a="(%s)(a)l%-", b="%1%2l-"}, + -- art. with waṣla + lām + {a="'l%-(l)", b="l-%1"}, + -- art. with waṣla + solar consonant + {a="'l%-([%_%^%.]?[tdrzsn])", b="l-%1"}, + -- li-/la- + art. + lām + {a="l([ai])%-l%-(l)", b="l%1-l-%2"}, + -- assim. art. with waṣla + solar consonant + {a="'([%_%^%.]?[tdrzsn])%-", b="l-"}, + -- li-/la- + art. + solar consonant is a special orthography + {a="l([ai])%-l%-([%_%^%.]?[tdrzsn])", b="l%1-l-%2"}, + -- li-/la- + assim. art. + solar consonant is a special orthography + {a="l([ai])%-([%_%^%.]?[tdrzsn])%-([%_%^%.]?[tdrzsn])", b="l%1-l-%3"}, + -- art. with waṣla + initial unstable hamza + {a="'l%-([uai])", b="l-%1"}, + -- art. with waṣla + lunar consonant (i.e. what remains) + {a="'l%-", b="l-"}, + -- the silent wāw + {a="uU$", b="u"}, + {a="uU(%W)", b="u%1"}, + {a="aU$", b="a"}, + {a="aU(%W)", b="a%1"}, + {a="iU$", b="i"}, + {a="iU(%W)", b="i%1"}, + -- words ending in -āT with silent wāw/yāʾ + {a="(_a)UA", b="A"}, + {a="(_a)U", b="A"}, + {a="(_a)I", b="A"} +} + +digraphstrarabica = { + -- discard the ʾiʿrāb hyphen (begin) + {a="(%-)(\"?[UI]na)(%p?%s)", b="%2%3"}, + {a="(%-)(\"?[UI]na)(%p?)$", b="%2%3"}, + {a="(%-)(\"?At[ui])(%p?%s)", b="%2%3"}, + {a="(%-)(\"?At[ui])(%p?)$", b="%2%3"}, + {a="(%-)(\"?Ani)(%p?%s)", b="%2%3"}, + {a="(%-)(\"?Ani)(%p?)$", b="%2%3"}, + {a="(%-)(\"?ayni)(%p?%s)", b="%2%3"}, + {a="(%-)(\"?ayni)(%p?)$", b="%2%3"}, + {a="(%-)([uai])(%p?%s)", b="%2%3"}, + {a="(%-)([uai])(%p?)$", b="%2%3"}, + -- discard the ʾiʿrāb hyphen (end) + {a="(%-)(\"?[uai])", b="%1%2"}, -- hyphen + initial alif without hamza + {a="^(\"?[uai])", b="%1"}, -- initial alif without hamza + {a="(%s)([uai])", b="%1%2"}, -- initial alif without hamza + {a="%-%-", b=""}, + {a="uww", b="ūw"}, + {a="iyy$", b="ī"}, + {a="iyy(%W)", b="ī%1"}, + {a="iyy", b="īy"}, + {a="([tkdsg])(h)", b="%1'%2"}, + -- {a="T([^uai])", b="h%1"}, + {a="([a%']l%-)(.-)T([%(%[%|%<%s])(al%-)", b="%1%2h%3%4"}, --p + {a="T([%(%[%|%<%s])(al%-)", b="t%1%2"}, --p + {a="T$", b="h"}, + {a="T(%W)", b="h%1"}, + {a="_t", b="th"}, + {a="%^g", b="j"}, + {a="%.h", b="ḥ"}, + {a="_h", b="kh"}, + {a="_d", b="dh"}, + {a="%^s", b="sh"}, + {a="%.s", b="ṣ"}, + {a="%.d", b="ḍ"}, + {a="%.t", b="ṭ"}, + {a="%.z", b="ẓ"}, + {a="%.g", b="gh"}, + {a="(U)(A)", b="ū"}, + {a="WA", b="w"}, + {a="(a)W", b="%1w"}, + {a="_A", b="á"}, + {a="_u", b="ū"}, + {a="_a", b="ā"}, + {a="_i", b="ī"}, + {a="%.b", b="b"}, + {a="%.f", b="f"}, + {a="%.q", b="q"}, + {a="%.k", b="k"}, + {a="%.n", b="n"}, + {a="%^d", b="d"} +} + +-- new +singletrarabica = { + {a="b", b="b"}, + {a="t", b="t"}, + {a="j", b="j"}, + {a="x", b="kh"}, + {a="d", b="d"}, + {a="r", b="r"}, + {a="z", b="z"}, + {a="s", b="s"}, + {a="`", b="ʿ"}, + {a="f", b="f"}, + {a="q", b="q"}, + {a="k", b="k"}, + {a="l", b="l"}, + {a="m", b="m"}, + {a="n", b="n"}, + {a="h", b="h"}, + {a="w", b="w"}, + {a="y", b="y"}, + {a="T", b="t"}, + {a="\"", b=""}, + {a="B", b=""} +} + +longvtrarabica = { + {a="A", b="ā"}, + {a="U", b="ū"}, + {a="I", b="ī"}, + {a="Y", b="á"}, +} + +shortvtrarabica = { + {a="u", b="u"}, + {a="a", b="a"}, + {a="i", b="i"} +} + +-- new +finaltrarabica = { +-- {a="ʾ", b="'"}, +} -- cgit v1.2.3