From c8dccd1fa1da5e45bdd951746b942f8de1ffcb29 Mon Sep 17 00:00:00 2001 From: Robert Alessi Date: Tue, 1 Mar 2016 11:27:13 +0100 Subject: implementation of loc transliteration tables --- arabluatex.lua | 34 +++++++++++ arabluatex_trans.lua | 169 ++++++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 201 insertions(+), 2 deletions(-) diff --git a/arabluatex.lua b/arabluatex.lua index b716bea..d76a284 100644 --- a/arabluatex.lua +++ b/arabluatex.lua @@ -236,6 +236,38 @@ local function transdmg(str) return str end +local function transloc(str) + str = string.gsub(str, "\\arb(%b{})", function(inside) + inside = string.sub(inside, 2, -2) + for i = 1,#hamzatrloc do + inside = string.gsub(inside, hamzatrloc[i].a, hamzatrloc[i].b) + end + for i = 1,#tanwintrloc do + inside = string.gsub(inside, tanwintrloc[i].a, tanwintrloc[i].b) + end + for i = 1,#trigraphstrloc do + inside = string.gsub(inside, trigraphstrloc[i].a, trigraphstrloc[i].b) + end + for i = 1,#digraphstrloc do + inside = string.gsub(inside, digraphstrloc[i].a, digraphstrloc[i].b) + end + for i = 1,#singletrloc do + inside = string.gsub(inside, singletrloc[i].a, singletrloc[i].b) + end + for i = 1,#longvtrloc do + inside = string.gsub(inside, longvtrloc[i].a, longvtrloc[i].b) + end + for i = 1,#shortvtrloc do + inside = string.gsub(inside, shortvtrloc[i].a, shortvtrloc[i].b) + end + for i = 1,#finaltrloc do + inside = string.gsub(inside, finaltrloc[i].a, finaltrloc[i].b) + end + return string.format("\\txtrans{%s}", inside) + end) +return str +end + function processvoc(str) str = "\\arb{".. str.."}" str = protectarb(str) @@ -273,6 +305,8 @@ function processtrans(str, mode) str = holdcmd(str) if mode == "dmg" then str = transdmg(str) + elseif mode == "loc" then + str = transloc(str) else end str = unprotectarb(str) return str diff --git a/arabluatex_trans.lua b/arabluatex_trans.lua index e65c3eb..8a9aee8 100644 --- a/arabluatex_trans.lua +++ b/arabluatex_trans.lua @@ -22,6 +22,8 @@ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. --]] +-- dmg + hamzatrdmg = { -- initial long u {a="%'%_U", b="ʾU"}, @@ -87,11 +89,11 @@ trigraphstrdmg = { -- trigraphs or more -- art. with waṣla + lām {a="'l%-(l)", b="'l-%1"}, -- art. with waṣla + solar consonant - {a="'l%-([%_%^%.]?[tdrzsn])", b="'%1-"}, + {a="'l%-([%_%^%.]?[tdrzsn])", b="'%1-%1"}, -- li- + art. + lām {a="l(i)%-l%-(l)", b="l%1-l-%2%2"}, -- assim. art. with waṣla + solar consonant - {a="'([%_%^%.]?[tdrzsn])%-", b="'%1-%1"}, + {a="'([%_%^%.]?[tdrzsn])%-", b="'%1-"}, -- li- + art. + solar consonant is a special orthography {a="l(i)%-l%-([%_%^%.]?[tdrzsn])", b="l%1-%2-%2"}, -- li- + assim. art. + solar consonant is a special orthography @@ -173,6 +175,169 @@ shortvtrdmg = { {a="i", b="i"} } +-- loc + +hamzatrloc = { + -- initial long u + {a="%'%_U", b="U"}, + -- madda (historic writing below) + {a="^(')(A)", b="%2"}, + {a="(%W)(')(A)", b="%1%3"}, + {a="'A", b="ʾA"}, + {a="(A)(')(i)$", b="%1ʾ%3"}, + {a="(A)(')(i)(%W)", b="%1ʾ%3%4"}, + {a="(A)(')(i)", b="%1ʾ%3"}, -- historic madda + {a="(A)(')", b="%1ʾ"}, -- historic madda + -- initial (needs both ^ and %W patterns) + {a="^(')([ua])", b="%2"}, + {a="^(')(i)", b="%2"}, + {a="(%W)(')([ua])", b="%1%3"}, + {a="(%W)(')(i)", b="%1%3"}, + -- final + {a="([Iy])(')(aN)$", b="%1ʾ%3"}, + {a="([Iy])(')(aN)(%W)", b="%1ʾ%3%4"}, + {a="([^uai])(')([uai]N?)$", b="%1ʾ%3"}, + {a="([^uai])(')([uai]N?)(%W)", b="%1ʾ%3%4"}, + {a="([UI])(')([uai])$", b="%1ʾ%3"}, + {a="([UI])(')([uai])(%W)", b="%1ʾ%3%4"}, + -- middle + {a="(U)(')", b="%1ʾ"}, + {a="([Iy])(')", b="%1ʾ"}, + {a="([^uai])(')([uU])", b="%1ʾ%3"}, + {a="([^uai])(')([aA])", b="%1ʾ%3"}, + {a="([^uai])(')([iI])", b="%1ʾ%3"}, + {a="(u)(')([uU])", b="%1ʾ%3"}, + {a="(u)(')([aA])", b="%1ʾ%3"}, + {a="(u)(')([iI])", b="%1ʾ%3"}, + {a="(a)(')([aA])", b="%1ʾ%3"}, + {a="(a)(')([uU])", b="%1ʾ%3"}, + {a="(a)(')([iI])", b="%1ʾ%3"}, + {a="(i)(')([aA])", b="%1ʾ%3"}, + {a="(i)(')([uU])", b="%1ʾ%3"}, + {a="(i)(')([iI])", b="%1ʾ%3"}, + {a="(a)(')([^uaiUAI])", b="%1ʾ%3"}, + {a="(u)(')([^uaiUAI])", b="%1ʾ%3"}, + {a="(i)(')([^uaiUAI])", b="%1ʾ%3"} +} + +trigraphstrloc = { -- trigraphs or more + -- 'llatI / 'llad_I + {a="^'ll(a)([%_]?[dt])", b="all%1%2"}, + {a="(%s)'ll(a)([%_]?[dt])", b="%1all%2%3"}, + -- al- + lām + {a="^(a)l%-(l)", b="%1l-%2"}, + {a="(%s)(a)l%-(l)", b="%1%2l-%3"}, + -- al- + solar consonant + {a="^(a)l%-([%_%^%.]?[tdrzsn])", b="%1l-%2"}, + {a="(%s)(a)l%-([%_%^%.]?[tdrzsn])", b="%1%2l-%3"}, + -- assim. art. + solar consonant + {a="^(a)([%_%^%.]?[tdrzsn])%-", b="%1l-%2"}, + {a="(%s)(a)([%_%^%.]?[tdrzsn])%-", b="%1%2l-%3"}, + -- al- + initial unstable hamza + {a="^(a)l%-([uai])", b="%1l-%2"}, + {a="(%s)(a)l%-([uai])", b="%1%2l-%3"}, + -- li- + art. + initial unstable hamza is a special orthography + {a="l(i)%-l%-([uai])", b="l%1l-%2"}, + -- al- + lunar consonant (i.e. what remains) + {a="^(a)l%-", b="%1l-"}, + {a="(%s)(a)l%-", b="%1%2l-"}, + -- art. with waṣla + lām + {a="'l%-(l)", b="al-%1"}, + -- art. with waṣla + solar consonant + {a="'l%-([%_%^%.]?[tdrzsn])", b="al-%1"}, + -- li- + art. + lām + {a="l(i)%-l%-(l)", b="l%1l-%2"}, + -- assim. art. with waṣla + solar consonant + {a="'([%_%^%.]?[tdrzsn])%-", b="al-"}, + -- li- + art. + solar consonant is a special orthography + {a="l(i)%-l%-([%_%^%.]?[tdrzsn])", b="l%1l-%2"}, + -- li- + assim. art. + solar consonant is a special orthography + {a="l(i)%-([%_%^%.]?[tdrzsn])%-([%_%^%.]?[tdrzsn])", b="l%1l-%3"}, + -- art. with waṣla + initial unstable hamza + {a="'l%-([uai])", b="al-%1"}, + -- art. with waṣla + lunar consonant (i.e. what remains) + {a="'l%-", b="al-"} +} + +tanwintrloc = { + {a="uN", b="un"}, + {a="(aN)(_A)", b="an"}, + {a="(aN)(Y)", b="an"}, + {a="(T)(aN)", b="tan"}, + {a="([^TA])(aN)", b="%1an"}, + {a="iN", b="in"} +} + +digraphstrloc = { + {a="(%-)([uai])", b="%1%2"}, -- hyphen + initial alif without hamza + {a="^([uai])", b="%1"}, -- initial alif without hamza + {a="(%s)([uai])", b="%1%2"}, -- initial alif without hamza + {a="%-%-", b=""}, + {a="uww", b="ūw"}, + {a="iyy", b="īy"}, + {a="([tkdsg])(h)", b="%1'%2"}, + {a="T([^uai])", b="h%1"}, + {a="T$", b="h"}, + {a="T(%W)", b="h%1"}, + {a="_t", b="th"}, + {a="%^g", b="j"}, + {a="%.h", b="ḥ"}, + {a="_h", b="kh"}, + {a="_d", b="dh"}, + {a="%^s", b="sh"}, + {a="%.s", b="ṣ"}, + {a="%.d", b="ḍ"}, + {a="%.t", b="ṭ"}, + {a="%.z", b="ẓ"}, + {a="%.g", b="gh"}, + {a="(U)(A)", b="ū"}, + {a="WA", b="ū"}, + {a="(a)W", b="%1w"}, + {a="_A", b="á"}, + {a="_u", b="ū"}, + {a="_a", b="ā"}, + {a="_i", b="ī"} +} + +singletrloc = { + {a="b", b="b"}, + {a="t", b="t"}, + {a="j", b="j"}, + {a="x", b="kh"}, + {a="d", b="d"}, + {a="r", b="r"}, + {a="z", b="z"}, + {a="s", b="s"}, + {a="`", b="`"}, + {a="f", b="f"}, + {a="q", b="q"}, + {a="k", b="k"}, + {a="l", b="l"}, + {a="m", b="m"}, + {a="n", b="n"}, + {a="h", b="h"}, + {a="w", b="w"}, + {a="y", b="y"}, + {a="T", b="t"}, + {a="Y", b="á"}, + {a='"', b=''}, +} + +longvtrloc = { + {a="A", b="ā"}, + {a="U", b="ū"}, + {a="I", b="ī"} +} + +shortvtrloc = { + {a="u", b="u"}, + {a="a", b="a"}, + {a="i", b="i"} +} + +finaltrloc = { + {a="ʾ", b="'"}, +} -- return { -- raw = raw, -- cgit v1.2.3