aboutsummaryrefslogtreecommitdiff
path: root/arabluatex.lua
diff options
context:
space:
mode:
Diffstat (limited to 'arabluatex.lua')
-rw-r--r--arabluatex.lua108
1 files changed, 51 insertions, 57 deletions
diff --git a/arabluatex.lua b/arabluatex.lua
index 38fe87b..07968c0 100644
--- a/arabluatex.lua
+++ b/arabluatex.lua
@@ -146,18 +146,29 @@ local function takeoutcapetc(str)
146 return str 146 return str
147end 147end
148 148
149local function voc(str) 149local function voc(str, rules)
150 str = string.gsub(str, "\\arb(%b{})", function(inside) 150 str = string.gsub(str, "\\arb(%b{})", function(inside)
151 inside = string.sub(inside, 2, -2) 151 inside = string.sub(inside, 2, -2)
152 for i = 1,#hamza do 152 for i = 1,#hamza do
153 inside = string.gsub(inside, hamza[i].a, hamza[i].b) 153 inside = string.gsub(inside, hamza[i].a, hamza[i].b)
154 end 154 end
155 for i = 1,#tanwin do 155 if rules == "idgham" then
156 inside = string.gsub(inside, tanwin[i].a, tanwin[i].b) 156 for i = 1,#tanwin do
157 inside = string.gsub(inside, tanwin[i].a, tanwin[i].b)
158 end
159 else
160 for i = 1,#tanwineasy do
161 inside = string.gsub(inside, tanwineasy[i].a, tanwineasy[i].b)
162 end
157 end 163 end
158 for i = 1,#trigraphs do 164 for i = 1,#trigraphs do
159 inside = string.gsub(inside, trigraphs[i].a, trigraphs[i].b) 165 inside = string.gsub(inside, trigraphs[i].a, trigraphs[i].b)
160 end 166 end
167 if rules == "idgham" then
168 for i = 1,#idgham do
169 inside = string.gsub(inside, idgham[i].a, idgham[i].b)
170 end
171 end
161 for i = 1,#digraphs do 172 for i = 1,#digraphs do
162 inside = string.gsub(inside, digraphs[i].a, digraphs[i].b) 173 inside = string.gsub(inside, digraphs[i].a, digraphs[i].b)
163 end 174 end
@@ -218,20 +229,37 @@ local function voceasy(str)
218return str 229return str
219end 230end
220 231
221local function fullvoc(str) 232local function fullvoc(str, rules)
222 str = string.gsub(str, "\\arb(%b{})", function(inside) 233 str = string.gsub(str, "\\arb(%b{})", function(inside)
223 inside = string.sub(inside, 2, -2) 234 inside = string.sub(inside, 2, -2)
224 for i = 1,#hamzafv do 235 for i = 1,#hamzafv do
225 inside = string.gsub(inside, hamzafv[i].a, hamzafv[i].b) 236 inside = string.gsub(inside, hamzafv[i].a, hamzafv[i].b)
226 end 237 end
227 for i = 1,#tanwinfv do 238 if rules == "idgham" then
228 inside = string.gsub(inside, tanwinfv[i].a, tanwinfv[i].b) 239 for i = 1,#tanwinfv do
240 inside = string.gsub(inside, tanwinfv[i].a, tanwinfv[i].b)
241 end
242 else
243 for i = 1,#tanwinfveasy do
244 inside = string.gsub(inside, tanwinfveasy[i].a, tanwinfveasy[i].b)
245 end
229 end 246 end
230 for i = 1,#trigraphsfv do 247 for i = 1,#trigraphsfv do
231 inside = string.gsub(inside, trigraphsfv[i].a, trigraphsfv[i].b) 248 inside = string.gsub(inside, trigraphsfv[i].a, trigraphsfv[i].b)
232 end 249 end
233 for i = 1,#digraphsfv do 250 if rules == "idgham" then
234 inside = string.gsub(inside, digraphsfv[i].a, digraphsfv[i].b) 251 for i = 1,#idgham do
252 inside = string.gsub(inside, idgham[i].a, idgham[i].b)
253 end
254 end
255 if rules == "idgham" then
256 for i = 1,#digraphsfvidgham do
257 inside = string.gsub(inside, digraphsfvidgham[i].a, digraphsfvidgham[i].b)
258 end
259 else
260 for i = 1,#digraphsfv do
261 inside = string.gsub(inside, digraphsfv[i].a, digraphsfv[i].b)
262 end
235 end 263 end
236 for i = 1,#singlefv do 264 for i = 1,#singlefv do
237 inside = string.gsub(inside, singlefv[i].a, singlefv[i].b) 265 inside = string.gsub(inside, singlefv[i].a, singlefv[i].b)
@@ -254,7 +282,7 @@ local function fullvoc(str)
254return str 282return str
255end 283end
256 284
257local function fullvoceasy(str, opt) 285local function fullvoceasy(str, rules)
258 str = string.gsub(str, "\\arb(%b{})", function(inside) 286 str = string.gsub(str, "\\arb(%b{})", function(inside)
259 inside = string.sub(inside, 2, -2) 287 inside = string.sub(inside, 2, -2)
260 for i = 1,#hamzafveasy do 288 for i = 1,#hamzafveasy do
@@ -266,7 +294,7 @@ local function fullvoceasy(str, opt)
266 for i = 1,#trigraphsfveasy do 294 for i = 1,#trigraphsfveasy do
267 inside = string.gsub(inside, trigraphsfveasy[i].a, trigraphsfveasy[i].b) 295 inside = string.gsub(inside, trigraphsfveasy[i].a, trigraphsfveasy[i].b)
268 end 296 end
269 if opt == "nosukun" then 297 if rules == "nosukun" then
270 for i = 1,#digraphsfveasy do 298 for i = 1,#digraphsfveasy do
271 inside = string.gsub(inside, digraphsfveasy[i].a, digraphsfveasy[i].b) 299 inside = string.gsub(inside, digraphsfveasy[i].a, digraphsfveasy[i].b)
272 end 300 end
@@ -368,7 +396,7 @@ local function novoceasy(str)
368return str 396return str
369end 397end
370 398
371local function transdmg(str) 399local function transdmg(str, rules)
372 str = string.gsub(str, "\\arb(%b{})", function(inside) 400 str = string.gsub(str, "\\arb(%b{})", function(inside)
373 inside = string.sub(inside, 2, -2) 401 inside = string.sub(inside, 2, -2)
374 for i = 1,#hamzatrdmg do 402 for i = 1,#hamzatrdmg do
@@ -380,40 +408,10 @@ local function transdmg(str)
380 for i = 1,#trigraphstrdmg do 408 for i = 1,#trigraphstrdmg do
381 inside = string.gsub(inside, trigraphstrdmg[i].a, trigraphstrdmg[i].b) 409 inside = string.gsub(inside, trigraphstrdmg[i].a, trigraphstrdmg[i].b)
382 end 410 end
383 for i = 1,#digraphstrdmg do 411 if rules == "idgham" then
384 inside = string.gsub(inside, digraphstrdmg[i].a, digraphstrdmg[i].b) 412 for i = 1,#idghamtrdmg do
385 end 413 inside = string.gsub(inside, idghamtrdmg[i].a, idghamtrdmg[i].b)
386 for i = 1,#singletrdmg do 414 end
387 inside = string.gsub(inside, singletrdmg[i].a, singletrdmg[i].b)
388 end
389 for i = 1,#longvtrdmg do
390 inside = string.gsub(inside, longvtrdmg[i].a, longvtrdmg[i].b)
391 end
392 for i = 1,#shortvtrdmg do
393 inside = string.gsub(inside, shortvtrdmg[i].a, shortvtrdmg[i].b)
394 end
395 for i = 1,#punctuationtr do
396 inside = string.gsub(inside, punctuationtr[i].a, punctuationtr[i].b)
397 end
398 for i = 1,#nulltr do
399 inside = string.gsub(inside, nulltr[i].a, nulltr[i].b)
400 end
401 return string.format("\\txtrans{%s}", inside)
402 end)
403return str
404end
405
406local function transdmgeasy(str)
407 str = string.gsub(str, "\\arb(%b{})", function(inside)
408 inside = string.sub(inside, 2, -2)
409 for i = 1,#hamzatrdmg do
410 inside = string.gsub(inside, hamzatrdmg[i].a, hamzatrdmg[i].b)
411 end
412 for i = 1,#tanwintrdmg do
413 inside = string.gsub(inside, tanwintrdmg[i].a, tanwintrdmg[i].b)
414 end
415 for i = 1,#trigraphstrdmgeasy do
416 inside = string.gsub(inside, trigraphstrdmgeasy[i].a, trigraphstrdmgeasy[i].b)
417 end 415 end
418 for i = 1,#digraphstrdmg do 416 for i = 1,#digraphstrdmg do
419 inside = string.gsub(inside, digraphstrdmg[i].a, digraphstrdmg[i].b) 417 inside = string.gsub(inside, digraphstrdmg[i].a, digraphstrdmg[i].b)
@@ -498,8 +496,8 @@ function processvoc(str, rules, scheme)
498 else end 496 else end
499 if rules == "easy" or rules == "easynosukun" then 497 if rules == "easy" or rules == "easynosukun" then
500 str = voceasy(str) 498 str = voceasy(str)
501 elseif rules == "dflt" then 499 elseif rules == "dflt" or rules == "idgham" then
502 str = voc(str) 500 str = voc(str, rules)
503 else end 501 else end
504 str = unprotectarb(str) 502 str = unprotectarb(str)
505return str 503return str
@@ -518,8 +516,8 @@ function processfullvoc(str, rules, scheme)
518 str = fullvoceasy(str, "sukun") 516 str = fullvoceasy(str, "sukun")
519 elseif rules == "easynosukun" then 517 elseif rules == "easynosukun" then
520 str = fullvoceasy(str, "nosukun") 518 str = fullvoceasy(str, "nosukun")
521 elseif rules == "dflt" then 519 elseif rules == "dflt" or rules == "idgham" then
522 str = fullvoc(str) 520 str = fullvoc(str, rules)
523 else end 521 else end
524 str = unprotectarb(str) 522 str = unprotectarb(str)
525return str 523return str
@@ -536,7 +534,7 @@ function processnovoc(str, rules, scheme)
536 else end 534 else end
537 if rules == "easy" or rules == "easynosukun" then 535 if rules == "easy" or rules == "easynosukun" then
538 str = novoceasy(str) 536 str = novoceasy(str)
539 elseif rules == "dflt" then 537 elseif rules == "dflt" or rules == "idgham" then
540 str = novoc(str) 538 str = novoc(str)
541 else end 539 else end
542 str = unprotectarb(str) 540 str = unprotectarb(str)
@@ -551,16 +549,12 @@ function processtrans(str, mode, rules, scheme)
551 str = holdcmd(str) 549 str = holdcmd(str)
552 if scheme == "buckwalter" then 550 if scheme == "buckwalter" then
553 str = processbuckw(str) 551 str = processbuckw(str)
554 else end 552 end
555 if mode == "dmg" then 553 if mode == "dmg" then
556 if rules == "easy" or rules == "easynosukun" then 554 str = transdmg(str, rules)
557 str = transdmgeasy(str)
558 elseif rules == "dflt" then
559 str = transdmg(str)
560 else end
561 elseif mode == "loc" then 555 elseif mode == "loc" then
562 str = transloc(str) 556 str = transloc(str)
563 else end 557 end
564 str = unprotectarb(str) 558 str = unprotectarb(str)
565return str 559return str
566end 560end
="color:#000000">{a="([%s%-])(a)l%-(l)", b="%1ا%2ل%3%3"}, -- al- + solar consonant {a="^(a)l%-([%_%^%.]?[tdrzsn])", b="ا%1ل%2%2"}, {a="([%s%-])(a)l%-([%_%^%.]?[tdrzsn])", b="%1ا%2ل%3%3"}, -- assim. art. + solar consonant {a="^(a)([%_%^%.]?[tdrzsn])%-", b="ا%1ل%2"}, {a="([%s%-])(a)([%_%^%.]?[tdrzsn])%-", b="%1ا%2ل%3"}, -- al- + initial unstable hamza {a="^(a)l%-(\")([uai])", b="ا%1لٱ%3"}, {a="([%s%-])(a)l%-(\")([uai])", b="%1ا%2لٱ%4"}, {a="^(a)l%-([uai])", b="ا%1لا%2"}, {a="([%s%-])(a)l%-([uai])", b="%1ا%2لا%3"}, -- li-/la- + art. + initial unstable hamza is a special orthography {a="l([ai])%-l%-(\")([uai])", b="ل%1لٱ%3"}, {a="l([ai])%-l%-([uai])", b="ل%1لا%2"}, -- al- + lunar consonant (i.e. what remains) {a="^(a)l%-", b="ا%1ل"}, {a="([%s%-])(a)l%-", b="%1ا%2ل"}, -- diphthongs to be resolved before ʾalif conjunctionis {a="(aW)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="awuا%2%3"}, {a="(aw)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="%1u%2%3"}, {a="(ay)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="%1i%2%3"}, -- art. with waṣla + lām {a="'l%-(l)", b="ال%1%1"}, -- art. with waṣla + solar consonant {a="'l%-([%_%^%.]?[tdrzsn])", b="ال%1%1"}, -- li-/la- + art. + lām {a="l([ai])%-l%-(l)", b="ل%1%2%2"}, -- assim. art. with waṣla + solar consonant {a="'([%_%^%.]?[tdrzsn])%-", b="ال%1"}, -- li-/la- + art. + solar consonant is a special orthography {a="l([ai])%-l%-([%_%^%.]?[tdrzsn])", b="ل%1ل%2%2"}, -- li-/la + assim. art. + solar consonant is a special orthography {a="l([ai])%-([%_%^%.]?[tdrzsn])%-([%_%^%.]?[tdrzsn])", b="ل%1ل%3%3"}, -- art. with waṣla + initial unstable hamza {a="'l%-(\")([uai])", b="الٱ%2"}, {a="'l%-([uai])", b="الا%1"}, -- art. with waṣla + lunar consonant (i.e. what remains) {a="'l%-", b="ال"}, -- the silent wāw {a="uU$", b="uو"}, {a="uU(%W)", b="uو%1"}, {a="aU$", b="aو"}, {a="aU(%W)", b="aو%1"}, {a="iU$", b="iو"}, {a="iU(%W)", b="iو%1"}, -- words ending in -āT with silent wāw/yāʾ {a="(_a)UA", b="%1وا"}, {a="(_a)U", b="%1و"}, {a="(_a)I", b="%1ي"}, -- assimilations {a="(n)(%s)([rlmnwy])", b="%1%2%3%3"} } digraphs = { -- initial straight double quote gives a connective ʾalif {a="^\"[uai]", b="ٱ"}, {a="([%s%-])\"[uai]", b="%1ٱ"}, {a="(aW)(%s)([uai])", b="awuا%2%3"}, {a="(%-)([uai])", b="%1ا%2"}, -- hyphen + initial alif without hamza {a="^([uai])", b="ا%1"}, -- initial alif without hamza {a="(%s)([uai])", b="%1ا%2"}, -- initial alif without hamza {a="%-%-", b="ـ"}, {a="ؤؤ", b="ؤّ"}, {a="أأ", b="أّ"}, {a="ئئ", b="ئّ"}, {a="bb", b="بّ"}, {a="BB", b="ـّ"}, {a="(%_)([thd])([thd])", b="%1%2|%3"}, {a="tt", b="تّ"}, {a="%_t%_t", b="ثّ"}, {a="jj", b="جّ"}, {a="%^g%^g", b="جّ"}, {a="xx", b="خّ"}, {a="%_h%_h", b="خّ"}, {a="dd", b="دّ"}, {a="%_d%_d", b="ذّ"}, {a="rr", b="رّ"}, {a="zz", b="زّ"}, {a="ss", b="سّ"}, {a="%^s%^s", b="شّ"}, {a="%.s%.s", b="صّ"}, {a="%.d%.d", b="ضّ"}, {a="%.t%.t", b="طّ"}, {a="%.z%.z", b="ظّ"}, {a="%`%`", b="عّ"}, {a="%.g%.g", b="غّ"}, {a="ff", b="فّ"}, {a="qq", b="قّ"}, {a="kk", b="كّ"}, {a="ll", b="لّ"}, {a="mm", b="مّ"}, {a="nn", b="نّ"}, {a="hh", b="هّ"}, {a="ww", b="وّ"}, {a="yy", b="يّ"}, {a="_t", b="ث"}, {a="%^g", b="ج"}, {a="%.h", b="ح"}, {a="_h", b="خ"}, {a="_d", b="ذ"}, {a="%^s", b="ش"}, {a="%.s", b="ص"}, {a="%.d", b="ض"}, {a="%.t", b="ط"}, {a="%.z", b="ظ"}, {a="%.g", b="غ"}, {a="(U)(A)", b="%1ا"}, {a="WA", b="وا"}, {a="(a)W\"", b="%1وْا"}, {a="(a)W", b="%1وا"}, {a="_A", b="aى"}, {a="_u", b="ٗ"}, {a="_a", b="ٰ"}, {a="_i", b="ٖ"}, {a="%.b", b="ٮ"}, {a="%.f", b="ڡ"}, {a="%.q", b="ٯ"}, {a="%.k", b="ک"}, {a="%.n", b="ں"}, {a="%^d", b="ڊ"} } single = { {a="b", b="ب"}, {a="t", b="ت"}, {a="j", b="ج"}, {a="x", b="خ"}, {a="d", b="د"}, {a="r", b="ر"}, {a="z", b="ز"}, {a="s", b="س"}, {a="f", b="ف"}, {a="`", b="ع"}, {a="f", b="ف"}, {a="q", b="ق"}, {a="k", b="ك"}, {a="l", b="ل"}, {a="m", b="م"}, {a="n", b="ن"}, {a="h", b="ه"}, {a="w", b="و"}, {a="y", b="ي"}, {a="T", b="ة"}, {a="\"$", b="ْ"}, {a="\"(%W)", b="ْ%1"}, {a="\"([^uaiUAI])", b="ْ%1"}, {a="([^0-9])%-([^0-9])", b="%1%2"}, {a="B", b="ـ"} } longv = { {a="\"A", b="ا"}, {a="\"U", b="و"}, {a="\"I", b="ي"}, {a="\"Y", b="ى"}, {a="A", b="َا"}, {a="U", b="ُو"}, {a="I", b="ِي"}, {a="Y", b="aى"} } shortv = { {a="\"u", b=""}, {a="\"a", b=""}, {a="\"i", b=""}, {a="u", b="ُ"}, {a="a", b="َ"}, {a="i", b="ِ"} } punctuation = { {a="%(%(", b="﴿"}, {a="%)%)", b="﴾"}, {a="%(", b="+@("},