aboutsummaryrefslogtreecommitdiff
path: root/arabluatex_voc.lua
diff options
context:
space:
mode:
authorRobert Alessi <alessi@robertalessi.net>2016-12-07 17:21:54 +0100
committerRobert Alessi <alessi@robertalessi.net>2016-12-07 17:22:28 +0100
commitd4299f53e1abe77ba50210f2345ccfcb33abf79c (patch)
tree7fb4c72913f4529c6dc91f981a75fe2f447f7095 /arabluatex_voc.lua
parentc3602097f2b0c81625317422187e381848d09881 (diff)
downloadarabluatex-d4299f53e1abe77ba50210f2345ccfcb33abf79c.tar.gz
rules related to punctuation characters at word boundaries needed much more work (see '--p' tag); new rules for the resolution of the diphthong in ʾaw and law
Diffstat (limited to 'arabluatex_voc.lua')
-rw-r--r--arabluatex_voc.lua56
1 files changed, 39 insertions, 17 deletions
diff --git a/arabluatex_voc.lua b/arabluatex_voc.lua
index 61e4c4a..02c3feb 100644
--- a/arabluatex_voc.lua
+++ b/arabluatex_voc.lua
@@ -201,6 +201,12 @@ hamza = {
201 {a="(A)(')(u)", b="aآؤ%3"}, -- historic madda 201 {a="(A)(')(u)", b="aآؤ%3"}, -- historic madda
202 {a="(A)(')", b="aآء"}, -- historic madda 202 {a="(A)(')", b="aآء"}, -- historic madda
203 -- initial (needs both ^ and %W patterns) 203 -- initial (needs both ^ and %W patterns)
204 -- 'aw: the diphthong is to be resoved into 'awi' (next 4 lines)
205 {a="^('aw)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="%1i%2%3"},
206 {a="(%W)('aw)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="%1%2i%3%4"},
207 {a="^('aw)(%s)([%(%[%|%<]?\"?[uai])", b="%1i%2%3"},
208 {a="(%W)('aw)(%s)([%(%[%|%<]?\"?[uai])", b="%1%2i%3%4"},
209 -- then the 'initial' rules for the remaining cases
204 {a="^(')([ua])", b="أ%2"}, 210 {a="^(')([ua])", b="أ%2"},
205 {a="^(')(i)", b="إ%2"}, 211 {a="^(')(i)", b="إ%2"},
206 {a="(%W)(')([ua])", b="%1أ%3"}, 212 {a="(%W)(')([ua])", b="%1أ%3"},
@@ -292,6 +298,12 @@ hamzaeasy = { -- differences marked below with 'easy'
292 {a="(A)(')", b="aاء"}, -- historic madda 298 {a="(A)(')", b="aاء"}, -- historic madda
293 --easy (end) 299 --easy (end)
294 -- initial (needs both ^ and %W patterns) 300 -- initial (needs both ^ and %W patterns)
301 -- 'aw: the diphthong is to be resoved into 'awi' (next 4 lines)
302 {a="^('aw)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="%1i%2%3"},
303 {a="(%W)('aw)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="%1%2i%3%4"},
304 {a="^('aw)(%s)([%(%[%|%<]?\"?[uai])", b="%1i%2%3"},
305 {a="(%W)('aw)(%s)([%(%[%|%<]?\"?[uai])", b="%1%2i%3%4"},
306 -- then the 'initial' rules for the remaining cases
295 {a="^(')([ua])", b="أ%2"}, 307 {a="^(')([ua])", b="أ%2"},
296 {a="^(')(i)", b="إ%2"}, 308 {a="^(')(i)", b="إ%2"},
297 {a="(%W)(')([ua])", b="%1أ%3"}, 309 {a="(%W)(')([ua])", b="%1أ%3"},
@@ -415,27 +427,32 @@ tanwineasy = { -- 'easy' requires some lines to be taken out:
415trigraphs = { -- trigraphs or more 427trigraphs = { -- trigraphs or more
416 -- 'llatI / 'llad_I 428 -- 'llatI / 'llad_I
417 {a="^'ll(a)([%_]?[dt])", b="الّ%1%2"}, 429 {a="^'ll(a)([%_]?[dt])", b="الّ%1%2"},
418 {a="([%p%s%-])'ll(a)([%_]?[dt])", b="%1الّ%2%3"}, --p 430 {a="([%(%[%|%<%s%-])'ll(a)([%_]?[dt])", b="%1الّ%2%3"}, --p
431 -- law: the diphthong is to be resoved into 'awi' (next 4 lines)
432 {a="^(law)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="%1i%2%3"},
433 {a="(%W)(law)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="%1%2i%3%4"},
434 {a="^(law)(%s)([%(%[%|%<]?\"?[uai])", b="%1i%2%3"},
435 {a="(%W)(law)(%s)([%(%[%|%<]?\"?[uai])", b="%1%2i%3%4"},
419 -- al- + lām 436 -- al- + lām
420 {a="^(a)l%-(l)", b="ا%1ل%2%2"}, 437 {a="^(a)l%-(l)", b="ا%1ل%2%2"},
421 {a="([%p%s%-])(a)l%-(l)", b="%1ا%2ل%3%3"}, --p 438 {a="([%(%[%|%<%s%-])(a)l%-(l)", b="%1ا%2ل%3%3"}, --p
422 -- al- + solar consonant 439 -- al- + solar consonant
423 {a="^(a)l%-([%_%^%.]?[tdrzsn])", b="ا%1ل%2%2"}, 440 {a="^(a)l%-([%_%^%.]?[tdrzsn])", b="ا%1ل%2%2"},
424 {a="([%p%s%-])(a)l%-([%_%^%.]?[tdrzsn])", b="%1ا%2ل%3%3"}, --p 441 {a="([%(%[%|%<%s%-])(a)l%-([%_%^%.]?[tdrzsn])", b="%1ا%2ل%3%3"}, --p
425 -- assim. art. + solar consonant 442 -- assim. art. + solar consonant
426 {a="^(a)([%_%^%.]?[tdrzsn])%-", b="ا%1ل%2"}, 443 {a="^(a)([%_%^%.]?[tdrzsn])%-", b="ا%1ل%2"},
427 {a="([%p%s%-])(a)([%_%^%.]?[tdrzsn])%-", b="%1ا%2ل%3"}, --p 444 {a="([%(%[%|%<%s%-])(a)([%_%^%.]?[tdrzsn])%-", b="%1ا%2ل%3"}, --p
428 -- al- + initial unstable hamza 445 -- al- + initial unstable hamza
429 {a="^(a)l%-(\")([uai])", b="ا%1ل%3ٱ"}, 446 {a="^(a)l%-(\")([uai])", b="ا%1ل%3ٱ"},
430 {a="([%p%s%-])(a)l%-(\")([uai])", b="%1ا%2ل%4ٱ"}, --p 447 {a="([%(%[%|%<%s%-])(a)l%-(\")([uai])", b="%1ا%2ل%4ٱ"}, --p
431 {a="^(a)l%-([uai])", b="ا%1ل%2ا"}, 448 {a="^(a)l%-([uai])", b="ا%1ل%2ا"},
432 {a="([%p%s%-])(a)l%-([uai])", b="%1ا%2ل%3ا"}, --p 449 {a="([%(%[%|%<%s%-])(a)l%-([uai])", b="%1ا%2ل%3ا"}, --p
433 -- li-/la- + art. + initial unstable hamza is a special orthography 450 -- li-/la- + art. + initial unstable hamza is a special orthography
434 {a="l([ai])%-l%-(\")([uai])", b="ل%1ل%3ٱ"}, 451 {a="l([ai])%-l%-(\")([uai])", b="ل%1ل%3ٱ"},
435 {a="l([ai])%-l%-([uai])", b="ل%1ل%2ا"}, 452 {a="l([ai])%-l%-([uai])", b="ل%1ل%2ا"},
436 -- al- + lunar consonant (i.e. what remains) 453 -- al- + lunar consonant (i.e. what remains)
437 {a="^(a)l%-", b="ا%1ل"}, 454 {a="^(a)l%-", b="ا%1ل"},
438 {a="([%p%s%-])(a)l%-", b="%1ا%2ل"}, --p 455 {a="([%(%[%|%<%s%-])(a)l%-", b="%1ا%2ل"}, --p
439 -- diphthongs to be resolved before ʾalif conjunctionis 456 -- diphthongs to be resolved before ʾalif conjunctionis
440 {a="(aW)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="awuا%2%3"}, 457 {a="(aW)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="awuا%2%3"},
441 {a="(aw)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="%1u%2%3"}, 458 {a="(aw)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="%1u%2%3"},
@@ -480,27 +497,32 @@ trigraphseasy = { -- differences marked below with 'easy'
480 {a="l%-l_ah", b="l-ll_ah"}, 497 {a="l%-l_ah", b="l-ll_ah"},
481 -- 'llatI / 'llad_I 498 -- 'llatI / 'llad_I
482 {a="^'ll(a)([%_]?[dt])", b="الّ%1%2"}, 499 {a="^'ll(a)([%_]?[dt])", b="الّ%1%2"},
483 {a="([%p%s%-])'ll(a)([%_]?[dt])", b="%1الّ%2%3"}, --p 500 {a="([%(%[%|%<%s%-])'ll(a)([%_]?[dt])", b="%1الّ%2%3"}, --p
501 -- law: the diphthong is to be resoved into 'awi' (next 4 lines)
502 {a="^(law)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="%1i%2%3"},
503 {a="(%W)(law)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="%1%2i%3%4"},
504 {a="^(law)(%s)([%(%[%|%<]?\"?[uai])", b="%1i%2%3"},
505 {a="(%W)(law)(%s)([%(%[%|%<]?\"?[uai])", b="%1%2i%3%4"},
484 -- al- + lām (easy) 506 -- al- + lām (easy)
485 {a="^(a)l%-(l)", b="ا%1ل%2"}, 507 {a="^(a)l%-(l)", b="ا%1ل%2"},
486 {a="([%p%s%-])(a)l%-(l)", b="%1ا%2ل%3"}, --p 508 {a="([%(%[%|%<%s%-])(a)l%-(l)", b="%1ا%2ل%3"}, --p
487 -- al- + solar consonant (easy) 509 -- al- + solar consonant (easy)
488 {a="^(a)l%-([%_%^%.]?[tdrzsn])", b="ا%1ل%2"}, 510 {a="^(a)l%-([%_%^%.]?[tdrzsn])", b="ا%1ل%2"},
489 {a="([%p%s%-])(a)l%-([%_%^%.]?[tdrzsn])", b="%1ا%2ل%3"}, --p 511 {a="([%(%[%|%<%s%-])(a)l%-([%_%^%.]?[tdrzsn])", b="%1ا%2ل%3"}, --p
490 -- assim. art. + solar consonant (easy) 512 -- assim. art. + solar consonant (easy)
491 {a="^(a)([%_%^%.]?[tdrzsn])%-", b="ا%1ل"}, 513 {a="^(a)([%_%^%.]?[tdrzsn])%-", b="ا%1ل"},
492 {a="([%p%s%-])(a)([%_%^%.]?[tdrzsn])%-", b="%1ا%2ل"}, --p 514 {a="([%(%[%|%<%s%-])(a)([%_%^%.]?[tdrzsn])%-", b="%1ا%2ل"}, --p
493 -- al- + initial unstable hamza 515 -- al- + initial unstable hamza
494 {a="^(a)l%-(\")([uai])", b="ا%1ل%3ٱ"}, 516 {a="^(a)l%-(\")([uai])", b="ا%1ل%3ٱ"},
495 {a="([%p%s%-])(a)l%-(\")([uai])", b="%1ا%2ل%4ٱ"}, --p 517 {a="([%(%[%|%<%s%-])(a)l%-(\")([uai])", b="%1ا%2ل%4ٱ"}, --p
496 {a="^(a)l%-([uai])", b="ا%1ل%2ا"}, 518 {a="^(a)l%-([uai])", b="ا%1ل%2ا"},
497 {a="([%p%s%-])(a)l%-([uai])", b="%1ا%2ل%3ا"}, --p 519 {a="([%(%[%|%<%s%-])(a)l%-([uai])", b="%1ا%2ل%3ا"}, --p
498 -- li-/la- + art. + initial unstable hamza is a special orthography 520 -- li-/la- + art. + initial unstable hamza is a special orthography
499 {a="l([ai])%-l%-(\")([uai])", b="ل%1ل%3ٱ"}, 521 {a="l([ai])%-l%-(\")([uai])", b="ل%1ل%3ٱ"},
500 {a="l([ai])%-l%-([uai])", b="ل%1ل%2ا"}, 522 {a="l([ai])%-l%-([uai])", b="ل%1ل%2ا"},
501 -- al- + lunar consonant (i.e. what remains) 523 -- al- + lunar consonant (i.e. what remains)
502 {a="^(a)l%-", b="ا%1ل"}, 524 {a="^(a)l%-", b="ا%1ل"},
503 {a="([%p%s%-])(a)l%-", b="%1ا%2ل"}, --p 525 {a="([%(%[%|%<%s%-])(a)l%-", b="%1ا%2ل"}, --p
504 -- diphthongs to be resolved before ʾalif conjunctionis 526 -- diphthongs to be resolved before ʾalif conjunctionis
505 {a="(aW)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="awuا%2%3"}, 527 {a="(aW)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="awuا%2%3"},
506 {a="(aw)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="%1u%2%3"}, 528 {a="(aw)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="%1u%2%3"},
@@ -550,12 +572,12 @@ digraphs = {
550 -- ʾiʿrāb (end) 572 -- ʾiʿrāb (end)
551 -- initial straight double quote gives a connective ʾalif 573 -- initial straight double quote gives a connective ʾalif
552 {a="^\"[uai]", b="ٱ"}, 574 {a="^\"[uai]", b="ٱ"},
553 {a="([%p%s%-])\"[uai]", b="%1ٱ"}, --p 575 {a="([%(%[%|%<%s%-])\"[uai]", b="%1ٱ"}, --p
554 {a="(aW)(%s)([uai])", b="awuا%2%3"}, 576 {a="(aW)(%s)([uai])", b="awuا%2%3"},
555 -- hyphen + initial alif without hamza: 577 -- hyphen + initial alif without hamza:
556 {a="(%-)([uai])([%^%_%.%`]?)(%a)", b="%1ا%3%4"}, 578 {a="(%-)([uai])([%^%_%.%`]?)(%a)", b="%1ا%3%4"},
557 {a="^([uai])", b="ا%1"}, -- initial alif without hamza 579 {a="^([%(%[%|%<]?)([uai])", b="%1ا%2"}, -- initial alif without hamza --p
558 {a="([%p%s])([uai])", b="%1ا"}, -- initial alif without hamza --p 580 {a="(%s)([%(%[%|%<]?)([uai])", b="%1%2ا"}, -- initial alif without hamza --p
559 {a="%-%-", b="ـ"}, 581 {a="%-%-", b="ـ"},
560 {a="ؤؤ", b="ؤّ"}, 582 {a="ؤؤ", b="ؤّ"},
561 {a="أأ", b="أّ"}, 583 {a="أأ", b="أّ"},