aboutsummaryrefslogtreecommitdiff
path: root/arabluatex_fullvoc.lua
diff options
context:
space:
mode:
authorRobert Alessi <alessi@robertalessi.net>2016-12-07 17:21:54 +0100
committerRobert Alessi <alessi@robertalessi.net>2016-12-07 17:22:28 +0100
commitd4299f53e1abe77ba50210f2345ccfcb33abf79c (patch)
tree7fb4c72913f4529c6dc91f981a75fe2f447f7095 /arabluatex_fullvoc.lua
parentc3602097f2b0c81625317422187e381848d09881 (diff)
downloadarabluatex-d4299f53e1abe77ba50210f2345ccfcb33abf79c.tar.gz
rules related to punctuation characters at word boundaries needed much more work (see '--p' tag); new rules for the resolution of the diphthong in ʾaw and law
Diffstat (limited to 'arabluatex_fullvoc.lua')
-rw-r--r--arabluatex_fullvoc.lua78
1 files changed, 53 insertions, 25 deletions
diff --git a/arabluatex_fullvoc.lua b/arabluatex_fullvoc.lua
index bb928e4..efe26c0 100644
--- a/arabluatex_fullvoc.lua
+++ b/arabluatex_fullvoc.lua
@@ -66,6 +66,12 @@ hamzafv = {
66 {a="(A)(')(u)", b="aآؤ%3"}, -- historic madda 66 {a="(A)(')(u)", b="aآؤ%3"}, -- historic madda
67 {a="(A)(')", b="aآء"}, -- historic madda 67 {a="(A)(')", b="aآء"}, -- historic madda
68 -- initial (needs both ^ and %W patterns) 68 -- initial (needs both ^ and %W patterns)
69 -- 'aw: the diphthong is to be resoved into 'awi' (next 4 lines)
70 {a="^('aw)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="%1i%2%3"},
71 {a="(%W)('aw)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="%1%2i%3%4"},
72 {a="^('aw)(%s)([%(%[%|%<]?\"?[uai])", b="%1i%2%3"},
73 {a="(%W)('aw)(%s)([%(%[%|%<]?\"?[uai])", b="%1%2i%3%4"},
74 -- then the 'initial' rules for the remaining cases
69 {a="^(')([ua])", b="أ%2"}, 75 {a="^(')([ua])", b="أ%2"},
70 {a="^(')(i)", b="إ%2"}, 76 {a="^(')(i)", b="إ%2"},
71 {a="(%W)(')([ua])", b="%1أ%3"}, 77 {a="(%W)(')([ua])", b="%1أ%3"},
@@ -166,6 +172,12 @@ hamzafveasy = { -- differences marked below with 'easy'
166 {a="(A)(')", b="aاء"}, -- historic madda 172 {a="(A)(')", b="aاء"}, -- historic madda
167 --easy (end) 173 --easy (end)
168 -- initial (needs both ^ and %W patterns) 174 -- initial (needs both ^ and %W patterns)
175 -- 'aw: the diphthong is to be resoved into 'awi' (next 4 lines)
176 {a="^('aw)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="%1i%2%3"},
177 {a="(%W)('aw)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="%1%2i%3%4"},
178 {a="^('aw)(%s)([%(%[%|%<]?\"?[uai])", b="%1i%2%3"},
179 {a="(%W)('aw)(%s)([%(%[%|%<]?\"?[uai])", b="%1%2i%3%4"},
180 -- then the 'initial' rules for the remaining cases
169 {a="^(')([ua])", b="أ%2"}, 181 {a="^(')([ua])", b="أ%2"},
170 {a="^(')(i)", b="إ%2"}, 182 {a="^(')(i)", b="إ%2"},
171 {a="(%W)(')([ua])", b="%1أ%3"}, 183 {a="(%W)(')([ua])", b="%1أ%3"},
@@ -220,7 +232,10 @@ hamzafveasy = { -- differences marked below with 'easy'
220 {a="(i)(')([^uaiUAI])", b="%1ئْ%3"} 232 {a="(i)(')([^uaiUAI])", b="%1ئْ%3"}
221} 233}
222 234
223tanwinfv = { 235tanwinfv = { -- with assimilations (\SetArbDflt*)
236 {a="%-?(uNU)(%s)([uai])", b="%1%2ٱ"},
237 {a="%-?(aNU)(%s)([uai])", b="%1%2ٱ"},
238 {a="%-?(iNU)(%s)([uai])", b="%1%2ٱ"},
224 {a="%-?uNU", b="ٌو"}, 239 {a="%-?uNU", b="ٌو"},
225 {a="%-?aNU", b="ًوا"}, 240 {a="%-?aNU", b="ًوا"},
226 {a="%-?iNU", b="ٍو"}, 241 {a="%-?iNU", b="ٍو"},
@@ -256,6 +271,9 @@ tanwinfv = {
256} 271}
257 272
258tanwinfveasy = { -- no assimilations (see below) 273tanwinfveasy = { -- no assimilations (see below)
274 {a="%-?(uNU)(%s)([uai])", b="%1%2ٱ"},
275 {a="%-?(aNU)(%s)([uai])", b="%1%2ٱ"},
276 {a="%-?(iNU)(%s)([uai])", b="%1%2ٱ"},
259 {a="%-?uNU", b="ٌو"}, 277 {a="%-?uNU", b="ٌو"},
260 {a="%-?aNU", b="ًوا"}, 278 {a="%-?aNU", b="ًوا"},
261 {a="%-?iNU", b="ٍو"}, 279 {a="%-?iNU", b="ٍو"},
@@ -293,24 +311,29 @@ tanwinfveasy = { -- no assimilations (see below)
293trigraphsfv = { -- trigraphs or more 311trigraphsfv = { -- trigraphs or more
294 -- 'llatI / 'llad_I 312 -- 'llatI / 'llad_I
295 {a="^'ll(a)([%_]?[dt])", b="ٱلّ%1%2"}, 313 {a="^'ll(a)([%_]?[dt])", b="ٱلّ%1%2"},
296 {a="([%p%s%-])'ll(a)([%_]?[dt])", b="%1ٱلّ%2%3"}, --p 314 {a="([%(%[%|%<%s%-])'ll(a)([%_]?[dt])", b="%1ٱلّ%2%3"}, --p
315 -- law: the diphthong is to be resoved into 'awi' (next 4 lines)
316 {a="^(law)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="%1i%2%3"},
317 {a="(%W)(law)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="%1%2i%3%4"},
318 {a="^(law)(%s)([%(%[%|%<]?\"?[uai])", b="%1i%2%3"},
319 {a="(%W)(law)(%s)([%(%[%|%<]?\"?[uai])", b="%1%2i%3%4"},
297 -- al- + lām 320 -- al- + lām
298 {a="^(a)l%-(l)", b="ا%1ل%2%2"}, 321 {a="^(a)l%-(l)", b="ا%1ل%2%2"},
299 {a="([%p%s%-])(a)l%-(l)", b="%1ا%2ل%3%3"}, --p 322 {a="([%(%[%|%<%s%-])(a)l%-(l)", b="%1ا%2ل%3%3"}, --p
300 -- al- + solar consonant 323 -- al- + solar consonant
301 {a="^(a)l%-([%_%^%.]?[tdrzsn])", b="ا%1ل%2%2"}, 324 {a="^(a)l%-([%_%^%.]?[tdrzsn])", b="ا%1ل%2%2"},
302 {a="([%p%s%-])(a)l%-([%_%^%.]?[tdrzsn])", b="%1ا%2ل%3%3"}, --p 325 {a="([%(%[%|%<%s%-])(a)l%-([%_%^%.]?[tdrzsn])", b="%1ا%2ل%3%3"}, --p
303 -- assim. art. + solar consonant 326 -- assim. art. + solar consonant
304 {a="^(a)([%_%^%.]?[tdrzsn])%-", b="ا%1ل%2"}, 327 {a="^(a)([%_%^%.]?[tdrzsn])%-", b="ا%1ل%2"},
305 {a="([%p%s%-])(a)([%_%^%.]?[tdrzsn])%-", b="%1ا%2ل%3"}, --p 328 {a="([%(%[%|%<%s%-])(a)([%_%^%.]?[tdrzsn])%-", b="%1ا%2ل%3"}, --p
306 -- al- + initial unstable hamza 329 -- al- + initial unstable hamza
307 {a="^(a)l%-(\"?[uai])", b="ا%1ل%2ٱ"}, 330 {a="^(a)l%-(\"?[uai])", b="ا%1ل%2ٱ"},
308 {a="([%p%s%-])(a)l%-(\"?[uai])", b="%1ا%2ل%3ٱ"}, --p 331 {a="([%(%[%|%<%s%-])(a)l%-(\"?[uai])", b="%1ا%2ل%3ٱ"}, --p
309 -- li-/la + art. + initial unstable hamza is a special orthography 332 -- li-/la + art. + initial unstable hamza is a special orthography
310 {a="l([ai])%-l%-(\"?[uai])", b="ل%1ل%2ٱ"}, 333 {a="l([ai])%-l%-(\"?[uai])", b="ل%1ل%2ٱ"},
311 -- al- + lunar consonant (i.e. what remains) 334 -- al- + lunar consonant (i.e. what remains)
312 {a="^(a)l%-", b="ا%1لْ"}, 335 {a="^(a)l%-", b="ا%1لْ"},
313 {a="([%p%s%-])(a)l%-", b="%1ا%2لْ"}, --p 336 {a="([%(%[%|%<%s%-])(a)l%-", b="%1ا%2لْ"}, --p
314 -- diphthongs to be resolved before ʾalif conjunctionis 337 -- diphthongs to be resolved before ʾalif conjunctionis
315 {a="(aW)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="awuا%2%3"}, 338 {a="(aW)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="awuا%2%3"},
316 {a="(aw)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="%1u%2%3"}, 339 {a="(aw)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="%1u%2%3"},
@@ -350,24 +373,29 @@ trigraphsfv = { -- trigraphs or more
350trigraphsfveasy = { -- trigraphs or more (see 'easy' tag below for the diffs) 373trigraphsfveasy = { -- trigraphs or more (see 'easy' tag below for the diffs)
351 -- 'llatI / 'llad_I 374 -- 'llatI / 'llad_I
352 {a="^'ll(a)([%_]?[dt])", b="ٱلّ%1%2"}, 375 {a="^'ll(a)([%_]?[dt])", b="ٱلّ%1%2"},
353 {a="([%p%s%-])'ll(a)([%_]?[dt])", b="%1ٱلّ%2%3"}, --p 376 {a="([%(%[%|%<%s%-])'ll(a)([%_]?[dt])", b="%1ٱلّ%2%3"}, --p
377 -- law: the diphthong is to be resoved into 'awi' (next 4 lines)
378 {a="^(law)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="%1i%2%3"},
379 {a="(%W)(law)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="%1%2i%3%4"},
380 {a="^(law)(%s)([%(%[%|%<]?\"?[uai])", b="%1i%2%3"},
381 {a="(%W)(law)(%s)([%(%[%|%<]?\"?[uai])", b="%1%2i%3%4"},
354 -- al- + lām 382 -- al- + lām
355 {a="^(a)l%-(l)", b="ا%1ل%2%2"}, 383 {a="^(a)l%-(l)", b="ا%1ل%2%2"},
356 {a="([%p%s%-])(a)l%-(l)", b="%1ا%2ل%3%3"}, --p 384 {a="([%(%[%|%<%s%-])(a)l%-(l)", b="%1ا%2ل%3%3"}, --p
357 -- al- + solar consonant 385 -- al- + solar consonant
358 {a="^(a)l%-([%_%^%.]?[tdrzsn])", b="ا%1ل%2%2"}, 386 {a="^(a)l%-([%_%^%.]?[tdrzsn])", b="ا%1ل%2%2"},
359 {a="([%p%s%-])(a)l%-([%_%^%.]?[tdrzsn])", b="%1ا%2ل%3%3"}, --p 387 {a="([%(%[%|%<%s%-])(a)l%-([%_%^%.]?[tdrzsn])", b="%1ا%2ل%3%3"}, --p
360 -- assim. art. + solar consonant 388 -- assim. art. + solar consonant
361 {a="^(a)([%_%^%.]?[tdrzsn])%-", b="ا%1ل%2"}, 389 {a="^(a)([%_%^%.]?[tdrzsn])%-", b="ا%1ل%2"},
362 {a="([%p%s%-])(a)([%_%^%.]?[tdrzsn])%-", b="%1ا%2ل%3"}, --p 390 {a="([%(%[%|%<%s%-])(a)([%_%^%.]?[tdrzsn])%-", b="%1ا%2ل%3"}, --p
363 -- al- + initial unstable hamza 391 -- al- + initial unstable hamza
364 {a="^(a)l%-(\"?[uai])", b="ا%1ل%2ٱ"}, 392 {a="^(a)l%-(\"?[uai])", b="ا%1ل%2ٱ"},
365 {a="([%p%s%-])(a)l%-(\"?[uai])", b="%1ا%2ل%3ٱ"}, --p 393 {a="([%(%[%|%<%s%-])(a)l%-(\"?[uai])", b="%1ا%2ل%3ٱ"}, --p
366 -- li-/la + art. + initial unstable hamza is a special orthography 394 -- li-/la + art. + initial unstable hamza is a special orthography
367 {a="l([ai])%-l%-(\"?[uai])", b="ل%1ل%2ٱ"}, 395 {a="l([ai])%-l%-(\"?[uai])", b="ل%1ل%2ٱ"},
368 -- al- + lunar consonant (i.e. what remains) 396 -- al- + lunar consonant (i.e. what remains)
369 {a="^(a)l%-", b="ا%1لْ"}, 397 {a="^(a)l%-", b="ا%1لْ"},
370 {a="([%p%s%-])(a)l%-", b="%1ا%2لْ"}, --p 398 {a="([%(%[%|%<%s%-])(a)l%-", b="%1ا%2لْ"}, --p
371 -- diphthongs to be resolved before ʾalif conjunctionis 399 -- diphthongs to be resolved before ʾalif conjunctionis
372 {a="(aW)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="awuا%2%3"}, 400 {a="(aW)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="awuا%2%3"},
373 {a="(aw)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="%1u%2%3"}, 401 {a="(aw)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="%1u%2%3"},
@@ -416,16 +444,16 @@ digraphsfvidgham = {
416 -- ʾiʿrāb (end) 444 -- ʾiʿrāb (end)
417 -- initial straight double quote gives a connective ʾalif 445 -- initial straight double quote gives a connective ʾalif
418 {a="^\"[uai]", b="ٱ"}, 446 {a="^\"[uai]", b="ٱ"},
419 {a="([%p%s%-])\"[uai]", b="%1ٱ"}, --p 447 {a="([%(%[%|%<%s%-])\"[uai]", b="%1ٱ"}, --p
420 -- diphthongs to be resolved before ʾalif conjunctionis 448 -- diphthongs to be resolved before ʾalif conjunctionis
421 {a="(aW)(%s)(\"?[uai])", b="awuا%2ٱ"}, 449 {a="(aW)(%s)(\"?[uai])", b="awuا%2ٱ"},
422 {a="(aw)(%s)(\"?[uai])", b="%1u%2ٱ"}, 450 {a="(aw)(%s)(\"?[uai])", b="%1u%2ٱ"},
423 {a="(ay)(%s)(\"?[uai])", b="%1i%2ٱ"}, 451 {a="(ay)(%s)(\"?[uai])", b="%1i%2ٱ"},
424 {a="([uai]%-)(\"?[uai])", b="%1ٱ"}, -- hyphen + initial alif without hamza 452 {a="([uai]%-)(\"?[uai])", b="%1ٱ"}, -- hyphen + initial alif without hamza
425 -- initial alif without hamza 453 -- initial alif without hamza
426 {a="([%_]?[uaiUAIY])(%s)(\"?[uai])", b="%1%2ٱ"}, 454 {a="([%_]?[uaiUAIY])(%s[%(%[%|%<]?)(\"?[uai])", b="%1%2ٱ"}, --p
427 {a="^([uai])", b="ا%1"}, -- initial alif without hamza 455 {a="^([%(%[%|%<]?)([uai])", b="%1ا%2"}, -- initial alif without hamza --p
428 {a="([%p%s])([uai])", b="%1ا%2"}, -- initial alif without hamza --p 456 {a="(%s)([%(%[%|%<]?)([uai])", b="%1%2ا%3"}, -- initial alif without hamza --p
429 {a="%-%-", b="ـ"}, 457 {a="%-%-", b="ـ"},
430 {a="ؤؤ", b="ؤّ"}, 458 {a="ؤؤ", b="ؤّ"},
431 {a="أأ", b="أّ"}, 459 {a="أأ", b="أّ"},
@@ -518,16 +546,16 @@ digraphsfv = {
518 -- ʾiʿrāb (end) 546 -- ʾiʿrāb (end)
519 -- initial straight double quote gives a connective ʾalif 547 -- initial straight double quote gives a connective ʾalif
520 {a="^\"[uai]", b="ٱ"}, 548 {a="^\"[uai]", b="ٱ"},
521 {a="([%p%s%-])\"[uai]", b="%1ٱ"}, --p 549 {a="([%(%[%|%<%s%-])\"[uai]", b="%1ٱ"}, --p
522 -- diphthongs to be resolved before ʾalif conjunctionis 550 -- diphthongs to be resolved before ʾalif conjunctionis
523 {a="(aW)(%s)(\"?[uai])", b="awuا%2ٱ"}, 551 {a="(aW)(%s)(\"?[uai])", b="awuا%2ٱ"},
524 {a="(aw)(%s)(\"?[uai])", b="%1u%2ٱ"}, 552 {a="(aw)(%s)(\"?[uai])", b="%1u%2ٱ"},
525 {a="(ay)(%s)(\"?[uai])", b="%1i%2ٱ"}, 553 {a="(ay)(%s)(\"?[uai])", b="%1i%2ٱ"},
526 {a="([uai]%-)(\"?[uai])", b="%1ٱ"}, -- hyphen + initial alif without hamza 554 {a="([uai]%-)(\"?[uai])", b="%1ٱ"}, -- hyphen + initial alif without hamza
527 -- initial alif without hamza 555 -- initial alif without hamza
528 {a="([%_]?[uaiUAIY])(%s)(\"?[uai])", b="%1%2ٱ"}, 556 {a="([%_]?[uaiUAIY])(%s[%(%[%|%<]?)(\"?[uai])", b="%1%2ٱ"}, --p
529 {a="^([uai])", b="ا%1"}, -- initial alif without hamza 557 {a="^([%(%[%|%<]?)([uai])", b="%1ا%2"}, -- initial alif without hamza --p
530 {a="([%p%s])([uai])", b="%1ا%2"}, -- initial alif without hamza --p 558 {a="(%s)([%(%[%|%<]?)([uai])", b="%1%2ا%3"}, -- initial alif without hamza --p
531 {a="%-%-", b="ـ"}, 559 {a="%-%-", b="ـ"},
532 {a="ؤؤ", b="ؤّ"}, 560 {a="ؤؤ", b="ؤّ"},
533 {a="أأ", b="أّ"}, 561 {a="أأ", b="أّ"},
@@ -620,16 +648,16 @@ digraphsfveasy = { -- see the differences under 'easy' marker below
620 -- ʾiʿrāb (end) 648 -- ʾiʿrāb (end)
621 -- initial straight double quote gives a connective ʾalif 649 -- initial straight double quote gives a connective ʾalif
622 {a="^\"[uai]", b="ٱ"}, 650 {a="^\"[uai]", b="ٱ"},
623 {a="([%p%s%-])\"[uai]", b="%1ٱ"}, --p 651 {a="([%(%[%|%<%s%-])\"[uai]", b="%1ٱ"}, --p
624 -- diphthongs to be resolved before ʾalif conjunctionis 652 -- diphthongs to be resolved before ʾalif conjunctionis
625 {a="(aW)(%s)(\"?[uai])", b="awuا%2ٱ"}, 653 {a="(aW)(%s)(\"?[uai])", b="awuا%2ٱ"},
626 {a="(aw)(%s)(\"?[uai])", b="%1u%2ٱ"}, 654 {a="(aw)(%s)(\"?[uai])", b="%1u%2ٱ"},
627 {a="(ay)(%s)(\"?[uai])", b="%1i%2ٱ"}, 655 {a="(ay)(%s)(\"?[uai])", b="%1i%2ٱ"},
628 {a="([uai]%-)(\"?[uai])", b="%1ٱ"}, -- hyphen + initial alif without hamza 656 {a="([uai]%-)(\"?[uai])", b="%1ٱ"}, -- hyphen + initial alif without hamza
629 -- initial alif without hamza 657 -- initial alif without hamza
630 {a="([%_]?[uaiUAIY])(%s)(\"?[uai])", b="%1%2ٱ"}, 658 {a="([%_]?[uaiUAIY])(%s[%(%[%|%<]?)(\"?[uai])", b="%1%2ٱ"}, --p
631 {a="^([uai])", b="ا%1"}, -- initial alif without hamza 659 {a="^([%(%[%|%<]?)([uai])", b="%1ا%2"}, -- initial alif without hamza --p
632 {a="([%p%s])([uai])", b="%1ا%2"}, -- initial alif without hamza --p 660 {a="(%s)([%(%[%|%<]?)([uai])", b="%1%2ا%3"}, -- initial alif without hamza --p
633 {a="%-%-", b="ـ"}, 661 {a="%-%-", b="ـ"},
634 {a="ؤؤ", b="ؤّ"}, 662 {a="ؤؤ", b="ؤّ"},
635 {a="أأ", b="أّ"}, 663 {a="أأ", b="أّ"},