aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRobert Alessi <alessi@robertalessi.net>2017-03-29 21:34:39 +0200
committerRobert Alessi <alessi@robertalessi.net>2017-03-29 21:34:39 +0200
commit885a3e5523dc7d33ad45a3254b2d0be2af8f40ad (patch)
tree44aac92568be40a02cfa272164bcf301d0ebbbb6
parentd6da69f0a4cbefd1ccb2576fa56892852bb6aec5 (diff)
downloadarabluatex-885a3e5523dc7d33ad45a3254b2d0be2af8f40ad.tar.gz
arabica: started implementing this standard: added new functions; now working on tables
-rw-r--r--arabluatex.lua40
-rw-r--r--arabluatex_trans.lua236
2 files changed, 272 insertions, 4 deletions
diff --git a/arabluatex.lua b/arabluatex.lua
index 91932b7..722084c 100644
--- a/arabluatex.lua
+++ b/arabluatex.lua
@@ -494,6 +494,44 @@ local function transloc(str)
494return str 494return str
495end 495end
496 496
497local function transarabica(str)
498 str = string.gsub(str, "\\arb(%b{})", function(inside)
499 inside = string.sub(inside, 2, -2)
500 for i = 1,#hamzatrarabica do
501 inside = string.gsub(inside, hamzatrarabica[i].a, hamzatrarabica[i].b)
502 end
503 for i = 1,#tanwintrarabica do
504 inside = string.gsub(inside, tanwintrarabica[i].a, tanwintrarabica[i].b)
505 end
506 for i = 1,#trigraphstrarabica do
507 inside = string.gsub(inside, trigraphstrarabica[i].a, trigraphstrarabica[i].b)
508 end
509 for i = 1,#digraphstrarabica do
510 inside = string.gsub(inside, digraphstrarabica[i].a, digraphstrarabica[i].b)
511 end
512 for i = 1,#singletrarabica do
513 inside = string.gsub(inside, singletrarabica[i].a, singletrarabica[i].b)
514 end
515 for i = 1,#longvtrarabica do
516 inside = string.gsub(inside, longvtrarabica[i].a, longvtrarabica[i].b)
517 end
518 for i = 1,#shortvtrarabica do
519 inside = string.gsub(inside, shortvtrarabica[i].a, shortvtrarabica[i].b)
520 end
521 for i = 1,#finaltrarabica do
522 inside = string.gsub(inside, finaltrarabica[i].a, finaltrarabica[i].b)
523 end
524 for i = 1,#punctuationtr do
525 inside = string.gsub(inside, punctuationtr[i].a, punctuationtr[i].b)
526 end
527 for i = 1,#nulltr do
528 inside = string.gsub(inside, nulltr[i].a, nulltr[i].b)
529 end
530 return string.format("\\txtrans{%s}", inside)
531 end)
532return str
533end
534
497local function processbuckw(str) 535local function processbuckw(str)
498 str = string.gsub(str, "\\arb(%b{})", function(inside) 536 str = string.gsub(str, "\\arb(%b{})", function(inside)
499 inside = string.sub(inside, 2, -2) 537 inside = string.sub(inside, 2, -2)
@@ -579,6 +617,8 @@ function processtrans(str, mode, rules, scheme)
579 str = transdmg(str, rules) 617 str = transdmg(str, rules)
580 elseif mode == "loc" then 618 elseif mode == "loc" then
581 str = transloc(str) 619 str = transloc(str)
620 elseif mode == "arabica" then
621 str = transarabica(str)
582 end 622 end
583 str = unprotectarb(str) 623 str = unprotectarb(str)
584return str 624return str
diff --git a/arabluatex_trans.lua b/arabluatex_trans.lua
index 275e235..f6c90f7 100644
--- a/arabluatex_trans.lua
+++ b/arabluatex_trans.lua
@@ -496,7 +496,7 @@ trigraphstrloc = { -- trigraphs or more
496 {a="^(a)l%-([uai])", b="%1l-%2"}, 496 {a="^(a)l%-([uai])", b="%1l-%2"},
497 {a="(%s)(a)l%-([uai])", b="%1%2l-%3"}, 497 {a="(%s)(a)l%-([uai])", b="%1%2l-%3"},
498 -- li-/la- + art. + initial unstable hamza is a special orthography 498 -- li-/la- + art. + initial unstable hamza is a special orthography
499 {a="l([ai])%-l%-([uai])", b="l%1l-%2"}, 499 {a="l([ai])%-l%-([uai])", b="l%1-l-%2"},
500 -- al- + lunar consonant (i.e. what remains) 500 -- al- + lunar consonant (i.e. what remains)
501 {a="^(a)l%-", b="%1l-"}, 501 {a="^(a)l%-", b="%1l-"},
502 {a="(%s)(a)l%-", b="%1%2l-"}, 502 {a="(%s)(a)l%-", b="%1%2l-"},
@@ -505,13 +505,13 @@ trigraphstrloc = { -- trigraphs or more
505 -- art. with waṣla + solar consonant 505 -- art. with waṣla + solar consonant
506 {a="'l%-([%_%^%.]?[tdrzsn])", b="al-%1"}, 506 {a="'l%-([%_%^%.]?[tdrzsn])", b="al-%1"},
507 -- li-/la- + art. + lām 507 -- li-/la- + art. + lām
508 {a="l([ai])%-l%-(l)", b="l%1-%2"}, 508 {a="l([ai])%-l%-(l)", b="l%1-l-%2"},
509 -- assim. art. with waṣla + solar consonant 509 -- assim. art. with waṣla + solar consonant
510 {a="'([%_%^%.]?[tdrzsn])%-", b="al-"}, 510 {a="'([%_%^%.]?[tdrzsn])%-", b="al-"},
511 -- li-/la- + art. + solar consonant is a special orthography 511 -- li-/la- + art. + solar consonant is a special orthography
512 {a="l([ai])%-l%-([%_%^%.]?[tdrzsn])", b="l%1l-%2"}, 512 {a="l([ai])%-l%-([%_%^%.]?[tdrzsn])", b="l%1-l-%2"},
513 -- li-/la- + assim. art. + solar consonant is a special orthography 513 -- li-/la- + assim. art. + solar consonant is a special orthography
514 {a="l([ai])%-([%_%^%.]?[tdrzsn])%-([%_%^%.]?[tdrzsn])", b="l%1l-%3"}, 514 {a="l([ai])%-([%_%^%.]?[tdrzsn])%-([%_%^%.]?[tdrzsn])", b="l%1-l-%3"},
515 -- art. with waṣla + initial unstable hamza 515 -- art. with waṣla + initial unstable hamza
516 {a="'l%-([uai])", b="al-%1"}, 516 {a="'l%-([uai])", b="al-%1"},
517 -- art. with waṣla + lunar consonant (i.e. what remains) 517 -- art. with waṣla + lunar consonant (i.e. what remains)
@@ -622,3 +622,231 @@ shortvtrloc = {
622finaltrloc = { 622finaltrloc = {
623 {a="ʾ", b="'"}, 623 {a="ʾ", b="'"},
624} 624}
625
626-- arabica
627
628hamzatrarabica = { --UNTOUCHED/LOC
629 -- hard coded hamza
630 {a="|\"'", b="ʾ"},
631 {a="A\"'", b="ʾA"},
632 {a="[au]\"'", b="ʾ"},
633 {a="w\"'", b="ʾ"},
634 {a="i\"'", b="ʾ"},
635 {a="y\"'", b="ʾ"},
636 -- hamza takes tašdīd too
637 {a="''([Uu])", b="ʾʾ%1"},
638 {a="''([Aa])", b="ʾʾ%1"},
639 {a="''([Ii])", b="ʾʾ%1"},
640 -- initial long u and i (for a, see below)
641 {a="%'%_U", b="U"},
642 {a="%'%_I", b="I"},
643 -- taḫfīfu 'l-hamza
644 {a="^'u'([^uaiUAI])", b="U%1"},
645 {a="(%W)'u'([^uaiUAI])", b="%1U%2"},
646 {a="'u'([^uaiUAI])", b="ʾU"},
647 {a="^'i'([^uaiUAI])", b="I%1"},
648 {a="(%W)'i'([^uaiUAI])", b="%1I%2"},
649 {a="'i'([^uaiUAI])", b="ʾI"},
650 -- madda (historic writing below)
651 {a="^(')(A)", b="%2"},
652 {a="(%W)(')(A)", b="%1%3"},
653 {a="^'a'([^uaiUAI])", b="A%1"},
654 {a="(%W)'a'([^uaiUAI])", b="%1A%2"},
655 {a="'a'([^uaiUAI])", b="A%1"},
656 {a="^'a?A", b="A"},
657 {a="(%W)'a?A", b="%1A"},
658 {a="'a?A", b="ʾA"},
659 {a="(A)(')(i)$", b="%1ʾ%3"},
660 {a="(A)(')(i)(%W)", b="%1ʾ%3%4"},
661 {a="(A)(')(i)", b="%1ʾ%3"}, -- historic madda
662 {a="(A)(')", b="%1ʾ"}, -- historic madda
663 -- initial (needs both ^ and %W patterns)
664 {a="^(')([ua])", b="%2"},
665 {a="^(')(i)", b="%2"},
666 {a="(%W)(')([ua])", b="%1%3"},
667 {a="(%W)(')(i)", b="%1%3"},
668 -- final
669 {a="([Iy])(')(aN)$", b="%1ʾ%3"},
670 {a="([Iy])(')(aN)(%W)", b="%1ʾ%3%4"},
671 {a="([^uai])(')([uai]N?)$", b="%1ʾ%3"},
672 {a="([^uai])(')([uai]N?)(%W)", b="%1ʾ%3%4"},
673 {a="([UI])(')([uai])$", b="%1ʾ%3"},
674 {a="([UI])(')([uai])(%W)", b="%1ʾ%3%4"},
675 -- middle
676 {a="(U)(')", b="%1ʾ"},
677 {a="([Iy])(')", b="%1ʾ"},
678 {a="([^uai])(')([uU])", b="%1ʾ%3"},
679 {a="([^uai])(')([aA])", b="%1ʾ%3"},
680 {a="([^uai])(')([iI])", b="%1ʾ%3"},
681 {a="(u)(')([uU])", b="%1ʾ%3"},
682 {a="(u)(')([aA])", b="%1ʾ%3"},
683 {a="(u)(')([iI])", b="%1ʾ%3"},
684 {a="(a)(')([aA])", b="%1ʾ%3"},
685 {a="(a)(')([uU])", b="%1ʾ%3"},
686 {a="(a)(')([iI])", b="%1ʾ%3"},
687 {a="(i)(')([aA])", b="%1ʾ%3"},
688 {a="(i)(')([uU])", b="%1ʾ%3"},
689 {a="(i)(')([iI])", b="%1ʾ%3"},
690 {a="(a)(')([^uaiUAI])", b="%1ʾ%3"},
691 {a="(u)(')([^uaiUAI])", b="%1ʾ%3"},
692 {a="(i)(')([^uaiUAI])", b="%1ʾ%3"}
693}
694
695tanwintrarabica = { --UNTOUCHED/LOC
696 {a="%-?uNU", b="un"},
697 {a="%-?aNU", b="an"},
698 {a="%-?iNU", b="in"},
699 {a="%-?(\"?At)%-?([ui])N", b="%1%2n"},
700 {a="%-?([ui])N", b="%1n"},
701 {a="%-?(aN)(_A)", b="an"},
702 {a="%-?(aN)(Y)", b="an"},
703 {a="(T)%-?(\"?aN)", b="tan"},
704 {a="([^TA])%-?(\"?aN)", b="%1an"}
705}
706
707-- new
708trigraphstrarabica = { -- trigraphs or more
709 -- 'llatI / 'llad_I
710 {a="^'ll(a)([%_]?[dt])", b="ll%1%2"},
711 {a="([%(%[%|%<%s])'ll(a)([%_]?[dt])", b="%1ll%2%3"}, --p
712 -- al- + lām
713 {a="^(a)l%-(l)", b="%1l-%2"},
714 {a="(%s)(a)l%-(l)", b="%1%2l-%3"},
715 -- al- + solar consonant
716 {a="^(a)l%-([%_%^%.]?[tdrzsn])", b="%1l-%2"},
717 {a="(%s)(a)l%-([%_%^%.]?[tdrzsn])", b="%1%2l-%3"},
718 -- assim. art. + solar consonant
719 {a="^(a)([%_%^%.]?[tdrzsn])%-", b="%1l-"},
720 {a="(%s)(a)([%_%^%.]?[tdrzsn])%-", b="%1%2l-"},
721 -- al- + initial unstable hamza
722 {a="^(a)l%-([uai])", b="%1l-%2"},
723 {a="(%s)(a)l%-([uai])", b="%1%2l-%3"},
724 -- li-/la- + art. + initial unstable hamza is a special orthography
725 {a="l([ai])%-l%-([uai])", b="l%1-l-%2"},
726 -- al- + lunar consonant (i.e. what remains)
727 {a="^(a)l%-", b="%1l-"},
728 {a="(%s)(a)l%-", b="%1%2l-"},
729 -- art. with waṣla + lām
730 {a="'l%-(l)", b="l-%1"},
731 -- art. with waṣla + solar consonant
732 {a="'l%-([%_%^%.]?[tdrzsn])", b="l-%1"},
733 -- li-/la- + art. + lām
734 {a="l([ai])%-l%-(l)", b="l%1-l-%2"},
735 -- assim. art. with waṣla + solar consonant
736 {a="'([%_%^%.]?[tdrzsn])%-", b="l-"},
737 -- li-/la- + art. + solar consonant is a special orthography
738 {a="l([ai])%-l%-([%_%^%.]?[tdrzsn])", b="l%1-l-%2"},
739 -- li-/la- + assim. art. + solar consonant is a special orthography
740 {a="l([ai])%-([%_%^%.]?[tdrzsn])%-([%_%^%.]?[tdrzsn])", b="l%1-l-%3"},
741 -- art. with waṣla + initial unstable hamza
742 {a="'l%-([uai])", b="l-%1"},
743 -- art. with waṣla + lunar consonant (i.e. what remains)
744 {a="'l%-", b="l-"},
745 -- the silent wāw
746 {a="uU$", b="u"},
747 {a="uU(%W)", b="u%1"},
748 {a="aU$", b="a"},
749 {a="aU(%W)", b="a%1"},
750 {a="iU$", b="i"},
751 {a="iU(%W)", b="i%1"},
752 -- words ending in -āT with silent wāw/yāʾ
753 {a="(_a)UA", b="A"},
754 {a="(_a)U", b="A"},
755 {a="(_a)I", b="A"}
756}
757
758digraphstrarabica = {
759 -- discard the ʾiʿrāb hyphen (begin)
760 {a="(%-)(\"?[UI]na)(%p?%s)", b="%2%3"},
761 {a="(%-)(\"?[UI]na)(%p?)$", b="%2%3"},
762 {a="(%-)(\"?At[ui])(%p?%s)", b="%2%3"},
763 {a="(%-)(\"?At[ui])(%p?)$", b="%2%3"},
764 {a="(%-)(\"?Ani)(%p?%s)", b="%2%3"},
765 {a="(%-)(\"?Ani)(%p?)$", b="%2%3"},
766 {a="(%-)(\"?ayni)(%p?%s)", b="%2%3"},
767 {a="(%-)(\"?ayni)(%p?)$", b="%2%3"},
768 {a="(%-)([uai])(%p?%s)", b="%2%3"},
769 {a="(%-)([uai])(%p?)$", b="%2%3"},
770 -- discard the ʾiʿrāb hyphen (end)
771 {a="(%-)(\"?[uai])", b="%1%2"}, -- hyphen + initial alif without hamza
772 {a="^(\"?[uai])", b="%1"}, -- initial alif without hamza
773 {a="(%s)([uai])", b="%1%2"}, -- initial alif without hamza
774 {a="%-%-", b=""},
775 {a="uww", b="ūw"},
776 {a="iyy$", b="ī"},
777 {a="iyy(%W)", b="ī%1"},
778 {a="iyy", b="īy"},
779 {a="([tkdsg])(h)", b="%1'%2"},
780 -- {a="T([^uai])", b="h%1"},
781 {a="([a%']l%-)(.-)T([%(%[%|%<%s])(al%-)", b="%1%2h%3%4"}, --p
782 {a="T([%(%[%|%<%s])(al%-)", b="t%1%2"}, --p
783 {a="T$", b="h"},
784 {a="T(%W)", b="h%1"},
785 {a="_t", b="th"},
786 {a="%^g", b="j"},
787 {a="%.h", b="ḥ"},
788 {a="_h", b="kh"},
789 {a="_d", b="dh"},
790 {a="%^s", b="sh"},
791 {a="%.s", b="ṣ"},
792 {a="%.d", b="ḍ"},
793 {a="%.t", b="ṭ"},
794 {a="%.z", b="ẓ"},
795 {a="%.g", b="gh"},
796 {a="(U)(A)", b="ū"},
797 {a="WA", b="w"},
798 {a="(a)W", b="%1w"},
799 {a="_A", b="á"},
800 {a="_u", b="ū"},
801 {a="_a", b="ā"},
802 {a="_i", b="ī"},
803 {a="%.b", b="b"},
804 {a="%.f", b="f"},
805 {a="%.q", b="q"},
806 {a="%.k", b="k"},
807 {a="%.n", b="n"},
808 {a="%^d", b="d"}
809}
810
811-- new
812singletrarabica = {
813 {a="b", b="b"},
814 {a="t", b="t"},
815 {a="j", b="j"},
816 {a="x", b="kh"},
817 {a="d", b="d"},
818 {a="r", b="r"},
819 {a="z", b="z"},
820 {a="s", b="s"},
821 {a="`", b="ʿ"},
822 {a="f", b="f"},
823 {a="q", b="q"},
824 {a="k", b="k"},
825 {a="l", b="l"},
826 {a="m", b="m"},
827 {a="n", b="n"},
828 {a="h", b="h"},
829 {a="w", b="w"},
830 {a="y", b="y"},
831 {a="T", b="t"},
832 {a="\"", b=""},
833 {a="B", b=""}
834}
835
836longvtrarabica = {
837 {a="A", b="ā"},
838 {a="U", b="ū"},
839 {a="I", b="ī"},
840 {a="Y", b="á"},
841}
842
843shortvtrarabica = {
844 {a="u", b="u"},
845 {a="a", b="a"},
846 {a="i", b="i"}
847}
848
849-- new
850finaltrarabica = {
851-- {a="ʾ", b="'"},
852}