diff options
author | Robert Alessi <alessi@robertalessi.net> | 2017-03-29 21:34:39 +0200 |
---|---|---|
committer | Robert Alessi <alessi@robertalessi.net> | 2017-03-29 21:34:39 +0200 |
commit | 885a3e5523dc7d33ad45a3254b2d0be2af8f40ad (patch) | |
tree | 44aac92568be40a02cfa272164bcf301d0ebbbb6 | |
parent | d6da69f0a4cbefd1ccb2576fa56892852bb6aec5 (diff) | |
download | arabluatex-885a3e5523dc7d33ad45a3254b2d0be2af8f40ad.tar.gz |
arabica: started implementing this standard: added new functions; now working on tables
-rw-r--r-- | arabluatex.lua | 40 | ||||
-rw-r--r-- | arabluatex_trans.lua | 236 |
2 files changed, 272 insertions, 4 deletions
diff --git a/arabluatex.lua b/arabluatex.lua index 91932b7..722084c 100644 --- a/arabluatex.lua +++ b/arabluatex.lua | |||
@@ -494,6 +494,44 @@ local function transloc(str) | |||
494 | return str | 494 | return str |
495 | end | 495 | end |
496 | 496 | ||
497 | local function transarabica(str) | ||
498 | str = string.gsub(str, "\\arb(%b{})", function(inside) | ||
499 | inside = string.sub(inside, 2, -2) | ||
500 | for i = 1,#hamzatrarabica do | ||
501 | inside = string.gsub(inside, hamzatrarabica[i].a, hamzatrarabica[i].b) | ||
502 | end | ||
503 | for i = 1,#tanwintrarabica do | ||
504 | inside = string.gsub(inside, tanwintrarabica[i].a, tanwintrarabica[i].b) | ||
505 | end | ||
506 | for i = 1,#trigraphstrarabica do | ||
507 | inside = string.gsub(inside, trigraphstrarabica[i].a, trigraphstrarabica[i].b) | ||
508 | end | ||
509 | for i = 1,#digraphstrarabica do | ||
510 | inside = string.gsub(inside, digraphstrarabica[i].a, digraphstrarabica[i].b) | ||
511 | end | ||
512 | for i = 1,#singletrarabica do | ||
513 | inside = string.gsub(inside, singletrarabica[i].a, singletrarabica[i].b) | ||
514 | end | ||
515 | for i = 1,#longvtrarabica do | ||
516 | inside = string.gsub(inside, longvtrarabica[i].a, longvtrarabica[i].b) | ||
517 | end | ||
518 | for i = 1,#shortvtrarabica do | ||
519 | inside = string.gsub(inside, shortvtrarabica[i].a, shortvtrarabica[i].b) | ||
520 | end | ||
521 | for i = 1,#finaltrarabica do | ||
522 | inside = string.gsub(inside, finaltrarabica[i].a, finaltrarabica[i].b) | ||
523 | end | ||
524 | for i = 1,#punctuationtr do | ||
525 | inside = string.gsub(inside, punctuationtr[i].a, punctuationtr[i].b) | ||
526 | end | ||
527 | for i = 1,#nulltr do | ||
528 | inside = string.gsub(inside, nulltr[i].a, nulltr[i].b) | ||
529 | end | ||
530 | return string.format("\\txtrans{%s}", inside) | ||
531 | end) | ||
532 | return str | ||
533 | end | ||
534 | |||
497 | local function processbuckw(str) | 535 | local function processbuckw(str) |
498 | str = string.gsub(str, "\\arb(%b{})", function(inside) | 536 | str = string.gsub(str, "\\arb(%b{})", function(inside) |
499 | inside = string.sub(inside, 2, -2) | 537 | inside = string.sub(inside, 2, -2) |
@@ -579,6 +617,8 @@ function processtrans(str, mode, rules, scheme) | |||
579 | str = transdmg(str, rules) | 617 | str = transdmg(str, rules) |
580 | elseif mode == "loc" then | 618 | elseif mode == "loc" then |
581 | str = transloc(str) | 619 | str = transloc(str) |
620 | elseif mode == "arabica" then | ||
621 | str = transarabica(str) | ||
582 | end | 622 | end |
583 | str = unprotectarb(str) | 623 | str = unprotectarb(str) |
584 | return str | 624 | return str |
diff --git a/arabluatex_trans.lua b/arabluatex_trans.lua index 275e235..f6c90f7 100644 --- a/arabluatex_trans.lua +++ b/arabluatex_trans.lua | |||
@@ -496,7 +496,7 @@ trigraphstrloc = { -- trigraphs or more | |||
496 | {a="^(a)l%-([uai])", b="%1l-%2"}, | 496 | {a="^(a)l%-([uai])", b="%1l-%2"}, |
497 | {a="(%s)(a)l%-([uai])", b="%1%2l-%3"}, | 497 | {a="(%s)(a)l%-([uai])", b="%1%2l-%3"}, |
498 | -- li-/la- + art. + initial unstable hamza is a special orthography | 498 | -- li-/la- + art. + initial unstable hamza is a special orthography |
499 | {a="l([ai])%-l%-([uai])", b="l%1l-%2"}, | 499 | {a="l([ai])%-l%-([uai])", b="l%1-l-%2"}, |
500 | -- al- + lunar consonant (i.e. what remains) | 500 | -- al- + lunar consonant (i.e. what remains) |
501 | {a="^(a)l%-", b="%1l-"}, | 501 | {a="^(a)l%-", b="%1l-"}, |
502 | {a="(%s)(a)l%-", b="%1%2l-"}, | 502 | {a="(%s)(a)l%-", b="%1%2l-"}, |
@@ -505,13 +505,13 @@ trigraphstrloc = { -- trigraphs or more | |||
505 | -- art. with waṣla + solar consonant | 505 | -- art. with waṣla + solar consonant |
506 | {a="'l%-([%_%^%.]?[tdrzsn])", b="al-%1"}, | 506 | {a="'l%-([%_%^%.]?[tdrzsn])", b="al-%1"}, |
507 | -- li-/la- + art. + lām | 507 | -- li-/la- + art. + lām |
508 | {a="l([ai])%-l%-(l)", b="l%1-%2"}, | 508 | {a="l([ai])%-l%-(l)", b="l%1-l-%2"}, |
509 | -- assim. art. with waṣla + solar consonant | 509 | -- assim. art. with waṣla + solar consonant |
510 | {a="'([%_%^%.]?[tdrzsn])%-", b="al-"}, | 510 | {a="'([%_%^%.]?[tdrzsn])%-", b="al-"}, |
511 | -- li-/la- + art. + solar consonant is a special orthography | 511 | -- li-/la- + art. + solar consonant is a special orthography |
512 | {a="l([ai])%-l%-([%_%^%.]?[tdrzsn])", b="l%1l-%2"}, | 512 | {a="l([ai])%-l%-([%_%^%.]?[tdrzsn])", b="l%1-l-%2"}, |
513 | -- li-/la- + assim. art. + solar consonant is a special orthography | 513 | -- li-/la- + assim. art. + solar consonant is a special orthography |
514 | {a="l([ai])%-([%_%^%.]?[tdrzsn])%-([%_%^%.]?[tdrzsn])", b="l%1l-%3"}, | 514 | {a="l([ai])%-([%_%^%.]?[tdrzsn])%-([%_%^%.]?[tdrzsn])", b="l%1-l-%3"}, |
515 | -- art. with waṣla + initial unstable hamza | 515 | -- art. with waṣla + initial unstable hamza |
516 | {a="'l%-([uai])", b="al-%1"}, | 516 | {a="'l%-([uai])", b="al-%1"}, |
517 | -- art. with waṣla + lunar consonant (i.e. what remains) | 517 | -- art. with waṣla + lunar consonant (i.e. what remains) |
@@ -622,3 +622,231 @@ shortvtrloc = { | |||
622 | finaltrloc = { | 622 | finaltrloc = { |
623 | {a="ʾ", b="'"}, | 623 | {a="ʾ", b="'"}, |
624 | } | 624 | } |
625 | |||
626 | -- arabica | ||
627 | |||
628 | hamzatrarabica = { --UNTOUCHED/LOC | ||
629 | -- hard coded hamza | ||
630 | {a="|\"'", b="ʾ"}, | ||
631 | {a="A\"'", b="ʾA"}, | ||
632 | {a="[au]\"'", b="ʾ"}, | ||
633 | {a="w\"'", b="ʾ"}, | ||
634 | {a="i\"'", b="ʾ"}, | ||
635 | {a="y\"'", b="ʾ"}, | ||
636 | -- hamza takes tašdīd too | ||
637 | {a="''([Uu])", b="ʾʾ%1"}, | ||
638 | {a="''([Aa])", b="ʾʾ%1"}, | ||
639 | {a="''([Ii])", b="ʾʾ%1"}, | ||
640 | -- initial long u and i (for a, see below) | ||
641 | {a="%'%_U", b="U"}, | ||
642 | {a="%'%_I", b="I"}, | ||
643 | -- taḫfīfu 'l-hamza | ||
644 | {a="^'u'([^uaiUAI])", b="U%1"}, | ||
645 | {a="(%W)'u'([^uaiUAI])", b="%1U%2"}, | ||
646 | {a="'u'([^uaiUAI])", b="ʾU"}, | ||
647 | {a="^'i'([^uaiUAI])", b="I%1"}, | ||
648 | {a="(%W)'i'([^uaiUAI])", b="%1I%2"}, | ||
649 | {a="'i'([^uaiUAI])", b="ʾI"}, | ||
650 | -- madda (historic writing below) | ||
651 | {a="^(')(A)", b="%2"}, | ||
652 | {a="(%W)(')(A)", b="%1%3"}, | ||
653 | {a="^'a'([^uaiUAI])", b="A%1"}, | ||
654 | {a="(%W)'a'([^uaiUAI])", b="%1A%2"}, | ||
655 | {a="'a'([^uaiUAI])", b="A%1"}, | ||
656 | {a="^'a?A", b="A"}, | ||
657 | {a="(%W)'a?A", b="%1A"}, | ||
658 | {a="'a?A", b="ʾA"}, | ||
659 | {a="(A)(')(i)$", b="%1ʾ%3"}, | ||
660 | {a="(A)(')(i)(%W)", b="%1ʾ%3%4"}, | ||
661 | {a="(A)(')(i)", b="%1ʾ%3"}, -- historic madda | ||
662 | {a="(A)(')", b="%1ʾ"}, -- historic madda | ||
663 | -- initial (needs both ^ and %W patterns) | ||
664 | {a="^(')([ua])", b="%2"}, | ||
665 | {a="^(')(i)", b="%2"}, | ||
666 | {a="(%W)(')([ua])", b="%1%3"}, | ||
667 | {a="(%W)(')(i)", b="%1%3"}, | ||
668 | -- final | ||
669 | {a="([Iy])(')(aN)$", b="%1ʾ%3"}, | ||
670 | {a="([Iy])(')(aN)(%W)", b="%1ʾ%3%4"}, | ||
671 | {a="([^uai])(')([uai]N?)$", b="%1ʾ%3"}, | ||
672 | {a="([^uai])(')([uai]N?)(%W)", b="%1ʾ%3%4"}, | ||
673 | {a="([UI])(')([uai])$", b="%1ʾ%3"}, | ||
674 | {a="([UI])(')([uai])(%W)", b="%1ʾ%3%4"}, | ||
675 | -- middle | ||
676 | {a="(U)(')", b="%1ʾ"}, | ||
677 | {a="([Iy])(')", b="%1ʾ"}, | ||
678 | {a="([^uai])(')([uU])", b="%1ʾ%3"}, | ||
679 | {a="([^uai])(')([aA])", b="%1ʾ%3"}, | ||
680 | {a="([^uai])(')([iI])", b="%1ʾ%3"}, | ||
681 | {a="(u)(')([uU])", b="%1ʾ%3"}, | ||
682 | {a="(u)(')([aA])", b="%1ʾ%3"}, | ||
683 | {a="(u)(')([iI])", b="%1ʾ%3"}, | ||
684 | {a="(a)(')([aA])", b="%1ʾ%3"}, | ||
685 | {a="(a)(')([uU])", b="%1ʾ%3"}, | ||
686 | {a="(a)(')([iI])", b="%1ʾ%3"}, | ||
687 | {a="(i)(')([aA])", b="%1ʾ%3"}, | ||
688 | {a="(i)(')([uU])", b="%1ʾ%3"}, | ||
689 | {a="(i)(')([iI])", b="%1ʾ%3"}, | ||
690 | {a="(a)(')([^uaiUAI])", b="%1ʾ%3"}, | ||
691 | {a="(u)(')([^uaiUAI])", b="%1ʾ%3"}, | ||
692 | {a="(i)(')([^uaiUAI])", b="%1ʾ%3"} | ||
693 | } | ||
694 | |||
695 | tanwintrarabica = { --UNTOUCHED/LOC | ||
696 | {a="%-?uNU", b="un"}, | ||
697 | {a="%-?aNU", b="an"}, | ||
698 | {a="%-?iNU", b="in"}, | ||
699 | {a="%-?(\"?At)%-?([ui])N", b="%1%2n"}, | ||
700 | {a="%-?([ui])N", b="%1n"}, | ||
701 | {a="%-?(aN)(_A)", b="an"}, | ||
702 | {a="%-?(aN)(Y)", b="an"}, | ||
703 | {a="(T)%-?(\"?aN)", b="tan"}, | ||
704 | {a="([^TA])%-?(\"?aN)", b="%1an"} | ||
705 | } | ||
706 | |||
707 | -- new | ||
708 | trigraphstrarabica = { -- trigraphs or more | ||
709 | -- 'llatI / 'llad_I | ||
710 | {a="^'ll(a)([%_]?[dt])", b="ll%1%2"}, | ||
711 | {a="([%(%[%|%<%s])'ll(a)([%_]?[dt])", b="%1ll%2%3"}, --p | ||
712 | -- al- + lām | ||
713 | {a="^(a)l%-(l)", b="%1l-%2"}, | ||
714 | {a="(%s)(a)l%-(l)", b="%1%2l-%3"}, | ||
715 | -- al- + solar consonant | ||
716 | {a="^(a)l%-([%_%^%.]?[tdrzsn])", b="%1l-%2"}, | ||
717 | {a="(%s)(a)l%-([%_%^%.]?[tdrzsn])", b="%1%2l-%3"}, | ||
718 | -- assim. art. + solar consonant | ||
719 | {a="^(a)([%_%^%.]?[tdrzsn])%-", b="%1l-"}, | ||
720 | {a="(%s)(a)([%_%^%.]?[tdrzsn])%-", b="%1%2l-"}, | ||
721 | -- al- + initial unstable hamza | ||
722 | {a="^(a)l%-([uai])", b="%1l-%2"}, | ||
723 | {a="(%s)(a)l%-([uai])", b="%1%2l-%3"}, | ||
724 | -- li-/la- + art. + initial unstable hamza is a special orthography | ||
725 | {a="l([ai])%-l%-([uai])", b="l%1-l-%2"}, | ||
726 | -- al- + lunar consonant (i.e. what remains) | ||
727 | {a="^(a)l%-", b="%1l-"}, | ||
728 | {a="(%s)(a)l%-", b="%1%2l-"}, | ||
729 | -- art. with waṣla + lām | ||
730 | {a="'l%-(l)", b="l-%1"}, | ||
731 | -- art. with waṣla + solar consonant | ||
732 | {a="'l%-([%_%^%.]?[tdrzsn])", b="l-%1"}, | ||
733 | -- li-/la- + art. + lām | ||
734 | {a="l([ai])%-l%-(l)", b="l%1-l-%2"}, | ||
735 | -- assim. art. with waṣla + solar consonant | ||
736 | {a="'([%_%^%.]?[tdrzsn])%-", b="l-"}, | ||
737 | -- li-/la- + art. + solar consonant is a special orthography | ||
738 | {a="l([ai])%-l%-([%_%^%.]?[tdrzsn])", b="l%1-l-%2"}, | ||
739 | -- li-/la- + assim. art. + solar consonant is a special orthography | ||
740 | {a="l([ai])%-([%_%^%.]?[tdrzsn])%-([%_%^%.]?[tdrzsn])", b="l%1-l-%3"}, | ||
741 | -- art. with waṣla + initial unstable hamza | ||
742 | {a="'l%-([uai])", b="l-%1"}, | ||
743 | -- art. with waṣla + lunar consonant (i.e. what remains) | ||
744 | {a="'l%-", b="l-"}, | ||
745 | -- the silent wāw | ||
746 | {a="uU$", b="u"}, | ||
747 | {a="uU(%W)", b="u%1"}, | ||
748 | {a="aU$", b="a"}, | ||
749 | {a="aU(%W)", b="a%1"}, | ||
750 | {a="iU$", b="i"}, | ||
751 | {a="iU(%W)", b="i%1"}, | ||
752 | -- words ending in -āT with silent wāw/yāʾ | ||
753 | {a="(_a)UA", b="A"}, | ||
754 | {a="(_a)U", b="A"}, | ||
755 | {a="(_a)I", b="A"} | ||
756 | } | ||
757 | |||
758 | digraphstrarabica = { | ||
759 | -- discard the ʾiʿrāb hyphen (begin) | ||
760 | {a="(%-)(\"?[UI]na)(%p?%s)", b="%2%3"}, | ||
761 | {a="(%-)(\"?[UI]na)(%p?)$", b="%2%3"}, | ||
762 | {a="(%-)(\"?At[ui])(%p?%s)", b="%2%3"}, | ||
763 | {a="(%-)(\"?At[ui])(%p?)$", b="%2%3"}, | ||
764 | {a="(%-)(\"?Ani)(%p?%s)", b="%2%3"}, | ||
765 | {a="(%-)(\"?Ani)(%p?)$", b="%2%3"}, | ||
766 | {a="(%-)(\"?ayni)(%p?%s)", b="%2%3"}, | ||
767 | {a="(%-)(\"?ayni)(%p?)$", b="%2%3"}, | ||
768 | {a="(%-)([uai])(%p?%s)", b="%2%3"}, | ||
769 | {a="(%-)([uai])(%p?)$", b="%2%3"}, | ||
770 | -- discard the ʾiʿrāb hyphen (end) | ||
771 | {a="(%-)(\"?[uai])", b="%1%2"}, -- hyphen + initial alif without hamza | ||
772 | {a="^(\"?[uai])", b="%1"}, -- initial alif without hamza | ||
773 | {a="(%s)([uai])", b="%1%2"}, -- initial alif without hamza | ||
774 | {a="%-%-", b=""}, | ||
775 | {a="uww", b="ūw"}, | ||
776 | {a="iyy$", b="ī"}, | ||
777 | {a="iyy(%W)", b="ī%1"}, | ||
778 | {a="iyy", b="īy"}, | ||
779 | {a="([tkdsg])(h)", b="%1'%2"}, | ||
780 | -- {a="T([^uai])", b="h%1"}, | ||
781 | {a="([a%']l%-)(.-)T([%(%[%|%<%s])(al%-)", b="%1%2h%3%4"}, --p | ||
782 | {a="T([%(%[%|%<%s])(al%-)", b="t%1%2"}, --p | ||
783 | {a="T$", b="h"}, | ||
784 | {a="T(%W)", b="h%1"}, | ||
785 | {a="_t", b="th"}, | ||
786 | {a="%^g", b="j"}, | ||
787 | {a="%.h", b="ḥ"}, | ||
788 | {a="_h", b="kh"}, | ||
789 | {a="_d", b="dh"}, | ||
790 | {a="%^s", b="sh"}, | ||
791 | {a="%.s", b="ṣ"}, | ||
792 | {a="%.d", b="ḍ"}, | ||
793 | {a="%.t", b="ṭ"}, | ||
794 | {a="%.z", b="ẓ"}, | ||
795 | {a="%.g", b="gh"}, | ||
796 | {a="(U)(A)", b="ū"}, | ||
797 | {a="WA", b="w"}, | ||
798 | {a="(a)W", b="%1w"}, | ||
799 | {a="_A", b="á"}, | ||
800 | {a="_u", b="ū"}, | ||
801 | {a="_a", b="ā"}, | ||
802 | {a="_i", b="ī"}, | ||
803 | {a="%.b", b="b"}, | ||
804 | {a="%.f", b="f"}, | ||
805 | {a="%.q", b="q"}, | ||
806 | {a="%.k", b="k"}, | ||
807 | {a="%.n", b="n"}, | ||
808 | {a="%^d", b="d"} | ||
809 | } | ||
810 | |||
811 | -- new | ||
812 | singletrarabica = { | ||
813 | {a="b", b="b"}, | ||
814 | {a="t", b="t"}, | ||
815 | {a="j", b="j"}, | ||
816 | {a="x", b="kh"}, | ||
817 | {a="d", b="d"}, | ||
818 | {a="r", b="r"}, | ||
819 | {a="z", b="z"}, | ||
820 | {a="s", b="s"}, | ||
821 | {a="`", b="ʿ"}, | ||
822 | {a="f", b="f"}, | ||
823 | {a="q", b="q"}, | ||
824 | {a="k", b="k"}, | ||
825 | {a="l", b="l"}, | ||
826 | {a="m", b="m"}, | ||
827 | {a="n", b="n"}, | ||
828 | {a="h", b="h"}, | ||
829 | {a="w", b="w"}, | ||
830 | {a="y", b="y"}, | ||
831 | {a="T", b="t"}, | ||
832 | {a="\"", b=""}, | ||
833 | {a="B", b=""} | ||
834 | } | ||
835 | |||
836 | longvtrarabica = { | ||
837 | {a="A", b="ā"}, | ||
838 | {a="U", b="ū"}, | ||
839 | {a="I", b="ī"}, | ||
840 | {a="Y", b="á"}, | ||
841 | } | ||
842 | |||
843 | shortvtrarabica = { | ||
844 | {a="u", b="u"}, | ||
845 | {a="a", b="a"}, | ||
846 | {a="i", b="i"} | ||
847 | } | ||
848 | |||
849 | -- new | ||
850 | finaltrarabica = { | ||
851 | -- {a="ʾ", b="'"}, | ||
852 | } | ||