diff options
author | Robert Alessi <alessi@robertalessi.net> | 2018-06-03 22:13:51 +0200 |
---|---|---|
committer | Robert Alessi <alessi@robertalessi.net> | 2018-06-03 22:13:51 +0200 |
commit | 9bf202e8f1ff9a1047db825c54ca5226164eecfa (patch) | |
tree | 1cd2607f185af403bff92bba874d317b419f0e7a | |
parent | bc16279db68fd63aa1830167e0a635f6526cc1ed (diff) | |
download | arabluatex-9bf202e8f1ff9a1047db825c54ca5226164eecfa.tar.gz |
LPegs patterns: removed most of the rules from (un)protectarb(), moved \par into takeoutcapetc().
[todo] more work is now needed for footnotes to work in some extreme cases
-rw-r--r-- | arabluatex.lua | 66 |
1 files changed, 40 insertions, 26 deletions
diff --git a/arabluatex.lua b/arabluatex.lua index 053fb50..6a08a33 100644 --- a/arabluatex.lua +++ b/arabluatex.lua | |||
@@ -35,21 +35,12 @@ local function gsub(s, patt, repl) | |||
35 | end | 35 | end |
36 | 36 | ||
37 | local function protectarb(str) | 37 | local function protectarb(str) |
38 | -- str = string.gsub(str, "(\\arb%s?)(%[.-%])(%b{})", "\\@arb%2%3") | 38 | str = string.gsub(str, "(\\arb%s?)(%[.-%])(%b{})", "\\@arb%2%3") |
39 | -- str = string.gsub(str, "(\\begin%s?)(%b{})(%b[])", "\\@@begin{%2%3}") | ||
40 | -- str = string.gsub(str, "(\\begin%s?)(%b{})", "\\@begin%2") | ||
41 | -- str = string.gsub(str, "(\\end%s?)(%b{})", "\\@end%2") | ||
42 | -- str = string.gsub(str, "\\par", "\\p@r{}") | ||
43 | -- str = string.gsub(str, "\\@@par", "\\p@r{}") | ||
44 | return str | 39 | return str |
45 | end | 40 | end |
46 | 41 | ||
47 | local function unprotectarb(str) | 42 | local function unprotectarb(str) |
48 | -- str = string.gsub(str, "(\\@arb)(%[.-%])(%b{})", "\\arb%2%3") | 43 | str = string.gsub(str, "(\\@arb)(%[.-%])(%b{})", "\\arb%2%3") |
49 | -- str = string.gsub(str, "(\\@@begin){(%b{})(%b[])}", "\\begin%2%3") | ||
50 | -- str = string.gsub(str, "(\\@begin)(%b{})", "\\begin%2") | ||
51 | -- str = string.gsub(str, "(\\@end)(%b{})", "\\end%2") | ||
52 | -- str = string.gsub(str, "\\p@r{}", "\\par") | ||
53 | return str | 44 | return str |
54 | end | 45 | end |
55 | 46 | ||
@@ -57,11 +48,8 @@ end | |||
57 | -- arabluatex. 'albrkcmds' is what is set by default. 'brkcmds' is | 48 | -- arabluatex. 'albrkcmds' is what is set by default. 'brkcmds' is |
58 | -- what may be set in the preamble as argument of \MkArbBreak{} | 49 | -- what may be set in the preamble as argument of \MkArbBreak{} |
59 | local albrkcmds = { | 50 | local albrkcmds = { |
60 | "arb", | ||
61 | "begin", | 51 | "begin", |
62 | "end", | 52 | "end", |
63 | "par", | ||
64 | "@@par", | ||
65 | "RL", | 53 | "RL", |
66 | "LR", | 54 | "LR", |
67 | "RLfootnote", | 55 | "RLfootnote", |
@@ -89,12 +77,16 @@ function mkarbbreak(str) | |||
89 | end | 77 | end |
90 | 78 | ||
91 | -- some basic patterns: | 79 | -- some basic patterns: |
80 | local ascii = lpeg.R("az", "AZ", "@@") | ||
92 | local dblbkslash = lpeg.Cs("\\") | 81 | local dblbkslash = lpeg.Cs("\\") |
93 | local bsqbrackets = lpeg.Cs{ "[" * ((1 - lpeg.S"[]") + lpeg.V(1))^0 * "]" } | 82 | local bsqbrackets = lpeg.Cs{ "[" * ((1 - lpeg.S"[]") + lpeg.V(1))^0 * "]" } |
94 | local bcbraces = lpeg.Cs{ "{" * ((1 - lpeg.S"{}") + lpeg.V(1))^0 * "}" } | 83 | local bcbraces = lpeg.Cs{ "{" * ((1 - lpeg.S"{}") + lpeg.V(1))^0 * "}" } |
95 | local spce = lpeg.Cs(" ") | 84 | local spce = lpeg.Cs(" ") |
85 | local spcenc = lpeg.P(" ") | ||
86 | local cmdstar = lpeg.Cs(spce * lpeg.P("*")) | ||
96 | local bsqbracketsii = lpeg.Cs(bsqbrackets^-2) | 87 | local bsqbracketsii = lpeg.Cs(bsqbrackets^-2) |
97 | local bcbracesii = lpeg.Cs(bcbraces^-2) | 88 | local bcbracesii = lpeg.Cs(bcbraces^-2) |
89 | local cmd = lpeg.Cs(dblbkslash * ascii^1 * cmdstar^-1) | ||
98 | local cmdargs = lpeg.Cs(spce * bsqbracketsii * bcbracesii * bsqbrackets^-1) | 90 | local cmdargs = lpeg.Cs(spce * bsqbracketsii * bcbracesii * bsqbrackets^-1) |
99 | 91 | ||
100 | local function breakcmd(str) | 92 | local function breakcmd(str) |
@@ -133,22 +125,43 @@ local function breakcmd(str) | |||
133 | return str | 125 | return str |
134 | end | 126 | end |
135 | 127 | ||
128 | ---[[ | ||
129 | local function holdcmd(str) | ||
130 | str = gsub(str, lpeg.Cs("\\arb") * bcbraces, function(tag, body) | ||
131 | body = string.sub(body, 2, -2) | ||
132 | body = gsub(body, cmd * bsqbracketsii * spcenc^-1 * bcbraces, function(btag, bopt, bbody) | ||
133 | bbody = string.sub(bbody, 2, -2) | ||
134 | if string.find(btag, "@") then | ||
135 | return holdcmd(string.format("}%s%s{%s}\\arb{", btag, bopt, bbody)) | ||
136 | else | ||
137 | return holdcmd(string.format("}%s%s{\\arb{%s}}\\arb{", btag, bopt, bbody)) | ||
138 | end | ||
139 | end) | ||
140 | return string.format("%s{%s}", tag, body) | ||
141 | end) | ||
142 | str = string.gsub(str, "\\arb{}", "") | ||
143 | return str | ||
144 | end | ||
145 | --]] | ||
146 | |||
147 | --[[ | ||
136 | local function holdcmd(str) | 148 | local function holdcmd(str) |
137 | str = string.gsub(str, "\\(arb)(%b{})", function(tag, body) | 149 | str = string.gsub(str, "\\(arb)(%b{})", function(tag, body) |
138 | body = string.sub(body, 2, -2) | 150 | body = string.sub(body, 2, -2) |
139 | body = string.gsub(body, "\\(.-)(%b{})", function(btag, bbody) | 151 | body = string.gsub(body, "\\(.-)(%b{})", function(btag, bbody) |
140 | bbody = string.sub(bbody, 2, -2) | 152 | bbody = string.sub(bbody, 2, -2) |
141 | -- if string.find(btag, "@") then | 153 | if string.find(btag, "@") then |
142 | -- return holdcmd(string.format("}\\%s{%s}\\arb{", btag, bbody)) | 154 | return holdcmd(string.format("}\\%s{%s}\\arb{", btag, bbody)) |
143 | -- else | 155 | else |
144 | return holdcmd(string.format("}\\%s{\\arb{%s}}\\arb{", btag, bbody)) | 156 | return holdcmd(string.format("}\\%s{\\arb{%s}}\\arb{", btag, bbody)) |
145 | -- end | 157 | end |
146 | end) | 158 | end) |
147 | return string.format("\\%s{%s}", tag, body) | 159 | return string.format("\\%s{%s}", tag, body) |
148 | end) | 160 | end) |
149 | str = string.gsub(str, "\\arb{}", "") | 161 | str = string.gsub(str, "\\arb{}", "") |
150 | return str | 162 | return str |
151 | end | 163 | end |
164 | --]] | ||
152 | 165 | ||
153 | local function arbnum(str) | 166 | local function arbnum(str) |
154 | str = string.gsub(str, "([0-9%,%-%/]+)", function(num) | 167 | str = string.gsub(str, "([0-9%,%-%/]+)", function(num) |
@@ -207,6 +220,7 @@ local function takeoutcapetc(str) | |||
207 | end) | 220 | end) |
208 | str = string.gsub(str, "\\linebreak", "") | 221 | str = string.gsub(str, "\\linebreak", "") |
209 | str = string.gsub(str, "\\%-", "") | 222 | str = string.gsub(str, "\\%-", "") |
223 | str = string.gsub(str, "\\(@-)par", "\\%1par{}") | ||
210 | return str | 224 | return str |
211 | end | 225 | end |
212 | 226 | ||
@@ -594,7 +608,7 @@ function processvoc(str, rules, scheme) | |||
594 | str = "\\arb{".. str.."}" | 608 | str = "\\arb{".. str.."}" |
595 | str = processarbnull(str, scheme) | 609 | str = processarbnull(str, scheme) |
596 | str = takeoutcapetc(str) | 610 | str = takeoutcapetc(str) |
597 | -- str = protectarb(str) | 611 | str = protectarb(str) |
598 | str = breakcmd(str) | 612 | str = breakcmd(str) |
599 | str = holdcmd(str) | 613 | str = holdcmd(str) |
600 | if scheme == "buckwalter" then | 614 | if scheme == "buckwalter" then |
@@ -605,7 +619,7 @@ function processvoc(str, rules, scheme) | |||
605 | elseif rules == "dflt" or rules == "idgham" then | 619 | elseif rules == "dflt" or rules == "idgham" then |
606 | str = voc(str, rules) | 620 | str = voc(str, rules) |
607 | else end | 621 | else end |
608 | -- str = unprotectarb(str) | 622 | str = unprotectarb(str) |
609 | return str | 623 | return str |
610 | end | 624 | end |
611 | 625 | ||
@@ -613,7 +627,7 @@ function processfullvoc(str, rules, scheme) | |||
613 | str = "\\arb{".. str.."}" | 627 | str = "\\arb{".. str.."}" |
614 | str = processarbnull(str, scheme) | 628 | str = processarbnull(str, scheme) |
615 | str = takeoutcapetc(str) | 629 | str = takeoutcapetc(str) |
616 | -- str = protectarb(str) | 630 | str = protectarb(str) |
617 | str = breakcmd(str) | 631 | str = breakcmd(str) |
618 | str = holdcmd(str) | 632 | str = holdcmd(str) |
619 | if scheme == "buckwalter" then | 633 | if scheme == "buckwalter" then |
@@ -626,7 +640,7 @@ function processfullvoc(str, rules, scheme) | |||
626 | elseif rules == "dflt" or rules == "idgham" then | 640 | elseif rules == "dflt" or rules == "idgham" then |
627 | str = fullvoc(str, rules) | 641 | str = fullvoc(str, rules) |
628 | else end | 642 | else end |
629 | -- str = unprotectarb(str) | 643 | str = unprotectarb(str) |
630 | return str | 644 | return str |
631 | end | 645 | end |
632 | 646 | ||
@@ -634,7 +648,7 @@ function processnovoc(str, rules, scheme) | |||
634 | str = "\\arb{".. str.."}" | 648 | str = "\\arb{".. str.."}" |
635 | str = processarbnull(str, scheme) | 649 | str = processarbnull(str, scheme) |
636 | str = takeoutcapetc(str) | 650 | str = takeoutcapetc(str) |
637 | -- str = protectarb(str) | 651 | str = protectarb(str) |
638 | str = breakcmd(str) | 652 | str = breakcmd(str) |
639 | str = holdcmd(str) | 653 | str = holdcmd(str) |
640 | if scheme == "buckwalter" then | 654 | if scheme == "buckwalter" then |
@@ -645,7 +659,7 @@ function processnovoc(str, rules, scheme) | |||
645 | elseif rules == "dflt" or rules == "idgham" then | 659 | elseif rules == "dflt" or rules == "idgham" then |
646 | str = novoc(str) | 660 | str = novoc(str) |
647 | else end | 661 | else end |
648 | -- str = unprotectarb(str) | 662 | str = unprotectarb(str) |
649 | return str | 663 | return str |
650 | end | 664 | end |
651 | 665 | ||
@@ -654,7 +668,7 @@ function processtrans(str, mode, rules, scheme) | |||
654 | str = processdiscretionary(str) | 668 | str = processdiscretionary(str) |
655 | str = processarbnull(str, scheme) | 669 | str = processarbnull(str, scheme) |
656 | str = takeoutabjad(str) | 670 | str = takeoutabjad(str) |
657 | -- str = protectarb(str) | 671 | str = protectarb(str) |
658 | str = breakcmd(str) | 672 | str = breakcmd(str) |
659 | str = holdcmd(str) | 673 | str = holdcmd(str) |
660 | if scheme == "buckwalter" then | 674 | if scheme == "buckwalter" then |
@@ -667,7 +681,7 @@ function processtrans(str, mode, rules, scheme) | |||
667 | elseif mode == "arabica" then | 681 | elseif mode == "arabica" then |
668 | str = transarabica(str) | 682 | str = transarabica(str) |
669 | end | 683 | end |
670 | -- str = unprotectarb(str) | 684 | str = unprotectarb(str) |
671 | return str | 685 | return str |
672 | end | 686 | end |
673 | 687 | ||