aboutsummaryrefslogtreecommitdiff
path: root/arabluatex_fullvoc.lua
diff options
context:
space:
mode:
authorRobert Alessi <alessi@robertalessi.net>2016-05-08 16:09:05 +0200
committerRobert Alessi <alessi@robertalessi.net>2016-05-08 16:09:05 +0200
commit6202ee62d0f34509d7a652c30a1d5c19efec70ae (patch)
treea56d3c38f40221daabc0891d6b0974d6128fc7b6 /arabluatex_fullvoc.lua
parente80c0d99f7d2511e6e3f3e70cd922f119a1fc217 (diff)
downloadarabluatex-6202ee62d0f34509d7a652c30a1d5c19efec70ae.tar.gz
done implementing 'easy' rules set
Diffstat (limited to 'arabluatex_fullvoc.lua')
-rw-r--r--arabluatex_fullvoc.lua305
1 files changed, 302 insertions, 3 deletions
diff --git a/arabluatex_fullvoc.lua b/arabluatex_fullvoc.lua
index 8263734..a2676e0 100644
--- a/arabluatex_fullvoc.lua
+++ b/arabluatex_fullvoc.lua
@@ -22,7 +22,6 @@ along with this program. If not, see
22<http://www.gnu.org/licenses/>. 22<http://www.gnu.org/licenses/>.
23--]] 23--]]
24 24
25-- this is new
26hamzafv = { 25hamzafv = {
27 -- hard coded hamza 26 -- hard coded hamza
28 {a="|\"'", b="ء"}, 27 {a="|\"'", b="ء"},
@@ -115,6 +114,98 @@ hamzafv = {
115 {a="(i)(')([^uaiUAI])", b="%1ئْ%3"} 114 {a="(i)(')([^uaiUAI])", b="%1ئْ%3"}
116} 115}
117 116
117hamzafveasy = { -- differences marked below with 'easy'
118 -- hard coded hamza
119 {a="|\"'", b="ء"},
120 {a="A\"'", b="آ"},
121 {a="[au]\"'", b="أ"},
122 {a="w\"'", b="ؤ"},
123 {a="i\"'", b="إ"},
124 {a="y\"'", b="ئ"},
125 {a="ؤ([^uaiUAI])", b="ؤْ%1"},
126 {a="ؤ$", b="ؤْ"},
127 {a="ؤ(%s)", b="ؤْ%1"},
128 {a="أ([^uaiUAI])", b="أْ%1"},
129 {a="أ$", b="أْ"},
130 {a="أ(%s)", b="أْ%1"},
131 {a="ئ([^uaiUAI])", b="ئْ%1"},
132 {a="ئ$", b="ئْ"},
133 {a="ئ(%s)", b="ئْ%1"},
134 -- hamza takes tašdīd too
135 {a="''([Uu])", b="ؤؤ%1"},
136 {a="''([Aa])", b="أأ%1"},
137 {a="''([Ii])", b="ئئ%1"},
138 -- initial long u and i (for a, see below)
139 {a="%'%_U", b="أU"},
140 {a="%'%_I", b="إI"},
141 -- taḫfīfu 'l-hamza
142 {a="'u'([^uaiUAI])", b="أU%1"},
143 {a="'i'([^uaiUAI])", b="إI%1"},
144 -- madda (historic writing below)
145 {a="'a'([^uaiUAI])", b="آ%1"},
146 {a="'a?A([%_%^%.]?[%`%'btjghdrzsfqklmnywAY])", b="آ%1"},
147--easy {a="(A)(')(uN?)$", b="aآء%3"},
148--easy {a="(A)(')(uN?)(%W)", b="aآء%3%4"},
149--easy {a="(A)(')(iN?)$", b="aآء%3"},
150--easy {a="(A)(')(iN?)(%W)", b="aآء%3%4"},
151--easy {a="(A)(')(i)", b="aآئ%3"}, -- historic madda
152--easy {a="(A)(')(u)", b="aآؤ%3"}, -- historic madda
153--easy {a="(A)(')", b="aآء"}, -- historic madda
154 -- initial (needs both ^ and %W patterns)
155 {a="^(')([ua])", b="أ%2"},
156 {a="^(')(i)", b="إ%2"},
157 {a="(%W)(')([ua])", b="%1أ%3"},
158 {a="(%W)(')(i)", b="%1إ%3"},
159 -- final
160 -- ^say'aN and .zim'aN are special orthographies
161 {a="(%^say)(%')(aN)", b="%1ئ%3"},
162 {a="(.zi?m)(%')(aN)", b="%1ئ%3"},
163 {a="([^uai])(')([uai]N?)$", b="%1ء%3"},
164 {a="([^uai])(')([uai]N?)(%W)", b="%1ء%3%4"},
165-- u
166 {a="(u)(')([uai]?N)$", b="%1ؤ%3"},
167 {a="(u)(')([uai]N?)(%W)", b="%1ؤ%3%4"},
168 {a="(u)(')$", b="%1ؤْ"},
169 {a="(u)(')(%W)", b="%1ؤْ%3"},
170-- a
171 {a="(a)(')(A)$", b="%1آ"},
172 {a="(a)(')(A)(%W)", b="%1آ%4"},
173 {a="(a)(')([u]N?)$", b="%1أ%3"},
174 {a="(a)(')([u]N?)(%W)", b="%1أ%3%4"},
175 {a="(a)(')(a)$", b="%1أ%3"},
176 {a="(a)(')(a)(%W)", b="%1أ%3%4"},
177 {a="(a)(')(aN)$", b="%1أً"},
178 {a="(a)(')(aN)(%W)", b="%1أً%4"},
179 {a="(a)(')([i]N?)$", b="%1إ%3"},
180 {a="(a)(')([i]N?)(%W)", b="%1إ%3%4"},
181 {a="(a)(')$", b="%1أْ"},
182 {a="(a)(')(%W)", b="%1أْ%3"},
183-- i
184 {a="(i)(')([uai]N?)$", b="%1ئ%3"},
185 {a="(i)(')([uai]N?)(%W)", b="%1ئ%3%4"},
186 {a="(i)(')$", b="%1ئْ"},
187 {a="(i)(')(%W)", b="%1ئْ%3"},
188--
189 -- middle
190 {a="(U)(')", b="%1ء"},
191 {a="([Iy])(')", b="%1ئ"},
192 {a="([^uai])(')([uU])", b="%1ؤ%3"},
193 {a="([^uai])(')([aA])", b="%1أ%3"},
194 {a="([^uai])(')([iI])", b="%1ئ%3"},
195 {a="(u)(')([uU])", b="%1ؤ%3"},
196 {a="(u)(')([aA])", b="%1ؤ%3"},
197 {a="(u)(')([iI])", b="%1ئ%3"},
198 {a="(a)(')([aA])", b="%1أ%3"},
199 {a="(a)(')([uU])", b="%1ؤ%3"},
200 {a="(a)(')([iI])", b="%1ئ%3"},
201 {a="(i)(')([aA])", b="%1ئ%3"},
202 {a="(i)(')([uU])", b="%1ئ%3"},
203 {a="(i)(')([iI])", b="%1ئ%3"},
204 {a="(a)(')([^uaiUAI])", b="%1أْ%3"},
205 {a="(u)(')([^uaiUAI])", b="%1ؤْ%3"},
206 {a="(i)(')([^uaiUAI])", b="%1ئْ%3"}
207}
208
118tanwinfv = { 209tanwinfv = {
119 {a="uNU", b="ٌو"}, 210 {a="uNU", b="ٌو"},
120 {a="aNU", b="ًوا"}, 211 {a="aNU", b="ًوا"},
@@ -150,7 +241,41 @@ tanwinfv = {
150 {a="(iN)", b="ٍ"} 241 {a="(iN)", b="ٍ"}
151} 242}
152 243
153-- this is new 244tanwinfveasy = { -- no assimilations (see below)
245 {a="uNU", b="ٌو"},
246 {a="aNU", b="ًوا"},
247 {a="iNU", b="ٍو"},
248 {a="([uai]N)(%s)([uai])", b="%1%2ٱ"},
249 {a="(aN[%_]?[AY])(%s)([uai])", b="%1%2ٱ"},
250 -- assimilations (begin)
251--easy {a="(uN)(%s)([rlmnwy])", b="ٌ%2%3%3"},
252--easy {a="(aN)(_A)(%s)([rlmnwy])", b="ًى%3%4%4"},
253--easy {a="(aN)(Y)(%s)([rlmnwy])", b="ًى%3%4%4"},
254--easy {a="(T)(aN)(%s)([rlmnwy])", b="%1ً%3%4%4"},
255--easy {a="(ء)(aN)(%s)([rlmnwy])", b="%1ً%3%4%4"},
256--easy {a="([^TA])(aN)(%s)([rlmnwy])", b="%1ًا%3%4%4"},
257--easy {a="(iN)(%s)([rlmnwy])", b="ٍ%2%3%3"},
258 -- assimilations (end)
259 -- quoted tanwīn (begin)
260 {a="(\"uN)", b=""},
261 {a="(B)(\"aN)", b="%1"},
262 {a="(\"aN)(_A)", b="ى"},
263 {a="(\"aN)(Y)", b="ى"},
264 {a="(T)(\"aN)", b="%1"},
265 {a="(ء)(\"aN)", b="%1"},
266 {a="([^TA])(\"aN)", b="%1ا"},
267 {a="(\"iN)", b=""},
268 -- quoted tanwīn (end)
269 {a="(uN)", b="ٌ"},
270 {a="(B)(aN)", b="%1ً"},
271 {a="(aN)(_A)", b="ًى"},
272 {a="(aN)(Y)", b="ًى"},
273 {a="(T)(aN)", b="%1ً"},
274 {a="(ء)(aN)", b="%1ً"},
275 {a="([^TA])(aN)", b="%1ًا"},
276 {a="(iN)", b="ٍ"}
277}
278
154trigraphsfv = { -- trigraphs or more 279trigraphsfv = { -- trigraphs or more
155 -- 'llatI / 'llad_I 280 -- 'llatI / 'llad_I
156 {a="^'ll(a)([%_]?[dt])", b="ٱلّ%1%2"}, 281 {a="^'ll(a)([%_]?[dt])", b="ٱلّ%1%2"},
@@ -207,7 +332,62 @@ trigraphsfv = { -- trigraphs or more
207 {a="(n)(%s)([rlmnwy])", b="%1%2%3%3"} 332 {a="(n)(%s)([rlmnwy])", b="%1%2%3%3"}
208} 333}
209 334
210-- this is new 335trigraphsfveasy = { -- trigraphs or more (see 'easy' tag below for the diffs)
336 -- 'llatI / 'llad_I
337 {a="^'ll(a)([%_]?[dt])", b="ٱلّ%1%2"},
338 {a="([%s%-])'ll(a)([%_]?[dt])", b="%1ٱلّ%2%3"},
339 -- al- + lām
340 {a="^(a)l%-(l)", b="ا%1ل%2%2"},
341 {a="([%s%-])(a)l%-(l)", b="%1ا%2ل%3%3"},
342 -- al- + solar consonant
343 {a="^(a)l%-([%_%^%.]?[tdrzsn])", b="ا%1ل%2%2"},
344 {a="([%s%-])(a)l%-([%_%^%.]?[tdrzsn])", b="%1ا%2ل%3%3"},
345 -- assim. art. + solar consonant
346 {a="^(a)([%_%^%.]?[tdrzsn])%-", b="ا%1ل%2"},
347 {a="([%s%-])(a)([%_%^%.]?[tdrzsn])%-", b="%1ا%2ل%3"},
348 -- al- + initial unstable hamza
349 {a="^(a)l%-(\"?[uai])", b="ا%1لٱ%2"},
350 {a="([%s%-])(a)l%-(\"?[uai])", b="%1ا%2لٱ%3"},
351 -- li-/la + art. + initial unstable hamza is a special orthography
352 {a="l([ai])%-l%-(\"?[uai])", b="ل%1لٱ%2"},
353 -- al- + lunar consonant (i.e. what remains)
354 {a="^(a)l%-", b="ا%1لْ"},
355 {a="([%s%-])(a)l%-", b="%1ا%2لْ"},
356 -- diphthongs to be resolved before ʾalif conjunctionis
357 {a="(aW)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="awuا%2%3"},
358 {a="(aw)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="%1u%2%3"},
359 {a="(ay)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="%1i%2%3"},
360 -- art. with waṣla + lām
361 {a="'l%-(l)", b="ٱل%1%1"},
362 -- art. with waṣla + solar consonant
363 {a="'l%-([%_%^%.]?[tdrzsn])", b="ٱل%1%1"},
364 -- li-/la- + art. + lām
365 {a="l([ai])%-l%-(l)", b="ل%1%2%2"},
366 -- assim. art. with waṣla + solar consonant
367 {a="'([%_%^%.]?[tdrzsn])%-", b="ٱل%1"},
368 -- li-/la- + art. + solar consonant is a special orthography
369 {a="l([ai])%-l%-([%_%^%.]?[tdrzsn])", b="ل%1ل%2%2"},
370 -- li-/la- + assim. art. + solar consonant is a special orthography
371 {a="l([ai])%-([%_%^%.]?[tdrzsn])%-([%_%^%.]?[tdrzsn])", b="ل%1ل%3%3"},
372 -- art. with waṣla + initial unstable hamza
373 {a="'l%-(\"?[uai])", b="ٱلٱ%1"},
374 -- art. with waṣla + lunar consonant (i.e. what remains)
375 {a="'l%-", b="ٱلْ"},
376 -- the silent wāw
377 {a="uU$", b="uو"},
378 {a="uU(%W)", b="uو%1"},
379 {a="aU$", b="aو"},
380 {a="aU(%W)", b="aو%1"},
381 {a="iU$", b="iو"},
382 {a="iU(%W)", b="iو%1"},
383 -- words ending in -āT with silent wāw/yāʾ
384 {a="(_a)UA", b="%1وا"},
385 {a="(_a)U", b="%1و"},
386 {a="(_a)I", b="%1ي"},
387 -- assimilations
388--easy {a="(n)(%s)([rlmnwy])", b="%1%2%3%3"}
389}
390
211digraphsfv = { 391digraphsfv = {
212 -- initial straight double quote gives a connective ʾalif 392 -- initial straight double quote gives a connective ʾalif
213 {a="^\"[uai]", b="ٱ"}, 393 {a="^\"[uai]", b="ٱ"},
@@ -293,6 +473,94 @@ digraphsfv = {
293 {a="%^d", b="ڊ"} 473 {a="%^d", b="ڊ"}
294} 474}
295 475
476digraphsfveasy = { -- see the diffenrences under 'easy' marker below
477 -- initial straight double quote gives a connective ʾalif
478 {a="^\"[uai]", b="ٱ"},
479 {a="([%s%-])\"[uai]", b="%1ٱ"},
480 -- diphthongs to be resolved before ʾalif conjunctionis
481 {a="(aW)(%s)(\"?[uai])", b="awuا%2ٱ"},
482 {a="(aw)(%s)(\"?[uai])", b="%1u%2ٱ"},
483 {a="(ay)(%s)(\"?[uai])", b="%1i%2ٱ"},
484 {a="([uai]%-)(\"?[uai])", b="%1ٱ"}, -- hyphen + initial alif without hamza
485 -- initial alif without hamza
486 {a="([%_]?[uaiUAIY])(%s)(\"?[uai])", b="%1%2ٱ"},
487 {a="^([uai])", b="ا%1"}, -- initial alif without hamza
488 {a="(%s)([uai])", b="%1ا%2"}, -- initial alif without hamza
489 {a="%-%-", b="ـ"},
490 {a="ؤؤ", b="ؤّ"},
491 {a="أأ", b="أّ"},
492 {a="ئئ", b="ئّ"},
493 {a="bb", b="بّ"},
494 {a="BB", b="ـّ"},
495 {a="(%_)([thd])([thd])", b="%1%2|%3"},
496 {a="tt", b="تّ"},
497 {a="%_t%_t", b="ثّ"},
498 {a="jj", b="جّ"},
499 {a="%^g%^g", b="جّ"},
500 {a="xx", b="خّ"},
501 {a="%_h%_h", b="خّ"},
502 {a="dd", b="دّ"},
503 {a="%_d%_d", b="ذّ"},
504 {a="rr", b="رّ"},
505 {a="zz", b="زّ"},
506 {a="ss", b="سّ"},
507 {a="%^s%^s", b="شّ"},
508 {a="%.s%.s", b="صّ"},
509 {a="%.d%.d", b="ضّ"},
510 {a="%.t%.t", b="طّ"},
511 {a="%.z%.z", b="ظّ"},
512 {a="%`%`", b="عّ"},
513 {a="%.g%.g", b="غّ"},
514 {a="ff", b="فّ"},
515 {a="qq", b="قّ"},
516 {a="kk", b="كّ"},
517 {a="ll", b="لّ"},
518 {a="mm", b="مّ"},
519 {a="nn", b="نّ"},
520 {a="hh", b="هّ"},
521 {a="ww", b="وّ"},
522 {a="yy", b="يّ"},
523 -- sukūn begin ('easy' needs these rules to be taken out); but
524 -- first take out every previously generated sukūn by hamza rules,
525 -- so there be no need to edit them:
526 {a="ْ", b=""},
527-- {a="([%_%^%.]?[Bbtjghxdrzs%`fqklmnwy])$", b="%1ْ"},
528-- {a="([%_%^%.]?[Bbtjghxdrzs%`fqklmnwy])([%s])", b="%1ْ%2"},
529-- {a="([%_%^%.]?[Bbtjghxdrzs%`fqklmnwy])([%_]?[^%_uaiUAIًٌٍ])", b="%1ْ%2"},
530 -- take out sukūn in cases of assimilation
531-- {a="(n)(ْ)(%s)([روي])", b="%1%3%4"},
532-- {a="(n)(ْ)(%s)([ل])", b="%1%3%4"},
533-- {a="(n)(ْ)(%s)([م])", b="%1%3%4"},
534-- {a="(n)(ْ)(%s)([ن])", b="%1%3%4"},
535-- {a="ْ\"", b="\""},
536 -- sukūn end
537 {a="_t", b="ث"},
538 {a="%^g", b="ج"},
539 {a="%.h", b="ح"},
540 {a="_h", b="خ"},
541 {a="_d", b="ذ"},
542 {a="%^s", b="ش"},
543 {a="%.s", b="ص"},
544 {a="%.d", b="ض"},
545 {a="%.t", b="ط"},
546 {a="%.z", b="ظ"},
547 {a="%.g", b="غ"},
548 {a="(U)(A)", b="%1ا"},
549 {a="WA", b="وْا"},
550 {a="(a)W\"", b="%1وا"},
551 {a="(a)W", b="%1وْا"},
552 {a="_A", b="aى"},
553 {a="_u", b="ٗ"},
554 {a="_a", b="ٰ"},
555 {a="_i", b="ٖ"},
556 {a="%.b", b="ٮ"},
557 {a="%.f", b="ڡ"},
558 {a="%.q", b="ٯ"},
559 {a="%.k", b="ک"},
560 {a="%.n", b="ں"},
561 {a="%^d", b="ڊ"}
562}
563
296singlefv = { 564singlefv = {
297 {a="b", b="ب"}, 565 {a="b", b="ب"},
298 {a="t", b="ت"}, 566 {a="t", b="ت"},
@@ -320,3 +588,34 @@ singlefv = {
320 {a="([^0-9])%-([^0-9])", b="%1%2"}, 588 {a="([^0-9])%-([^0-9])", b="%1%2"},
321 {a="B", b="ـ"}, 589 {a="B", b="ـ"},
322} 590}
591
592singlefveasy = { -- see the differences under 'easy' tag below
593 {a="b", b="ب"},
594 {a="t", b="ت"},
595 {a="j", b="ج"},
596 {a="x", b="خ"},
597 {a="d", b="د"},
598 {a="r", b="ر"},
599 {a="z", b="ز"},
600 {a="s", b="س"},
601 {a="f", b="ف"},
602 {a="`", b="ع"},
603 {a="f", b="ف"},
604 {a="q", b="ق"},
605 {a="k", b="ك"},
606 {a="l", b="ل"},
607 {a="m", b="م"},
608 {a="n", b="ن"},
609 {a="h", b="ه"},
610 {a="w", b="و"},
611 {a="y", b="ي"},
612 {a="T", b="ة"},
613 -- easy (begin): \" needs to put back the sukūn
614 {a="\"$", b="ْ"},
615 {a="\"(%W)", b="ْ%1"},
616 {a="\"([^uaiUAI])", b="ْ%1"},
617 -- easy (end)
618 {a="([^0-9])%-([^0-9])", b="%1%2"},
619 {a="B", b="ـ"},
620}
621