diff options
Diffstat (limited to 'arabluatex_fullvoc.lua')
-rw-r--r-- | arabluatex_fullvoc.lua | 305 |
1 files changed, 302 insertions, 3 deletions
diff --git a/arabluatex_fullvoc.lua b/arabluatex_fullvoc.lua index 8263734..a2676e0 100644 --- a/arabluatex_fullvoc.lua +++ b/arabluatex_fullvoc.lua | |||
@@ -22,7 +22,6 @@ along with this program. If not, see | |||
22 | <http://www.gnu.org/licenses/>. | 22 | <http://www.gnu.org/licenses/>. |
23 | --]] | 23 | --]] |
24 | 24 | ||
25 | -- this is new | ||
26 | hamzafv = { | 25 | hamzafv = { |
27 | -- hard coded hamza | 26 | -- hard coded hamza |
28 | {a="|\"'", b="ء"}, | 27 | {a="|\"'", b="ء"}, |
@@ -115,6 +114,98 @@ hamzafv = { | |||
115 | {a="(i)(')([^uaiUAI])", b="%1ئْ%3"} | 114 | {a="(i)(')([^uaiUAI])", b="%1ئْ%3"} |
116 | } | 115 | } |
117 | 116 | ||
117 | hamzafveasy = { -- differences marked below with 'easy' | ||
118 | -- hard coded hamza | ||
119 | {a="|\"'", b="ء"}, | ||
120 | {a="A\"'", b="آ"}, | ||
121 | {a="[au]\"'", b="أ"}, | ||
122 | {a="w\"'", b="ؤ"}, | ||
123 | {a="i\"'", b="إ"}, | ||
124 | {a="y\"'", b="ئ"}, | ||
125 | {a="ؤ([^uaiUAI])", b="ؤْ%1"}, | ||
126 | {a="ؤ$", b="ؤْ"}, | ||
127 | {a="ؤ(%s)", b="ؤْ%1"}, | ||
128 | {a="أ([^uaiUAI])", b="أْ%1"}, | ||
129 | {a="أ$", b="أْ"}, | ||
130 | {a="أ(%s)", b="أْ%1"}, | ||
131 | {a="ئ([^uaiUAI])", b="ئْ%1"}, | ||
132 | {a="ئ$", b="ئْ"}, | ||
133 | {a="ئ(%s)", b="ئْ%1"}, | ||
134 | -- hamza takes tašdīd too | ||
135 | {a="''([Uu])", b="ؤؤ%1"}, | ||
136 | {a="''([Aa])", b="أأ%1"}, | ||
137 | {a="''([Ii])", b="ئئ%1"}, | ||
138 | -- initial long u and i (for a, see below) | ||
139 | {a="%'%_U", b="أU"}, | ||
140 | {a="%'%_I", b="إI"}, | ||
141 | -- taḫfīfu 'l-hamza | ||
142 | {a="'u'([^uaiUAI])", b="أU%1"}, | ||
143 | {a="'i'([^uaiUAI])", b="إI%1"}, | ||
144 | -- madda (historic writing below) | ||
145 | {a="'a'([^uaiUAI])", b="آ%1"}, | ||
146 | {a="'a?A([%_%^%.]?[%`%'btjghdrzsfqklmnywAY])", b="آ%1"}, | ||
147 | --easy {a="(A)(')(uN?)$", b="aآء%3"}, | ||
148 | --easy {a="(A)(')(uN?)(%W)", b="aآء%3%4"}, | ||
149 | --easy {a="(A)(')(iN?)$", b="aآء%3"}, | ||
150 | --easy {a="(A)(')(iN?)(%W)", b="aآء%3%4"}, | ||
151 | --easy {a="(A)(')(i)", b="aآئ%3"}, -- historic madda | ||
152 | --easy {a="(A)(')(u)", b="aآؤ%3"}, -- historic madda | ||
153 | --easy {a="(A)(')", b="aآء"}, -- historic madda | ||
154 | -- initial (needs both ^ and %W patterns) | ||
155 | {a="^(')([ua])", b="أ%2"}, | ||
156 | {a="^(')(i)", b="إ%2"}, | ||
157 | {a="(%W)(')([ua])", b="%1أ%3"}, | ||
158 | {a="(%W)(')(i)", b="%1إ%3"}, | ||
159 | -- final | ||
160 | -- ^say'aN and .zim'aN are special orthographies | ||
161 | {a="(%^say)(%')(aN)", b="%1ئ%3"}, | ||
162 | {a="(.zi?m)(%')(aN)", b="%1ئ%3"}, | ||
163 | {a="([^uai])(')([uai]N?)$", b="%1ء%3"}, | ||
164 | {a="([^uai])(')([uai]N?)(%W)", b="%1ء%3%4"}, | ||
165 | -- u | ||
166 | {a="(u)(')([uai]?N)$", b="%1ؤ%3"}, | ||
167 | {a="(u)(')([uai]N?)(%W)", b="%1ؤ%3%4"}, | ||
168 | {a="(u)(')$", b="%1ؤْ"}, | ||
169 | {a="(u)(')(%W)", b="%1ؤْ%3"}, | ||
170 | -- a | ||
171 | {a="(a)(')(A)$", b="%1آ"}, | ||
172 | {a="(a)(')(A)(%W)", b="%1آ%4"}, | ||
173 | {a="(a)(')([u]N?)$", b="%1أ%3"}, | ||
174 | {a="(a)(')([u]N?)(%W)", b="%1أ%3%4"}, | ||
175 | {a="(a)(')(a)$", b="%1أ%3"}, | ||
176 | {a="(a)(')(a)(%W)", b="%1أ%3%4"}, | ||
177 | {a="(a)(')(aN)$", b="%1أً"}, | ||
178 | {a="(a)(')(aN)(%W)", b="%1أً%4"}, | ||
179 | {a="(a)(')([i]N?)$", b="%1إ%3"}, | ||
180 | {a="(a)(')([i]N?)(%W)", b="%1إ%3%4"}, | ||
181 | {a="(a)(')$", b="%1أْ"}, | ||
182 | {a="(a)(')(%W)", b="%1أْ%3"}, | ||
183 | -- i | ||
184 | {a="(i)(')([uai]N?)$", b="%1ئ%3"}, | ||
185 | {a="(i)(')([uai]N?)(%W)", b="%1ئ%3%4"}, | ||
186 | {a="(i)(')$", b="%1ئْ"}, | ||
187 | {a="(i)(')(%W)", b="%1ئْ%3"}, | ||
188 | -- | ||
189 | -- middle | ||
190 | {a="(U)(')", b="%1ء"}, | ||
191 | {a="([Iy])(')", b="%1ئ"}, | ||
192 | {a="([^uai])(')([uU])", b="%1ؤ%3"}, | ||
193 | {a="([^uai])(')([aA])", b="%1أ%3"}, | ||
194 | {a="([^uai])(')([iI])", b="%1ئ%3"}, | ||
195 | {a="(u)(')([uU])", b="%1ؤ%3"}, | ||
196 | {a="(u)(')([aA])", b="%1ؤ%3"}, | ||
197 | {a="(u)(')([iI])", b="%1ئ%3"}, | ||
198 | {a="(a)(')([aA])", b="%1أ%3"}, | ||
199 | {a="(a)(')([uU])", b="%1ؤ%3"}, | ||
200 | {a="(a)(')([iI])", b="%1ئ%3"}, | ||
201 | {a="(i)(')([aA])", b="%1ئ%3"}, | ||
202 | {a="(i)(')([uU])", b="%1ئ%3"}, | ||
203 | {a="(i)(')([iI])", b="%1ئ%3"}, | ||
204 | {a="(a)(')([^uaiUAI])", b="%1أْ%3"}, | ||
205 | {a="(u)(')([^uaiUAI])", b="%1ؤْ%3"}, | ||
206 | {a="(i)(')([^uaiUAI])", b="%1ئْ%3"} | ||
207 | } | ||
208 | |||
118 | tanwinfv = { | 209 | tanwinfv = { |
119 | {a="uNU", b="ٌو"}, | 210 | {a="uNU", b="ٌو"}, |
120 | {a="aNU", b="ًوا"}, | 211 | {a="aNU", b="ًوا"}, |
@@ -150,7 +241,41 @@ tanwinfv = { | |||
150 | {a="(iN)", b="ٍ"} | 241 | {a="(iN)", b="ٍ"} |
151 | } | 242 | } |
152 | 243 | ||
153 | -- this is new | 244 | tanwinfveasy = { -- no assimilations (see below) |
245 | {a="uNU", b="ٌو"}, | ||
246 | {a="aNU", b="ًوا"}, | ||
247 | {a="iNU", b="ٍو"}, | ||
248 | {a="([uai]N)(%s)([uai])", b="%1%2ٱ"}, | ||
249 | {a="(aN[%_]?[AY])(%s)([uai])", b="%1%2ٱ"}, | ||
250 | -- assimilations (begin) | ||
251 | --easy {a="(uN)(%s)([rlmnwy])", b="ٌ%2%3%3"}, | ||
252 | --easy {a="(aN)(_A)(%s)([rlmnwy])", b="ًى%3%4%4"}, | ||
253 | --easy {a="(aN)(Y)(%s)([rlmnwy])", b="ًى%3%4%4"}, | ||
254 | --easy {a="(T)(aN)(%s)([rlmnwy])", b="%1ً%3%4%4"}, | ||
255 | --easy {a="(ء)(aN)(%s)([rlmnwy])", b="%1ً%3%4%4"}, | ||
256 | --easy {a="([^TA])(aN)(%s)([rlmnwy])", b="%1ًا%3%4%4"}, | ||
257 | --easy {a="(iN)(%s)([rlmnwy])", b="ٍ%2%3%3"}, | ||
258 | -- assimilations (end) | ||
259 | -- quoted tanwīn (begin) | ||
260 | {a="(\"uN)", b=""}, | ||
261 | {a="(B)(\"aN)", b="%1"}, | ||
262 | {a="(\"aN)(_A)", b="ى"}, | ||
263 | {a="(\"aN)(Y)", b="ى"}, | ||
264 | {a="(T)(\"aN)", b="%1"}, | ||
265 | {a="(ء)(\"aN)", b="%1"}, | ||
266 | {a="([^TA])(\"aN)", b="%1ا"}, | ||
267 | {a="(\"iN)", b=""}, | ||
268 | -- quoted tanwīn (end) | ||
269 | {a="(uN)", b="ٌ"}, | ||
270 | {a="(B)(aN)", b="%1ً"}, | ||
271 | {a="(aN)(_A)", b="ًى"}, | ||
272 | {a="(aN)(Y)", b="ًى"}, | ||
273 | {a="(T)(aN)", b="%1ً"}, | ||
274 | {a="(ء)(aN)", b="%1ً"}, | ||
275 | {a="([^TA])(aN)", b="%1ًا"}, | ||
276 | {a="(iN)", b="ٍ"} | ||
277 | } | ||
278 | |||
154 | trigraphsfv = { -- trigraphs or more | 279 | trigraphsfv = { -- trigraphs or more |
155 | -- 'llatI / 'llad_I | 280 | -- 'llatI / 'llad_I |
156 | {a="^'ll(a)([%_]?[dt])", b="ٱلّ%1%2"}, | 281 | {a="^'ll(a)([%_]?[dt])", b="ٱلّ%1%2"}, |
@@ -207,7 +332,62 @@ trigraphsfv = { -- trigraphs or more | |||
207 | {a="(n)(%s)([rlmnwy])", b="%1%2%3%3"} | 332 | {a="(n)(%s)([rlmnwy])", b="%1%2%3%3"} |
208 | } | 333 | } |
209 | 334 | ||
210 | -- this is new | 335 | trigraphsfveasy = { -- trigraphs or more (see 'easy' tag below for the diffs) |
336 | -- 'llatI / 'llad_I | ||
337 | {a="^'ll(a)([%_]?[dt])", b="ٱلّ%1%2"}, | ||
338 | {a="([%s%-])'ll(a)([%_]?[dt])", b="%1ٱلّ%2%3"}, | ||
339 | -- al- + lām | ||
340 | {a="^(a)l%-(l)", b="ا%1ل%2%2"}, | ||
341 | {a="([%s%-])(a)l%-(l)", b="%1ا%2ل%3%3"}, | ||
342 | -- al- + solar consonant | ||
343 | {a="^(a)l%-([%_%^%.]?[tdrzsn])", b="ا%1ل%2%2"}, | ||
344 | {a="([%s%-])(a)l%-([%_%^%.]?[tdrzsn])", b="%1ا%2ل%3%3"}, | ||
345 | -- assim. art. + solar consonant | ||
346 | {a="^(a)([%_%^%.]?[tdrzsn])%-", b="ا%1ل%2"}, | ||
347 | {a="([%s%-])(a)([%_%^%.]?[tdrzsn])%-", b="%1ا%2ل%3"}, | ||
348 | -- al- + initial unstable hamza | ||
349 | {a="^(a)l%-(\"?[uai])", b="ا%1لٱ%2"}, | ||
350 | {a="([%s%-])(a)l%-(\"?[uai])", b="%1ا%2لٱ%3"}, | ||
351 | -- li-/la + art. + initial unstable hamza is a special orthography | ||
352 | {a="l([ai])%-l%-(\"?[uai])", b="ل%1لٱ%2"}, | ||
353 | -- al- + lunar consonant (i.e. what remains) | ||
354 | {a="^(a)l%-", b="ا%1لْ"}, | ||
355 | {a="([%s%-])(a)l%-", b="%1ا%2لْ"}, | ||
356 | -- diphthongs to be resolved before ʾalif conjunctionis | ||
357 | {a="(aW)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="awuا%2%3"}, | ||
358 | {a="(aw)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="%1u%2%3"}, | ||
359 | {a="(ay)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="%1i%2%3"}, | ||
360 | -- art. with waṣla + lām | ||
361 | {a="'l%-(l)", b="ٱل%1%1"}, | ||
362 | -- art. with waṣla + solar consonant | ||
363 | {a="'l%-([%_%^%.]?[tdrzsn])", b="ٱل%1%1"}, | ||
364 | -- li-/la- + art. + lām | ||
365 | {a="l([ai])%-l%-(l)", b="ل%1%2%2"}, | ||
366 | -- assim. art. with waṣla + solar consonant | ||
367 | {a="'([%_%^%.]?[tdrzsn])%-", b="ٱل%1"}, | ||
368 | -- li-/la- + art. + solar consonant is a special orthography | ||
369 | {a="l([ai])%-l%-([%_%^%.]?[tdrzsn])", b="ل%1ل%2%2"}, | ||
370 | -- li-/la- + assim. art. + solar consonant is a special orthography | ||
371 | {a="l([ai])%-([%_%^%.]?[tdrzsn])%-([%_%^%.]?[tdrzsn])", b="ل%1ل%3%3"}, | ||
372 | -- art. with waṣla + initial unstable hamza | ||
373 | {a="'l%-(\"?[uai])", b="ٱلٱ%1"}, | ||
374 | -- art. with waṣla + lunar consonant (i.e. what remains) | ||
375 | {a="'l%-", b="ٱلْ"}, | ||
376 | -- the silent wāw | ||
377 | {a="uU$", b="uو"}, | ||
378 | {a="uU(%W)", b="uو%1"}, | ||
379 | {a="aU$", b="aو"}, | ||
380 | {a="aU(%W)", b="aو%1"}, | ||
381 | {a="iU$", b="iو"}, | ||
382 | {a="iU(%W)", b="iو%1"}, | ||
383 | -- words ending in -āT with silent wāw/yāʾ | ||
384 | {a="(_a)UA", b="%1وا"}, | ||
385 | {a="(_a)U", b="%1و"}, | ||
386 | {a="(_a)I", b="%1ي"}, | ||
387 | -- assimilations | ||
388 | --easy {a="(n)(%s)([rlmnwy])", b="%1%2%3%3"} | ||
389 | } | ||
390 | |||
211 | digraphsfv = { | 391 | digraphsfv = { |
212 | -- initial straight double quote gives a connective ʾalif | 392 | -- initial straight double quote gives a connective ʾalif |
213 | {a="^\"[uai]", b="ٱ"}, | 393 | {a="^\"[uai]", b="ٱ"}, |
@@ -293,6 +473,94 @@ digraphsfv = { | |||
293 | {a="%^d", b="ڊ"} | 473 | {a="%^d", b="ڊ"} |
294 | } | 474 | } |
295 | 475 | ||
476 | digraphsfveasy = { -- see the diffenrences under 'easy' marker below | ||
477 | -- initial straight double quote gives a connective ʾalif | ||
478 | {a="^\"[uai]", b="ٱ"}, | ||
479 | {a="([%s%-])\"[uai]", b="%1ٱ"}, | ||
480 | -- diphthongs to be resolved before ʾalif conjunctionis | ||
481 | {a="(aW)(%s)(\"?[uai])", b="awuا%2ٱ"}, | ||
482 | {a="(aw)(%s)(\"?[uai])", b="%1u%2ٱ"}, | ||
483 | {a="(ay)(%s)(\"?[uai])", b="%1i%2ٱ"}, | ||
484 | {a="([uai]%-)(\"?[uai])", b="%1ٱ"}, -- hyphen + initial alif without hamza | ||
485 | -- initial alif without hamza | ||
486 | {a="([%_]?[uaiUAIY])(%s)(\"?[uai])", b="%1%2ٱ"}, | ||
487 | {a="^([uai])", b="ا%1"}, -- initial alif without hamza | ||
488 | {a="(%s)([uai])", b="%1ا%2"}, -- initial alif without hamza | ||
489 | {a="%-%-", b="ـ"}, | ||
490 | {a="ؤؤ", b="ؤّ"}, | ||
491 | {a="أأ", b="أّ"}, | ||
492 | {a="ئئ", b="ئّ"}, | ||
493 | {a="bb", b="بّ"}, | ||
494 | {a="BB", b="ـّ"}, | ||
495 | {a="(%_)([thd])([thd])", b="%1%2|%3"}, | ||
496 | {a="tt", b="تّ"}, | ||
497 | {a="%_t%_t", b="ثّ"}, | ||
498 | {a="jj", b="جّ"}, | ||
499 | {a="%^g%^g", b="جّ"}, | ||
500 | {a="xx", b="خّ"}, | ||
501 | {a="%_h%_h", b="خّ"}, | ||
502 | {a="dd", b="دّ"}, | ||
503 | {a="%_d%_d", b="ذّ"}, | ||
504 | {a="rr", b="رّ"}, | ||
505 | {a="zz", b="زّ"}, | ||
506 | {a="ss", b="سّ"}, | ||
507 | {a="%^s%^s", b="شّ"}, | ||
508 | {a="%.s%.s", b="صّ"}, | ||
509 | {a="%.d%.d", b="ضّ"}, | ||
510 | {a="%.t%.t", b="طّ"}, | ||
511 | {a="%.z%.z", b="ظّ"}, | ||
512 | {a="%`%`", b="عّ"}, | ||
513 | {a="%.g%.g", b="غّ"}, | ||
514 | {a="ff", b="فّ"}, | ||
515 | {a="qq", b="قّ"}, | ||
516 | {a="kk", b="كّ"}, | ||
517 | {a="ll", b="لّ"}, | ||
518 | {a="mm", b="مّ"}, | ||
519 | {a="nn", b="نّ"}, | ||
520 | {a="hh", b="هّ"}, | ||
521 | {a="ww", b="وّ"}, | ||
522 | {a="yy", b="يّ"}, | ||
523 | -- sukūn begin ('easy' needs these rules to be taken out); but | ||
524 | -- first take out every previously generated sukūn by hamza rules, | ||
525 | -- so there be no need to edit them: | ||
526 | {a="ْ", b=""}, | ||
527 | -- {a="([%_%^%.]?[Bbtjghxdrzs%`fqklmnwy])$", b="%1ْ"}, | ||
528 | -- {a="([%_%^%.]?[Bbtjghxdrzs%`fqklmnwy])([%s])", b="%1ْ%2"}, | ||
529 | -- {a="([%_%^%.]?[Bbtjghxdrzs%`fqklmnwy])([%_]?[^%_uaiUAIًٌٍ])", b="%1ْ%2"}, | ||
530 | -- take out sukūn in cases of assimilation | ||
531 | -- {a="(n)(ْ)(%s)([روي])", b="%1%3%4"}, | ||
532 | -- {a="(n)(ْ)(%s)([ل])", b="%1%3%4"}, | ||
533 | -- {a="(n)(ْ)(%s)([م])", b="%1%3%4"}, | ||
534 | -- {a="(n)(ْ)(%s)([ن])", b="%1%3%4"}, | ||
535 | -- {a="ْ\"", b="\""}, | ||
536 | -- sukūn end | ||
537 | {a="_t", b="ث"}, | ||
538 | {a="%^g", b="ج"}, | ||
539 | {a="%.h", b="ح"}, | ||
540 | {a="_h", b="خ"}, | ||
541 | {a="_d", b="ذ"}, | ||
542 | {a="%^s", b="ش"}, | ||
543 | {a="%.s", b="ص"}, | ||
544 | {a="%.d", b="ض"}, | ||
545 | {a="%.t", b="ط"}, | ||
546 | {a="%.z", b="ظ"}, | ||
547 | {a="%.g", b="غ"}, | ||
548 | {a="(U)(A)", b="%1ا"}, | ||
549 | {a="WA", b="وْا"}, | ||
550 | {a="(a)W\"", b="%1وا"}, | ||
551 | {a="(a)W", b="%1وْا"}, | ||
552 | {a="_A", b="aى"}, | ||
553 | {a="_u", b="ٗ"}, | ||
554 | {a="_a", b="ٰ"}, | ||
555 | {a="_i", b="ٖ"}, | ||
556 | {a="%.b", b="ٮ"}, | ||
557 | {a="%.f", b="ڡ"}, | ||
558 | {a="%.q", b="ٯ"}, | ||
559 | {a="%.k", b="ک"}, | ||
560 | {a="%.n", b="ں"}, | ||
561 | {a="%^d", b="ڊ"} | ||
562 | } | ||
563 | |||
296 | singlefv = { | 564 | singlefv = { |
297 | {a="b", b="ب"}, | 565 | {a="b", b="ب"}, |
298 | {a="t", b="ت"}, | 566 | {a="t", b="ت"}, |
@@ -320,3 +588,34 @@ singlefv = { | |||
320 | {a="([^0-9])%-([^0-9])", b="%1%2"}, | 588 | {a="([^0-9])%-([^0-9])", b="%1%2"}, |
321 | {a="B", b="ـ"}, | 589 | {a="B", b="ـ"}, |
322 | } | 590 | } |
591 | |||
592 | singlefveasy = { -- see the differences under 'easy' tag below | ||
593 | {a="b", b="ب"}, | ||
594 | {a="t", b="ت"}, | ||
595 | {a="j", b="ج"}, | ||
596 | {a="x", b="خ"}, | ||
597 | {a="d", b="د"}, | ||
598 | {a="r", b="ر"}, | ||
599 | {a="z", b="ز"}, | ||
600 | {a="s", b="س"}, | ||
601 | {a="f", b="ف"}, | ||
602 | {a="`", b="ع"}, | ||
603 | {a="f", b="ف"}, | ||
604 | {a="q", b="ق"}, | ||
605 | {a="k", b="ك"}, | ||
606 | {a="l", b="ل"}, | ||
607 | {a="m", b="م"}, | ||
608 | {a="n", b="ن"}, | ||
609 | {a="h", b="ه"}, | ||
610 | {a="w", b="و"}, | ||
611 | {a="y", b="ي"}, | ||
612 | {a="T", b="ة"}, | ||
613 | -- easy (begin): \" needs to put back the sukūn | ||
614 | {a="\"$", b="ْ"}, | ||
615 | {a="\"(%W)", b="ْ%1"}, | ||
616 | {a="\"([^uaiUAI])", b="ْ%1"}, | ||
617 | -- easy (end) | ||
618 | {a="([^0-9])%-([^0-9])", b="%1%2"}, | ||
619 | {a="B", b="ـ"}, | ||
620 | } | ||
621 | |||