aboutsummaryrefslogtreecommitdiff
path: root/arabluatex_voc.lua
diff options
context:
space:
mode:
authorRobert Alessi <alessi@robertalessi.net>2016-05-08 16:09:05 +0200
committerRobert Alessi <alessi@robertalessi.net>2016-05-08 16:09:05 +0200
commit6202ee62d0f34509d7a652c30a1d5c19efec70ae (patch)
treea56d3c38f40221daabc0891d6b0974d6128fc7b6 /arabluatex_voc.lua
parente80c0d99f7d2511e6e3f3e70cd922f119a1fc217 (diff)
downloadarabluatex-6202ee62d0f34509d7a652c30a1d5c19efec70ae.tar.gz
done implementing 'easy' rules set
Diffstat (limited to 'arabluatex_voc.lua')
-rw-r--r--arabluatex_voc.lua176
1 files changed, 176 insertions, 0 deletions
diff --git a/arabluatex_voc.lua b/arabluatex_voc.lua
index 5eafca5..4f5399f 100644
--- a/arabluatex_voc.lua
+++ b/arabluatex_voc.lua
@@ -154,6 +154,89 @@ hamza = {
154 {a="(i)(')([^uaiUAI])", b="%1ئ%3"} 154 {a="(i)(')([^uaiUAI])", b="%1ئ%3"}
155} 155}
156 156
157hamzaeasy = { -- differences marked below with 'easy'
158 -- hard coded hamza
159 {a="|\"'", b="ء"},
160 {a="A\"'", b="آ"},
161 {a="[au]\"'", b="أ"},
162 {a="w\"'", b="ؤ"},
163 {a="i\"'", b="إ"},
164 {a="y\"'", b="ئ"},
165 -- hamza takes tašdīd too
166 {a="''([Uu])", b="ؤؤ%1"},
167 {a="''([Aa])", b="أأ%1"},
168 {a="''([Ii])", b="ئئ%1"},
169 -- initial long u and i (for a, see below)
170 {a="%'%_U", b="أU"},
171 {a="%'%_I", b="إI"},
172 -- taḫfīfu 'l-hamza
173 {a="'u'([^uaiUAI])", b="أU%1"},
174 {a="'i'([^uaiUAI])", b="إI%1"},
175 -- madda (historic writing below)
176 {a="'a'([^uaiUAI])", b="آ%1"},
177 {a="'a?A([%_%^%.]?[%`%'btjghdrzsfqklmnywAY])", b="آ%1"},
178--easy {a="(A)(')(uN?)$", b="aآء%3"},
179--easy {a="(A)(')(uN?)(%W)", b="aآء%3%4"},
180--easy {a="(A)(')(iN?)$", b="aآء%3"},
181--easy {a="(A)(')(iN?)(%W)", b="aآء%3%4"},
182--easy {a="(A)(')(i)", b="aآئ%3"}, -- historic madda
183--easy {a="(A)(')(u)", b="aآؤ%3"}, -- historic madda
184--easy {a="(A)(')", b="aآء"}, -- historic madda
185 -- initial (needs both ^ and %W patterns)
186 {a="^(')([ua])", b="أ%2"},
187 {a="^(')(i)", b="إ%2"},
188 {a="(%W)(')([ua])", b="%1أ%3"},
189 {a="(%W)(')(i)", b="%1إ%3"},
190 -- final
191 -- ^say'aN and .zim'aN are special orthographies
192 {a="(%^say)(%')(aN)", b="%1ئ%3"},
193 {a="(.zi?m)(%')(aN)", b="%1ئ%3"},
194 {a="([^uai])(')([uai]N?)$", b="%1ء%3"},
195 {a="([^uai])(')([uai]N?)(%W)", b="%1ء%3%4"},
196-- u
197 {a="(u)(')([uai]?N)$", b="%1ؤ%3"},
198 {a="(u)(')([uai]N?)(%W)", b="%1ؤ%3%4"},
199 {a="(u)(')$", b="%1ؤ"},
200 {a="(u)(')(%W)", b="%1ؤ%3"},
201-- a
202 {a="(a)(')(A)$", b="%1آ"},
203 {a="(a)(')(A)(%W)", b="%1آ%4"},
204 {a="(a)(')([u]N?)$", b="%1أ%3"},
205 {a="(a)(')([u]N?)(%W)", b="%1أ%3%4"},
206 {a="(a)(')(a)$", b="%1أ%3"},
207 {a="(a)(')(a)(%W)", b="%1أ%3%4"},
208 {a="(a)(')(aN)$", b="%1أً"},
209 {a="(a)(')(aN)(%W)", b="%1أً%4"},
210 {a="(a)(')([i]N?)$", b="%1إ%3"},
211 {a="(a)(')([i]N?)(%W)", b="%1إ%3%4"},
212 {a="(a)(')$", b="%1أ"},
213 {a="(a)(')(%W)", b="%1أ%3"},
214-- i
215 {a="(i)(')([uai]N?)$", b="%1ئ%3"},
216 {a="(i)(')([uai]N?)(%W)", b="%1ئ%3%4"},
217 {a="(i)(')$", b="%1ئ"},
218 {a="(i)(')(%W)", b="%1ئ%3"},
219--
220 -- middle
221 {a="(U)(')", b="%1ء"},
222 {a="([Iy])(')", b="%1ئ"},
223 {a="([^uai])(')([uU])", b="%1ؤ%3"},
224 {a="([^uai])(')([aA])", b="%1أ%3"},
225 {a="([^uai])(')([iI])", b="%1ئ%3"},
226 {a="(u)(')([uU])", b="%1ؤ%3"},
227 {a="(u)(')([aA])", b="%1ؤ%3"},
228 {a="(u)(')([iI])", b="%1ئ%3"},
229 {a="(a)(')([aA])", b="%1أ%3"},
230 {a="(a)(')([uU])", b="%1ؤ%3"},
231 {a="(a)(')([iI])", b="%1ئ%3"},
232 {a="(i)(')([aA])", b="%1ئ%3"},
233 {a="(i)(')([uU])", b="%1ئ%3"},
234 {a="(i)(')([iI])", b="%1ئ%3"},
235 {a="(a)(')([^uaiUAI])", b="%1أ%3"},
236 {a="(u)(')([^uaiUAI])", b="%1ؤ%3"},
237 {a="(i)(')([^uaiUAI])", b="%1ئ%3"}
238}
239
157tanwin = { 240tanwin = {
158 {a="uNU", b="ٌو"}, 241 {a="uNU", b="ٌو"},
159 {a="aNU", b="ًوا"}, 242 {a="aNU", b="ًوا"},
@@ -187,6 +270,39 @@ tanwin = {
187 {a="(iN)", b="ٍ"} 270 {a="(iN)", b="ٍ"}
188} 271}
189 272
273tanwineasy = { -- 'easy' requires some lines to be taken out:
274 {a="uNU", b="ٌو"},
275 {a="aNU", b="ًوا"},
276 {a="iNU", b="ٍو"},
277 -- assimilations (begin)
278-- {a="(uN)(%s)([rlmnwy])", b="ٌ%2%3%3"},
279-- {a="(aN)(_A)(%s)([rlmnwy])", b="ًى%3%4%4"},
280-- {a="(aN)(Y)(%s)([rlmnwy])", b="ًى%3%4%4"},
281-- {a="(T)(aN)(%s)([rlmnwy])", b="%1ً%3%4%4"},
282-- {a="(ء)(aN)(%s)([rlmnwy])", b="%1ً%3%4%4"},
283-- {a="([^TA])(aN)(%s)([rlmnwy])", b="%1ًا%3%4%4"},
284-- {a="(iN)(%s)([rlmnwy])", b="ٍ%2%3%3"},
285 -- assimilations (end)
286 -- quoted tanwīn (begin)
287 {a="(\"uN)", b=""},
288 {a="(B)(\"aN)", b="%1"},
289 {a="(\"aN)(_A)", b="ى"},
290 {a="(\"aN)(Y)", b="ى"},
291 {a="(T)(\"aN)", b="%1"},
292 {a="(ء)(\"aN)", b="%1"},
293 {a="([^TA])(\"aN)", b="%1ا"},
294 {a="(\"iN)", b=""},
295 -- quoted tanwīn (end)
296 {a="(uN)", b="ٌ"},
297 {a="(B)(aN)", b="%1ً"},
298 {a="(aN)(_A)", b="ًى"},
299 {a="(aN)(Y)", b="ًى"},
300 {a="(T)(aN)", b="%1ً"},
301 {a="(ء)(aN)", b="%1ً"},
302 {a="([^TA])(aN)", b="%1ًا"},
303 {a="(iN)", b="ٍ"}
304}
305
190trigraphs = { -- trigraphs or more 306trigraphs = { -- trigraphs or more
191 -- 'llatI / 'llad_I 307 -- 'llatI / 'llad_I
192 {a="^'ll(a)([%_]?[dt])", b="الّ%1%2"}, 308 {a="^'ll(a)([%_]?[dt])", b="الّ%1%2"},
@@ -247,6 +363,66 @@ trigraphs = { -- trigraphs or more
247 {a="(n)(%s)([rlmnwy])", b="%1%2%3%3"} 363 {a="(n)(%s)([rlmnwy])", b="%1%2%3%3"}
248} 364}
249 365
366trigraphseasy = { -- differences marked below with 'easy'
367 -- 'llatI / 'llad_I
368 {a="^'ll(a)([%_]?[dt])", b="الّ%1%2"},
369 {a="([%s%-])'ll(a)([%_]?[dt])", b="%1الّ%2%3"},
370 -- al- + lām (easy)
371 {a="^(a)l%-(l)", b="ا%1ل%2"},
372 {a="([%s%-])(a)l%-(l)", b="%1ا%2ل%3"},
373 -- al- + solar consonant (easy)
374 {a="^(a)l%-([%_%^%.]?[tdrzsn])", b="ا%1ل%2"},
375 {a="([%s%-])(a)l%-([%_%^%.]?[tdrzsn])", b="%1ا%2ل%3"},
376 -- assim. art. + solar consonant (easy)
377 {a="^(a)([%_%^%.]?[tdrzsn])%-", b="ا%1ل"},
378 {a="([%s%-])(a)([%_%^%.]?[tdrzsn])%-", b="%1ا%2ل"},
379 -- al- + initial unstable hamza
380 {a="^(a)l%-(\")([uai])", b="ا%1لٱ%3"},
381 {a="([%s%-])(a)l%-(\")([uai])", b="%1ا%2لٱ%4"},
382 {a="^(a)l%-([uai])", b="ا%1لا%2"},
383 {a="([%s%-])(a)l%-([uai])", b="%1ا%2لا%3"},
384 -- li-/la- + art. + initial unstable hamza is a special orthography
385 {a="l([ai])%-l%-(\")([uai])", b="ل%1لٱ%3"},
386 {a="l([ai])%-l%-([uai])", b="ل%1لا%2"},
387 -- al- + lunar consonant (i.e. what remains)
388 {a="^(a)l%-", b="ا%1ل"},
389 {a="([%s%-])(a)l%-", b="%1ا%2ل"},
390 -- diphthongs to be resolved before ʾalif conjunctionis
391 {a="(aW)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="awuا%2%3"},
392 {a="(aw)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="%1u%2%3"},
393 {a="(ay)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="%1i%2%3"},
394 -- art. with waṣla + lām (easy)
395 {a="'l%-(l)", b="ال%1"},
396 -- art. with waṣla + solar consonant (easy)
397 {a="'l%-([%_%^%.]?[tdrzsn])", b="ال%1"},
398 -- li-/la- + art. + lām (easy)
399 {a="l([ai])%-l%-(l)", b="ل%1%2"},
400 -- assim. art. with waṣla + solar consonant (easy)
401 {a="'([%_%^%.]?[tdrzsn])%-", b="ال"},
402 -- li-/la- + art. + solar consonant is a special orthography (easy)
403 {a="l([ai])%-l%-([%_%^%.]?[tdrzsn])", b="ل%1ل%2"},
404 -- li-/la + assim. art. + solar consonant is a special orthography (easy)
405 {a="l([ai])%-([%_%^%.]?[tdrzsn])%-([%_%^%.]?[tdrzsn])", b="ل%1ل%3"},
406 -- art. with waṣla + initial unstable hamza
407 {a="'l%-(\")([uai])", b="الٱ%2"},
408 {a="'l%-([uai])", b="الا%1"},
409 -- art. with waṣla + lunar consonant (i.e. what remains)
410 {a="'l%-", b="ال"},
411 -- the silent wāw
412 {a="uU$", b="uو"},
413 {a="uU(%W)", b="uو%1"},
414 {a="aU$", b="aو"},
415 {a="aU(%W)", b="aو%1"},
416 {a="iU$", b="iو"},
417 {a="iU(%W)", b="iو%1"},
418 -- words ending in -āT with silent wāw/yāʾ
419 {a="(_a)UA", b="%1وا"},
420 {a="(_a)U", b="%1و"},
421 {a="(_a)I", b="%1ي"},
422 -- assimilations
423--easy {a="(n)(%s)([rlmnwy])", b="%1%2%3%3"}
424}
425
250digraphs = { 426digraphs = {
251 -- initial straight double quote gives a connective ʾalif 427 -- initial straight double quote gives a connective ʾalif
252 {a="^\"[uai]", b="ٱ"}, 428 {a="^\"[uai]", b="ٱ"},