diff options
author | Robert Alessi <alessi@robertalessi.net> | 2016-05-08 16:09:05 +0200 |
---|---|---|
committer | Robert Alessi <alessi@robertalessi.net> | 2016-05-08 16:09:05 +0200 |
commit | 6202ee62d0f34509d7a652c30a1d5c19efec70ae (patch) | |
tree | a56d3c38f40221daabc0891d6b0974d6128fc7b6 /arabluatex_voc.lua | |
parent | e80c0d99f7d2511e6e3f3e70cd922f119a1fc217 (diff) | |
download | arabluatex-6202ee62d0f34509d7a652c30a1d5c19efec70ae.tar.gz |
done implementing 'easy' rules set
Diffstat (limited to 'arabluatex_voc.lua')
-rw-r--r-- | arabluatex_voc.lua | 176 |
1 files changed, 176 insertions, 0 deletions
diff --git a/arabluatex_voc.lua b/arabluatex_voc.lua index 5eafca5..4f5399f 100644 --- a/arabluatex_voc.lua +++ b/arabluatex_voc.lua | |||
@@ -154,6 +154,89 @@ hamza = { | |||
154 | {a="(i)(')([^uaiUAI])", b="%1ئ%3"} | 154 | {a="(i)(')([^uaiUAI])", b="%1ئ%3"} |
155 | } | 155 | } |
156 | 156 | ||
157 | hamzaeasy = { -- differences marked below with 'easy' | ||
158 | -- hard coded hamza | ||
159 | {a="|\"'", b="ء"}, | ||
160 | {a="A\"'", b="آ"}, | ||
161 | {a="[au]\"'", b="أ"}, | ||
162 | {a="w\"'", b="ؤ"}, | ||
163 | {a="i\"'", b="إ"}, | ||
164 | {a="y\"'", b="ئ"}, | ||
165 | -- hamza takes tašdīd too | ||
166 | {a="''([Uu])", b="ؤؤ%1"}, | ||
167 | {a="''([Aa])", b="أأ%1"}, | ||
168 | {a="''([Ii])", b="ئئ%1"}, | ||
169 | -- initial long u and i (for a, see below) | ||
170 | {a="%'%_U", b="أU"}, | ||
171 | {a="%'%_I", b="إI"}, | ||
172 | -- taḫfīfu 'l-hamza | ||
173 | {a="'u'([^uaiUAI])", b="أU%1"}, | ||
174 | {a="'i'([^uaiUAI])", b="إI%1"}, | ||
175 | -- madda (historic writing below) | ||
176 | {a="'a'([^uaiUAI])", b="آ%1"}, | ||
177 | {a="'a?A([%_%^%.]?[%`%'btjghdrzsfqklmnywAY])", b="آ%1"}, | ||
178 | --easy {a="(A)(')(uN?)$", b="aآء%3"}, | ||
179 | --easy {a="(A)(')(uN?)(%W)", b="aآء%3%4"}, | ||
180 | --easy {a="(A)(')(iN?)$", b="aآء%3"}, | ||
181 | --easy {a="(A)(')(iN?)(%W)", b="aآء%3%4"}, | ||
182 | --easy {a="(A)(')(i)", b="aآئ%3"}, -- historic madda | ||
183 | --easy {a="(A)(')(u)", b="aآؤ%3"}, -- historic madda | ||
184 | --easy {a="(A)(')", b="aآء"}, -- historic madda | ||
185 | -- initial (needs both ^ and %W patterns) | ||
186 | {a="^(')([ua])", b="أ%2"}, | ||
187 | {a="^(')(i)", b="إ%2"}, | ||
188 | {a="(%W)(')([ua])", b="%1أ%3"}, | ||
189 | {a="(%W)(')(i)", b="%1إ%3"}, | ||
190 | -- final | ||
191 | -- ^say'aN and .zim'aN are special orthographies | ||
192 | {a="(%^say)(%')(aN)", b="%1ئ%3"}, | ||
193 | {a="(.zi?m)(%')(aN)", b="%1ئ%3"}, | ||
194 | {a="([^uai])(')([uai]N?)$", b="%1ء%3"}, | ||
195 | {a="([^uai])(')([uai]N?)(%W)", b="%1ء%3%4"}, | ||
196 | -- u | ||
197 | {a="(u)(')([uai]?N)$", b="%1ؤ%3"}, | ||
198 | {a="(u)(')([uai]N?)(%W)", b="%1ؤ%3%4"}, | ||
199 | {a="(u)(')$", b="%1ؤ"}, | ||
200 | {a="(u)(')(%W)", b="%1ؤ%3"}, | ||
201 | -- a | ||
202 | {a="(a)(')(A)$", b="%1آ"}, | ||
203 | {a="(a)(')(A)(%W)", b="%1آ%4"}, | ||
204 | {a="(a)(')([u]N?)$", b="%1أ%3"}, | ||
205 | {a="(a)(')([u]N?)(%W)", b="%1أ%3%4"}, | ||
206 | {a="(a)(')(a)$", b="%1أ%3"}, | ||
207 | {a="(a)(')(a)(%W)", b="%1أ%3%4"}, | ||
208 | {a="(a)(')(aN)$", b="%1أً"}, | ||
209 | {a="(a)(')(aN)(%W)", b="%1أً%4"}, | ||
210 | {a="(a)(')([i]N?)$", b="%1إ%3"}, | ||
211 | {a="(a)(')([i]N?)(%W)", b="%1إ%3%4"}, | ||
212 | {a="(a)(')$", b="%1أ"}, | ||
213 | {a="(a)(')(%W)", b="%1أ%3"}, | ||
214 | -- i | ||
215 | {a="(i)(')([uai]N?)$", b="%1ئ%3"}, | ||
216 | {a="(i)(')([uai]N?)(%W)", b="%1ئ%3%4"}, | ||
217 | {a="(i)(')$", b="%1ئ"}, | ||
218 | {a="(i)(')(%W)", b="%1ئ%3"}, | ||
219 | -- | ||
220 | -- middle | ||
221 | {a="(U)(')", b="%1ء"}, | ||
222 | {a="([Iy])(')", b="%1ئ"}, | ||
223 | {a="([^uai])(')([uU])", b="%1ؤ%3"}, | ||
224 | {a="([^uai])(')([aA])", b="%1أ%3"}, | ||
225 | {a="([^uai])(')([iI])", b="%1ئ%3"}, | ||
226 | {a="(u)(')([uU])", b="%1ؤ%3"}, | ||
227 | {a="(u)(')([aA])", b="%1ؤ%3"}, | ||
228 | {a="(u)(')([iI])", b="%1ئ%3"}, | ||
229 | {a="(a)(')([aA])", b="%1أ%3"}, | ||
230 | {a="(a)(')([uU])", b="%1ؤ%3"}, | ||
231 | {a="(a)(')([iI])", b="%1ئ%3"}, | ||
232 | {a="(i)(')([aA])", b="%1ئ%3"}, | ||
233 | {a="(i)(')([uU])", b="%1ئ%3"}, | ||
234 | {a="(i)(')([iI])", b="%1ئ%3"}, | ||
235 | {a="(a)(')([^uaiUAI])", b="%1أ%3"}, | ||
236 | {a="(u)(')([^uaiUAI])", b="%1ؤ%3"}, | ||
237 | {a="(i)(')([^uaiUAI])", b="%1ئ%3"} | ||
238 | } | ||
239 | |||
157 | tanwin = { | 240 | tanwin = { |
158 | {a="uNU", b="ٌو"}, | 241 | {a="uNU", b="ٌو"}, |
159 | {a="aNU", b="ًوا"}, | 242 | {a="aNU", b="ًوا"}, |
@@ -187,6 +270,39 @@ tanwin = { | |||
187 | {a="(iN)", b="ٍ"} | 270 | {a="(iN)", b="ٍ"} |
188 | } | 271 | } |
189 | 272 | ||
273 | tanwineasy = { -- 'easy' requires some lines to be taken out: | ||
274 | {a="uNU", b="ٌو"}, | ||
275 | {a="aNU", b="ًوا"}, | ||
276 | {a="iNU", b="ٍو"}, | ||
277 | -- assimilations (begin) | ||
278 | -- {a="(uN)(%s)([rlmnwy])", b="ٌ%2%3%3"}, | ||
279 | -- {a="(aN)(_A)(%s)([rlmnwy])", b="ًى%3%4%4"}, | ||
280 | -- {a="(aN)(Y)(%s)([rlmnwy])", b="ًى%3%4%4"}, | ||
281 | -- {a="(T)(aN)(%s)([rlmnwy])", b="%1ً%3%4%4"}, | ||
282 | -- {a="(ء)(aN)(%s)([rlmnwy])", b="%1ً%3%4%4"}, | ||
283 | -- {a="([^TA])(aN)(%s)([rlmnwy])", b="%1ًا%3%4%4"}, | ||
284 | -- {a="(iN)(%s)([rlmnwy])", b="ٍ%2%3%3"}, | ||
285 | -- assimilations (end) | ||
286 | -- quoted tanwīn (begin) | ||
287 | {a="(\"uN)", b=""}, | ||
288 | {a="(B)(\"aN)", b="%1"}, | ||
289 | {a="(\"aN)(_A)", b="ى"}, | ||
290 | {a="(\"aN)(Y)", b="ى"}, | ||
291 | {a="(T)(\"aN)", b="%1"}, | ||
292 | {a="(ء)(\"aN)", b="%1"}, | ||
293 | {a="([^TA])(\"aN)", b="%1ا"}, | ||
294 | {a="(\"iN)", b=""}, | ||
295 | -- quoted tanwīn (end) | ||
296 | {a="(uN)", b="ٌ"}, | ||
297 | {a="(B)(aN)", b="%1ً"}, | ||
298 | {a="(aN)(_A)", b="ًى"}, | ||
299 | {a="(aN)(Y)", b="ًى"}, | ||
300 | {a="(T)(aN)", b="%1ً"}, | ||
301 | {a="(ء)(aN)", b="%1ً"}, | ||
302 | {a="([^TA])(aN)", b="%1ًا"}, | ||
303 | {a="(iN)", b="ٍ"} | ||
304 | } | ||
305 | |||
190 | trigraphs = { -- trigraphs or more | 306 | trigraphs = { -- trigraphs or more |
191 | -- 'llatI / 'llad_I | 307 | -- 'llatI / 'llad_I |
192 | {a="^'ll(a)([%_]?[dt])", b="الّ%1%2"}, | 308 | {a="^'ll(a)([%_]?[dt])", b="الّ%1%2"}, |
@@ -247,6 +363,66 @@ trigraphs = { -- trigraphs or more | |||
247 | {a="(n)(%s)([rlmnwy])", b="%1%2%3%3"} | 363 | {a="(n)(%s)([rlmnwy])", b="%1%2%3%3"} |
248 | } | 364 | } |
249 | 365 | ||
366 | trigraphseasy = { -- differences marked below with 'easy' | ||
367 | -- 'llatI / 'llad_I | ||
368 | {a="^'ll(a)([%_]?[dt])", b="الّ%1%2"}, | ||
369 | {a="([%s%-])'ll(a)([%_]?[dt])", b="%1الّ%2%3"}, | ||
370 | -- al- + lām (easy) | ||
371 | {a="^(a)l%-(l)", b="ا%1ل%2"}, | ||
372 | {a="([%s%-])(a)l%-(l)", b="%1ا%2ل%3"}, | ||
373 | -- al- + solar consonant (easy) | ||
374 | {a="^(a)l%-([%_%^%.]?[tdrzsn])", b="ا%1ل%2"}, | ||
375 | {a="([%s%-])(a)l%-([%_%^%.]?[tdrzsn])", b="%1ا%2ل%3"}, | ||
376 | -- assim. art. + solar consonant (easy) | ||
377 | {a="^(a)([%_%^%.]?[tdrzsn])%-", b="ا%1ل"}, | ||
378 | {a="([%s%-])(a)([%_%^%.]?[tdrzsn])%-", b="%1ا%2ل"}, | ||
379 | -- al- + initial unstable hamza | ||
380 | {a="^(a)l%-(\")([uai])", b="ا%1لٱ%3"}, | ||
381 | {a="([%s%-])(a)l%-(\")([uai])", b="%1ا%2لٱ%4"}, | ||
382 | {a="^(a)l%-([uai])", b="ا%1لا%2"}, | ||
383 | {a="([%s%-])(a)l%-([uai])", b="%1ا%2لا%3"}, | ||
384 | -- li-/la- + art. + initial unstable hamza is a special orthography | ||
385 | {a="l([ai])%-l%-(\")([uai])", b="ل%1لٱ%3"}, | ||
386 | {a="l([ai])%-l%-([uai])", b="ل%1لا%2"}, | ||
387 | -- al- + lunar consonant (i.e. what remains) | ||
388 | {a="^(a)l%-", b="ا%1ل"}, | ||
389 | {a="([%s%-])(a)l%-", b="%1ا%2ل"}, | ||
390 | -- diphthongs to be resolved before ʾalif conjunctionis | ||
391 | {a="(aW)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="awuا%2%3"}, | ||
392 | {a="(aw)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="%1u%2%3"}, | ||
393 | {a="(ay)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="%1i%2%3"}, | ||
394 | -- art. with waṣla + lām (easy) | ||
395 | {a="'l%-(l)", b="ال%1"}, | ||
396 | -- art. with waṣla + solar consonant (easy) | ||
397 | {a="'l%-([%_%^%.]?[tdrzsn])", b="ال%1"}, | ||
398 | -- li-/la- + art. + lām (easy) | ||
399 | {a="l([ai])%-l%-(l)", b="ل%1%2"}, | ||
400 | -- assim. art. with waṣla + solar consonant (easy) | ||
401 | {a="'([%_%^%.]?[tdrzsn])%-", b="ال"}, | ||
402 | -- li-/la- + art. + solar consonant is a special orthography (easy) | ||
403 | {a="l([ai])%-l%-([%_%^%.]?[tdrzsn])", b="ل%1ل%2"}, | ||
404 | -- li-/la + assim. art. + solar consonant is a special orthography (easy) | ||
405 | {a="l([ai])%-([%_%^%.]?[tdrzsn])%-([%_%^%.]?[tdrzsn])", b="ل%1ل%3"}, | ||
406 | -- art. with waṣla + initial unstable hamza | ||
407 | {a="'l%-(\")([uai])", b="الٱ%2"}, | ||
408 | {a="'l%-([uai])", b="الا%1"}, | ||
409 | -- art. with waṣla + lunar consonant (i.e. what remains) | ||
410 | {a="'l%-", b="ال"}, | ||
411 | -- the silent wāw | ||
412 | {a="uU$", b="uو"}, | ||
413 | {a="uU(%W)", b="uو%1"}, | ||
414 | {a="aU$", b="aو"}, | ||
415 | {a="aU(%W)", b="aو%1"}, | ||
416 | {a="iU$", b="iو"}, | ||
417 | {a="iU(%W)", b="iو%1"}, | ||
418 | -- words ending in -āT with silent wāw/yāʾ | ||
419 | {a="(_a)UA", b="%1وا"}, | ||
420 | {a="(_a)U", b="%1و"}, | ||
421 | {a="(_a)I", b="%1ي"}, | ||
422 | -- assimilations | ||
423 | --easy {a="(n)(%s)([rlmnwy])", b="%1%2%3%3"} | ||
424 | } | ||
425 | |||
250 | digraphs = { | 426 | digraphs = { |
251 | -- initial straight double quote gives a connective ʾalif | 427 | -- initial straight double quote gives a connective ʾalif |
252 | {a="^\"[uai]", b="ٱ"}, | 428 | {a="^\"[uai]", b="ٱ"}, |