diff options
author | Robert Alessi <alessi@robertalessi.net> | 2016-03-01 11:27:13 +0100 |
---|---|---|
committer | Robert Alessi <alessi@robertalessi.net> | 2016-03-01 11:27:13 +0100 |
commit | c8dccd1fa1da5e45bdd951746b942f8de1ffcb29 (patch) | |
tree | 66a0ae64886bb5e5138e726e39eeec63c4742546 | |
parent | 7bd04248f4dd33d051e0dedf5de8560e2f3a7496 (diff) | |
download | arabluatex-c8dccd1fa1da5e45bdd951746b942f8de1ffcb29.tar.gz |
implementation of loc transliteration tables
-rw-r--r-- | arabluatex.lua | 34 | ||||
-rw-r--r-- | arabluatex_trans.lua | 169 |
2 files changed, 201 insertions, 2 deletions
diff --git a/arabluatex.lua b/arabluatex.lua index b716bea..d76a284 100644 --- a/arabluatex.lua +++ b/arabluatex.lua | |||
@@ -236,6 +236,38 @@ local function transdmg(str) | |||
236 | return str | 236 | return str |
237 | end | 237 | end |
238 | 238 | ||
239 | local function transloc(str) | ||
240 | str = string.gsub(str, "\\arb(%b{})", function(inside) | ||
241 | inside = string.sub(inside, 2, -2) | ||
242 | for i = 1,#hamzatrloc do | ||
243 | inside = string.gsub(inside, hamzatrloc[i].a, hamzatrloc[i].b) | ||
244 | end | ||
245 | for i = 1,#tanwintrloc do | ||
246 | inside = string.gsub(inside, tanwintrloc[i].a, tanwintrloc[i].b) | ||
247 | end | ||
248 | for i = 1,#trigraphstrloc do | ||
249 | inside = string.gsub(inside, trigraphstrloc[i].a, trigraphstrloc[i].b) | ||
250 | end | ||
251 | for i = 1,#digraphstrloc do | ||
252 | inside = string.gsub(inside, digraphstrloc[i].a, digraphstrloc[i].b) | ||
253 | end | ||
254 | for i = 1,#singletrloc do | ||
255 | inside = string.gsub(inside, singletrloc[i].a, singletrloc[i].b) | ||
256 | end | ||
257 | for i = 1,#longvtrloc do | ||
258 | inside = string.gsub(inside, longvtrloc[i].a, longvtrloc[i].b) | ||
259 | end | ||
260 | for i = 1,#shortvtrloc do | ||
261 | inside = string.gsub(inside, shortvtrloc[i].a, shortvtrloc[i].b) | ||
262 | end | ||
263 | for i = 1,#finaltrloc do | ||
264 | inside = string.gsub(inside, finaltrloc[i].a, finaltrloc[i].b) | ||
265 | end | ||
266 | return string.format("\\txtrans{%s}", inside) | ||
267 | end) | ||
268 | return str | ||
269 | end | ||
270 | |||
239 | function processvoc(str) | 271 | function processvoc(str) |
240 | str = "\\arb{".. str.."}" | 272 | str = "\\arb{".. str.."}" |
241 | str = protectarb(str) | 273 | str = protectarb(str) |
@@ -273,6 +305,8 @@ function processtrans(str, mode) | |||
273 | str = holdcmd(str) | 305 | str = holdcmd(str) |
274 | if mode == "dmg" then | 306 | if mode == "dmg" then |
275 | str = transdmg(str) | 307 | str = transdmg(str) |
308 | elseif mode == "loc" then | ||
309 | str = transloc(str) | ||
276 | else end | 310 | else end |
277 | str = unprotectarb(str) | 311 | str = unprotectarb(str) |
278 | return str | 312 | return str |
diff --git a/arabluatex_trans.lua b/arabluatex_trans.lua index e65c3eb..8a9aee8 100644 --- a/arabluatex_trans.lua +++ b/arabluatex_trans.lua | |||
@@ -22,6 +22,8 @@ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA | |||
22 | 02111-1307, USA. | 22 | 02111-1307, USA. |
23 | --]] | 23 | --]] |
24 | 24 | ||
25 | -- dmg | ||
26 | |||
25 | hamzatrdmg = { | 27 | hamzatrdmg = { |
26 | -- initial long u | 28 | -- initial long u |
27 | {a="%'%_U", b="ʾU"}, | 29 | {a="%'%_U", b="ʾU"}, |
@@ -87,11 +89,11 @@ trigraphstrdmg = { -- trigraphs or more | |||
87 | -- art. with waṣla + lām | 89 | -- art. with waṣla + lām |
88 | {a="'l%-(l)", b="'l-%1"}, | 90 | {a="'l%-(l)", b="'l-%1"}, |
89 | -- art. with waṣla + solar consonant | 91 | -- art. with waṣla + solar consonant |
90 | {a="'l%-([%_%^%.]?[tdrzsn])", b="'%1-"}, | 92 | {a="'l%-([%_%^%.]?[tdrzsn])", b="'%1-%1"}, |
91 | -- li- + art. + lām | 93 | -- li- + art. + lām |
92 | {a="l(i)%-l%-(l)", b="l%1-l-%2%2"}, | 94 | {a="l(i)%-l%-(l)", b="l%1-l-%2%2"}, |
93 | -- assim. art. with waṣla + solar consonant | 95 | -- assim. art. with waṣla + solar consonant |
94 | {a="'([%_%^%.]?[tdrzsn])%-", b="'%1-%1"}, | 96 | {a="'([%_%^%.]?[tdrzsn])%-", b="'%1-"}, |
95 | -- li- + art. + solar consonant is a special orthography | 97 | -- li- + art. + solar consonant is a special orthography |
96 | {a="l(i)%-l%-([%_%^%.]?[tdrzsn])", b="l%1-%2-%2"}, | 98 | {a="l(i)%-l%-([%_%^%.]?[tdrzsn])", b="l%1-%2-%2"}, |
97 | -- li- + assim. art. + solar consonant is a special orthography | 99 | -- li- + assim. art. + solar consonant is a special orthography |
@@ -173,6 +175,169 @@ shortvtrdmg = { | |||
173 | {a="i", b="i"} | 175 | {a="i", b="i"} |
174 | } | 176 | } |
175 | 177 | ||
178 | -- loc | ||
179 | |||
180 | hamzatrloc = { | ||
181 | -- initial long u | ||
182 | {a="%'%_U", b="U"}, | ||
183 | -- madda (historic writing below) | ||
184 | {a="^(')(A)", b="%2"}, | ||
185 | {a="(%W)(')(A)", b="%1%3"}, | ||
186 | {a="'A", b="ʾA"}, | ||
187 | {a="(A)(')(i)$", b="%1ʾ%3"}, | ||
188 | {a="(A)(')(i)(%W)", b="%1ʾ%3%4"}, | ||
189 | {a="(A)(')(i)", b="%1ʾ%3"}, -- historic madda | ||
190 | {a="(A)(')", b="%1ʾ"}, -- historic madda | ||
191 | -- initial (needs both ^ and %W patterns) | ||
192 | {a="^(')([ua])", b="%2"}, | ||
193 | {a="^(')(i)", b="%2"}, | ||
194 | {a="(%W)(')([ua])", b="%1%3"}, | ||
195 | {a="(%W)(')(i)", b="%1%3"}, | ||
196 | -- final | ||
197 | {a="([Iy])(')(aN)$", b="%1ʾ%3"}, | ||
198 | {a="([Iy])(')(aN)(%W)", b="%1ʾ%3%4"}, | ||
199 | {a="([^uai])(')([uai]N?)$", b="%1ʾ%3"}, | ||
200 | {a="([^uai])(')([uai]N?)(%W)", b="%1ʾ%3%4"}, | ||
201 | {a="([UI])(')([uai])$", b="%1ʾ%3"}, | ||
202 | {a="([UI])(')([uai])(%W)", b="%1ʾ%3%4"}, | ||
203 | -- middle | ||
204 | {a="(U)(')", b="%1ʾ"}, | ||
205 | {a="([Iy])(')", b="%1ʾ"}, | ||
206 | {a="([^uai])(')([uU])", b="%1ʾ%3"}, | ||
207 | {a="([^uai])(')([aA])", b="%1ʾ%3"}, | ||
208 | {a="([^uai])(')([iI])", b="%1ʾ%3"}, | ||
209 | {a="(u)(')([uU])", b="%1ʾ%3"}, | ||
210 | {a="(u)(')([aA])", b="%1ʾ%3"}, | ||
211 | {a="(u)(')([iI])", b="%1ʾ%3"}, | ||
212 | {a="(a)(')([aA])", b="%1ʾ%3"}, | ||
213 | {a="(a)(')([uU])", b="%1ʾ%3"}, | ||
214 | {a="(a)(')([iI])", b="%1ʾ%3"}, | ||
215 | {a="(i)(')([aA])", b="%1ʾ%3"}, | ||
216 | {a="(i)(')([uU])", b="%1ʾ%3"}, | ||
217 | {a="(i)(')([iI])", b="%1ʾ%3"}, | ||
218 | {a="(a)(')([^uaiUAI])", b="%1ʾ%3"}, | ||
219 | {a="(u)(')([^uaiUAI])", b="%1ʾ%3"}, | ||
220 | {a="(i)(')([^uaiUAI])", b="%1ʾ%3"} | ||
221 | } | ||
222 | |||
223 | trigraphstrloc = { -- trigraphs or more | ||
224 | -- 'llatI / 'llad_I | ||
225 | {a="^'ll(a)([%_]?[dt])", b="all%1%2"}, | ||
226 | {a="(%s)'ll(a)([%_]?[dt])", b="%1all%2%3"}, | ||
227 | -- al- + lām | ||
228 | {a="^(a)l%-(l)", b="%1l-%2"}, | ||
229 | {a="(%s)(a)l%-(l)", b="%1%2l-%3"}, | ||
230 | -- al- + solar consonant | ||
231 | {a="^(a)l%-([%_%^%.]?[tdrzsn])", b="%1l-%2"}, | ||
232 | {a="(%s)(a)l%-([%_%^%.]?[tdrzsn])", b="%1%2l-%3"}, | ||
233 | -- assim. art. + solar consonant | ||
234 | {a="^(a)([%_%^%.]?[tdrzsn])%-", b="%1l-%2"}, | ||
235 | {a="(%s)(a)([%_%^%.]?[tdrzsn])%-", b="%1%2l-%3"}, | ||
236 | -- al- + initial unstable hamza | ||
237 | {a="^(a)l%-([uai])", b="%1l-%2"}, | ||
238 | {a="(%s)(a)l%-([uai])", b="%1%2l-%3"}, | ||
239 | -- li- + art. + initial unstable hamza is a special orthography | ||
240 | {a="l(i)%-l%-([uai])", b="l%1l-%2"}, | ||
241 | -- al- + lunar consonant (i.e. what remains) | ||
242 | {a="^(a)l%-", b="%1l-"}, | ||
243 | {a="(%s)(a)l%-", b="%1%2l-"}, | ||
244 | -- art. with waṣla + lām | ||
245 | {a="'l%-(l)", b="al-%1"}, | ||
246 | -- art. with waṣla + solar consonant | ||
247 | {a="'l%-([%_%^%.]?[tdrzsn])", b="al-%1"}, | ||
248 | -- li- + art. + lām | ||
249 | {a="l(i)%-l%-(l)", b="l%1l-%2"}, | ||
250 | -- assim. art. with waṣla + solar consonant | ||
251 | {a="'([%_%^%.]?[tdrzsn])%-", b="al-"}, | ||
252 | -- li- + art. + solar consonant is a special orthography | ||
253 | {a="l(i)%-l%-([%_%^%.]?[tdrzsn])", b="l%1l-%2"}, | ||
254 | -- li- + assim. art. + solar consonant is a special orthography | ||
255 | {a="l(i)%-([%_%^%.]?[tdrzsn])%-([%_%^%.]?[tdrzsn])", b="l%1l-%3"}, | ||
256 | -- art. with waṣla + initial unstable hamza | ||
257 | {a="'l%-([uai])", b="al-%1"}, | ||
258 | -- art. with waṣla + lunar consonant (i.e. what remains) | ||
259 | {a="'l%-", b="al-"} | ||
260 | } | ||
261 | |||
262 | tanwintrloc = { | ||
263 | {a="uN", b="un"}, | ||
264 | {a="(aN)(_A)", b="an"}, | ||
265 | {a="(aN)(Y)", b="an"}, | ||
266 | {a="(T)(aN)", b="tan"}, | ||
267 | {a="([^TA])(aN)", b="%1an"}, | ||
268 | {a="iN", b="in"} | ||
269 | } | ||
270 | |||
271 | digraphstrloc = { | ||
272 | {a="(%-)([uai])", b="%1%2"}, -- hyphen + initial alif without hamza | ||
273 | {a="^([uai])", b="%1"}, -- initial alif without hamza | ||
274 | {a="(%s)([uai])", b="%1%2"}, -- initial alif without hamza | ||
275 | {a="%-%-", b=""}, | ||
276 | {a="uww", b="ūw"}, | ||
277 | {a="iyy", b="īy"}, | ||
278 | {a="([tkdsg])(h)", b="%1'%2"}, | ||
279 | {a="T([^uai])", b="h%1"}, | ||
280 | {a="T$", b="h"}, | ||
281 | {a="T(%W)", b="h%1"}, | ||
282 | {a="_t", b="th"}, | ||
283 | {a="%^g", b="j"}, | ||
284 | {a="%.h", b="ḥ"}, | ||
285 | {a="_h", b="kh"}, | ||
286 | {a="_d", b="dh"}, | ||
287 | {a="%^s", b="sh"}, | ||
288 | {a="%.s", b="ṣ"}, | ||
289 | {a="%.d", b="ḍ"}, | ||
290 | {a="%.t", b="ṭ"}, | ||
291 | {a="%.z", b="ẓ"}, | ||
292 | {a="%.g", b="gh"}, | ||
293 | {a="(U)(A)", b="ū"}, | ||
294 | {a="WA", b="ū"}, | ||
295 | {a="(a)W", b="%1w"}, | ||
296 | {a="_A", b="á"}, | ||
297 | {a="_u", b="ū"}, | ||
298 | {a="_a", b="ā"}, | ||
299 | {a="_i", b="ī"} | ||
300 | } | ||
301 | |||
302 | singletrloc = { | ||
303 | {a="b", b="b"}, | ||
304 | {a="t", b="t"}, | ||
305 | {a="j", b="j"}, | ||
306 | {a="x", b="kh"}, | ||
307 | {a="d", b="d"}, | ||
308 | {a="r", b="r"}, | ||
309 | {a="z", b="z"}, | ||
310 | {a="s", b="s"}, | ||
311 | {a="`", b="`"}, | ||
312 | {a="f", b="f"}, | ||
313 | {a="q", b="q"}, | ||
314 | {a="k", b="k"}, | ||
315 | {a="l", b="l"}, | ||
316 | {a="m", b="m"}, | ||
317 | {a="n", b="n"}, | ||
318 | {a="h", b="h"}, | ||
319 | {a="w", b="w"}, | ||
320 | {a="y", b="y"}, | ||
321 | {a="T", b="t"}, | ||
322 | {a="Y", b="á"}, | ||
323 | {a='"', b=''}, | ||
324 | } | ||
325 | |||
326 | longvtrloc = { | ||
327 | {a="A", b="ā"}, | ||
328 | {a="U", b="ū"}, | ||
329 | {a="I", b="ī"} | ||
330 | } | ||
331 | |||
332 | shortvtrloc = { | ||
333 | {a="u", b="u"}, | ||
334 | {a="a", b="a"}, | ||
335 | {a="i", b="i"} | ||
336 | } | ||
337 | |||
338 | finaltrloc = { | ||
339 | {a="ʾ", b="'"}, | ||
340 | } | ||
176 | 341 | ||
177 | -- return { | 342 | -- return { |
178 | -- raw = raw, | 343 | -- raw = raw, |