aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRobert Alessi <alessi@robertalessi.net>2016-03-01 11:27:13 +0100
committerRobert Alessi <alessi@robertalessi.net>2016-03-01 11:27:13 +0100
commitc8dccd1fa1da5e45bdd951746b942f8de1ffcb29 (patch)
tree66a0ae64886bb5e5138e726e39eeec63c4742546
parent7bd04248f4dd33d051e0dedf5de8560e2f3a7496 (diff)
downloadarabluatex-c8dccd1fa1da5e45bdd951746b942f8de1ffcb29.tar.gz
implementation of loc transliteration tables
-rw-r--r--arabluatex.lua34
-rw-r--r--arabluatex_trans.lua169
2 files changed, 201 insertions, 2 deletions
diff --git a/arabluatex.lua b/arabluatex.lua
index b716bea..d76a284 100644
--- a/arabluatex.lua
+++ b/arabluatex.lua
@@ -236,6 +236,38 @@ local function transdmg(str)
236return str 236return str
237end 237end
238 238
239local function transloc(str)
240 str = string.gsub(str, "\\arb(%b{})", function(inside)
241 inside = string.sub(inside, 2, -2)
242 for i = 1,#hamzatrloc do
243 inside = string.gsub(inside, hamzatrloc[i].a, hamzatrloc[i].b)
244 end
245 for i = 1,#tanwintrloc do
246 inside = string.gsub(inside, tanwintrloc[i].a, tanwintrloc[i].b)
247 end
248 for i = 1,#trigraphstrloc do
249 inside = string.gsub(inside, trigraphstrloc[i].a, trigraphstrloc[i].b)
250 end
251 for i = 1,#digraphstrloc do
252 inside = string.gsub(inside, digraphstrloc[i].a, digraphstrloc[i].b)
253 end
254 for i = 1,#singletrloc do
255 inside = string.gsub(inside, singletrloc[i].a, singletrloc[i].b)
256 end
257 for i = 1,#longvtrloc do
258 inside = string.gsub(inside, longvtrloc[i].a, longvtrloc[i].b)
259 end
260 for i = 1,#shortvtrloc do
261 inside = string.gsub(inside, shortvtrloc[i].a, shortvtrloc[i].b)
262 end
263 for i = 1,#finaltrloc do
264 inside = string.gsub(inside, finaltrloc[i].a, finaltrloc[i].b)
265 end
266 return string.format("\\txtrans{%s}", inside)
267 end)
268return str
269end
270
239function processvoc(str) 271function processvoc(str)
240 str = "\\arb{".. str.."}" 272 str = "\\arb{".. str.."}"
241 str = protectarb(str) 273 str = protectarb(str)
@@ -273,6 +305,8 @@ function processtrans(str, mode)
273 str = holdcmd(str) 305 str = holdcmd(str)
274 if mode == "dmg" then 306 if mode == "dmg" then
275 str = transdmg(str) 307 str = transdmg(str)
308 elseif mode == "loc" then
309 str = transloc(str)
276 else end 310 else end
277 str = unprotectarb(str) 311 str = unprotectarb(str)
278return str 312return str
diff --git a/arabluatex_trans.lua b/arabluatex_trans.lua
index e65c3eb..8a9aee8 100644
--- a/arabluatex_trans.lua
+++ b/arabluatex_trans.lua
@@ -22,6 +22,8 @@ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
2202111-1307, USA. 2202111-1307, USA.
23--]] 23--]]
24 24
25-- dmg
26
25hamzatrdmg = { 27hamzatrdmg = {
26 -- initial long u 28 -- initial long u
27 {a="%'%_U", b="ʾU"}, 29 {a="%'%_U", b="ʾU"},
@@ -87,11 +89,11 @@ trigraphstrdmg = { -- trigraphs or more
87 -- art. with waṣla + lām 89 -- art. with waṣla + lām
88 {a="'l%-(l)", b="'l-%1"}, 90 {a="'l%-(l)", b="'l-%1"},
89 -- art. with waṣla + solar consonant 91 -- art. with waṣla + solar consonant
90 {a="'l%-([%_%^%.]?[tdrzsn])", b="'%1-"}, 92 {a="'l%-([%_%^%.]?[tdrzsn])", b="'%1-%1"},
91 -- li- + art. + lām 93 -- li- + art. + lām
92 {a="l(i)%-l%-(l)", b="l%1-l-%2%2"}, 94 {a="l(i)%-l%-(l)", b="l%1-l-%2%2"},
93 -- assim. art. with waṣla + solar consonant 95 -- assim. art. with waṣla + solar consonant
94 {a="'([%_%^%.]?[tdrzsn])%-", b="'%1-%1"}, 96 {a="'([%_%^%.]?[tdrzsn])%-", b="'%1-"},
95 -- li- + art. + solar consonant is a special orthography 97 -- li- + art. + solar consonant is a special orthography
96 {a="l(i)%-l%-([%_%^%.]?[tdrzsn])", b="l%1-%2-%2"}, 98 {a="l(i)%-l%-([%_%^%.]?[tdrzsn])", b="l%1-%2-%2"},
97 -- li- + assim. art. + solar consonant is a special orthography 99 -- li- + assim. art. + solar consonant is a special orthography
@@ -173,6 +175,169 @@ shortvtrdmg = {
173 {a="i", b="i"} 175 {a="i", b="i"}
174} 176}
175 177
178-- loc
179
180hamzatrloc = {
181 -- initial long u
182 {a="%'%_U", b="U"},
183 -- madda (historic writing below)
184 {a="^(')(A)", b="%2"},
185 {a="(%W)(')(A)", b="%1%3"},
186 {a="'A", b="ʾA"},
187 {a="(A)(')(i)$", b="%1ʾ%3"},
188 {a="(A)(')(i)(%W)", b="%1ʾ%3%4"},
189 {a="(A)(')(i)", b="%1ʾ%3"}, -- historic madda
190 {a="(A)(')", b="%1ʾ"}, -- historic madda
191 -- initial (needs both ^ and %W patterns)
192 {a="^(')([ua])", b="%2"},
193 {a="^(')(i)", b="%2"},
194 {a="(%W)(')([ua])", b="%1%3"},
195 {a="(%W)(')(i)", b="%1%3"},
196 -- final
197 {a="([Iy])(')(aN)$", b="%1ʾ%3"},
198 {a="([Iy])(')(aN)(%W)", b="%1ʾ%3%4"},
199 {a="([^uai])(')([uai]N?)$", b="%1ʾ%3"},
200 {a="([^uai])(')([uai]N?)(%W)", b="%1ʾ%3%4"},
201 {a="([UI])(')([uai])$", b="%1ʾ%3"},
202 {a="([UI])(')([uai])(%W)", b="%1ʾ%3%4"},
203 -- middle
204 {a="(U)(')", b="%1ʾ"},
205 {a="([Iy])(')", b="%1ʾ"},
206 {a="([^uai])(')([uU])", b="%1ʾ%3"},
207 {a="([^uai])(')([aA])", b="%1ʾ%3"},
208 {a="([^uai])(')([iI])", b="%1ʾ%3"},
209 {a="(u)(')([uU])", b="%1ʾ%3"},
210 {a="(u)(')([aA])", b="%1ʾ%3"},
211 {a="(u)(')([iI])", b="%1ʾ%3"},
212 {a="(a)(')([aA])", b="%1ʾ%3"},
213 {a="(a)(')([uU])", b="%1ʾ%3"},
214 {a="(a)(')([iI])", b="%1ʾ%3"},
215 {a="(i)(')([aA])", b="%1ʾ%3"},
216 {a="(i)(')([uU])", b="%1ʾ%3"},
217 {a="(i)(')([iI])", b="%1ʾ%3"},
218 {a="(a)(')([^uaiUAI])", b="%1ʾ%3"},
219 {a="(u)(')([^uaiUAI])", b="%1ʾ%3"},
220 {a="(i)(')([^uaiUAI])", b="%1ʾ%3"}
221}
222
223trigraphstrloc = { -- trigraphs or more
224 -- 'llatI / 'llad_I
225 {a="^'ll(a)([%_]?[dt])", b="all%1%2"},
226 {a="(%s)'ll(a)([%_]?[dt])", b="%1all%2%3"},
227 -- al- + lām
228 {a="^(a)l%-(l)", b="%1l-%2"},
229 {a="(%s)(a)l%-(l)", b="%1%2l-%3"},
230 -- al- + solar consonant
231 {a="^(a)l%-([%_%^%.]?[tdrzsn])", b="%1l-%2"},
232 {a="(%s)(a)l%-([%_%^%.]?[tdrzsn])", b="%1%2l-%3"},
233 -- assim. art. + solar consonant
234 {a="^(a)([%_%^%.]?[tdrzsn])%-", b="%1l-%2"},
235 {a="(%s)(a)([%_%^%.]?[tdrzsn])%-", b="%1%2l-%3"},
236 -- al- + initial unstable hamza
237 {a="^(a)l%-([uai])", b="%1l-%2"},
238 {a="(%s)(a)l%-([uai])", b="%1%2l-%3"},
239 -- li- + art. + initial unstable hamza is a special orthography
240 {a="l(i)%-l%-([uai])", b="l%1l-%2"},
241 -- al- + lunar consonant (i.e. what remains)
242 {a="^(a)l%-", b="%1l-"},
243 {a="(%s)(a)l%-", b="%1%2l-"},
244 -- art. with waṣla + lām
245 {a="'l%-(l)", b="al-%1"},
246 -- art. with waṣla + solar consonant
247 {a="'l%-([%_%^%.]?[tdrzsn])", b="al-%1"},
248 -- li- + art. + lām
249 {a="l(i)%-l%-(l)", b="l%1l-%2"},
250 -- assim. art. with waṣla + solar consonant
251 {a="'([%_%^%.]?[tdrzsn])%-", b="al-"},
252 -- li- + art. + solar consonant is a special orthography
253 {a="l(i)%-l%-([%_%^%.]?[tdrzsn])", b="l%1l-%2"},
254 -- li- + assim. art. + solar consonant is a special orthography
255 {a="l(i)%-([%_%^%.]?[tdrzsn])%-([%_%^%.]?[tdrzsn])", b="l%1l-%3"},
256 -- art. with waṣla + initial unstable hamza
257 {a="'l%-([uai])", b="al-%1"},
258 -- art. with waṣla + lunar consonant (i.e. what remains)
259 {a="'l%-", b="al-"}
260}
261
262tanwintrloc = {
263 {a="uN", b="un"},
264 {a="(aN)(_A)", b="an"},
265 {a="(aN)(Y)", b="an"},
266 {a="(T)(aN)", b="tan"},
267 {a="([^TA])(aN)", b="%1an"},
268 {a="iN", b="in"}
269}
270
271digraphstrloc = {
272 {a="(%-)([uai])", b="%1%2"}, -- hyphen + initial alif without hamza
273 {a="^([uai])", b="%1"}, -- initial alif without hamza
274 {a="(%s)([uai])", b="%1%2"}, -- initial alif without hamza
275 {a="%-%-", b=""},
276 {a="uww", b="ūw"},
277 {a="iyy", b="īy"},
278 {a="([tkdsg])(h)", b="%1'%2"},
279 {a="T([^uai])", b="h%1"},
280 {a="T$", b="h"},
281 {a="T(%W)", b="h%1"},
282 {a="_t", b="th"},
283 {a="%^g", b="j"},
284 {a="%.h", b="ḥ"},
285 {a="_h", b="kh"},
286 {a="_d", b="dh"},
287 {a="%^s", b="sh"},
288 {a="%.s", b="ṣ"},
289 {a="%.d", b="ḍ"},
290 {a="%.t", b="ṭ"},
291 {a="%.z", b="ẓ"},
292 {a="%.g", b="gh"},
293 {a="(U)(A)", b="ū"},
294 {a="WA", b="ū"},
295 {a="(a)W", b="%1w"},
296 {a="_A", b="á"},
297 {a="_u", b="ū"},
298 {a="_a", b="ā"},
299 {a="_i", b="ī"}
300}
301
302singletrloc = {
303 {a="b", b="b"},
304 {a="t", b="t"},
305 {a="j", b="j"},
306 {a="x", b="kh"},
307 {a="d", b="d"},
308 {a="r", b="r"},
309 {a="z", b="z"},
310 {a="s", b="s"},
311 {a="`", b="`"},
312 {a="f", b="f"},
313 {a="q", b="q"},
314 {a="k", b="k"},
315 {a="l", b="l"},
316 {a="m", b="m"},
317 {a="n", b="n"},
318 {a="h", b="h"},
319 {a="w", b="w"},
320 {a="y", b="y"},
321 {a="T", b="t"},
322 {a="Y", b="á"},
323 {a='"', b=''},
324}
325
326longvtrloc = {
327 {a="A", b="ā"},
328 {a="U", b="ū"},
329 {a="I", b="ī"}
330}
331
332shortvtrloc = {
333 {a="u", b="u"},
334 {a="a", b="a"},
335 {a="i", b="i"}
336}
337
338finaltrloc = {
339 {a="ʾ", b="'"},
340}
176 341
177-- return { 342-- return {
178-- raw = raw, 343-- raw = raw,