aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--arabluatex.dtx20
-rw-r--r--arabluatex.lua9
-rw-r--r--arabluatex_fullvoc.lua4
-rw-r--r--arabluatex_trans.lua33
-rw-r--r--arabluatex_voc.lua4
5 files changed, 44 insertions, 26 deletions
diff --git a/arabluatex.dtx b/arabluatex.dtx
index 995be49..2e3dc7e 100644
--- a/arabluatex.dtx
+++ b/arabluatex.dtx
@@ -2144,8 +2144,8 @@ wa-ya.sIru ta.hta 'l-jildi
2144% \end{macro} 2144% \end{macro}
2145% \end{macro} 2145% \end{macro}
2146% \begin{macro}{\SetTranslitStyle} By default any transliterated 2146% \begin{macro}{\SetTranslitStyle} By default any transliterated
2147% Arabic text is printed in italics. This can be changed globally in 2147% Arabic text is printed in italics. This can be changed either
2148% the preamble or at any point of the document: 2148% globally in the preamble or at any point of the document:
2149% \begin{macrocode} 2149% \begin{macrocode}
2150\def\al@trans@style{\itshape}% 2150\def\al@trans@style{\itshape}%
2151\NewDocumentCommand{\SetTranslitStyle}{m}{\def\al@trans@style{#1}} 2151\NewDocumentCommand{\SetTranslitStyle}{m}{\def\al@trans@style{#1}}
@@ -2159,6 +2159,22 @@ wa-ya.sIru ta.hta 'l-jildi
2159\NewDocumentCommand{\SetTranslitConvention}{m}{\def\al@trans@convention{#1}} 2159\NewDocumentCommand{\SetTranslitConvention}{m}{\def\al@trans@convention{#1}}
2160% \end{macrocode} 2160% \end{macrocode}
2161% \end{macro} 2161% \end{macro}
2162% \begin{macro}{\arbup}
2163% \begin{macro}{\SetArbUp}
2164% By default, \cs{arbup} is set to \cs{textsuperscript}. This is how
2165% the \arb[trans]{tanwIn} that takes place at the end of a word
2166% should be displayed in |dmg| mode. \cs{SetArbUp} may be used
2167% either in the preamble or at any point of the document to change
2168% the default definition.
2169% \begin{macrocode}
2170\NewDocumentCommand{\al@arbup@dflt}{m}{\textsuperscript{\thinspace#1}}%
2171\NewDocumentCommand{\al@arbup}{m}{\al@arbup@dflt{#1}}
2172\NewDocumentCommand{\arbup}{m}{\al@arbup{#1}}
2173\NewDocumentCommand{\ArbUpDflt}{}{\let\al@arbup=\al@arbup@dflt}
2174\NewDocumentCommand{\NoArbUp}{}{\RenewDocumentCommand{\al@arbup}{m}{##1}}
2175% \end{macrocode}
2176% \end{macro}
2177% \end{macro}
2162% \begin{macro}{\cap} Proper Arabic names or book titles should be 2178% \begin{macro}{\cap} Proper Arabic names or book titles should be
2163% passed to the command \cs{cap} so that they have their first letters 2179% passed to the command \cs{cap} so that they have their first letters
2164% uppercased. \cs{cap} is actually coded in Lua. 2180% uppercased. \cs{cap} is actually coded in Lua.
diff --git a/arabluatex.lua b/arabluatex.lua
index 0bb4756..eb0d55c 100644
--- a/arabluatex.lua
+++ b/arabluatex.lua
@@ -123,11 +123,12 @@ local function takeoutabjad(str)
123 return str 123 return str
124end 124end
125 125
126local function takeoutcap(str) 126local function takeoutcapetc(str)
127 str = string.gsub(str, "(\\cap.?)(%b{})", function(tag, body) 127 str = string.gsub(str, "(\\cap.?)(%b{})", function(tag, body)
128 body = string.sub(body, 2, -2) 128 body = string.sub(body, 2, -2)
129 return string.format("%s", body) 129 return string.format("%s", body)
130 end) 130 end)
131 str = string.gsub(str, "\\linebreak", "")
131 return str 132 return str
132end 133end
133 134
@@ -421,7 +422,7 @@ end
421 422
422function processvoc(str, rules) 423function processvoc(str, rules)
423 str = "\\arb{".. str.."}" 424 str = "\\arb{".. str.."}"
424 str = takeoutcap(str) 425 str = takeoutcapetc(str)
425 str = protectarb(str) 426 str = protectarb(str)
426 str = breakcmd(str) 427 str = breakcmd(str)
427 str = holdcmd(str) 428 str = holdcmd(str)
@@ -436,7 +437,7 @@ end
436 437
437function processfullvoc(str, rules) 438function processfullvoc(str, rules)
438 str = "\\arb{".. str.."}" 439 str = "\\arb{".. str.."}"
439 str = takeoutcap(str) 440 str = takeoutcapetc(str)
440 str = protectarb(str) 441 str = protectarb(str)
441 str = breakcmd(str) 442 str = breakcmd(str)
442 str = holdcmd(str) 443 str = holdcmd(str)
@@ -451,7 +452,7 @@ end
451 452
452function processnovoc(str) 453function processnovoc(str)
453 str = "\\arb{".. str.."}" 454 str = "\\arb{".. str.."}"
454 str = takeoutcap(str) 455 str = takeoutcapetc(str)
455 str = protectarb(str) 456 str = protectarb(str)
456 str = breakcmd(str) 457 str = breakcmd(str)
457 str = holdcmd(str) 458 str = holdcmd(str)
diff --git a/arabluatex_fullvoc.lua b/arabluatex_fullvoc.lua
index a2676e0..577fff7 100644
--- a/arabluatex_fullvoc.lua
+++ b/arabluatex_fullvoc.lua
@@ -238,7 +238,7 @@ tanwinfv = {
238 {a="(T)(aN)", b="%1ً"}, 238 {a="(T)(aN)", b="%1ً"},
239 {a="(ء)(aN)", b="%1ً"}, 239 {a="(ء)(aN)", b="%1ً"},
240 {a="([^TA])(aN)", b="%1ًا"}, 240 {a="([^TA])(aN)", b="%1ًا"},
241 {a="(iN)", b="ٍ"} 241 {a="(iNI?)", b="ٍ"}
242} 242}
243 243
244tanwinfveasy = { -- no assimilations (see below) 244tanwinfveasy = { -- no assimilations (see below)
@@ -273,7 +273,7 @@ tanwinfveasy = { -- no assimilations (see below)
273 {a="(T)(aN)", b="%1ً"}, 273 {a="(T)(aN)", b="%1ً"},
274 {a="(ء)(aN)", b="%1ً"}, 274 {a="(ء)(aN)", b="%1ً"},
275 {a="([^TA])(aN)", b="%1ًا"}, 275 {a="([^TA])(aN)", b="%1ًا"},
276 {a="(iN)", b="ٍ"} 276 {a="(iNI?)", b="ٍ"}
277} 277}
278 278
279trigraphsfv = { -- trigraphs or more 279trigraphsfv = { -- trigraphs or more
diff --git a/arabluatex_trans.lua b/arabluatex_trans.lua
index c96a9da..fbea94b 100644
--- a/arabluatex_trans.lua
+++ b/arabluatex_trans.lua
@@ -127,23 +127,24 @@ hamzatrdmg = {
127} 127}
128 128
129tanwintrdmg = { 129tanwintrdmg = {
130 {a="uNU", b="un"}, 130 {a="uNU", b="\\arbup{un}"},
131 {a="aNU", b="an"}, 131 {a="aNU", b="\\arbup{an}"},
132 {a="iNU", b="in"}, 132 {a="iNU", b="\\arbup{in}"},
133 {a="iNI", b="i\\arbup{n}"},
133 -- tanwīn preceding ʾalif conjunctionis 134 -- tanwīn preceding ʾalif conjunctionis
134 {a="(uN)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="uni%2%3"}, 135 {a="(uN)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="\\arbup{uni}%2%3"},
135 {a="(aN)(_A)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="ạni%3%4"}, 136 {a="(aN)(_A)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="ạ\\arbup{ni}%3%4"},
136 {a="(aN)(Y)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="ạni%3%4"}, 137 {a="(aN)(Y)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="ạ\\arbup{ni}%3%4"},
137 {a="(T)(aN)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="tani%3%4"}, 138 {a="(T)(aN)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="t\\arbup{ani}%3%4"},
138 {a="([^TA])(aN)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="%1ani%3%4"}, 139 {a="([^TA])(aN)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="%1\\arbup{ani}%3%4"},
139 {a="(iN)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="ini%2%3"}, 140 {a="(iN)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="\\arbup{ini}%2%3"},
140 -- 141 --
141 {a="uN", b="un"}, 142 {a="uN", b="\\arbup{un}"},
142 {a="(aN)(_A)", b="ạn"}, 143 {a="(aN)(_A)", b="ạ\\arbup{n}"},
143 {a="(aN)(Y)", b="ạn"}, 144 {a="(aN)(Y)", b="ạ\\arbup{n}"},
144 {a="(T)(\"?aN)", b="tan"}, 145 {a="(T)(\"?aN)", b="t\\arbup{an}"},
145 {a="([^TA])(\"?aN)", b="%1an"}, 146 {a="([^TA])(\"?aN)", b="%1\\arbup{an}"},
146 {a="iN", b="in"} 147 {a="iN", b="\\arbup{in}"}
147} 148}
148 149
149trigraphstrdmg = { -- trigraphs or more 150trigraphstrdmg = { -- trigraphs or more
@@ -198,7 +199,7 @@ trigraphstrdmg = { -- trigraphs or more
198 {a="(_a)U", b="A"}, 199 {a="(_a)U", b="A"},
199 {a="(_a)I", b="A"}, 200 {a="(_a)I", b="A"},
200 -- assimilations 201 -- assimilations
201 {a="(n)(%s)([rlmnwy])", b="%3%2%3"} 202 {a="(n)(})(%s)([rlmnwy])", b="%4%2%3%4"}
202} 203}
203 204
204trigraphstrdmgeasy = { -- see the differences below under 'easy' tag 205trigraphstrdmgeasy = { -- see the differences below under 'easy' tag
diff --git a/arabluatex_voc.lua b/arabluatex_voc.lua
index 4f5399f..878f351 100644
--- a/arabluatex_voc.lua
+++ b/arabluatex_voc.lua
@@ -267,7 +267,7 @@ tanwin = {
267 {a="(T)(aN)", b="%1ً"}, 267 {a="(T)(aN)", b="%1ً"},
268 {a="(ء)(aN)", b="%1ً"}, 268 {a="(ء)(aN)", b="%1ً"},
269 {a="([^TA])(aN)", b="%1ًا"}, 269 {a="([^TA])(aN)", b="%1ًا"},
270 {a="(iN)", b="ٍ"} 270 {a="(iNI?)", b="ٍ"}
271} 271}
272 272
273tanwineasy = { -- 'easy' requires some lines to be taken out: 273tanwineasy = { -- 'easy' requires some lines to be taken out:
@@ -300,7 +300,7 @@ tanwineasy = { -- 'easy' requires some lines to be taken out:
300 {a="(T)(aN)", b="%1ً"}, 300 {a="(T)(aN)", b="%1ً"},
301 {a="(ء)(aN)", b="%1ً"}, 301 {a="(ء)(aN)", b="%1ً"},
302 {a="([^TA])(aN)", b="%1ًا"}, 302 {a="([^TA])(aN)", b="%1ًا"},
303 {a="(iN)", b="ٍ"} 303 {a="(iNI?)", b="ٍ"}
304} 304}
305 305
306trigraphs = { -- trigraphs or more 306trigraphs = { -- trigraphs or more
lor:#000000">="([^TA])%-?(aN)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="%1\\arbup{ani}%3%4"}, {a="%-?(iN)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="\\arbup{ini}%2%3"}, -- tanwīn + alif without hamza and kasra (ibn) {a="%-?(uN)(%s)(i)", b="\\arbup{uni}%2'"}, {a="%-?(aN)(_A)(%s)(i)", b="ạ\\arbup{ni}%3%'"}, {a="%-?(aN)(Y)(%s)(i)", b="ạ\\arbup{ni}%3%'"}, {a="(T)%-?(aN)(%s)(i)", b="t\\arbup{ani}%3%'"}, {a="([^TA])%-?(aN)(%s)(i)", b="%1\\arbup{ani}%3%'"}, {a="%-?(iN)(%s)(i)", b="\\arbup{ini}%2%'"}, -- -- {a="uN", b="\\arbup{un}"}, (now included in the last line of this table) {a="%-?(\"?At)%-?([ui])N", b="\\arbup{%1%2n}"}, {a="%-?(aN)(_A)", b="ạ\\arbup{n}"}, {a="%-?(aN)(Y)", b="ạ\\arbup{n}"}, {a="(T)%-?(\"?aN)", b="t\\arbup{an}"}, {a="([^TA])%-?(\"?aN)", b="%1\\arbup{an}"}, {a="%-?([ui])N", b="\\arbup{%1n}"} } trigraphstrdmg = { -- trigraphs or more -- 'llatI / 'llad_I {a="^'ll(a)([%_]?[dt])", b="'ll%1%2"}, {a="(%s)'ll(a)([%_]?[dt])", b="%1'll%2%3"}, -- al- + lām {a="^(a)l%-(l)", b="%1l-%2"}, {a="([%s%-])(a)l%-(l)", b="%1%2l-%3"}, -- al- + solar consonant {a="^(a)l%-([%_%^%.]?[tdrzsn])", b="%1%2-%2"}, {a="([%s%-])(a)l%-([%_%^%.]?[tdrzsn])", b="%1%2%3-%3"}, -- assim. art. + solar consonant {a="^(a)([%_%^%.]?[tdrzsn])%-", b="%1%2-"}, {a="([%s%-])(a)([%_%^%.]?[tdrzsn])%-", b="%1%2%3-"}, -- al- + initial unstable hamza {a="^(a)l%-([uai])", b="%1l-%2"}, {a="([%s%-])(a)l%-([uai])", b="%1%2l-%3"}, -- li-/la- + art. + initial unstable hamza is a special orthography {a="l([ai])%-l%-([uai])", b="l%1-l-%2"}, -- al- + lunar consonant (i.e. what remains) {a="^(a)l%-", b="%1l-"}, {a="([%s%-])(a)l%-", b="%1%2l-"}, -- diphthongs to be resolved before ʾalif conjunctionis {a="(aw)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="%1u%2%3"}, {a="(ay)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="%1i%2%3"}, -- art. with waṣla + lām {a="'l%-(l)", b="'l-%1"}, -- art. with waṣla + solar consonant {a="'l%-([%_%^%.]?[tdrzsn])", b="'%1-%1"}, -- li-/la- + art. + lām {a="l([ai])%-l%-(l)", b="l%1-%2%2"}, -- assim. art. with waṣla + solar consonant {a="'([%_%^%.]?[tdrzsn])%-", b="'%1-"}, -- li-/la- + art. + solar consonant is a special orthography {a="l([ai])%-l%-([%_%^%.]?[tdrzsn])", b="l%1-%2-%2"}, -- li-/la- + assim. art. + solar consonant is a special orthography {a="l([ai])%-([%_%^%.]?[tdrzsn])%-([%_%^%.]?[tdrzsn])", b="l%1-%2-%3"}, -- art. with waṣla + initial unstable hamza {a="'l%-([uai])", b="'l-%1"}, -- art. with waṣla + lunar consonant (i.e. what remains) {a="'l%-", b="'l-"}, -- the silent wāw {a="uU$", b="u"}, {a="uU(%W)", b="u%1"}, {a="aU$", b="a"}, {a="aU(%W)", b="a%1"}, {a="iU$", b="i"}, {a="iU(%W)", b="i%1"}, -- words ending in -āT with silent wāw/yāʾ {a="(_a)UA", b="A"}, {a="(_a)U", b="A"}, {a="(_a)I", b="A"}, -- assimilations {a="(n)(}?)(%s)([rlmnwy])", b="%4%2%3%4"} } trigraphstrdmgeasy = { -- see the differences below under 'easy' tag -- 'llatI / 'llad_I {a="^'ll(a)([%_]?[dt])", b="'ll%1%2"}, {a="(%s)'ll(a)([%_]?[dt])", b="%1'll%2%3"}, -- al- + lām {a="^(a)l%-(l)", b="%1l-%2"}, {a="([%s%-])(a)l%-(l)", b="%1%2l-%3"}, -- al- + solar consonant {a="^(a)l%-([%_%^%.]?[tdrzsn])", b="%1%2-%2"}, {a="([%s%-])(a)l%-([%_%^%.]?[tdrzsn])", b="%1%2%3-%3"}, -- assim. art. + solar consonant {a="^(a)([%_%^%.]?[tdrzsn])%-", b="%1%2-"}, {a="([%s%-])(a)([%_%^%.]?[tdrzsn])%-", b="%1%2%3-"}, -- al- + initial unstable hamza {a="^(a)l%-([uai])", b="%1l-%2"}, {a="([%s%-])(a)l%-([uai])", b="%1%2l-%3"}, -- li-/la- + art. + initial unstable hamza is a special orthography {a="l([ai])%-l%-([uai])", b="l%1-l-%2"}, -- al- + lunar consonant (i.e. what remains) {a="^(a)l%-", b="%1l-"}, {a="([%s%-])(a)l%-", b="%1%2l-"}, -- diphthongs to be resolved before ʾalif conjunctionis {a="(aw)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="%1u%2%3"}, {a="(ay)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="%1i%2%3"}, -- art. with waṣla + lām {a="'l%-(l)", b="'l-%1"}, -- art. with waṣla + solar consonant {a="'l%-([%_%^%.]?[tdrzsn])", b="'%1-%1"}, -- li-/la- + art. + lām {a="l([ai])%-l%-(l)", b="l%1-%2%2"}, -- assim. art. with waṣla + solar consonant {a="'([%_%^%.]?[tdrzsn])%-", b="'%1-"}, -- li-/la- + art. + solar consonant is a special orthography {a="l([ai])%-l%-([%_%^%.]?[tdrzsn])", b="l%1-%2-%2"}, -- li-/la- + assim. art. + solar consonant is a special orthography {a="l([ai])%-([%_%^%.]?[tdrzsn])%-([%_%^%.]?[tdrzsn])", b="l%1-%2-%3"}, -- art. with waṣla + initial unstable hamza {a="'l%-([uai])", b="'l-%1"}, -- art. with waṣla + lunar consonant (i.e. what remains) {a="'l%-", b="'l-"}, -- the silent wāw {a="uU$", b="u"}, {a="uU(%W)", b="u%1"}, {a="aU$", b="a"}, {a="aU(%W)", b="a%1"}, {a="iU$", b="i"}, {a="iU(%W)", b="i%1"}, -- words ending in -āT with silent wāw/yāʾ {a="(_a)UA", b="A"}, {a="(_a)U", b="A"}, {a="(_a)I", b="A"} -- assimilations --easy {a="(n)(%s)([rlmnwy])", b="%3%2%3"} } digraphstrdmg = { {a="([uai]%-)(\"?[uai])", b="%1'"}, -- hyphen + initial alif without hamza -- the following two are replaced with the 4 lines next for now -- {a="^(\"?[uai])", b="%1"}, -- initial alif without hamza -- {a="(%W)(\"?[uai])", b="%1%2"}, -- initial alif without hamza {a="^(\"[uai])", b="'"}, -- initial alif without hamza {a="(%W)(\"[uai])", b="%1'"}, -- initial alif without hamza {a="^([uai])", b="%1"}, -- initial alif without hamza {a="(%W)([uai])", b="%1%2"}, -- initial alif without hamza -- this is not necessary, take out for now: -- {a="([%_]?[uaiUAIY])(%s)([uai])", b="%1%2'"}, -- initial alif without hamza {a="(aW)(%s)(\"?[uai])", b="awu%2%3"}, {a="([^%_][uai])(%s)(\"?[uai])", b="%1%2'"}, {a="(%_A)(%s)(\"?[uai])", b="ạ%2'"}, {a="(Y)(%s)(\"?[uai])", b="ạ%2'"}, {a="(%_a)(%s)(\"?[uai])", b="a%2'"}, {a="(A)(%s)(\"?[uai])", b="a%2'"}, {a="([%_]?[Uu])(%s)(\"?[uai])", b="u%2'"}, {a="([%_]?[Ii])(%s)(\"?[uai])", b="i%2'"}, -- ʾiʿrāb hyphen (begin) {a="(%-)(\"?[UI]na)(%p?%s)", b="\\arbup{%2}%3"}, {a="(%-)(\"?[UI]na)(%p?)$", b="\\arbup{%2}%3"}, {a="(%-)(\"?At[ui])(%p?%s)", b="\\arbup{%2}%3"}, {a="(%-)(\"?At[ui])(%p?)$", b="\\arbup{%2}%3"}, {a="(%-)(\"?Ani)(%p?%s)", b="\\arbup{%2}%3"}, {a="(%-)(\"?Ani)(%p?)$", b="\\arbup{%2}%3"}, {a="(%-)(\"?ayni)(%p?%s)", b="\\arbup{%2}%3"}, {a="(%-)(\"?ayni)(%p?)$", b="\\arbup{%2}%3"}, {a="(%-)(\"?[uai])(%p?%s)", b="\\arbup{%2}%3"}, {a="(%-)(\"?[uai])(%p?)$", b="\\arbup{%2}%3"}, -- ʾiʿrāb hyphen (end) -- shorten long vowels preceding ʾalif conjunctionis {a="(U)(A)", b="U"}, {a="(aW)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="awu%2%3"}, {a="(%_a)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="a%2%3"}, {a="(%_A)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="ạ%2%3"}, {a="(A)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="a%2%3"}, {a="(Y)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="ạ%2%3"}, {a="([%_]?[Uu])(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="u%2%3"}, {a="([%_]?[Ii])(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="i%2%3"}, {a="%-%-", b=""}, -- {a="T([^uai])", b="%1"}, {a="T(%p?%s)", b="h%1"}, {a="T(%p?)$", b="h%1"}, {a="_t", b="ṯ"}, {a="%^g", b="ğ"}, {a="%.h", b="ḥ"}, {a="_h", b="ḫ"}, {a="_d", b="ḏ"}, {a="%^s", b="š"}, {a="%.s", b="ṣ"}, {a="%.d", b="ḍ"}, {a="%.t", b="ṭ"}, {a="%.z", b="ẓ"}, {a="%.g", b="ġ"}, -- the following needs to be moved above shortening rules -- {a="(U)(A)", b="ū"}, {a="WA", b="w"}, {a="(a)W", b="%1w"}, {a="_A", b="ạ̄"}, {a="_u", b="ū"}, {a="_a", b="ā"}, {a="_i", b="ī"}, {a="%.b", b="ḅ"}, {a="%.f", b="f̣"}, {a="%.q", b="q̣"}, {a="%.k", b="k"}, {a="%.n", b="ṇ"}, {a="%^d", b="d́"} } singletrdmg = { {a="b", b="b"}, {a="t", b="t"}, {a="j", b="ğ"}, {a="x", b="ḫ"}, {a="d", b="d"}, {a="r", b="r"}, {a="z", b="z"}, {a="s", b="s"}, {a="`", b="ʿ"}, {a="f", b="f"}, {a="q", b="q"}, {a="k", b="k"}, {a="l", b="l"}, {a="m", b="m"}, {a="n", b="n"}, {a="h", b="h"}, {a="w", b="w"}, {a="y", b="y"}, {a="T", b="t"}, {a="\"", b=""}, {a="B", b=""} } longvtrdmg = { {a="A", b="ā"}, {a="U", b="ū"}, {a="I", b="ī"}, {a="Y", b="ạ̄"} } shortvtrdmg = { {a="u", b="u"}, {a="a", b="a"}, {a="i", b="i"} } -- loc hamzatrloc = { -- hard coded hamza {a="|\"'", b="ʾ"}, {a="A\"'", b="ʾA"}, {a="[au]\"'", b="ʾ"}, {a="w\"'", b="ʾ"}, {a="i\"'", b="ʾ"}, {a="y\"'", b="ʾ"}, -- hamza takes tašdīd too {a="''([Uu])", b="ʾʾ%1"}, {a="''([Aa])", b="ʾʾ%1"}, {a="''([Ii])", b="ʾʾ%1"}, -- initial long u and i (for a, see below) {a="%'%_U", b="U"}, {a="%'%_I", b="I"}, -- taḫfīfu 'l-hamza {a="^'u'([^uaiUAI])", b="U%1"}, {a="(%W)'u'([^uaiUAI])", b="%1U%2"}, {a="'u'([^uaiUAI])", b="ʾU"}, {a="^'i'([^uaiUAI])", b="I%1"}, {a="(%W)'i'([^uaiUAI])", b="%1I%2"}, {a="'i'([^uaiUAI])", b="ʾI"}, -- madda (historic writing below) {a="^(')(A)", b="%2"}, {a="(%W)(')(A)", b="%1%3"}, {a="^'a'([^uaiUAI])", b="A%1"}, {a="(%W)'a'([^uaiUAI])", b="%1A%2"}, {a="'a'([^uaiUAI])", b="A%1"}, {a="^'a?A", b="A"}, {a="(%W)'a?A", b="%1A"}, {a="'a?A", b="ʾA"}, {a="(A)(')(i)$", b="%1ʾ%3"}, {a="(A)(')(i)(%W)", b="%1ʾ%3%4"}, {a="(A)(')(i)", b="%1ʾ%3"}, -- historic madda {a="(A)(')", b="%1ʾ"}, -- historic madda -- initial (needs both ^ and %W patterns) {a="^(')([ua])", b="%2"}, {a="^(')(i)", b="%2"}, {a="(%W)(')([ua])", b="%1%3"}, {a="(%W)(')(i)", b="%1%3"}, -- final {a="([Iy])(')(aN)$", b="%1ʾ%3"}, {a="([Iy])(')(aN)(%W)", b="%1ʾ%3%4"}, {a="([^uai])(')([uai]N?)$", b="%1ʾ%3"}, {a="([^uai])(')([uai]N?)(%W)", b="%1ʾ%3%4"}, {a="([UI])(')([uai])$", b="%1ʾ%3"}, {a="([UI])(')([uai])(%W)", b="%1ʾ%3%4"}, -- middle {a="(U)(')", b="%1ʾ"}, {a="([Iy])(')", b="%1ʾ"}, {a="([^uai])(')([uU])", b="%1ʾ%3"}, {a="([^uai])(')([aA])", b="%1ʾ%3"}, {a="([^uai])(')([iI])", b="%1ʾ%3"}, {a="(u)(')([uU])", b="%1ʾ%3"}, {a="(u)(')([aA])", b="%1ʾ%3"}, {a="(u)(')([iI])", b="%1ʾ%3"}, {a="(a)(')([aA])", b="%1ʾ%3"}, {a="(a)(')([uU])", b="%1ʾ%3"}, {a="(a)(')([iI])", b="%1ʾ%3"}, {a="(i)(')([aA])", b="%1ʾ%3"}, {a="(i)(')([uU])", b="%1ʾ%3"}, {a="(i)(')([iI])", b="%1ʾ%3"}, {a="(a)(')([^uaiUAI])", b="%1ʾ%3"}, {a="(u)(')([^uaiUAI])", b="%1ʾ%3"}, {a="(i)(')([^uaiUAI])", b="%1ʾ%3"} } tanwintrloc = { {a="%-?uNU", b="un"}, {a="%-?aNU", b="an"}, {a="%-?iNU", b="in"}, {a="%-?(\"?At)%-?([ui])N", b="%1%2n"}, {a="%-?([ui])N", b="%1n"}, {a="%-?(aN)(_A)", b="an"}, {a="%-?(aN)(Y)", b="an"}, {a="(T)%-?(\"?aN)", b="tan"}, {a="([^TA])%-?(\"?aN)", b="%1an"} } trigraphstrloc = { -- trigraphs or more -- 'llatI / 'llad_I {a="^'ll(a)([%_]?[dt])", b="all%1%2"}, {a="(%s)'ll(a)([%_]?[dt])", b="%1all%2%3"}, -- al- + lām {a="^(a)l%-(l)", b="%1l-%2"}, {a="(%s)(a)l%-(l)", b="%1%2l-%3"}, -- al- + solar consonant {a="^(a)l%-([%_%^%.]?[tdrzsn])", b="%1l-%2"}, {a="(%s)(a)l%-([%_%^%.]?[tdrzsn])", b="%1%2l-%3"}, -- assim. art. + solar consonant {a="^(a)([%_%^%.]?[tdrzsn])%-", b="%1l-"}, {a="(%s)(a)([%_%^%.]?[tdrzsn])%-", b="%1%2l-"}, -- al- + initial unstable hamza {a="^(a)l%-([uai])", b="%1l-%2"}, {a="(%s)(a)l%-([uai])", b="%1%2l-%3"}, -- li-/la- + art. + initial unstable hamza is a special orthography {a="l([ai])%-l%-([uai])", b="l%1l-%2"}, -- al- + lunar consonant (i.e. what remains) {a="^(a)l%-", b="%1l-"}, {a="(%s)(a)l%-", b="%1%2l-"}, -- art. with waṣla + lām {a="'l%-(l)", b="al-%1"}, -- art. with waṣla + solar consonant {a="'l%-([%_%^%.]?[tdrzsn])", b="al-%1"}, -- li-/la- + art. + lām {a="l([ai])%-l%-(l)", b="l%1-%2"}, -- assim. art. with waṣla + solar consonant {a="'([%_%^%.]?[tdrzsn])%-", b="al-"}, -- li-/la- + art. + solar consonant is a special orthography {a="l([ai])%-l%-([%_%^%.]?[tdrzsn])", b="l%1l-%2"}, -- li-/la- + assim. art. + solar consonant is a special orthography {a="l([ai])%-([%_%^%.]?[tdrzsn])%-([%_%^%.]?[tdrzsn])", b="l%1l-%3"}, -- art. with waṣla + initial unstable hamza {a="'l%-([uai])", b="al-%1"}, -- art. with waṣla + lunar consonant (i.e. what remains) {a="'l%-", b="al-"}, -- the silent wāw {a="uU$", b="u"}, {a="uU(%W)", b="u%1"}, {a="aU$", b="a"}, {a="aU(%W)", b="a%1"}, {a="iU$", b="i"}, {a="iU(%W)", b="i%1"}, -- words ending in -āT with silent wāw/yāʾ {a="(_a)UA", b="A"}, {a="(_a)U", b="A"}, {a="(_a)I", b="A"} } digraphstrloc = { -- discard the ʾiʿrāb hyphen (begin) {a="(%-)(\"?[UI]na)(%p?%s)", b="%2%3"}, {a="(%-)(\"?[UI]na)(%p?)$", b="%2%3"}, {a="(%-)(\"?At[ui])(%p?%s)", b="%2%3"}, {a="(%-)(\"?At[ui])(%p?)$", b="%2%3"}, {a="(%-)(\"?Ani)(%p?%s)", b="%2%3"}, {a="(%-)(\"?Ani)(%p?)$", b="%2%3"}, {a="(%-)(\"?ayni)(%p?%s)", b="%2%3"}, {a="(%-)(\"?ayni)(%p?)$", b="%2%3"}, {a="(%-)([uai])(%p?%s)", b="%2%3"}, {a="(%-)([uai])(%p?)$", b="%2%3"}, -- discard the ʾiʿrāb hyphen (end) {a="(%-)(\"?[uai])", b="%1%2"}, -- hyphen + initial alif without hamza {a="^(\"?[uai])", b="%1"}, -- initial alif without hamza {a="(%s)([uai])", b="%1%2"}, -- initial alif without hamza {a="%-%-", b=""}, {a="uww", b="ūw"}, {a="iyy", b="īy"}, {a="([tkdsg])(h)", b="%1'%2"}, -- {a="T([^uai])", b="h%1"}, {a="T$", b="h"}, {a="T(%W)", b="h%1"}, {a="_t", b="th"}, {a="%^g", b="j"}, {a="%.h", b="ḥ"}, {a="_h", b="kh"}, {a="_d", b="dh"}, {a="%^s", b="sh"}, {a="%.s", b="ṣ"}, {a="%.d", b="ḍ"}, {a="%.t", b="ṭ"}, {a="%.z", b="ẓ"}, {a="%.g", b="gh"}, {a="(U)(A)", b="ū"}, {a="WA", b="w"}, {a="(a)W", b="%1w"}, {a="_A", b="á"}, {a="_u", b="ū"}, {a="_a", b="ā"}, {a="_i", b="ī"}, {a="%.b", b="b"}, {a="%.f", b="f"}, {a="%.q", b="q"}, {a="%.k", b="k"}, {a="%.n", b="n"}, {a="%^d", b="d"} } singletrloc = { {a="b", b="b"}, {a="t", b="t"}, {a="j", b="j"}, {a="x", b="kh"}, {a="d", b="d"}, {a="r", b="r"}, {a="z", b="z"}, {a="s", b="s"}, {a="`", b="`"}, {a="f", b="f"}, {a="q", b="q"}, {a="k", b="k"}, {a="l", b="l"}, {a="m", b="m"}, {a="n", b="n"}, {a="h", b="h"}, {a="w", b="w"}, {a="y", b="y"}, {a="T", b="t"}, {a="\"", b=""}, {a="B", b=""} } longvtrloc = { {a="A", b="ā"}, {a="U", b="ū"}, {a="I", b="ī"}, {a="Y", b="á"}, } shortvtrloc = { {a="u", b="u"}, {a="a", b="a"}, {a="i", b="i"} } finaltrloc = { {a="ʾ", b="'"}, }