From b8c169ea96b982c664dcbde90e29ca87cfe39188 Mon Sep 17 00:00:00 2001 From: Robert Alessi Date: Sat, 28 May 2016 10:36:47 +0200 Subject: =?UTF-8?q?done=20implementing=20=CA=BEi=CA=BFr=C4=81b=20=C4=A1ayr?= =?UTF-8?q?=20mun=E1=B9=A3arif,=20done=20documenting.=20getting=20close=20?= =?UTF-8?q?to=20v1.3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- arabluatex.dtx | 113 ++++++++++++++++++++++++++++++++----------- arabluatex_fullvoc.lua | 129 +++++++++++++++++++++++++------------------------ arabluatex_novoc.lua | 52 ++++++++++---------- arabluatex_trans.lua | 23 +++++++-- arabluatex_voc.lua | 116 ++++++++++++++++++++++---------------------- 5 files changed, 254 insertions(+), 179 deletions(-) diff --git a/arabluatex.dtx b/arabluatex.dtx index 613257b..52dc19f 100644 --- a/arabluatex.dtx +++ b/arabluatex.dtx @@ -448,9 +448,10 @@ % corresponding \arb[trans]{tanwIn} (\arb{BuN}, \arb{B|aN}\,, % \arb{TaN}, \arb{BaN_A} or \arb{BiN}) is generated. Finally, \meta{u, % a, i} at the commencement of a word indicate a \enquote{connective -% \arb[trans]{'alif}\,} (\arb[trans]{'alifu 'l-wa.sli}), but |voc| mode -% does not show the \arb[trans]{wa.slaT} above the \arb[trans]{'alif}; -% instead, the accompanying vowel is expressed (\arb{u a i}). +% \arb[trans]{'alif}\,} (\arb[trans]{'alifu 'l-wa.sli}), but |voc| +% mode does not show the \arb[trans]{wa.slaT} above the +% \arb[trans]{'alif}; instead, the accompanying vowel may be expressed +% at the beginning of a sentence (\arb{u} \arb{a} \arb{i}). % % \DescribeOption{fullvoc}\\ \label{fullvoc-mode}In addition to what % the |voc| mode does, |fullvoc| expresses the \arb[trans]{sukUn} and @@ -863,7 +864,7 @@ vowels (see §~3). % % \paragraph{\arb[trans]{\cap{`amruNU}}, and the silent % \arb[novoc]{U}} To that name a silent \arb[novoc]{U} is added to -% distinguish it from \arb[trans]{\cap{`umaruN}}: see \textcite[i. 12 +% distinguish it from \arb[trans]{\cap{`umar-u}}: see \textcite[i. 12 % C]{Wright}. In no way this affects the sound of the % \arb[trans]{tanwIn}, so it has to be discarded in |trans| mode: % \begin{quote} @@ -996,7 +997,7 @@ vowels (see §~3). % \arb[voc]{'a^syA'-a} \arb[trans]{'a^syA'-a}, |.zim'aN| % \arb[voc]{.zim'aN} \arb[trans]{.zim'aN}. % -% \textbf{\arb[trans]{ta_hfIfu 'l-hamzaTi}}: if the +% \textbf{\arb[trans]{ta_hfIf-u 'l-hamzaT-i}}: if the % \arb[trans]{hamzaT} has \arb[trans]{jazmaT} and is preceded by % \emph{\arb[trans]{'alif} hamzatum}, it must be changed into the % letter of prolongation that is homogeneous with the preceding @@ -1029,8 +1030,8 @@ vowels (see §~3). % % \fi % -% \package{arabluatex} otherwise determines \arb[trans]{al-'alifu -% 'l-mamdUdaTu} by context analysis. +% \package{arabluatex} otherwise determines \arb[trans]{al-'alif-u +% 'l-mamdUdaT-u} by context analysis. % % \begin{quote} % |'is'AduN| \arb[voc]{'is'AduN} \arb[trans]{'is'AduN}, |'AkilUna| @@ -1158,7 +1159,6 @@ vowels (see §~3). % |fa-lammA| |ra'aW| |'l-najm-a| \arb[fullvoc]{fa-lammA ra'aW % 'l-najm-a} \arb[trans]{fa-lammA ra'aW 'l-najm-a}. % -% \newpage % \textbf{Particles}:--- % \begin{compactenum}[(a)] % \item \arb[trans]{li-}: \arb[trans]{'alif-u 'l-wa.sl-i} is omitted @@ -1198,11 +1198,17 @@ vowels (see §~3). % \arb[trans]{'awi ismu-hu}, |zayduN| |ibn-u| |`amriNU| % \arb[fullvoc]{\cap{z}ayduN ibn-u \cap{`amriNU}} % \arb[trans]{\cap{z}ayduN ibn-u -% \cap{`amriNU}},\footnote{\enquote{\arb[trans]{\cap{z}ayd} is the -% son of \arb[trans]{\cap{`a}mr}}: the second noun is not in -% apposition to the first, but forms part of the predicate. Hence -% \arb[voc]{zayduN ibn-u `amriNU} and not \arb[voc]{zayd-u bn-u -% `amriNU}, \enquote{Zayd, son of ʿAmr}.} |imru'-u| |'l-qays-i| +% \cap{`amriNU}},\footnote{\label{fn:zayd-is-son}% +% \enquote{\arb[trans]{\cap{z}ayd} is the son of +% \arb[trans]{\cap{`a}mr}}: the second noun is not in apposition to +% the first, but forms part of the predicate. Hence \arb[voc]{zayduN +% ibn-u `amriNU} and not \arb[voc]{zayd-u bn-u `amriNU}, +% \enquote{Zayd, son of ʿAmr}.} |`umar-u| |ibn-u| |'l-_ha.t.tAb-i| +% \arb[fullvoc]{\cap{`umar}-u ibn-u \cap{'l-_ha.t.tAb-i}} +% \arb[trans]{\cap{`umar}-u ibn-u +% \cap{'l-_ha.t.tAb-i}},\footnote{\enquote{\arb[trans]{\cap{`umar}} +% is the son of \arb[trans]{\cap{al-_ha.t.tAb}}} (see +% \vref{fn:zayd-is-son}).} |imru'-u| |'l-qays-i| % \arb[fullvoc]{imru'-u 'l-qays-i} \arb[trans]{\cap{i}mru'-u % \cap{'l-qays-i}}, |la-aymun-u| |'l-l_ah-i| % \arb[fullvoc]{la-aymun-u 'l-l_ah-i} \arb[trans]{la-aymun-u @@ -1219,7 +1225,9 @@ vowels (see §~3). % 'l-nAs-i}, |'abU| |'l-wazIr-i| \arb[fullvoc]{'abU 'l-wazIr-i} % \arb[trans]{'abU 'l-wazIr-i}, |fI| |'l-ibtidA'-i| \arb[fullvoc]{fI % 'l-ibtidA'-i} \arb[trans]{fI 'l-ibtidA'-i}, |_dU 'l-i`lAl-i| -% \arb[fullvoc]{_dU 'l-i`lAl-i} \arb[trans]{_dU 'l-i`lAl-i}. +% \arb[fullvoc]{_dU 'l-i`lAl-i} \arb[trans]{_dU 'l-i`lAl-i}, +% |maqh_A| |'l-'amIr-i| \arb[voc]{maqh_A 'l-'amIr-i} +% \arb[trans]{maqh_A 'l-'amIr-i}. % \end{quote} % % \subparagraph{\arb[trans]{'alif-u 'l-wa.sl-i} preceded by a diphthong} @@ -1247,8 +1255,8 @@ vowels (see §~3). % cases \parencite[i. 22 A--C]{Wright}, it is encoded explicitly, like % so:--- % \begin{quote} -% |'antumu| |'l-kA_dibUna| \arb[fullvoc]{'antumu 'l-kA_dibUna} -% \arb[trans]{'antumu 'l-kA_dibUna}, |ra'aytumu| |'l-rajul-a| +% |'antumu| |'l-kA_dib-Una| \arb[fullvoc]{'antumu 'l-kA_dib-Una} +% \arb[trans]{'antumu 'l-kA_dib-Una}, |ra'aytumu| |'l-rajul-a| % \arb[fullvoc]{ra'aytumu 'l-rajul-a} \arb[trans]{ra'aytumu % 'l-rajul-a}, |mani| |'l-ka_d_dAb-u| \arb[fullvoc]{mani % 'l-ka_d_dAb-u} \arb[trans]{mani 'l-ka_d_dAb-u}, |qatalati| @@ -1315,9 +1323,9 @@ vowels (see §~3). % |'a_hAfu| |mina| |'l-malik-i| |'lla_dI| |ya.zlimu| |'l-nAs-a| % \arb[fullvoc]{'a_hAfu mina 'l-malik-i 'lla_dI ya.zlimu 'l-nAs-a} % \arb[trans]{'a_hAfu mina 'l-malik-i 'lla_dI ya.zlimu 'l-nAs-a}, -% |`udtu| |'l-^say_h-a| |'lladI| |huwa| |marI.duN| -% \arb[fullvoc]{`udtu 'l-^say_h-a 'lladI huwa marI.duN} -% \arb[trans]{`udtu 'l-^say_h-a 'lladI huwa marI.duN}, |mA| |'anA| +% |`udtu| |'l-^say_h-a| |'lla_dI| |huwa| |marI.duN| +% \arb[fullvoc]{`udtu 'l-^say_h-a 'lla_dI huwa marI.duN} +% \arb[trans]{`udtu 'l-^say_h-a 'lla_dI huwa marI.duN}, |mA| |'anA| % |bi-'lla_dI| |qA'iluN| |la-ka| |^say'aN| \arb[fullvoc]{mA 'anA % bi-'lla_dI qA'iluN la-ka ^say'aN} \arb[trans]{mA 'anA bi-'lla_dI % qA'iluN la-ka ^say'aN}. @@ -1329,11 +1337,11 @@ vowels (see §~3). % \end{quote} % The other forms are encoded regularly as |al-l| or |'l-l|:--- % \begin{quote} -% |fa-'innA| |na_dkuru| |'l-.sawtayni| |'l-la_dayni| |rawaynA-humA| -% |`an| |ja.h.zaT-a| \arb[fullvoc]{fa-'innA na_dkuru 'l-.sawtayni -% 'l-la_dayni rawaynA-humA `an \cap{ja.h.zaT-a}} \arb[trans]{fa-'innA -% na_dkuru 'l-.sawtayni 'l-la_dayni rawaynA-humA `an -% \cap{ja.h.zaT-a}}. +% |fa-'innA| |na_dkuru| |'l-.sawt-ayni| |'l-la_dayni| |rawaynA-humA| +% |`an| |ja.h.zaT-a| \arb[fullvoc]{fa-'innA na_dkuru 'l-.sawt-ayni +% 'l-la_dayni rawaynA-humA `an \cap{ja.h.zaT-a}} +% \arb[trans]{fa-'innA na_dkuru 'l-.sawt-ayni 'l-la_dayni +% rawaynA-humA `an \cap{ja.h.zaT-a}}. % % And also: |al-la_dAni| \arb[fullvoc]{al-la_dAni} % \arb[trans]{al-la_dAni}, |al-la_dayni| \arb[fullvoc]{al-la_dayni} @@ -1793,8 +1801,8 @@ vowels (see §~3). % \end{compactenum} % \DescribeMacro{\arbup} By default, \package{arabluatex} applies rule % \ref{ref:dmg-up-rend}. Once delimited by a set of Lua functions, -% every \arb[trans]{tanwIn} is passed as an argument on to a -% \cs{arbup} command which is set to \cs{textsuperscript}. +% \arb[trans]{'i`rAb} is passed as an argument on to a \cs{arbup} +% command which is set to \cs{textsuperscript}. % % \DescribeMacro{\NoArbUp} \DescribeMacro{\ArbUpDflt} \cs{NoArbUp} may % be used either in the preamble or at any point of the document in @@ -1803,7 +1811,7 @@ vowels (see §~3). % any point of the document. % % \DescribeMacro{\SetArbUp} Finally, \cs{SetArbUp}\marg{formatting -% directives} may be used to customize the way \arb[trans]{tanwIn} is +% directives} may be used to customize the way \arb[trans]{'i`rAb} is % displayed. To take one example, here is how Arabic % \arb[trans]{'i`rAb} may be rendered as subscript text:--- % \iffalse @@ -1822,7 +1830,54 @@ muhaddamaTaN mi'_danatu-hu}: \arb[trans]{ra'aytu % As shown in the above example, |#1| is the token that is replaced % with the actual \arb[trans]{tanwIn} in the formatting directives of % the \cs{SetArbUp} command. -% +% +% \paragraph{\arb[trans]{'i`rAb} boundaries} +% Every declinable noun (\arb[trans]{mu`rab}) may be declined either +% with or without \arb[trans]{tanwIn}, viz. \arb[trans]{mun.sarifuN} +% or \arb[trans]{.gayr-u mun.sarifiN}. The former is automatically +% parsed by \package{arabluatex}, whereas the latter has to be +% delimited with an hyphen, like so:--- +% \begin{quote} +% \arb[trans]{\textbf{mun.sarif}}: |mu`allimuN| +% \arb[voc]{mu`allimuN} \arb[trans]{mu`allimuN}, |kA'inuN| +% \arb[voc]{kA'inuN} \arb[trans]{kA'inuN}, |kA'inAtuN| +% \arb[voc]{kA'inAtuN} \arb[trans]{kA'inAtuN}, |\cap{`amraNU}| +% \arb[voc]{\cap{`amraNU}} \arb[trans]{\cap{`amraNU}}, |fataN_A| +% \arb[voc]{fataN_A} \arb[trans]{fataN_A}. +% +% \arb[trans]{\textbf{.gayr mun.sarif}}: |al-mu`allim-u| +% \arb[voc]{al-mu`allim-u} \arb[trans]{al-mu`allim-u}, |kitAb-Ani| +% \arb[voc]{kitAb-Ani} \arb[trans]{kitAb-Ani}, |ra^sa'-Ani| +% \arb[voc]{ra^sa'-Ani} \arb[trans]{ra^sa'-Ani}, |sAriq-Una| +% \arb[voc]{sAriq-Una} \arb[trans]{sAriq-Una}, |qA.d-Una| +% \arb[voc]{qA.d-Una} \arb[trans]{qA.d-Una}, |al-.zulm-Atu| +% \arb[voc]{al-.zulm-Atu} \arb[trans]{al-.zulm-Atu}. +% \end{quote} +% +% \begin{quoting} +% \textsc{Rem.}~\emph{a.} As the \arb[trans]{tanwIn} is passed over +% in pronunciation when it is followed by the letters +% \arb[novoc]{r}, \arb[novoc]{l}, \arb[novoc]{m}, \arb[novoc]{w}, +% \arb[novoc]{y} (see \vref{ref:assimilation}), it may be desirable +% to further distinguish it by putting it above the line, but not to +% do the same for \arb[trans]{.gayr mun.sarif} terminations. This +% can be achieved by simply omitting the hyphen before any +% \arb[trans]{.gayr mun.sarif} termination:---\\ +% |kAna| |.ganiyyaN| |l_akinna-hu| |labisa| |^gubbaTaN| |mumazzaqaN| +% |'aydu-hA| \arb[voc]{kAna .ganiyyaN l_akinna-hu labisa ^gubbaTaN +% mumazzaqaN 'aydu-hA} \arb[trans]{kAna .ganiyyaN l_akinna-hu labisa +% ^gubbaTaN mumazzaqaN 'aydu-hA}. +% +% \textsc{Rem.}~\emph{b.} Although the hyphen before the +% \arb[trans]{tanwIn} is optional as \package{arabluatex} always +% parses nouns with such termination, it may also be used to mark +% better the inflectional endings:---\\ +% |mana`a| |'l-nAs-a| |kAffaT-aN| |min| |mu_hA.tabati-hi| +% |'a.had-uN| |bi-sayyidi-nA| \arb[voc]{mana`a 'l-nAs-a kAffaT-aN +% min mu_hA.tabati-hi 'a.had-uN bi-sayyidi-nA} \arb[trans]{mana`a +% 'l-nAs-a kAffaT-aN min mu_hA.tabati-hi 'a.had-uN bi-sayyidi-nA}. +% \end{quoting} +% % \subsection{Examples} % \label{sec:examples-translit} % Here follows in transliteration the story of @@ -2226,7 +2281,7 @@ wa-ya.sIru ta.hta 'l-jild-i % \end{macrocode} % \end{macro} % \begin{macro}{\arbup} -% \changes{v1.3}{2016/05/19}{\arb[trans]{tanwIn} is now written as +% \changes{v1.3}{2016/05/19}{\arb[trans]{'i`rAb} is now written as % superscript text in \texttt{dmg} mode by default.} % \begin{macro}{\NoArbUp} % \begin{macro}{\ArbUpDflt} diff --git a/arabluatex_fullvoc.lua b/arabluatex_fullvoc.lua index f9d4e60..e4a9495 100644 --- a/arabluatex_fullvoc.lua +++ b/arabluatex_fullvoc.lua @@ -53,8 +53,9 @@ hamzafv = { {a="'i'([^uaiUAI])", b="إI%1"}, -- madda (historic writing below) {a="'a'([^uaiUAI])", b="آ%1"}, ---tmp {a="'a?A([%_%^%.]?[%`%'btjghdrzsfqklmnywAY])", b="آ%1"}, - {a="([^UIui])'a?A([%_%^%.]?[%`%'btjghdrzsfqklmnywAY])", b="%1آ%2"}, + {a="([^uiUI])\'a?A([%_%^%.]?[%`%'btjghdrzsfqklmnywAY])", b="%1آ%2"}, + {a="^\'a?A([%_%^%.]?[%`%'btjghdrzsfqklmnywAY])", b="آ%1"}, + {a="(%W)\'a?A([%_%^%.]?[%`%'btjghdrzsfqklmnywAY])", b="%1آ%2"}, {a="(A)(')(uN?)$", b="aآء%3"}, {a="(A)(')(uN?)(%W)", b="aآء%3%4"}, {a="(A)(')(iN?)$", b="aآء%3"}, @@ -148,8 +149,9 @@ hamzafveasy = { -- differences marked below with 'easy' {a="'i'([^uaiUAI])", b="إI%1"}, -- madda (historic writing below) {a="'a'([^uaiUAI])", b="آ%1"}, ---tmp {a="'a?A([%_%^%.]?[%`%'btjghdrzsfqklmnywAY])", b="آ%1"}, - {a="([^UIui])'a?A([%_%^%.]?[%`%'btjghdrzsfqklmnywAY])", b="%1آ%2"}, + {a="([^uiUI])\'a?A([%_%^%.]?[%`%'btjghdrzsfqklmnywAY])", b="%1آ%2"}, + {a="^\'a?A([%_%^%.]?[%`%'btjghdrzsfqklmnywAY])", b="آ%1"}, + {a="(%W)\'a?A([%_%^%.]?[%`%'btjghdrzsfqklmnywAY])", b="%1آ%2"}, --easy (begin) {a="(A)(')(uN?)$", b="aاء%3"}, {a="(A)(')(uN?)(%W)", b="aاء%3%4"}, @@ -215,73 +217,73 @@ hamzafveasy = { -- differences marked below with 'easy' } tanwinfv = { - {a="uNU", b="ٌو"}, - {a="aNU", b="ًوا"}, - {a="iNU", b="ٍو"}, - {a="([uai]N)(%s)([uai])", b="%1%2ٱ"}, - {a="(aN[%_]?[AY])(%s)([uai])", b="%1%2ٱ"}, + {a="%-?uNU", b="ٌو"}, + {a="%-?aNU", b="ًوا"}, + {a="%-?iNU", b="ٍو"}, + {a="%-?([uai]N)(%s)([uai])", b="%1%2ٱ"}, + {a="%-?(aN[%_]?[AY])(%s)([uai])", b="%1%2ٱ"}, -- assimilations (begin) - {a="(uN)(%s)([rlmnwy])", b="ٌ%2%3%3"}, - {a="(aN)(_A)(%s)([rlmnwy])", b="ًى%3%4%4"}, - {a="(aN)(Y)(%s)([rlmnwy])", b="ًى%3%4%4"}, - {a="(T)(aN)(%s)([rlmnwy])", b="%1ً%3%4%4"}, - {a="(ء)(aN)(%s)([rlmnwy])", b="%1ً%3%4%4"}, - {a="([^TA])(aN)(%s)([rlmnwy])", b="%1ًا%3%4%4"}, - {a="(iN)(%s)([rlmnwy])", b="ٍ%2%3%3"}, + {a="%-?(uN)(%s)([rlmnwy])", b="ٌ%2%3%3"}, + {a="%-?(aN)(_A)(%s)([rlmnwy])", b="ًى%3%4%4"}, + {a="%-?(aN)(Y)(%s)([rlmnwy])", b="ًى%3%4%4"}, + {a="(T)%-?(aN)(%s)([rlmnwy])", b="%1ً%3%4%4"}, + {a="(ء)%-?(aN)(%s)([rlmnwy])", b="%1ً%3%4%4"}, + {a="([^TA])%-?(aN)(%s)([rlmnwy])", b="%1ًا%3%4%4"}, + {a="%-?(iNI?)(%s)([rlmnwy])", b="ٍ%2%3%3"}, -- assimilations (end) -- quoted tanwīn (begin) - {a="(\"uN)", b=""}, - {a="(B)(\"aN)", b="%1"}, - {a="(\"aN)(_A)", b="ى"}, - {a="(\"aN)(Y)", b="ى"}, - {a="(T)(\"aN)", b="%1"}, - {a="(ء)(\"aN)", b="%1"}, - {a="([^TA])(\"aN)", b="%1ا"}, - {a="(\"iN)", b=""}, + {a="%-?(\"uN)", b=""}, + {a="(B)%-?(\"aN)", b="%1"}, + {a="%-?(\"aN)(_A)", b="ى"}, + {a="%-?(\"aN)(Y)", b="ى"}, + {a="(T)%-?(\"aN)", b="%1"}, + {a="(ء)%-?(\"aN)", b="%1"}, + {a="([^TA])%-?(\"aN)", b="%1ا"}, + {a="%-?(\"iNI?)", b=""}, -- quoted tanwīn (end) - {a="(uN)", b="ٌ"}, - {a="(B)(aN)", b="%1ً"}, - {a="(aN)(_A)", b="ًى"}, - {a="(aN)(Y)", b="ًى"}, - {a="(T)(aN)", b="%1ً"}, - {a="(ء)(aN)", b="%1ً"}, - {a="([^TA])(aN)", b="%1ًا"}, - {a="(iNI?)", b="ٍ"} + {a="%-?(uN)", b="ٌ"}, + {a="(B)%-?(aN)", b="%1ً"}, + {a="%-?(aN)(_A)", b="ًى"}, + {a="%-?(aN)(Y)", b="ًى"}, + {a="(T)%-?(aN)", b="%1ً"}, + {a="(ء)%-?(aN)", b="%1ً"}, + {a="([^TA])%-?(aN)", b="%1ًا"}, + {a="%-?(iNI?)", b="ٍ"} } tanwinfveasy = { -- no assimilations (see below) - {a="uNU", b="ٌو"}, - {a="aNU", b="ًوا"}, - {a="iNU", b="ٍو"}, - {a="([uai]N)(%s)([uai])", b="%1%2ٱ"}, - {a="(aN[%_]?[AY])(%s)([uai])", b="%1%2ٱ"}, + {a="%-?uNU", b="ٌو"}, + {a="%-?aNU", b="ًوا"}, + {a="%-?iNU", b="ٍو"}, + {a="%-?([uai]N)(%s)([uai])", b="%1%2ٱ"}, + {a="%-?(aN[%_]?[AY])(%s)([uai])", b="%1%2ٱ"}, -- assimilations (begin) ---easy {a="(uN)(%s)([rlmnwy])", b="ٌ%2%3%3"}, ---easy {a="(aN)(_A)(%s)([rlmnwy])", b="ًى%3%4%4"}, ---easy {a="(aN)(Y)(%s)([rlmnwy])", b="ًى%3%4%4"}, ---easy {a="(T)(aN)(%s)([rlmnwy])", b="%1ً%3%4%4"}, ---easy {a="(ء)(aN)(%s)([rlmnwy])", b="%1ً%3%4%4"}, ---easy {a="([^TA])(aN)(%s)([rlmnwy])", b="%1ًا%3%4%4"}, ---easy {a="(iN)(%s)([rlmnwy])", b="ٍ%2%3%3"}, +--easy {a="%-?(uN)(%s)([rlmnwy])", b="ٌ%2%3%3"}, +--easy {a="%-?(aN)(_A)(%s)([rlmnwy])", b="ًى%3%4%4"}, +--easy {a="%-?(aN)(Y)(%s)([rlmnwy])", b="ًى%3%4%4"}, +--easy {a="(T)%-?(aN)(%s)([rlmnwy])", b="%1ً%3%4%4"}, +--easy {a="(ء)%-?(aN)(%s)([rlmnwy])", b="%1ً%3%4%4"}, +--easy {a="([^TA])%-?(aN)(%s)([rlmnwy])", b="%1ًا%3%4%4"}, +--easy {a="%-?(iNI?)(%s)([rlmnwy])", b="ٍ%2%3%3"}, -- assimilations (end) -- quoted tanwīn (begin) - {a="(\"uN)", b=""}, - {a="(B)(\"aN)", b="%1"}, - {a="(\"aN)(_A)", b="ى"}, - {a="(\"aN)(Y)", b="ى"}, - {a="(T)(\"aN)", b="%1"}, - {a="(ء)(\"aN)", b="%1"}, - {a="([^TA])(\"aN)", b="%1ا"}, - {a="(\"iN)", b=""}, + {a="%-?(\"uN)", b=""}, + {a="(B)%-?(\"aN)", b="%1"}, + {a="%-?(\"aN)(_A)", b="ى"}, + {a="%-?(\"aN)(Y)", b="ى"}, + {a="(T)%-?(\"aN)", b="%1"}, + {a="(ء)%-?(\"aN)", b="%1"}, + {a="([^TA])%-?(\"aN)", b="%1ا"}, + {a="%-?(\"iNI?)", b=""}, -- quoted tanwīn (end) - {a="(uN)", b="ٌ"}, - {a="(B)(aN)", b="%1ً"}, - {a="(aN)(_A)", b="ًى"}, - {a="(aN)(Y)", b="ًى"}, - {a="(T)(aN)", b="%1ً"}, - {a="(ء)(aN)", b="%1ً"}, - {a="([^TA])(aN)", b="%1ًا"}, - {a="(iNI?)", b="ٍ"} + {a="%-?(uN)", b="ٌ"}, + {a="(B)%-?(aN)", b="%1ً"}, + {a="%-?(aN)(_A)", b="ًى"}, + {a="%-?(aN)(Y)", b="ًى"}, + {a="(T)%-?(aN)", b="%1ً"}, + {a="(ء)%-?(aN)", b="%1ً"}, + {a="([^TA])%-?(aN)", b="%1ًا"}, + {a="%-?(iNI?)", b="ٍ"} } trigraphsfv = { -- trigraphs or more @@ -457,10 +459,10 @@ digraphsfv = { {a="yy", b="يّ"}, -- sukūn begin --tmp: next line is added - {a="([%_%^%.]?[Bbtjghxdrzs%`fqklmnwy])%-(%_?[uaiUAIY])", b="%1%2"}, + {a="([%_%^%.]?[Bbtjghxdrzs%`fqklmnwy])%-([uaiUAI])", b="%1%2"}, {a="([%_%^%.]?[Bbtjghxdrzs%`fqklmnwy])$", b="%1ْ"}, {a="([%_%^%.]?[Bbtjghxdrzs%`fqklmnwy])([%s])", b="%1ْ%2"}, - {a="([%_%^%.]?[Bbtjghxdrzs%`fqklmnwy])%-(%_?[^uaiUAIًٌٍ])", b="%1ْ%2"}, + {a="([%_%^%.]?[Bbtjghxdrzs%`fqklmnwy])([%_]?[^%_uaiUAIYًٌٍ])", b="%1ْ%2"}, -- take out sukūn in cases of assimilation {a="(n)(ْ)(%s)([روي])", b="%1%3%4"}, {a="(n)(ْ)(%s)([ل])", b="%1%3%4"}, @@ -559,7 +561,7 @@ digraphsfveasy = { -- see the diffenrences under 'easy' marker below -- so there be no need to edit them: {a="ْ", b=""}, --tmp: next line is added --- {a="([%_%^%.]?[Bbtjghxdrzs%`fqklmnwy])%-([uai])", b="%1%2"}, +-- {a="([%_%^%.]?[Bbtjghxdrzs%`fqklmnwy])%-([uaiUAI])", b="%1%2"}, -- {a="([%_%^%.]?[Bbtjghxdrzs%`fqklmnwy])$", b="%1ْ"}, -- {a="([%_%^%.]?[Bbtjghxdrzs%`fqklmnwy])([%s])", b="%1ْ%2"}, -- {a="([%_%^%.]?[Bbtjghxdrzs%`fqklmnwy])([%_]?[^%_uaiUAIًٌٍ])", b="%1ْ%2"}, @@ -654,4 +656,3 @@ singlefveasy = { -- see the differences under 'easy' tag below {a="([^0-9])%-([^0-9])", b="%1%2"}, {a="B", b="ـ"}, } - diff --git a/arabluatex_novoc.lua b/arabluatex_novoc.lua index 16f3b51..f069f5a 100644 --- a/arabluatex_novoc.lua +++ b/arabluatex_novoc.lua @@ -23,36 +23,36 @@ along with this program. If not, see --]] tanwinnv = { - {a="uNU", b="و"}, - {a="aNU", b="وا"}, - {a="iNU", b="و"}, + {a="%-?uNU", b="و"}, + {a="%-?aNU", b="وا"}, + {a="%-?iNU", b="و"}, -- assimilations (begin). These are good but may not apply here. --- {a="(uN)(%s)([rlmnwy])", b="|%2%3%3"}, --- {a="(aN)(_A)(%s)([rlmnwy])", b="ى%3%4%4"}, --- {a="(aN)(Y)(%s)([rlmnwy])", b="ى%3%4%4"}, --- {a="(T)(aN)(%s)([rlmnwy])", b="%1%3%4%4"}, --- {a="(ء)(aN)(%s)([rlmnwy])", b="%1%3%4%4"}, --- {a="([^TA])(aN)(%s)([rlmnwy])", b="%1ا%3%4%4"}, --- {a="(iN)(%s)([rlmnwy])", b="|%2%3%3"}, +-- {a="%-?(uN)(%s)([rlmnwy])", b="|%2%3%3"}, +-- {a="%-?(aN)(_A)(%s)([rlmnwy])", b="ى%3%4%4"}, +-- {a="%-?(aN)(Y)(%s)([rlmnwy])", b="ى%3%4%4"}, +-- {a="(T)%-?(aN)(%s)([rlmnwy])", b="%1%3%4%4"}, +-- {a="(ء)%-?(aN)(%s)([rlmnwy])", b="%1%3%4%4"}, +-- {a="([^TA])%-?(aN)(%s)([rlmnwy])", b="%1ا%3%4%4"}, +-- {a="%-?(iNI?)(%s)([rlmnwy])", b="|%2%3%3"}, -- assimilations (end) -- "quoted" tanwīn (begin) - {a="(\"uN)", b="ٌ"}, - {a="(B)(\"aN)", b="%1ً"}, - {a="(\"aN)(_A)", b="ًى"}, - {a="(\"aN)(Y)", b="ًى"}, - {a="(T)(\"aN)", b="%1ً"}, - {a="(ء)(\"aN)", b="%1ً"}, - {a="([^TA])(\"aN)", b="%1ًا"}, - {a="(\"iN)", b="ٍ"}, + {a="%-?(\"uN)", b="ٌ"}, + {a="(B)%-?(\"aN)", b="%1ً"}, + {a="%-?(\"aN)(_A)", b="ًى"}, + {a="%-?(\"aN)(Y)", b="ًى"}, + {a="(T)%-?(\"aN)", b="%1ً"}, + {a="(ء)%-?(\"aN)", b="%1ً"}, + {a="([^TA])%-?(\"aN)", b="%1ًا"}, + {a="%-?(\"iNI?)", b="ٍ"}, -- "quoted" tanwīn (end) - {a="(uN)", b=""}, - {a="(B)(aN)", b="%1"}, - {a="(aN)(_A)", b="ى"}, - {a="(aN)(Y)", b="ى"}, - {a="(T)(aN)", b="%1"}, - {a="(ء)(aN)", b="%1"}, - {a="([^TA])(aN)", b="%1ا"}, - {a="(iN)", b=""}, + {a="%-?(uN)", b=""}, + {a="(B)%-?(aN)", b="%1"}, + {a="%-?(aN)(_A)", b="ى"}, + {a="%-?(aN)(Y)", b="ى"}, + {a="(T)%-?(aN)", b="%1"}, + {a="(ء)%-?(aN)", b="%1"}, + {a="([^TA])%-?(aN)", b="%1ا"}, + {a="%-?(iNI?)", b=""}, -- initial straight double quote gives a connective ʾalif. This has -- nothing to do with the tanwīn, but I put it here for time being. {a="^\"", b="ٱ"}, diff --git a/arabluatex_trans.lua b/arabluatex_trans.lua index 45fd3d5..76e297e 100644 --- a/arabluatex_trans.lua +++ b/arabluatex_trans.lua @@ -138,8 +138,15 @@ tanwintrdmg = { {a="(T)%-?(aN)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="t\\arbup{ani}%3%4"}, {a="([^TA])%-?(aN)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="%1\\arbup{ani}%3%4"}, {a="%-?(iN)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="\\arbup{ini}%2%3"}, +-- tmp (+ ibn) + {a="%-?(uN)(%s)(i)", b="\\arbup{uni}%2'"}, + {a="%-?(aN)(_A)(%s)(i)", b="ạ\\arbup{ni}%3%'"}, + {a="%-?(aN)(Y)(%s)(i)", b="ạ\\arbup{ni}%3%'"}, + {a="(T)%-?(aN)(%s)(i)", b="t\\arbup{ani}%3%'"}, + {a="([^TA])%-?(aN)(%s)(i)", b="%1\\arbup{ani}%3%'"}, + {a="%-?(iN)(%s)(i)", b="\\arbup{ini}%2%'"}, -- --- {a="uN", b="\\arbup{un}"}, (now included in last line in this table) +-- {a="uN", b="\\arbup{un}"}, (now included in the last line of this table) {a="%-?(aN)(_A)", b="ạ\\arbup{n}"}, {a="%-?(aN)(Y)", b="ạ\\arbup{n}"}, {a="(T)%-?(\"?aN)", b="t\\arbup{an}"}, @@ -260,10 +267,16 @@ trigraphstrdmgeasy = { -- see the differences below under 'easy' tag digraphstrdmg = { {a="([uai]%-)(\"?[uai])", b="%1'"}, -- hyphen + initial alif without hamza {a="^(\"?[uai])", b="%1"}, -- initial alif without hamza + -- this is not necessary, take out for now: -- {a="([%_]?[uaiUAIY])(%s)([uai])", b="%1%2'"}, -- initial alif without hamza {a="(aW)(%s)(\"?[uai])", b="awu%2%3"}, {a="([^%_][uai])(%s)(\"?[uai])", b="%1%2'"}, - {a="([%_]?[AYa])(%s)(\"?[uai])", b="a%2'"}, +--tmp (more complex, see next 4 l.) +-- {a="([%_]?[AYa])(%s)(\"?[uai])", b="a%2'"}, + {a="(%_A)(%s)(\"?[uai])", b="ạ%2'"}, + {a="(Y)(%s)(\"?[uai])", b="ạ%2'"}, + {a="(%_a)(%s)(\"?[uai])", b="a%2'"}, + {a="(A)(%s)(\"?[uai])", b="a%2'"}, {a="([%_]?[Uu])(%s)(\"?[uai])", b="u%2'"}, {a="([%_]?[Ii])(%s)(\"?[uai])", b="i%2'"}, -- ʾiʿrāb hyphen (begin) @@ -281,7 +294,11 @@ digraphstrdmg = { -- shorten long vowels preceding ʾalif conjunctionis {a="(U)(A)", b="U"}, {a="(aW)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="awu%2%3"}, - {a="([%_]?[AYa])(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="a%2%3"}, +--tmp {a="([%_]?[AYa])(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="a%2%3"}, + {a="(%_a)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="a%2%3"}, + {a="(%_A)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="ạ%2%3"}, + {a="(A)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="a%2%3"}, + {a="(Y)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="ạ%2%3"}, {a="([%_]?[Uu])(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="u%2%3"}, {a="([%_]?[Ii])(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="i%2%3"}, {a="%-%-", b=""}, diff --git a/arabluatex_voc.lua b/arabluatex_voc.lua index 0097450..0331794 100644 --- a/arabluatex_voc.lua +++ b/arabluatex_voc.lua @@ -93,8 +93,9 @@ hamza = { {a="'i'([^uaiUAI])", b="إI%1"}, -- madda (historic writing below) {a="'a'([^uaiUAI])", b="آ%1"}, ---tmp {a="'a?A([%_%^%.]?[%`%'btjghdrzsfqklmnywAY])", b="آ%1"}, - {a="([^UIui])'a?A([%_%^%.]?[%`%'btjghdrzsfqklmnywAY])", b="%1آ%2"}, + {a="([^uiUI])\'a?A([%_%^%.]?[%`%'btjghdrzsfqklmnywAY])", b="%1آ%2"}, + {a="^\'a?A([%_%^%.]?[%`%'btjghdrzsfqklmnywAY])", b="آ%1"}, + {a="(%W)\'a?A([%_%^%.]?[%`%'btjghdrzsfqklmnywAY])", b="%1آ%2"}, {a="(A)(')(uN?)$", b="aآء%3"}, {a="(A)(')(uN?)(%W)", b="aآء%3%4"}, {a="(A)(')(iN?)$", b="aآء%3"}, @@ -179,8 +180,9 @@ hamzaeasy = { -- differences marked below with 'easy' {a="'i'([^uaiUAI])", b="إI%1"}, -- madda (historic writing below) {a="'a'([^uaiUAI])", b="آ%1"}, ---tmp {a="'a?A([%_%^%.]?[%`%'btjghdrzsfqklmnywAY])", b="آ%1"}, - {a="([^UIui])'a?A([%_%^%.]?[%`%'btjghdrzsfqklmnywAY])", b="%1آ%2"}, + {a="([^uiUI])\'a?A([%_%^%.]?[%`%'btjghdrzsfqklmnywAY])", b="%1آ%2"}, + {a="^\'a?A([%_%^%.]?[%`%'btjghdrzsfqklmnywAY])", b="آ%1"}, + {a="(%W)\'a?A([%_%^%.]?[%`%'btjghdrzsfqklmnywAY])", b="%1آ%2"}, --easy (begin) {a="(A)(')(uN?)$", b="aاء%3"}, {a="(A)(')(uN?)(%W)", b="aاء%3%4"}, @@ -246,69 +248,69 @@ hamzaeasy = { -- differences marked below with 'easy' } tanwin = { - {a="uNU", b="ٌو"}, - {a="aNU", b="ًوا"}, - {a="iNU", b="ٍو"}, + {a="%-?uNU", b="ٌو"}, + {a="%-?aNU", b="ًوا"}, + {a="%-?iNU", b="ٍو"}, -- assimilations (begin) - {a="(uN)(%s)([rlmnwy])", b="ٌ%2%3%3"}, - {a="(aN)(_A)(%s)([rlmnwy])", b="ًى%3%4%4"}, - {a="(aN)(Y)(%s)([rlmnwy])", b="ًى%3%4%4"}, - {a="(T)(aN)(%s)([rlmnwy])", b="%1ً%3%4%4"}, - {a="(ء)(aN)(%s)([rlmnwy])", b="%1ً%3%4%4"}, - {a="([^TA])(aN)(%s)([rlmnwy])", b="%1ًا%3%4%4"}, - {a="(iN)(%s)([rlmnwy])", b="ٍ%2%3%3"}, + {a="%-?(uN)(%s)([rlmnwy])", b="ٌ%2%3%3"}, + {a="%-?(aN)(_A)(%s)([rlmnwy])", b="ًى%3%4%4"}, + {a="%-?(aN)(Y)(%s)([rlmnwy])", b="ًى%3%4%4"}, + {a="(T)%-?(aN)(%s)([rlmnwy])", b="%1ً%3%4%4"}, + {a="(ء)%-?(aN)(%s)([rlmnwy])", b="%1ً%3%4%4"}, + {a="([^TA])%-?(aN)(%s)([rlmnwy])", b="%1ًا%3%4%4"}, + {a="%-?(iNI?)(%s)([rlmnwy])", b="ٍ%2%3%3"}, -- assimilations (end) -- quoted tanwīn (begin) - {a="(\"uN)", b=""}, - {a="(B)(\"aN)", b="%1"}, - {a="(\"aN)(_A)", b="ى"}, - {a="(\"aN)(Y)", b="ى"}, - {a="(T)(\"aN)", b="%1"}, - {a="(ء)(\"aN)", b="%1"}, - {a="([^TA])(\"aN)", b="%1ا"}, - {a="(\"iN)", b=""}, + {a="%-?(\"uN)", b=""}, + {a="(B)%-?(\"aN)", b="%1"}, + {a="%-?(\"aN)(_A)", b="ى"}, + {a="%-?(\"aN)(Y)", b="ى"}, + {a="(T)%-?(\"aN)", b="%1"}, + {a="(ء)%-?(\"aN)", b="%1"}, + {a="([^TA])%-?(\"aN)", b="%1ا"}, + {a="%-?(\"iNI?)", b=""}, -- quoted tanwīn (end) - {a="(uN)", b="ٌ"}, - {a="(B)(aN)", b="%1ً"}, - {a="(aN)(_A)", b="ًى"}, - {a="(aN)(Y)", b="ًى"}, - {a="(T)(aN)", b="%1ً"}, - {a="(ء)(aN)", b="%1ً"}, - {a="([^TA])(aN)", b="%1ًا"}, - {a="(iNI?)", b="ٍ"} + {a="%-?(uN)", b="ٌ"}, + {a="(B)%-?(aN)", b="%1ً"}, + {a="%-?(aN)(_A)", b="ًى"}, + {a="%-?(aN)(Y)", b="ًى"}, + {a="(T)%-?(aN)", b="%1ً"}, + {a="(ء)%-?(aN)", b="%1ً"}, + {a="([^TA])%-?(aN)", b="%1ًا"}, + {a="%-?(iNI?)", b="ٍ"} } tanwineasy = { -- 'easy' requires some lines to be taken out: - {a="uNU", b="ٌو"}, - {a="aNU", b="ًوا"}, - {a="iNU", b="ٍو"}, + {a="%-?uNU", b="ٌو"}, + {a="%-?aNU", b="ًوا"}, + {a="%-?iNU", b="ٍو"}, -- assimilations (begin) --- {a="(uN)(%s)([rlmnwy])", b="ٌ%2%3%3"}, --- {a="(aN)(_A)(%s)([rlmnwy])", b="ًى%3%4%4"}, --- {a="(aN)(Y)(%s)([rlmnwy])", b="ًى%3%4%4"}, --- {a="(T)(aN)(%s)([rlmnwy])", b="%1ً%3%4%4"}, --- {a="(ء)(aN)(%s)([rlmnwy])", b="%1ً%3%4%4"}, --- {a="([^TA])(aN)(%s)([rlmnwy])", b="%1ًا%3%4%4"}, --- {a="(iN)(%s)([rlmnwy])", b="ٍ%2%3%3"}, +-- {a="%-?(uN)(%s)([rlmnwy])", b="ٌ%2%3%3"}, +-- {a="%-?(aN)(_A)(%s)([rlmnwy])", b="ًى%3%4%4"}, +-- {a="%-?(aN)(Y)(%s)([rlmnwy])", b="ًى%3%4%4"}, +-- {a="(T)%-?(aN)(%s)([rlmnwy])", b="%1ً%3%4%4"}, +-- {a="(ء)%-?(aN)(%s)([rlmnwy])", b="%1ً%3%4%4"}, +-- {a="([^TA])%-?(aN)(%s)([rlmnwy])", b="%1ًا%3%4%4"}, +-- {a="%-?(iNI?)(%s)([rlmnwy])", b="ٍ%2%3%3"}, -- assimilations (end) -- quoted tanwīn (begin) - {a="(\"uN)", b=""}, - {a="(B)(\"aN)", b="%1"}, - {a="(\"aN)(_A)", b="ى"}, - {a="(\"aN)(Y)", b="ى"}, - {a="(T)(\"aN)", b="%1"}, - {a="(ء)(\"aN)", b="%1"}, - {a="([^TA])(\"aN)", b="%1ا"}, - {a="(\"iN)", b=""}, + {a="%-?(\"uN)", b=""}, + {a="(B)%-?(\"aN)", b="%1"}, + {a="%-?(\"aN)(_A)", b="ى"}, + {a="%-?(\"aN)(Y)", b="ى"}, + {a="(T)%-?(\"aN)", b="%1"}, + {a="(ء)%-?(\"aN)", b="%1"}, + {a="([^TA])%-?(\"aN)", b="%1ا"}, + {a="%-?(\"iNI?)", b=""}, -- quoted tanwīn (end) - {a="(uN)", b="ٌ"}, - {a="(B)(aN)", b="%1ً"}, - {a="(aN)(_A)", b="ًى"}, - {a="(aN)(Y)", b="ًى"}, - {a="(T)(aN)", b="%1ً"}, - {a="(ء)(aN)", b="%1ً"}, - {a="([^TA])(aN)", b="%1ًا"}, - {a="(iNI?)", b="ٍ"} + {a="%-?(uN)", b="ٌ"}, + {a="(B)%-?(aN)", b="%1ً"}, + {a="%-?(aN)(_A)", b="ًى"}, + {a="%-?(aN)(Y)", b="ًى"}, + {a="(T)%-?(aN)", b="%1ً"}, + {a="(ء)%-?(aN)", b="%1ً"}, + {a="([^TA])%-?(aN)", b="%1ًا"}, + {a="%-?(iNI?)", b="ٍ"} } trigraphs = { -- trigraphs or more @@ -452,7 +454,7 @@ digraphs = { --tmp-added hyphen + initial alif without hamza: {a="(%-)([uai])([%^%_%.%`]?)(%a)", b="%1ا%3%4"}, {a="^([uai])", b="ا%1"}, -- initial alif without hamza - {a="(%s)([uai])", b="%1ا%2"}, -- initial alif without hamza + {a="(%s)([uai])", b="%1ا"}, -- initial alif without hamza {a="%-%-", b="ـ"}, {a="ؤؤ", b="ؤّ"}, {a="أأ", b="أّ"}, -- cgit v1.2.3