From b1ea269d110270d2dc6d0e58e8c772fc86167a9e Mon Sep 17 00:00:00 2001
From: Robert Alessi <alessi@robertalessi.net>
Date: Tue, 22 Mar 2016 19:56:48 +0100
Subject: =?UTF-8?q?bugfix=20in=20fullvoc=20tables=20(tanw=C4=ABn=20+=20con?=
 =?UTF-8?q?nective=20=CA=BEalif);=20still=20documenting...?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 arabluatex.dtx         | 229 +++++++++++++++++++++++++++++++++++++++++++------
 arabluatex.lua         |   4 +-
 arabluatex_fullvoc.lua |  25 ++++++
 3 files changed, 230 insertions(+), 28 deletions(-)

diff --git a/arabluatex.dtx b/arabluatex.dtx
index d61316d..8ed305c 100644
--- a/arabluatex.dtx
+++ b/arabluatex.dtx
@@ -324,9 +324,9 @@
 % does not show the \arb[trans]{wa.slaT} above the \arb[trans]{'alif};
 % instead, the accompanying vowel is expressed (\arb{u a i}).
 %
-% \DescribeOption{fullvoc}\\ In addition to what the |voc| mode does,
-% |fullvoc| expresses the \arb[trans]{sukUn} and the
-% \arb[trans]{wa.slaT}.
+% \DescribeOption{fullvoc}\\ \label{fullvoc-mode}In addition to what
+% the |voc| mode does, |fullvoc| expresses the \arb[trans]{sukUn} and
+% the \arb[trans]{wa.slaT}.
 %
 % \DescribeOption{novoc}\\ None of the diacritics is showed in |novoc|
 % mode, unless otherwise specified (see \enquote{quoting} technique
@@ -393,7 +393,7 @@ consonants, though three of them are also used as vowels
 \begin{arabluacode}
  \begin{arab}
    'at_A .sadIquN 'il_A ju.hA ya.tlubu min-hu .himAra-hu
-   li-yarkaba-hu fI safraTiN qa.sIraTiN wa-qAla la-hu:
+   li-yarkaba-hu fI safraTiN qa.sIraTiN. wa-qAla la-hu:
    \enquote{sawfa 'u`Idu-hu 'ilay-ka fI 'l-masA'i
    wa-'adfa`u la-ka 'ujraTaN.} fa-qAla ju.hA:
    \enquote{'anA 'AsifuN jiddaN 'annI lA 'asta.tI`u 'an
@@ -700,6 +700,11 @@ consonants, though three of them are also used as vowels
 %   \arb[trans]{'iqlIduN}, |'anna| \arb[voc]{'anna}
 %   \arb[trans]{'anna}, |'inna| \arb[voc]{'inna} \arb[trans]{'inna}.
 %
+%   \arb[trans]{hamzaT} followed by the long vowel \arb[novoc]{U} is
+%   encoded |'_U|: |'_Ul_A| \arb[voc]{'_Ul_A} \arb[trans]{'_Ul_A}, |'_UlU|
+%   \arb[voc]{'_UlU} \arb[trans]{'_UlU}, |'_UlA'ika|
+%   \arb[voc]{'_UlA'ika} \arb[trans]{'_UlA'ika}.
+%
 %   \textbf{Middle \arb[trans]{hamzaT}}: |xA.ti'Ina|
 %   \arb[voc]{xA.ti'Ina} \arb[trans]{xA.ti'Ina}, |ru'UsuN|
 %   \arb[voc]{ru'UsuN}, \arb[trans]{ru'UsuN}, |xa.tI'aTuN|
@@ -788,40 +793,212 @@ consonants, though three of them are also used as vowels
 % \end{quote}
 %
 % \paragraph{\arb[trans]{^saddaT}}
-% The \emph{necessary} \arb[trans]{ta^sdId} that always follows a
-% vowel, whether short or long \parencite[see][i. 15 A--B]{Wright}, is
-% encoded in writing the consonant that carries it twice:
+% \arb[trans]{ta^sdId} is either \emph{necessary} or \emph{euphonic}.
+%
+% \subparagraph{The necessary \arb[trans]{ta^sdId}} always follows a
+% vowel, whether short or long \parencite[see][i. 15 A--B]{Wright}. It
+% is encoded in writing the consonant that carries it twice:
 % \begin{quote}
 %   |`allaqa| \arb[voc]{`allaqa} \arb[trans]{`allaqa}, |mAdduN|
 %   \arb[voc]{mAdduN} \arb[trans]{mAdduN}, |'ammara|
 %   \arb[voc]{'ammara} \arb[trans]{ammara}, |murruN| \arb[voc]{murruN}
 %   \arb[trans]{murruN}
 % \end{quote}
-%
-% The same applies e.g. for the \emph{euphonic} \arb[trans]{ta^sdId}
-% with the letters \arb[novoc]{r}, \arb[novoc]{l}, \arb[novoc]{m},
+% 
+% \subparagraph{The euphonic \arb[trans]{ta^sdId}} always follows a
+% vowelless consonant which is passed over in pronunciation and
+% assimilated to a following consonant. It may be
+% found \parencite[i. 15 B--16 C]{Wright}:---
+% \begin{enumerate}[(a)]
+% \item With the \emph{solar} letters \arb[novoc]{t}, \arb[novoc]{_t},
+%   \arb[novoc]{d}, \arb[novoc]{_d}, \arb[novoc]{r}, \arb[novoc]{z},
+%   \arb[novoc]{s}, \arb[novoc]{^s}, \arb[novoc]{.s}, \arb[novoc]{.d},
+%   \arb[novoc]{.t}, \arb[novoc]{.z}, \arb[novoc]{l}, \arb[novoc]{n},
+%   after the article \arb[fullvoc]{al-}:---
+% \iffalse
+%<*example>
+% \fi
+\begin{arabluacode}[text only]
+  Unlike \package{arabtex} and \package{arabxetex},
+  \package{arabluatex} \emph{never requires the solar letter to be
+    written twice}, as it automatically generates the euphonic
+  \arb[trans]{ta^sdId} above the letter that carries it, whether the
+  article is written in the assimilated form or not, e.g. |al-^samsu|
+  \arb[voc]{al-^samsu} \arb[trans]{al-^samsu}, or |a^s-^samsu|
+  \arb[voc]{a^s-^samsu} \arb[trans]{a^s-^samsu}.
+\end{arabluacode}
+% \iffalse
+%</example>
+% \fi
+% \begin{quote}
+%   |al-tamru| \arb[voc]{al-tamru} \arb[trans]{al-tamru},
+%   |al-ra.hm_anu| \arb[voc]{al-ra.hm_anu} \arb[trans]{al-ra.hm_anu},
+%   |al-.zulmu| \arb[voc]{al-.zulmu} \arb[trans]{al-.zulmu},
+%   |al-lu.gaTu| \arb[voc]{al-lu.gaTu} \arb[trans]{al-lu.gaTu}.
+% \end{quote}
+% \item With the letters \arb[novoc]{r}, \arb[novoc]{l}, \arb[novoc]{m},
 % \arb[novoc]{w}, \arb[novoc]{y} after \arb[voc]{n} with
-% \arb[trans]{jazmaT}, or with the letter \arb[voc]{t} after the
-% dentals (\arb[novoc]{_t}, \arb[novoc]{d}, \arb[novoc]{_d},
-% \arb[novoc]{.d}, \arb[novoc]{.t}, \arb[novoc]{.z}):
+% \arb[trans]{jazmaT}, and also after the \arb[trans]{tanwIn}:---
 % \begin{quote}
-%   |min rabbi-hi| \arb[voc]{min rabbi-hi}, \arb[trans]{min
-%   rabbi-hi},\footnote{\arb[trans]{min rabbi-hi} is actually wrong;
-%   the correct romanization should be \emph{mir rabbi-hi}. Please
-%   adapt. This feature will be implemented in the next release of
-%   \package{arabluatex}.} %
-%   |min layliN| \arb[voc]{min layliN} \arb[trans]{min
-%   layliN},\footnote{For \emph{mil laylin}.}, %
-%   |'an yaqtula| \arb[voc]{'an yaqtula} \arb[trans]{'an
-%   yaqtula},\footnote{For \emph{ʾay yaqtula}.} |kitAbuN mubInuN|
-%   \arb[voc]{kitAbuN mubInuN} \arb[trans]{kitAbuN mubInuN}.
-%
-%   The second kind of assimilation, e.g. \arb[voc]{labi_tttu} for
+%   Note the absence of \arb[trans]{sukUn} above the passed over
+%   \arb[novoc]{n} in the following examples, each of which is
+%   accompanied with a consistent transliteration: |min rabbi-hi|
+%   \arb[fullvoc]{min rabbi-hi}, \arb[trans]{min rabbi-hi}, %
+%   |min layliN| \arb[fullvoc]{min layliN} \arb[trans]{min layliN}, %
+%   |'an yaqtula| \arb[fullvoc]{'an yaqtula} \arb[trans]{'an yaqtula}.
+%   
+%   With \arb[trans]{tanwIn}: |kitAbuN mubInuN| %
+%   \arb[voc]{kitAbuN mubInuN} \arb[trans]{kitAbuN mubInuN}.%
+% \end{quote}
+% \item With the letter \arb[voc]{t} after the dentals
+%   \arb[novoc]{_t}, \arb[novoc]{d}, \arb[novoc]{_d}, \arb[novoc]{.d},
+%   \arb[novoc]{.t}, \arb[novoc]{.z} in certain parts of the verb:
+%   this kind of assimilation, e.g. \arb[voc]{labi_tttu} for
 %   \arb[voc]{labi_ttu} \arb[trans]{labi_ttu}, will be discarded here,
 %   as it is largely condemned by the
 %   grammarians \parencite[see][i. 16 B--C]{Wright}.
+% \end{enumerate}
+%
+% \paragraph{The definite article and the \arb[trans]{'alifu 'l-wa.sli}}
+% At the beginning of a sentence, \txarb{\char"0671} is never written,
+% as \arb[fullvoc]{'l-.hamdu li-ll_ahi}; instead, to indicate that the
+% \arb[trans]{'alif} is a connective \arb[trans]{'alif}
+% (\arb[trans]{'alifu 'l-wa.sli}), the \arb[trans]{hamzaT} is omitted
+% and only its accompanying vowel is expressed:
+% \begin{quote}
+%   |al-.hamdu li-ll_ahi| \arb[fullvoc]{al-.hamdu li-ll_ahi}
+%   \arb[trans]{al-.hamdu li-ll_ahi}.
+% \end{quote}
+% As said above in section\vref{fullvoc-mode}, |fullvoc| is the mode
+% in which \package{arabluatex} expresses the \arb[trans]{sukUn} and
+% the \arb[trans]{wa.slaT}. \package{arabluatex} will take care of
+% doing this automatically provided that the vowel which is to be
+% absorbed by the final vowel of the preceding word is properly
+% encoded, like so:---
+% \begin{enumerate}[(a)]
+% \item Definite article at the beginning of a sentence is encoded\\
+%   \arabluaverb{al-}, or \arabluaverb{a<solar letter>-}\\ if one
+%   wishes to mark the assimilation---which is in no way required, as
+%   \package{arabulatex} will detect all cases of assimilation.
+% \item Definite article inside sentences is encoded\\ \arabluaverb{'l-}
+%   or \arabluaverb{'<solar letter>-}.
+% \item In all remaining cases of elision, the \arb[trans]{'alifu
+% 'l-wa.sli} is expressed by the vowel that accompanies the omitted
+% \arb[trans]{hamzaT}: \meta{u, a, i}.
+% \end{enumerate}
+% \begin{quote}
+%   \textbf{Article}: |bAbu| |'l-madrasaTi| \arb[fullvoc]{bAbu
+%   'l-madrasaTi} \arb[trans]{bAbu 'l-madrasaTi}, |al-maqAlaTu|
+%   |'l-'_Ul_A| \arb[fullvoc]{al-maqAlaTu 'l-'_Ul_A}
+%   \arb[trans]{al-maqAlaTu 'l-'_Ul_A}, |al-lu.gaTu| |'l-`arabiyyaTu|
+%   \arb[fullvoc]{al-lu.gaTu 'l-`arabiyyaTu} \arb[trans]{al-lu.gaTu
+%   'l-`arabiyyaTu}, |fI| |.sinA`aTi| |'l-.tibbi| \arb[fullvoc]{fI
+%   .sinA`aTi 'l-.tibbi} \arb[trans]{fI .sinA`aTi 'l-.tibbi}, |'il_A|
+%   |'l-intiqA.di| \arb[fullvoc]{'il_A 'l-intiqA.di} \arb[trans]{'il_A
+%   'l-intiqA.di}, |fI| |'l-ibtidA'i| \arb[fullvoc]{fI 'l-ibtidA'i}
+%   \arb[trans]{fI 'l-ibtidA'i}, |'abU| |'l-wazIri| \arb[fullvoc]{'abU
+%   'l-wazIri} \arb[trans]{'abU 'l-wazIri}, |fa-lammA| |ra'aW|
+%   |'l-najma| \arb[fullvoc]{fa-lammA ra'aW 'l-najma}
+%   \arb[trans]{fa-lammA ra'aW 'l-najma}.
+%
+%   \textbf{Particles}:---
+%   \begin{enumerate}[(a)]
+%   \item \arb[trans]{li-}: \arb[trans]{'alifu 'l-wa.sli} is omitted
+%     in the article \arb[fullvoc]{al} when it is preceded by the
+%     preposition \arb[fullvoc]{li}: |li-l-rajuli|
+%     \arb[fullvoc]{li-l-rajuli}
+%     \arb[trans]{li-l-rajuli}.\\
+%     If the first letter of the noun be \arb[novoc]{l}, then the
+%     \arb[novoc]{l} of the article also falls away, but
+%     \package{arabluatex} is aware of that: |li-l-laylaTi|
+%     \arb[fullvoc]{li-l-laylaTi} \arb[trans]{li-l-laylaTi}.
+%   \item \arb[trans]{la-}: the same applies for the affirmative
+%   particle \arb[fullvoc]{la}: |la-l-.haqqu|
+%   \arb[fullvoc]{la-l-.haqqu} \arb[trans]{la-l-.haqqu}.
+% \item With the other particles, \arb[trans]{'alifu 'l-wa.sli} is
+%   expressed: |fI| |'l-madInaTi| \arb[fullvoc]{fI 'l-madInaTi}
+%   \arb[trans]{fI 'l-madInaTi}, |wa-'l-rajulu|
+%   \arb[fullvoc]{wa-'l-rajulu} \arb[trans]{wa-'l-rajulu},
+%   |bi-'l-qalami| \arb[fullvoc]{bi-'l-qalami}
+%   \arb[trans]{bi-'l-qalami}, |bi-'l-ru`bi|
+%   \arb[fullvoc]{bi-'l-ru`bi} \arb[trans]{bi-'l-ru`bi}.
+%   \end{enumerate}
+%   
+%   \textbf{Perfect active, imperative, nomen actionis}: |qAla|
+%   |isma`| \arb[fullvoc]{qAla isma`} \arb[trans]{qAla isma`}, |qAla|
+%   |uqtul| \arb[fullvoc]{qAla uqtul} \arb[trans]{qAla uqtul}, |huwa|
+%   |inhazama| \arb[fullvoc]{huwa inhazama} \arb[trans]{huwa
+%   inhazama}, |wa-ustu`mila| \arb[fullvoc]{wa-ustu`mila}
+%   \arb[trans]{wa-ustu`mila}, |qad-i| |in.sarafa| \arb[fullvoc]{qadi
+%   in.sarafa} \arb[trans]{qadi in.sarafa}, |al-iqtidAru|
+%   \arb[fullvoc]{al-iqtidAru} \arb[trans]{al-iqtidAru}, |'il_A|
+%   |'l-intiqA.di| \arb[fullvoc]{'il_A 'l-intiqA.di} \arb[trans]{'il_A
+%   'l-intiqA.di}, |lawi| |istaqbala| \arb[fullvoc]{lawi istaqbala}
+%   \arb[trans]{lawi istaqbala}.
+%
+%   \textbf{Other cases}: |'awi| |ismu-hu| \arb[fullvoc]{'awi ismu-hu}
+%   \arb[trans]{'awi ismu-hu}, |.hunaynu| |ibnu| |'is.h_aqa|
+%   \arb[fullvoc]{.hunaynu ibnu 'is.h_aqa} \arb[trans]{\cap{.h}unaynu
+%   ibnu \cap{'is.h_aqa}}, |imru'u| |'l-qaysi| \arb[fullvoc]{imru'u
+%   'l-qaysi} \arb[trans]{\cap{i}mru'u \cap{'l-qaysi}}, |la-aymunu|
+%   |'l-l_ahi| \arb[fullvoc]{la-aymunu 'l-l_ahi} \arb[trans]{la-aymunu
+%   'l-l_ahi}.
+% \end{quote}
+%
+% \subparagraph{\arb[trans]{'alifu 'l-wa.sli} preceded by a long
+% vowel} The long vowel preceding the connective \arb[trans]{'alif} is
+% shortened in pronunciation \parencite[i. 21 B--D]{Wright}. This is
+% does not appear in the Arabic script, but \package{arabluatex} takes
+% it into account in some transliteration standards:---
+% \begin{quote}
+%   |fI| |'l-nAsi| \arb[fullvoc]{fI 'l-nAsi} \arb[trans]{fI 'l-nAsi},
+%   |'abU| |'l-wazIri| \arb[fullvoc]{'abU 'l-wazIri} \arb[trans]{'abU
+%   'l-wazIri}, |fI| |'l-ibtidA'i| \arb[fullvoc]{fI 'l-ibtidA'i}
+%   \arb[trans]{fI 'l-ibtidA'i}, |_dU 'l-i`lAli| \arb[fullvoc]{_dU
+%   'l-i`lAli} \arb[trans]{_dU 'l-i`lAli}.
+% \end{quote}
+%
+% \subparagraph{\arb[trans]{'alifu 'l-wa.sli} preceded by a diphthong}
+% The diphthong is resolved into two simple vowels \parencite[i. 21
+% D--22 A]{Wright} viz. \emph{ay}~→ \emph{\u{a}\u{i}} and \emph{aw}~→
+% \emph{\u{a}\u{u}}. \package{arabluatex} detects the cases in which
+% this rule applies:---
+% \begin{quote}
+%   |fI| |`aynay| |'l-maliki| \arb[fullvoc]{fI `aynay 'l-maliki}
+%   \arb[trans]{fI `aynay 'l-maliki}, |ix^say| |'l-qawma|
+%   \arb[fullvoc]{ix^say 'l-qawma} \arb[trans]{ix^say 'l-qawma},
+%   |mu.s.tafaw| |'l-l_ahi| \arb[fullvoc]{mu.s.tafaw 'l-l_ahi}
+%   \arb[trans]{mu.s.tafaw 'l-l_ahi}.
+%
+%   |ramaW| |'l-.hijAraTa| \arb[fullvoc]{ramaW 'l-.hijAraTa}
+%   \arb[trans]{ramaW 'l-.hijAraTa}, |fa-lammA| |ra'aW | |'l-najma|
+%   \arb[fullvoc]{fa-lammA ra'aW 'l-najma} \arb[trans]{fa-lammA ra'aW
+%   'l-najma}.
+% \end{quote}
+%
+% \subparagraph{\arb[trans]{'alifu 'l-wa.sli} preceded by a consonant
+% with \arb[trans]{sukUn}}
+% The vowel which the consonant takes, either its original vowel, or
+% that which belongs to the connective \arb[trans]{'alif} or the
+% \arb[trans]{kasraT}; in most of the cases \parencite[i. 22
+% A--C]{Wright}, it is encoded explicitly, like so:---
+% \begin{quote}
+%   |'antumu| |'l-kA_dibUna| \arb[fullvoc]{'antumu 'l-kA_dibUna}
+%   \arb[trans]{'antumu 'l-kA_dibUna}, |ra'aytumu| |'l-rajula|
+%   \arb[fullvoc]{ra'aytumu 'l-rajula} \arb[trans]{ra'aytumu
+%   'l-rajula}, |mani| |'l-ka_d_dAbu| \arb[fullvoc]{mani 'l-ka_d_dAbu}
+%   \arb[trans]{mani 'l-ka_d_dAbu}, |qatalati| |'l-rUmu|
+%   \arb[fullvoc]{qatalati 'l-rUmu} \arb[trans]{qatalati
+%   \cap{'l-rUmu}}.
+% \end{quote}
+% However, the Arabic script does not shows the \arb[trans]{kasraT}
+% which is taken by the nouns having \arb[trans]{tanwIn} although it
+% is explicit in pronunciation and must appear in some transliteration
+% standards. \package{arabluatex} takes care of this automatically:---
+% \begin{quote}
+%   |mu.hammaduN| |'l-nabI| \arb[fullvoc]{mu.hammaduN 'l-nabI}
+%   \arb[trans]{\cap{m}u.hammaduN 'l-nabI}.
 % \end{quote}
-% 
 %
 % \StopEventually{}
 %
diff --git a/arabluatex.lua b/arabluatex.lua
index de33bb3..c14e396 100644
--- a/arabluatex.lua
+++ b/arabluatex.lua
@@ -165,8 +165,8 @@ local function fullvoc(str)
     for i = 1,#hamzafv do
 		inside = string.gsub(inside, hamzafv[i].a, hamzafv[i].b)
 	end
-	for i = 1,#tanwin do
-		inside  = string.gsub(inside, tanwin[i].a, tanwin[i].b)
+	for i = 1,#tanwinfv do
+		inside  = string.gsub(inside, tanwinfv[i].a, tanwinfv[i].b)
 	end
 	for i = 1,#trigraphsfv do
 		inside = string.gsub(inside, trigraphsfv[i].a, trigraphsfv[i].b)
diff --git a/arabluatex_fullvoc.lua b/arabluatex_fullvoc.lua
index e6cdb42..8f6ce66 100644
--- a/arabluatex_fullvoc.lua
+++ b/arabluatex_fullvoc.lua
@@ -173,6 +173,31 @@ trigraphsfv = { -- trigraphs or more
    {a="(n)(%s)([rlmnwy])", b="%1%2%3%3"}
 }
 
+tanwinfv = {
+   {a="uNU", b="ٌو"},
+   {a="aNU", b="ًوا"},
+   {a="iNU", b="ٍو"},
+   {a="([uai]N)(%s)([uai])", b="%1%2ٱ"},
+   {a="(aN[%_]?[AY])(%s)([uai])", b="%1%2ٱ"},
+   -- assimilations (begin)
+   {a="(uN)(%s)([rlmnwy])", b="ٌ%2%3%3"},
+   {a="(aN)(_A)(%s)([rlmnwy])", b="ًى%3%4%4"},
+   {a="(aN)(Y)(%s)([rlmnwy])", b="ًى%3%4%4"},
+   {a="(T)(aN)(%s)([rlmnwy])", b="%1ً%3%4%4"},
+   {a="(ء)(aN)(%s)([rlmnwy])", b="%1ً%3%4%4"},
+   {a="([^TA])(aN)(%s)([rlmnwy])", b="%1ًا%3%4%4"},
+   {a="(iN)(%s)([rlmnwy])", b="ٍ%2%3%3"},
+   -- assimilations (end)
+   {a="(uN)", b="ٌ"},
+   {a="(B)(aN)", b="%1ً"},
+   {a="(aN)(_A)", b="ًى"},
+   {a="(aN)(Y)", b="ًى"},
+   {a="(T)(aN)", b="%1ً"},
+   {a="(ء)(aN)", b="%1ً"},
+   {a="([^TA])(aN)", b="%1ًا"},
+   {a="(iN)", b="ٍ"}
+}
+
 -- this is new
 digraphsfv = {
    -- diphthongs to be resolved before ʾalif conjunctionis
-- 
cgit v1.2.3