From b1ea269d110270d2dc6d0e58e8c772fc86167a9e Mon Sep 17 00:00:00 2001 From: Robert Alessi Date: Tue, 22 Mar 2016 19:56:48 +0100 Subject: =?UTF-8?q?bugfix=20in=20fullvoc=20tables=20(tanw=C4=ABn=20+=20con?= =?UTF-8?q?nective=20=CA=BEalif);=20still=20documenting...?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- arabluatex.dtx | 229 +++++++++++++++++++++++++++++++++++++++++++------ arabluatex.lua | 4 +- arabluatex_fullvoc.lua | 25 ++++++ 3 files changed, 230 insertions(+), 28 deletions(-) diff --git a/arabluatex.dtx b/arabluatex.dtx index d61316d..8ed305c 100644 --- a/arabluatex.dtx +++ b/arabluatex.dtx @@ -324,9 +324,9 @@ % does not show the \arb[trans]{wa.slaT} above the \arb[trans]{'alif}; % instead, the accompanying vowel is expressed (\arb{u a i}). % -% \DescribeOption{fullvoc}\\ In addition to what the |voc| mode does, -% |fullvoc| expresses the \arb[trans]{sukUn} and the -% \arb[trans]{wa.slaT}. +% \DescribeOption{fullvoc}\\ \label{fullvoc-mode}In addition to what +% the |voc| mode does, |fullvoc| expresses the \arb[trans]{sukUn} and +% the \arb[trans]{wa.slaT}. % % \DescribeOption{novoc}\\ None of the diacritics is showed in |novoc| % mode, unless otherwise specified (see \enquote{quoting} technique @@ -393,7 +393,7 @@ consonants, though three of them are also used as vowels \begin{arabluacode} \begin{arab} 'at_A .sadIquN 'il_A ju.hA ya.tlubu min-hu .himAra-hu - li-yarkaba-hu fI safraTiN qa.sIraTiN wa-qAla la-hu: + li-yarkaba-hu fI safraTiN qa.sIraTiN. wa-qAla la-hu: \enquote{sawfa 'u`Idu-hu 'ilay-ka fI 'l-masA'i wa-'adfa`u la-ka 'ujraTaN.} fa-qAla ju.hA: \enquote{'anA 'AsifuN jiddaN 'annI lA 'asta.tI`u 'an @@ -700,6 +700,11 @@ consonants, though three of them are also used as vowels % \arb[trans]{'iqlIduN}, |'anna| \arb[voc]{'anna} % \arb[trans]{'anna}, |'inna| \arb[voc]{'inna} \arb[trans]{'inna}. % +% \arb[trans]{hamzaT} followed by the long vowel \arb[novoc]{U} is +% encoded |'_U|: |'_Ul_A| \arb[voc]{'_Ul_A} \arb[trans]{'_Ul_A}, |'_UlU| +% \arb[voc]{'_UlU} \arb[trans]{'_UlU}, |'_UlA'ika| +% \arb[voc]{'_UlA'ika} \arb[trans]{'_UlA'ika}. +% % \textbf{Middle \arb[trans]{hamzaT}}: |xA.ti'Ina| % \arb[voc]{xA.ti'Ina} \arb[trans]{xA.ti'Ina}, |ru'UsuN| % \arb[voc]{ru'UsuN}, \arb[trans]{ru'UsuN}, |xa.tI'aTuN| @@ -788,40 +793,212 @@ consonants, though three of them are also used as vowels % \end{quote} % % \paragraph{\arb[trans]{^saddaT}} -% The \emph{necessary} \arb[trans]{ta^sdId} that always follows a -% vowel, whether short or long \parencite[see][i. 15 A--B]{Wright}, is -% encoded in writing the consonant that carries it twice: +% \arb[trans]{ta^sdId} is either \emph{necessary} or \emph{euphonic}. +% +% \subparagraph{The necessary \arb[trans]{ta^sdId}} always follows a +% vowel, whether short or long \parencite[see][i. 15 A--B]{Wright}. It +% is encoded in writing the consonant that carries it twice: % \begin{quote} % |`allaqa| \arb[voc]{`allaqa} \arb[trans]{`allaqa}, |mAdduN| % \arb[voc]{mAdduN} \arb[trans]{mAdduN}, |'ammara| % \arb[voc]{'ammara} \arb[trans]{ammara}, |murruN| \arb[voc]{murruN} % \arb[trans]{murruN} % \end{quote} -% -% The same applies e.g. for the \emph{euphonic} \arb[trans]{ta^sdId} -% with the letters \arb[novoc]{r}, \arb[novoc]{l}, \arb[novoc]{m}, +% +% \subparagraph{The euphonic \arb[trans]{ta^sdId}} always follows a +% vowelless consonant which is passed over in pronunciation and +% assimilated to a following consonant. It may be +% found \parencite[i. 15 B--16 C]{Wright}:--- +% \begin{enumerate}[(a)] +% \item With the \emph{solar} letters \arb[novoc]{t}, \arb[novoc]{_t}, +% \arb[novoc]{d}, \arb[novoc]{_d}, \arb[novoc]{r}, \arb[novoc]{z}, +% \arb[novoc]{s}, \arb[novoc]{^s}, \arb[novoc]{.s}, \arb[novoc]{.d}, +% \arb[novoc]{.t}, \arb[novoc]{.z}, \arb[novoc]{l}, \arb[novoc]{n}, +% after the article \arb[fullvoc]{al-}:--- +% \iffalse +%<*example> +% \fi +\begin{arabluacode}[text only] + Unlike \package{arabtex} and \package{arabxetex}, + \package{arabluatex} \emph{never requires the solar letter to be + written twice}, as it automatically generates the euphonic + \arb[trans]{ta^sdId} above the letter that carries it, whether the + article is written in the assimilated form or not, e.g. |al-^samsu| + \arb[voc]{al-^samsu} \arb[trans]{al-^samsu}, or |a^s-^samsu| + \arb[voc]{a^s-^samsu} \arb[trans]{a^s-^samsu}. +\end{arabluacode} +% \iffalse +% +% \fi +% \begin{quote} +% |al-tamru| \arb[voc]{al-tamru} \arb[trans]{al-tamru}, +% |al-ra.hm_anu| \arb[voc]{al-ra.hm_anu} \arb[trans]{al-ra.hm_anu}, +% |al-.zulmu| \arb[voc]{al-.zulmu} \arb[trans]{al-.zulmu}, +% |al-lu.gaTu| \arb[voc]{al-lu.gaTu} \arb[trans]{al-lu.gaTu}. +% \end{quote} +% \item With the letters \arb[novoc]{r}, \arb[novoc]{l}, \arb[novoc]{m}, % \arb[novoc]{w}, \arb[novoc]{y} after \arb[voc]{n} with -% \arb[trans]{jazmaT}, or with the letter \arb[voc]{t} after the -% dentals (\arb[novoc]{_t}, \arb[novoc]{d}, \arb[novoc]{_d}, -% \arb[novoc]{.d}, \arb[novoc]{.t}, \arb[novoc]{.z}): +% \arb[trans]{jazmaT}, and also after the \arb[trans]{tanwIn}:--- % \begin{quote} -% |min rabbi-hi| \arb[voc]{min rabbi-hi}, \arb[trans]{min -% rabbi-hi},\footnote{\arb[trans]{min rabbi-hi} is actually wrong; -% the correct romanization should be \emph{mir rabbi-hi}. Please -% adapt. This feature will be implemented in the next release of -% \package{arabluatex}.} % -% |min layliN| \arb[voc]{min layliN} \arb[trans]{min -% layliN},\footnote{For \emph{mil laylin}.}, % -% |'an yaqtula| \arb[voc]{'an yaqtula} \arb[trans]{'an -% yaqtula},\footnote{For \emph{ʾay yaqtula}.} |kitAbuN mubInuN| -% \arb[voc]{kitAbuN mubInuN} \arb[trans]{kitAbuN mubInuN}. -% -% The second kind of assimilation, e.g. \arb[voc]{labi_tttu} for +% Note the absence of \arb[trans]{sukUn} above the passed over +% \arb[novoc]{n} in the following examples, each of which is +% accompanied with a consistent transliteration: |min rabbi-hi| +% \arb[fullvoc]{min rabbi-hi}, \arb[trans]{min rabbi-hi}, % +% |min layliN| \arb[fullvoc]{min layliN} \arb[trans]{min layliN}, % +% |'an yaqtula| \arb[fullvoc]{'an yaqtula} \arb[trans]{'an yaqtula}. +% +% With \arb[trans]{tanwIn}: |kitAbuN mubInuN| % +% \arb[voc]{kitAbuN mubInuN} \arb[trans]{kitAbuN mubInuN}.% +% \end{quote} +% \item With the letter \arb[voc]{t} after the dentals +% \arb[novoc]{_t}, \arb[novoc]{d}, \arb[novoc]{_d}, \arb[novoc]{.d}, +% \arb[novoc]{.t}, \arb[novoc]{.z} in certain parts of the verb: +% this kind of assimilation, e.g. \arb[voc]{labi_tttu} for % \arb[voc]{labi_ttu} \arb[trans]{labi_ttu}, will be discarded here, % as it is largely condemned by the % grammarians \parencite[see][i. 16 B--C]{Wright}. +% \end{enumerate} +% +% \paragraph{The definite article and the \arb[trans]{'alifu 'l-wa.sli}} +% At the beginning of a sentence, \txarb{\char"0671} is never written, +% as \arb[fullvoc]{'l-.hamdu li-ll_ahi}; instead, to indicate that the +% \arb[trans]{'alif} is a connective \arb[trans]{'alif} +% (\arb[trans]{'alifu 'l-wa.sli}), the \arb[trans]{hamzaT} is omitted +% and only its accompanying vowel is expressed: +% \begin{quote} +% |al-.hamdu li-ll_ahi| \arb[fullvoc]{al-.hamdu li-ll_ahi} +% \arb[trans]{al-.hamdu li-ll_ahi}. +% \end{quote} +% As said above in section\vref{fullvoc-mode}, |fullvoc| is the mode +% in which \package{arabluatex} expresses the \arb[trans]{sukUn} and +% the \arb[trans]{wa.slaT}. \package{arabluatex} will take care of +% doing this automatically provided that the vowel which is to be +% absorbed by the final vowel of the preceding word is properly +% encoded, like so:--- +% \begin{enumerate}[(a)] +% \item Definite article at the beginning of a sentence is encoded\\ +% \arabluaverb{al-}, or \arabluaverb{a-}\\ if one +% wishes to mark the assimilation---which is in no way required, as +% \package{arabulatex} will detect all cases of assimilation. +% \item Definite article inside sentences is encoded\\ \arabluaverb{'l-} +% or \arabluaverb{'-}. +% \item In all remaining cases of elision, the \arb[trans]{'alifu +% 'l-wa.sli} is expressed by the vowel that accompanies the omitted +% \arb[trans]{hamzaT}: \meta{u, a, i}. +% \end{enumerate} +% \begin{quote} +% \textbf{Article}: |bAbu| |'l-madrasaTi| \arb[fullvoc]{bAbu +% 'l-madrasaTi} \arb[trans]{bAbu 'l-madrasaTi}, |al-maqAlaTu| +% |'l-'_Ul_A| \arb[fullvoc]{al-maqAlaTu 'l-'_Ul_A} +% \arb[trans]{al-maqAlaTu 'l-'_Ul_A}, |al-lu.gaTu| |'l-`arabiyyaTu| +% \arb[fullvoc]{al-lu.gaTu 'l-`arabiyyaTu} \arb[trans]{al-lu.gaTu +% 'l-`arabiyyaTu}, |fI| |.sinA`aTi| |'l-.tibbi| \arb[fullvoc]{fI +% .sinA`aTi 'l-.tibbi} \arb[trans]{fI .sinA`aTi 'l-.tibbi}, |'il_A| +% |'l-intiqA.di| \arb[fullvoc]{'il_A 'l-intiqA.di} \arb[trans]{'il_A +% 'l-intiqA.di}, |fI| |'l-ibtidA'i| \arb[fullvoc]{fI 'l-ibtidA'i} +% \arb[trans]{fI 'l-ibtidA'i}, |'abU| |'l-wazIri| \arb[fullvoc]{'abU +% 'l-wazIri} \arb[trans]{'abU 'l-wazIri}, |fa-lammA| |ra'aW| +% |'l-najma| \arb[fullvoc]{fa-lammA ra'aW 'l-najma} +% \arb[trans]{fa-lammA ra'aW 'l-najma}. +% +% \textbf{Particles}:--- +% \begin{enumerate}[(a)] +% \item \arb[trans]{li-}: \arb[trans]{'alifu 'l-wa.sli} is omitted +% in the article \arb[fullvoc]{al} when it is preceded by the +% preposition \arb[fullvoc]{li}: |li-l-rajuli| +% \arb[fullvoc]{li-l-rajuli} +% \arb[trans]{li-l-rajuli}.\\ +% If the first letter of the noun be \arb[novoc]{l}, then the +% \arb[novoc]{l} of the article also falls away, but +% \package{arabluatex} is aware of that: |li-l-laylaTi| +% \arb[fullvoc]{li-l-laylaTi} \arb[trans]{li-l-laylaTi}. +% \item \arb[trans]{la-}: the same applies for the affirmative +% particle \arb[fullvoc]{la}: |la-l-.haqqu| +% \arb[fullvoc]{la-l-.haqqu} \arb[trans]{la-l-.haqqu}. +% \item With the other particles, \arb[trans]{'alifu 'l-wa.sli} is +% expressed: |fI| |'l-madInaTi| \arb[fullvoc]{fI 'l-madInaTi} +% \arb[trans]{fI 'l-madInaTi}, |wa-'l-rajulu| +% \arb[fullvoc]{wa-'l-rajulu} \arb[trans]{wa-'l-rajulu}, +% |bi-'l-qalami| \arb[fullvoc]{bi-'l-qalami} +% \arb[trans]{bi-'l-qalami}, |bi-'l-ru`bi| +% \arb[fullvoc]{bi-'l-ru`bi} \arb[trans]{bi-'l-ru`bi}. +% \end{enumerate} +% +% \textbf{Perfect active, imperative, nomen actionis}: |qAla| +% |isma`| \arb[fullvoc]{qAla isma`} \arb[trans]{qAla isma`}, |qAla| +% |uqtul| \arb[fullvoc]{qAla uqtul} \arb[trans]{qAla uqtul}, |huwa| +% |inhazama| \arb[fullvoc]{huwa inhazama} \arb[trans]{huwa +% inhazama}, |wa-ustu`mila| \arb[fullvoc]{wa-ustu`mila} +% \arb[trans]{wa-ustu`mila}, |qad-i| |in.sarafa| \arb[fullvoc]{qadi +% in.sarafa} \arb[trans]{qadi in.sarafa}, |al-iqtidAru| +% \arb[fullvoc]{al-iqtidAru} \arb[trans]{al-iqtidAru}, |'il_A| +% |'l-intiqA.di| \arb[fullvoc]{'il_A 'l-intiqA.di} \arb[trans]{'il_A +% 'l-intiqA.di}, |lawi| |istaqbala| \arb[fullvoc]{lawi istaqbala} +% \arb[trans]{lawi istaqbala}. +% +% \textbf{Other cases}: |'awi| |ismu-hu| \arb[fullvoc]{'awi ismu-hu} +% \arb[trans]{'awi ismu-hu}, |.hunaynu| |ibnu| |'is.h_aqa| +% \arb[fullvoc]{.hunaynu ibnu 'is.h_aqa} \arb[trans]{\cap{.h}unaynu +% ibnu \cap{'is.h_aqa}}, |imru'u| |'l-qaysi| \arb[fullvoc]{imru'u +% 'l-qaysi} \arb[trans]{\cap{i}mru'u \cap{'l-qaysi}}, |la-aymunu| +% |'l-l_ahi| \arb[fullvoc]{la-aymunu 'l-l_ahi} \arb[trans]{la-aymunu +% 'l-l_ahi}. +% \end{quote} +% +% \subparagraph{\arb[trans]{'alifu 'l-wa.sli} preceded by a long +% vowel} The long vowel preceding the connective \arb[trans]{'alif} is +% shortened in pronunciation \parencite[i. 21 B--D]{Wright}. This is +% does not appear in the Arabic script, but \package{arabluatex} takes +% it into account in some transliteration standards:--- +% \begin{quote} +% |fI| |'l-nAsi| \arb[fullvoc]{fI 'l-nAsi} \arb[trans]{fI 'l-nAsi}, +% |'abU| |'l-wazIri| \arb[fullvoc]{'abU 'l-wazIri} \arb[trans]{'abU +% 'l-wazIri}, |fI| |'l-ibtidA'i| \arb[fullvoc]{fI 'l-ibtidA'i} +% \arb[trans]{fI 'l-ibtidA'i}, |_dU 'l-i`lAli| \arb[fullvoc]{_dU +% 'l-i`lAli} \arb[trans]{_dU 'l-i`lAli}. +% \end{quote} +% +% \subparagraph{\arb[trans]{'alifu 'l-wa.sli} preceded by a diphthong} +% The diphthong is resolved into two simple vowels \parencite[i. 21 +% D--22 A]{Wright} viz. \emph{ay}~→ \emph{\u{a}\u{i}} and \emph{aw}~→ +% \emph{\u{a}\u{u}}. \package{arabluatex} detects the cases in which +% this rule applies:--- +% \begin{quote} +% |fI| |`aynay| |'l-maliki| \arb[fullvoc]{fI `aynay 'l-maliki} +% \arb[trans]{fI `aynay 'l-maliki}, |ix^say| |'l-qawma| +% \arb[fullvoc]{ix^say 'l-qawma} \arb[trans]{ix^say 'l-qawma}, +% |mu.s.tafaw| |'l-l_ahi| \arb[fullvoc]{mu.s.tafaw 'l-l_ahi} +% \arb[trans]{mu.s.tafaw 'l-l_ahi}. +% +% |ramaW| |'l-.hijAraTa| \arb[fullvoc]{ramaW 'l-.hijAraTa} +% \arb[trans]{ramaW 'l-.hijAraTa}, |fa-lammA| |ra'aW | |'l-najma| +% \arb[fullvoc]{fa-lammA ra'aW 'l-najma} \arb[trans]{fa-lammA ra'aW +% 'l-najma}. +% \end{quote} +% +% \subparagraph{\arb[trans]{'alifu 'l-wa.sli} preceded by a consonant +% with \arb[trans]{sukUn}} +% The vowel which the consonant takes, either its original vowel, or +% that which belongs to the connective \arb[trans]{'alif} or the +% \arb[trans]{kasraT}; in most of the cases \parencite[i. 22 +% A--C]{Wright}, it is encoded explicitly, like so:--- +% \begin{quote} +% |'antumu| |'l-kA_dibUna| \arb[fullvoc]{'antumu 'l-kA_dibUna} +% \arb[trans]{'antumu 'l-kA_dibUna}, |ra'aytumu| |'l-rajula| +% \arb[fullvoc]{ra'aytumu 'l-rajula} \arb[trans]{ra'aytumu +% 'l-rajula}, |mani| |'l-ka_d_dAbu| \arb[fullvoc]{mani 'l-ka_d_dAbu} +% \arb[trans]{mani 'l-ka_d_dAbu}, |qatalati| |'l-rUmu| +% \arb[fullvoc]{qatalati 'l-rUmu} \arb[trans]{qatalati +% \cap{'l-rUmu}}. +% \end{quote} +% However, the Arabic script does not shows the \arb[trans]{kasraT} +% which is taken by the nouns having \arb[trans]{tanwIn} although it +% is explicit in pronunciation and must appear in some transliteration +% standards. \package{arabluatex} takes care of this automatically:--- +% \begin{quote} +% |mu.hammaduN| |'l-nabI| \arb[fullvoc]{mu.hammaduN 'l-nabI} +% \arb[trans]{\cap{m}u.hammaduN 'l-nabI}. % \end{quote} -% % % \StopEventually{} % diff --git a/arabluatex.lua b/arabluatex.lua index de33bb3..c14e396 100644 --- a/arabluatex.lua +++ b/arabluatex.lua @@ -165,8 +165,8 @@ local function fullvoc(str) for i = 1,#hamzafv do inside = string.gsub(inside, hamzafv[i].a, hamzafv[i].b) end - for i = 1,#tanwin do - inside = string.gsub(inside, tanwin[i].a, tanwin[i].b) + for i = 1,#tanwinfv do + inside = string.gsub(inside, tanwinfv[i].a, tanwinfv[i].b) end for i = 1,#trigraphsfv do inside = string.gsub(inside, trigraphsfv[i].a, trigraphsfv[i].b) diff --git a/arabluatex_fullvoc.lua b/arabluatex_fullvoc.lua index e6cdb42..8f6ce66 100644 --- a/arabluatex_fullvoc.lua +++ b/arabluatex_fullvoc.lua @@ -173,6 +173,31 @@ trigraphsfv = { -- trigraphs or more {a="(n)(%s)([rlmnwy])", b="%1%2%3%3"} } +tanwinfv = { + {a="uNU", b="ٌو"}, + {a="aNU", b="ًوا"}, + {a="iNU", b="ٍو"}, + {a="([uai]N)(%s)([uai])", b="%1%2ٱ"}, + {a="(aN[%_]?[AY])(%s)([uai])", b="%1%2ٱ"}, + -- assimilations (begin) + {a="(uN)(%s)([rlmnwy])", b="ٌ%2%3%3"}, + {a="(aN)(_A)(%s)([rlmnwy])", b="ًى%3%4%4"}, + {a="(aN)(Y)(%s)([rlmnwy])", b="ًى%3%4%4"}, + {a="(T)(aN)(%s)([rlmnwy])", b="%1ً%3%4%4"}, + {a="(ء)(aN)(%s)([rlmnwy])", b="%1ً%3%4%4"}, + {a="([^TA])(aN)(%s)([rlmnwy])", b="%1ًا%3%4%4"}, + {a="(iN)(%s)([rlmnwy])", b="ٍ%2%3%3"}, + -- assimilations (end) + {a="(uN)", b="ٌ"}, + {a="(B)(aN)", b="%1ً"}, + {a="(aN)(_A)", b="ًى"}, + {a="(aN)(Y)", b="ًى"}, + {a="(T)(aN)", b="%1ً"}, + {a="(ء)(aN)", b="%1ً"}, + {a="([^TA])(aN)", b="%1ًا"}, + {a="(iN)", b="ٍ"} +} + -- this is new digraphsfv = { -- diphthongs to be resolved before ʾalif conjunctionis -- cgit v1.2.3