From 4668233f36c49ea7a1306aa92b48b4711b7e1d95 Mon Sep 17 00:00:00 2001 From: Robert Alessi Date: Wed, 30 Mar 2016 11:58:47 +0200 Subject: =?UTF-8?q?Added=20suk=C5=ABn=20above=20the=20ta=E1=B9=ADw=C4=ABl?= =?UTF-8?q?=20in=20fullvoc=20for=20linguistics=20proposes;=20started=20cle?= =?UTF-8?q?aning=20up=20the=20code:=20we=20are=20getting=20close=20to=20th?= =?UTF-8?q?e=20first=20public=20release.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- arabluatex.dtx | 167 ++++++++++++++++++++++++++++++++++++++++--------- arabluatex_fullvoc.lua | 6 +- 2 files changed, 139 insertions(+), 34 deletions(-) diff --git a/arabluatex.dtx b/arabluatex.dtx index 537cef4..c4e6388 100644 --- a/arabluatex.dtx +++ b/arabluatex.dtx @@ -116,7 +116,13 @@ % \changes{1.0}{2016/03/29}{Initial release} % % \DoNotIndex{\newcommand,\newenvironment,\renewcommand} -% +% \DoNotIndex{\~,\AtBeginDocument,\bgroup,\csname} +% \DoNotIndex{\DeclareDocumentCommand,\def,\edef,\egroup} +% \DoNotIndex{\else,\endcsname,\endinput,\expandafter,\fi} +% \DoNotIndex{\ifdef,\ifdefined,\ifx,\MessageBreak,\NeedsTeXFormat} +% \DoNotIndex{\NewDocumentCommand,\newif,\PackageError,\PackageWarning} +% \DoNotIndex{\relax,\RenewDocumentCommand,\string} +% % \providecommand*{\url}{\texttt} % \GetFileInfo{arabluatex.dtx} % @@ -149,8 +155,12 @@ % writing. \package{arabluatex} is able to process any Arab\TeX\ % input notation. Its output can be set in the same modes of % vocalization as Arab\TeX, or in different roman -% transliterations. It further allows some typographical -% refinements. +% transliterations. It further allows many typographical +% refinements. It will eventually interact with some other packages +% yet to come to produce from \verb|.tex| source files, in addition +% to printed books, \texttt{TEI xml} compliant critical editions +% and/or lexicons that can be searched, analyzed and correlated in +% various ways. % \end{abstract} % % \section*{License and disclamer} @@ -161,7 +171,6 @@ % {\texttt{alessi@{\allowbreak}robertalessi.net}}> % \item website: \url{http://www.robertalessi.net/arabluatex} % \end{compactitem} -% % % This program is free software; you can redistribute it and/or % modify it under the terms of the GNU General Public License @@ -177,7 +186,20 @@ % along with this program; if not, write to the Free Software % Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA % 02111-1307, USA. -% +% +% This release of \package{arabluatex} consists of the following +% source files: +% \begin{compactitem} +% \item |arabluatex.ins| +% \item |arabluatex.dtx| +% \item |arabluatex.lua| +% \item |arabluatex_voc.lua| +% \item |arabluatex_fullvoc.lua| +% \item |arabluatex_novoc.lua| +% \item |arabluatex_trans.lua| +% \item |arabluatex.bib| +% \end{compactitem} +% % \section{Introduction} % In comparison to Prof. Lagally's outstanding Arab\TeX,\footnote{See % \url{http://ctan.org/pkg/arabtex}} Arab\LuaTeX\ is at present @@ -227,6 +249,7 @@ % transliterated into whichever romanization standard the user may % choose. % +% \label{ref:tei-to-come} % But there may be more to be said on this point, as encoding Arabic % also naturally encourages the coder to vocalize the texts---without % compelling him to do so, of course. Accurate coding may even have @@ -248,12 +271,13 @@ % \LuaLaTeX. \TeX\ and \LaTeX\ have \package{arabtex}, and \XeLaTeX\ % has \package{arabxetex}. Both of them are much more advanced than % \package{arabluatex}, as they can process a number of different -% languages,\footnote{To date, both packages support Arabic, Maghribi, -% Urdu, Pashto, Sindhi, Kashmiri, Uighuric and Old Malay; in addition -% to these, \package{arabtex} also has a Hebrew mode, including -% Judeo-Arabic and Yiddish.} whereas \package{arabluatex} can process -% only Arabic for the time being. More languages will be included in -% future releases of \package{arabluatex}. +% languages,\footnote{\label{fn:arabtex-languages}To date, both +% packages support Arabic, Maghribi, Urdu, Pashto, Sindhi, Kashmiri, +% Uighuric and Old Malay; in addition to these, \package{arabtex} also +% has a Hebrew mode, including Judeo-Arabic and Yiddish.} whereas +% \package{arabluatex} can process only Arabic for the time +% being. More languages will be included in future releases of +% \package{arabluatex}. % % In comparison to \package{arabxetex}, \package{arabluatex} works in % a very different way. The former relies on the @@ -847,7 +871,7 @@ consonants, though three of them are also used as vowels % always follows a vowelless consonant which is passed over in % pronunciation and assimilated to a following consonant. It may be % found \parencite[i. 15 B--16 C]{Wright}:--- -% \begin{enumerate}[(a)] +% \begin{compactenum}[(a)] % \item With the \emph{solar} letters \arb[novoc]{t}, \arb[novoc]{_t}, % \arb[novoc]{d}, \arb[novoc]{_d}, \arb[novoc]{r}, \arb[novoc]{z}, % \arb[novoc]{s}, \arb[novoc]{^s}, \arb[novoc]{.s}, \arb[novoc]{.d}, @@ -895,7 +919,7 @@ consonants, though three of them are also used as vowels % \arb[voc]{labi_ttu} \arb[trans]{labi_ttu}, will be discarded here, % as it is largely condemned by the % grammarians \parencite[see][i. 16 B--C]{Wright}. -% \end{enumerate} +% \end{compactenum} % % \paragraph{The definite article and the \arb[trans]{'alifu 'l-wa.sli}} % At the beginning of a sentence, \txarb{\char"0671} is never written, @@ -913,7 +937,7 @@ consonants, though three of them are also used as vowels % doing this automatically provided that the vowel which is to be % absorbed by the final vowel of the preceding word is properly % encoded, like so:--- -% \begin{enumerate}[(a)] +% \begin{compactenum}[(a)] % \item Definite article at the beginning of a sentence is encoded\\ % \arabluaverb{al-}, or \arabluaverb{a-}\\ if one % wishes to mark the assimilation---which is in no way required, as @@ -923,7 +947,7 @@ consonants, though three of them are also used as vowels % \item In all remaining cases of elision, the \arb[trans]{'alifu % 'l-wa.sli} is expressed by the vowel that accompanies the omitted % \arb[trans]{hamzaT}: \meta{u, a, i}. -% \end{enumerate} +% \end{compactenum} % \begin{quote} % \textbf{Article}: |bAbu| |'l-madrasaTi| \arb[fullvoc]{bAbu % 'l-madrasaTi} \arb[trans]{bAbu 'l-madrasaTi}, |al-maqAlaTu| @@ -940,7 +964,7 @@ consonants, though three of them are also used as vowels % \arb[trans]{fa-lammA ra'aW 'l-najma}. % % \textbf{Particles}:--- -% \begin{enumerate}[(a)] +% \begin{compactenum}[(a)] % \item \arb[trans]{li-}: \arb[trans]{'alifu 'l-wa.sli} is omitted % in the article \arb[fullvoc]{al} when it is preceded by the % preposition \arb[fullvoc]{li}: |li-l-rajuli| @@ -960,7 +984,7 @@ consonants, though three of them are also used as vowels % |bi-'l-qalami| \arb[fullvoc]{bi-'l-qalami} % \arb[trans]{bi-'l-qalami}, |bi-'l-ru`bi| % \arb[fullvoc]{bi-'l-ru`bi} \arb[trans]{bi-'l-ru`bi}. -% \end{enumerate} +% \end{compactenum} % % \textbf{Perfect active, imperative, nomen actionis}: |qAla| % |isma`| \arb[fullvoc]{qAla isma`} \arb[trans]{qAla isma`}, |qAla| @@ -1159,7 +1183,7 @@ consonants, though three of them are also used as vowels % % \paragraph{\texttt{novoc}} In this mode, \enquote{quoting} % essentially means make visible something that ought not to be so. -% \begin{enumerate}[(a)] +% \begin{compactenum}[(a)] % \item Quoting a vowel, either short or long, makes the % \arb[trans]{.dammaT}, \arb[trans]{fat.haT} or \arb[trans]{kasraT} % appear above the appropriate consonant:--- @@ -1193,13 +1217,13 @@ consonants, though three of them are also used as vowels % "inhazama} \arb[trans]{huwa "inhazama}, |al-"intiqA.du| % \arb[novoc]{al-"intiqA.du} \arb[trans]{al-"intiqA.du}. % \end{quote} -% \end{enumerate} +% \end{compactenum} % % \paragraph{\texttt{voc}} % In accordance with the general rule, in this mode, \enquote{quoting} % makes the vowels and the \arb[trans]{tanwIn} disappear, should this % feature be required for some reason:--- -% \begin{enumerate}[(a)] +% \begin{compactenum}[(a)] % \item Short and long vowels:--- % \begin{quote} % |q"Ala q"A'iluN| \arb[voc]{q"Ala q"A'iluN} \arb[trans]{q"Ala @@ -1214,12 +1238,12 @@ consonants, though three of them are also used as vowels % \arb[voc]{hud"aN_A} \arb[trans]{hud"aN_A}, |^say'"iN| % \arb[voc]{^say'"iN} \arb[trans]{^say'"iN}. % \end{quote} -% \end{enumerate} +% \end{compactenum} % One may more usefully \enquote{quote} the initial vowels to write % the \arb[trans]{hamzaT} above the \arb[trans]{'alif} or insert a % straight double quote after a consonant not followed by a vowel to % make the \arb[trans]{sukUn} appear:--- -% \begin{enumerate}[(a)] +% \begin{compactenum}[(a)] % \item \arb[trans]{'alifu 'l-wa.sli}:--- % \begin{quote} % |fI "istisqA'a| \arb[voc]{fI "istisqA'a} \arb[trans]{fI @@ -1238,7 +1262,7 @@ consonants, though three of them are also used as vowels % \arb[trans]{kam" qad" ma.dat" min" % laylaTiN}.\label{ref:quoted-sukun-e} % \end{quote} -% \end{enumerate} +% \end{compactenum} % % \paragraph{\texttt{fullvoc}} % In this mode, \enquote{quoting} may be used to take away any short @@ -1329,7 +1353,7 @@ consonants, though three of them are also used as vowels % (\cpageref{ref:necessary-tashdid}) from being applied. % % \subsection{\texorpdfstring{Stretching characters: the -% \arb[trans]{.ta.twIl}}{Stretching characters: the ṭaṭwīl}} +% \arb[trans]{ta.twIl}}{Stretching characters: the taṭwīl}} % \label{sec:tatwil} % A double hyphen \meta{-\,-} stretches the ligature in which one % letter is bound to another. Although it is always better to rely on @@ -1339,8 +1363,7 @@ consonants, though three of them are also used as vowels % \begin{quote} % |.hunaynu| |bnu| |'is.h--_aqa| \arb[voc]{.hunaynu bnu 'is.h--_aqa} % \arb[trans]{\cap{.hunaynu} bnu \cap{'is.h--_aqa}} -% \end{quote} -% +% \end{quote} % % \subsection{Digits} % \label{sec:digits} @@ -1400,7 +1423,7 @@ consonants, though three of them are also used as vowels % \fi % % \subsection{Additional characters} -% \label{sec:additional-letters} +% \label{sec:additional-characters} % In the manuscripts, the unpointed letters, \arb[trans]{al-.hurUfu % 'l-muhmalaTu}, are sometimes further distinguished from the pointed % by various contrivances, as explained in \textcite[i. 4 @@ -1459,6 +1482,20 @@ consonants, though three of them are also used as vowels % \arb[trans]{45: kitAbu-hu fI 'l-\cap{`AdAti}}. % \end{quote} % +% \section{Special applications} +% \label{sec:special-applications} +% \paragraph{Linguistics} +% The same horizontal stroke as the \arb[trans]{ta.twIl} (see +% \vref{sec:tatwil}) may be encoded \meta{B}; \meta{BB} will receive +% the \arb[trans]{ta^sdId}. This is useful to make linguistic +% annotations and comments on vowels:--- +% \begin{quote} +% |Bu| |Ba| |Bi| |BuN| |BaN| |BiN| \arb[voc]{Bu Ba Bi BuN BaN BiN} +% \arb[trans]{Bu Ba Bi BuN BaN BiN}, |BBu| |BBa| |BBi| \arb[voc]{BBu +% BBa BBi} \arb[trans]{BBu BBa BBi}, |B--aN| \arb[voc]{B--aN} +% \arb[trans]{B--aN}, |B| \arb[voc]{B"}\,. +% \end{quote} +% % \section{Transliteration} % \label{sec:transliteration} % It may be more appropriate to speak of \enquote{romanization} than @@ -1728,7 +1765,30 @@ wa-ya.sIru ta.hta 'l-jildi % % \section{Future work} % \label{sec:future-work} -% +% A short, uncommented, list of what is planned in the versions of +% \package{arabluatex} to come follows: +% \begin{compactenum}[(a)] +% \item Short-term: +% \begin{compactenum}[i.] +% \item Support for typesetting Arabic poetry. +% \item The \arb[trans]{\cap{qur'An}}: support for typesetting the +% \arb[trans]{\cap{Qur'An}}. +% \item \texttt{TEI xml} support: \package{arabluatex} will +% interoperate with \texttt{TEI xml} through new global and local +% options that will output Arabic in a \texttt{TEI xml} compliant +% file in addition to the usual PDF output: see +% \vpageref{ref:tei-to-come}. +% \end{compactenum} +% \item Medium-term: +% \begin{compactenum}[i.] +% \item More languages: the list of supported languages will +% eventually be the same as \package{arabtex}: see +% \vref{fn:arabtex-languages}. +% \item Formulate propositions for extending the Arab\TeX\ notation +% and the transliteration tables. Include them in +% \package{arabluatex}. See \vref{sec:additional-characters}. +% \end{compactenum} +% \end{compactenum} % % \StopEventually{} % @@ -1737,23 +1797,28 @@ wa-ya.sIru ta.hta 'l-jildi % \iffalse %<*package> % \fi -% +% +% The most important part of \package{arabluatex} relies on Lua +% functions and tables. Read the |.lua| files that accompany +% \package{arabluatex} for more information. % \begin{macrocode} \NeedsTeXFormat{LaTeX2e} \ProvidesPackage{arabluatex}% [2016/03/29 v1.0 ArabTeX-like interface for LuaLaTeX] \RequirePackage{ifluatex} % \end{macrocode} -% \package{arabluatex} requires \LuaLaTeX\ of course. +% \package{arabluatex} requires \LuaLaTeX\ of course. Issue a warning +% if the document is processed with another engine. % \begin{macrocode} \ifluatex\else \PackageError{arabluatex}{lualatex needed}{% Package `arabluatex' needs LuaTeX.\MessageBreak - So you should use `lualatex' to process your document!\MessageBreak + So you should use `lualatex' to process your document.\MessageBreak See documentation of `arabluatex' for further information.}% \expandafter\expandafter\expandafter\csname endinput\endcsname \fi % \end{macrocode} +% Declare the global options, and define them: % \begin{macrocode} \DeclareOption{voc}{\def\al@mode{voc}} \DeclareOption{fullvoc}{\def\al@mode{fullvoc}} @@ -1765,13 +1830,23 @@ wa-ya.sIru ta.hta 'l-jildi \def\al@mode@fullvoc{fullvoc} \def\al@mode@novoc{novoc} \def\al@mode@trans{trans} +% \end{macrocode} +% The following line will be used in the next release of +% \package{arabluatex}: +% \begin{macrocode} % \newif\ifal@mode@defined +% \end{macrocode} +% Packages that are required by \package{arabluatex}: +% \begin{macrocode} \RequirePackage{fontspec} \RequirePackage{amsmath} \RequirePackage{etoolbox} \RequirePackage{luacode} \RequirePackage{xparse} \RequirePackage{environ} +% \end{macrocode} +% Here begins the real work: load |arabluatex.lua|: +% \begin{macrocode} \luadirect{dofile(kpse.find_file("arabluatex.lua"))} % \end{macrocode} % This is needed by the current versions of \package{polyglossia} and @@ -1780,22 +1855,52 @@ wa-ya.sIru ta.hta 'l-jildi % \begin{macrocode} \luadirect{tex.enableprimitives("luatex",tex.extraprimitives("omega"))} % \end{macrocode} +% Font setup. If no Arabic font is selected, issue a warning message +% and attempt to load the Amiri font which is included in \TeX{}live: % \begin{macrocode} \AtBeginDocument{\ifdefined\arabicfont\relax\else \PackageWarning{arabluatex}{\string\arabicfont\ is not defined!^^JI will try to load Amiri}% \newfontfamily\arabicfont[Script=Arabic]{Amiri}\fi}% +% \end{macrocode} +% \begin{macro}{\setRL} +% This neutralizes what is defined by the same command in +% \package{luabidi}: +% \begin{macrocode} \AtBeginDocument{\def\setRL{\pardir TRT\textdir TRT}} +% \end{macrocode} +% \end{macro} +% \begin{macro}{\setLR} +% The same applies to \cs{setLR}: +% \begin{macrocode} \AtBeginDocument{\def\setLR{\pardir TLT\textdir TLT}} +% \end{macrocode} +% \end{macro} +% \begin{macro}{\LR} This command typesets its argument from left to +% right. As \cs{LR} may be already defined, we need to redefine for +% it to suit our purpose: +% \begin{macrocode} \AtBeginDocument{\ifdef{\LR}% {\RenewDocumentCommand{\LR}{m}{\bgroup\textdir TLT\rmfamily#1\egroup}} {\NewDocumentCommand{\LR}{m}{\bgroup\textdir TLT\rmfamily#1\egroup}}} +% \end{macrocode} +% \end{macro} +% \begin{macro}{\RL} This one typesets its argument from right to +% left. Same remark as above regarding the need of redefinition. +% \begin{macrocode} \AtBeginDocument{\ifdef{\RL}% {\RenewDocumentCommand{\RL}{m}{\bgroup\textdir TRT\rmfamily#1\egroup}} {\NewDocumentCommand{\RL}{m}{\bgroup\textdir TRT#1\rmfamily\egroup}}} +% \end{macrocode} +% \end{macro} +% \begin{macro}{\aemph} Arabic emphasis. Needs to be redefined as well. +% \begin{macrocode} \AtBeginDocument{\ifdef{\aemph}% {\RenewDocumentCommand{\aemph}{m}{$\overline{\text{#1}}$}} {\NewDocumentCommand{\aemph}{m}{$\overline{\text{#1}}$}}} +% \end{macrocode} +% \end{macro} +% \begin{macrocode} \def\al@trans@style{\itshape}% \NewDocumentCommand{\SetTranslitStyle}{m}{\def\al@trans@style{#1}} \def\al@trans@convention{dmg} diff --git a/arabluatex_fullvoc.lua b/arabluatex_fullvoc.lua index 505ac4e..d9e9f04 100644 --- a/arabluatex_fullvoc.lua +++ b/arabluatex_fullvoc.lua @@ -257,9 +257,9 @@ digraphsfv = { {a="ww", b="وّ"}, {a="yy", b="يّ"}, -- sukūn begin - {a="([%_%^%.]?[btjghxdrzs%`fqklmnwy])$", b="%1ْ"}, - {a="([%_%^%.]?[btjghxdrzs%`fqklmnwy])([%s])", b="%1ْ%2"}, - {a="([%_%^%.]?[btjghxdrzs%`fqklmnwy])([%_]?[^%_uaiUAIًٌٍ])", b="%1ْ%2"}, + {a="([%_%^%.]?[Bbtjghxdrzs%`fqklmnwy])$", b="%1ْ"}, + {a="([%_%^%.]?[Bbtjghxdrzs%`fqklmnwy])([%s])", b="%1ْ%2"}, + {a="([%_%^%.]?[Bbtjghxdrzs%`fqklmnwy])([%_]?[^%_uaiUAIًٌٍ])", b="%1ْ%2"}, -- take out sukūn in cases of assimilation {a="(n)(ْ)(%s)([روي])", b="%1%3%4"}, {a="(n)(ْ)(%s)([ل])", b="%1%3%4"}, -- cgit v1.2.3