From 644cefb59c1dcfee6591a1fcf68616fe0c11a28c Mon Sep 17 00:00:00 2001 From: Robert Alessi Date: Sat, 9 Jul 2016 11:56:03 +0200 Subject: updated documentation. this will be v1.4.1 --- arabluatex.dtx | 161 +++++++++++++++++++++++++++++------------------------ arabluatex_voc.lua | 1 + 2 files changed, 90 insertions(+), 72 deletions(-) diff --git a/arabluatex.dtx b/arabluatex.dtx index 433d66b..403b19e 100644 --- a/arabluatex.dtx +++ b/arabluatex.dtx @@ -78,6 +78,16 @@ url = {http://www.amirifont.org/} } +@Book{Habash, + author = {Habash, Nizar Y.}, + title = {Introduction to Arabic Natural Language Processing}, + year = 2010, + series = {Synthesis Lectures on Human Language Technologies}, + number = 10, + publisher = {Morgan \& Claypool Publishers}, + location = {Toronto} +} + @MVBook{Wright, author = {Wright, W. LL.D}, title = {A Grammar of the Arabic Language}, @@ -1979,85 +1989,90 @@ muhaddamaTaN mi'_danatu-hu}: \arb[trans]{ra'aytu % % \fi % -% \paragraph{Limitations} -% To date, the following two limitations apply: -% \begin{compactenum}[(a)] -% \item The braces |{| and |}|, which are used in Buckwalter scheme to -% encode \arb[novoc]{"a} and \arb[novoc]{y"'}, must be replaced with -% square brackets viz. |[| and |]| respectively. -% \item The underscore character |_|, which is used in Buckwalter -% scheme to encode the Arabic \arb[trans]{ta.twIl} must be replaced -% with a double hyphen |--| (see above \vref{sec:tatwil}). -% \end{compactenum} +% \paragraph{\enquote*{base}, \enquote*{\texttt{xml}} and +% \enquote*{safe} schemes} +% \package{arabluatex} can use any of the so-called Buckwalter +% \enquote*{base}, \enquote*{\texttt{xml}} or \enquote*{safe} schemes +% as they are described in \textcite[25--26]{Habash}.\footnote{I am +% grateful to Graeme Andrews who suggested that the \enquote*{safe} +% scheme be included in \package{arabluatex}.} However, the following +% limitation apply to the \enquote*{base} and \enquote*{\texttt{xml}} +% schemes: the braces |{| and |}|, which are used to encode +% \arb[novoc]{"a} and \arb[novoc]{y"'}, must be replaced with square +% brackets viz. |[| and |]| respectively. +% +% It is therefore recommended to use the Buckwalter \enquote*{safe} +% scheme. % % \Cref{tab:buckwalter-scheme} gives the Buckwalter equivalents that % are currently used by \package{arabluatex}. The additional % characters that are defined in \vref{tab:additional-arabic-codings} % are also available. -% \begin{longtable}{llll} +% \begin{longtable}{lllll} % \captionlistentry{Buckwalter scheme}\\[-1em] % \toprule % Letter & \multicolumn{2}{l}{Transliteration\footnotemark} -% & Buckwalter notation \\ -% & \texttt{dmg} & \texttt{loc} & \\ \midrule +% & \multicolumn{2}{l}{Buckwalter notation} \\ +% & \texttt{dmg} & \texttt{loc} & |base/xml| & |safe| \\ \midrule % \endfirsthead % \toprule % Letter & \multicolumn{2}{l}{Transliteration} -% & Buckwalter notation \\ -% & \texttt{dmg} & \texttt{loc} & \\ \midrule +% & \multicolumn{2}{l}{Buckwalter notation} \\ +% & \texttt{dmg} & \texttt{loc} & |base/xml| & |safe| \\ \midrule % \endhead \footnotetext{See \vref{sec:transliteration}.} % \label{tab:buckwalter-scheme} -% \arb[novoc]{a} & \dmg{a} & \loc{a} & \verb|A| \\ -% \arb[novoc]{b} & \dmg{b} & \loc{b} & |b| \\ -% \arb[novoc]{t} & \dmg{t} & \loc{t} & |t| \\ -% \arb[novoc]{_t} & \dmg{_t} & \loc{_t} & |v|\\ -% \arb[novoc]{j} & \dmg{j} & \loc{j} & |j| \\ -% \arb[novoc]{.h} & \dmg{.h} & \loc{.h} & |H| \\ -% \arb[novoc]{x} & \dmg{x} & \loc{x} & |x|\\ -% \arb[novoc]{d} & \dmg{d} & \loc{d} & |d| \\ -% \arb[novoc]{_d} & \dmg{_d} & \loc{_d} & |*| \\ -% \arb[novoc]{r} & \dmg{r} & \loc{r} & |r| \\ -% \arb[novoc]{z} & \dmg{z} & \loc{z} & |z| \\ -% \arb[novoc]{s} & \dmg{s} & \loc{s} & |s| \\ -% \arb[novoc]{^s} & \dmg{^s} & \loc{^s} & |$| \\ -% \arb[novoc]{.s} & \dmg{.s} & \loc{.s} & |S| \\ -% \arb[novoc]{.d} & \dmg{.d} & \loc{.d} & |D| \\ -% \arb[novoc]{.t} & \dmg{.t} & \loc{.t} & |T| \\ -% \arb[novoc]{.z} & \dmg{.z} & \loc{.z} & |Z| \\ -% \arb[novoc]{`} & \dmg{`} & \loc{`} & |E| \\ -% \arb[novoc]{.g} & \dmg{.g} & \loc{.g} & |g| \\ -% \arb[novoc]{f} & \dmg{f} & \loc{f} & |f| \\ -% \arb[novoc]{q} & \dmg{q} & \loc{q} & |q| \\ -% \arb[novoc]{k} & \dmg{k} & \loc{k} & |k| \\ -% \arb[novoc]{l} & \dmg{l} & \loc{l} & |l| \\ -% \arb[novoc]{m} & \dmg{m} & \loc{m} & |m| \\ -% \arb[novoc]{n} & \dmg{n} & \loc{n} & |n| \\ -% \arb[novoc]{h} & \dmg{h} & \loc{h} & |h| \\ -% \arb[novoc]{w} & \dmg{w} & \loc{w} & |w| \\ -% \arb[novoc]{y} & \dmg{y} & \loc{y} & |y| \\ -% \arb[novoc]{T} & \dmg{aT} & \loc{aT} & |p| \\ +% \arb[novoc]{a} & \dmg{a} & \loc{a} & |A| & |A| \\ +% \arb[novoc]{b} & \dmg{b} & \loc{b} & |b| & |b| \\ +% \arb[novoc]{t} & \dmg{t} & \loc{t} & |t| & |t| \\ +% \arb[novoc]{_t} & \dmg{_t} & \loc{_t} & |v| & |v| \\ +% \arb[novoc]{j} & \dmg{j} & \loc{j} & |j| & |j| \\ +% \arb[novoc]{.h} & \dmg{.h} & \loc{.h} & |H| & |H| \\ +% \arb[novoc]{x} & \dmg{x} & \loc{x} & |x| & |x| \\ +% \arb[novoc]{d} & \dmg{d} & \loc{d} & |d| & |d| \\ +% \arb[novoc]{_d} & \dmg{_d} & \loc{_d} & |*| & |V| \\ +% \arb[novoc]{r} & \dmg{r} & \loc{r} & |r| & |r| \\ +% \arb[novoc]{z} & \dmg{z} & \loc{z} & |z| & |z| \\ +% \arb[novoc]{s} & \dmg{s} & \loc{s} & |s| & |s| \\ +% \arb[novoc]{^s} & \dmg{^s} & \loc{^s} & |$| & |c| \\ +% \arb[novoc]{.s} & \dmg{.s} & \loc{.s} & |S| & |S| \\ +% \arb[novoc]{.d} & \dmg{.d} & \loc{.d} & |D| & |D| \\ +% \arb[novoc]{.t} & \dmg{.t} & \loc{.t} & |T| & |T| \\ +% \arb[novoc]{.z} & \dmg{.z} & \loc{.z} & |Z| & |Z| \\ +% \arb[novoc]{`} & \dmg{`} & \loc{`} & |E| & |E| \\ +% \arb[novoc]{.g} & \dmg{.g} & \loc{.g} & |g| & |g| \\ +% \arb[novoc]{f} & \dmg{f} & \loc{f} & |f| & |f| \\ +% \arb[novoc]{q} & \dmg{q} & \loc{q} & |q| & |q| \\ +% \arb[novoc]{k} & \dmg{k} & \loc{k} & |k| & |k| \\ +% \arb[novoc]{l} & \dmg{l} & \loc{l} & |l| & |l| \\ +% \arb[novoc]{m} & \dmg{m} & \loc{m} & |m| & |m| \\ +% \arb[novoc]{n} & \dmg{n} & \loc{n} & |n| & |n| \\ +% \arb[novoc]{h} & \dmg{h} & \loc{h} & |h| & |h| \\ +% \arb[novoc]{w} & \dmg{w} & \loc{w} & |w| & |w| \\ +% \arb[novoc]{y} & \dmg{y} & \loc{y} & |y| & |y| \\ +% \arb[novoc]{Y} & \dmg{Y} & \loc{Y} & |Y| & |Y| \\ +% \arb[novoc]{T} & \dmg{aT} & \loc{aT} & |p| & |p| \\ % \midrule -% \arb[novoc]{|"'} & \dmg{|"'} & \loc{|"'} & \verb|'| \\ -% \arb[novoc]{A"'} & \dmg{A"'} & \loc{A"'} & \verb+|+ \\ -% \arb[novoc]{a"'} & \dmg{a"'} & \loc{a"'} & \verb|>| \\ -% \arb[novoc]{w"'} & \dmg{w"'} & \loc{w"'} & \verb|&| \\ -% \arb[novoc]{i"'} & \dmg{i"'} & \loc{i"'} & \verb|<| \\ -% \arb[novoc]{y"'} & \dmg{y"'} & \loc{y"'} & \verb|]| \\ +% \arb[novoc]{|"'} & \dmg{|"'} & \loc{|"'} & \verb|'| & |C| \\ +% \arb[novoc]{A"'} & \dmg{A"'} & \loc{A"'} & \verb+|+ & |M| \\ +% \arb[novoc]{a"'} & \dmg{a"'} & \loc{a"'} & \verb|>| & |O| \\ +% \arb[novoc]{w"'} & \dmg{w"'} & \loc{w"'} & \verb|&| & |W| \\ +% \arb[novoc]{i"'} & \dmg{i"'} & \loc{i"'} & \verb|<| & |I| \\ +% \arb[novoc]{y"'} & \dmg{y"'} & \loc{y"'} & \verb|]| & |Q| \\ % \midrule -% \arb[novoc]{BB} & --- & --- & \verb|~| \\ -% \arb[novoc]{"a} & ' & ' & |[| \\ +% \arb[novoc]{BB} & --- & --- & \verb|~| & |~| \\ +% \arb[novoc]{"a} & ' & ' & |[| & |L| \\ % \midrule -% \arb[voc]{Ba} & \dmg{Ba} & \loc{Ba} & \verb|a| \\ -% \arb[voc]{Bu} & \dmg{Bu} & \loc{Bu} & \verb|u| \\ -% \arb[voc]{Bi} & \dmg{Bi} & \loc{Bi} & \verb|i| \\ -% \arb[voc]{BaN} & \dmg{BaN} & \loc{BaN} & \verb|F| \\ -% \arb[voc]{BuN} & \dmg{BuN} & \loc{BuN} & \verb|N| \\ -% \arb[voc]{BiN} & \dmg{BiN} & \loc{BiN} & \verb|K| \\ -% \arb[voc]{B"} & --- & --- & \verb|o| \\ +% \arb[voc]{Ba} & \dmg{Ba} & \loc{Ba} & \verb|a| & |a| \\ +% \arb[voc]{Bu} & \dmg{Bu} & \loc{Bu} & \verb|u| & |u| \\ +% \arb[voc]{Bi} & \dmg{Bi} & \loc{Bi} & \verb|i| & |i| \\ +% \arb[voc]{BaN} & \dmg{BaN} & \loc{BaN} & \verb|F| & |F| \\ +% \arb[voc]{BuN} & \dmg{BuN} & \loc{BuN} & \verb|N| & |N| \\ +% \arb[voc]{BiN} & \dmg{BiN} & \loc{BiN} & \verb|K| & |K| \\ +% \arb[voc]{B"} & --- & --- & \verb|o| & |o| \\ % \midrule -% \arb[novoc]{B_a} & \dmg{B_a} & \loc{B_a} & |`| \\ +% \arb[novoc]{B_a} & \dmg{B_a} & \loc{B_a} & |`| & |e| \\ % \midrule -% \arb[novoc]{--} & --- & --- & |--| (\arb[trans]{ta.twIl})\\ +% \arb[novoc]{--} (\arb[trans]{ta.twIl}) & --- & --- & |_| & |_| \\ % \bottomrule % \caption*{\Cref*{tab:buckwalter-scheme}: Buckwalter scheme} % \end{longtable} @@ -2073,24 +2088,26 @@ muhaddamaTaN mi'_danatu-hu}: \arb[trans]{ra'aytu % like so:--- \SetInputScheme{buckwalter} % \begin{quote} % |Al-EaAlamu| \arb{Al-EaAlam-u} \arb[trans]{Al-EaAlam-u}, -% |Al-$~amsu| \arb{Al-$~ams-u} \arb[trans]{Al-$~ams-u}, +% |Al-camsu| \arb{Al-cams-u} \arb[trans]{Al-cams-u}, % |bi-SinaAEapi| |Al-T~ib~i|, \arb{bi-SinaAEap-i Al-T~ib~-i} % \arb[trans]{bi-SinaAEap-i Al-T~ib~-i}. % -% |wa-Al-l~`hi| \arb{wa-Al-l~`h-i} \arb[trans]{wa-Al-l~`h-i}, -% |Al-Hamdu| |li-l~`hi| \arb{Al-Hamd-u li-l~`h-i} -% \arb[trans]{Al-Hamd-u li-l~`h-i}. +% |wa-Al-l~ehi| \arb{wa-Al-l~eh-i} \arb[trans]{wa-Al-l~eh-i}, +% |Al-Hamdu| |li-l~ehi| \arb{Al-Hamd-u li-l~eh-i} +% \arb[trans]{Al-Hamd-u li-l~eh-i}. % \end{quote} % \SetInputScheme{arabtex} % -% Similary, it is not advisable to use \verb+|+ and |[| to encode the -% \arb[trans]{'alif-u 'l-mamdUdaT-i} and the \arb[trans]{'alif-u -% 'l-wa.sl-i} for such signs can be generated by \package{arabluatex}. -% Besides, as they do not \emph{per se} convey any morphological -% information on what they are derived from, they cannot be -% transliterated accurately. To take one example, % +% Similary, it is not advisable to use \verb+|+ and |[| +% (\enquote*{base} and \enquote*{\texttt{xml}} schemes) or |M| and |L| +% (\enquote*{safe} scheme) to encode the \arb[trans]{'alif-u +% 'l-mamdUdaT-i} and the \arb[trans]{'alif-u 'l-wa.sl-i} for such +% signs are supposed to be generated by \package{arabluatex} internal +% functions. Besides, as they do not \emph{per se} convey any +% morphological information on what they are derived from, they cannot +% be transliterated accurately. To take one example, % % \SetInputScheme{buckwalter}% -% |ilY Al-[ntiqaADi} as expected, but +% |ilY Al-LntiqaADi} as expected, but % only |