aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRobert Alessi <alessi@robertalessi.net>2016-07-09 11:56:03 +0200
committerRobert Alessi <alessi@robertalessi.net>2016-07-09 11:56:03 +0200
commit644cefb59c1dcfee6591a1fcf68616fe0c11a28c (patch)
tree3984fd3a3bef91fe4b340677b6c3d226870492fc
parentdabfd4942964958fb68bb9aa162b28afc84b4f8c (diff)
downloadarabluatex-644cefb59c1dcfee6591a1fcf68616fe0c11a28c.tar.gz
updated documentation. this will be v1.4.1
-rw-r--r--arabluatex.dtx161
-rw-r--r--arabluatex_voc.lua1
2 files changed, 90 insertions, 72 deletions
diff --git a/arabluatex.dtx b/arabluatex.dtx
index 433d66b..403b19e 100644
--- a/arabluatex.dtx
+++ b/arabluatex.dtx
@@ -78,6 +78,16 @@
78 url = {http://www.amirifont.org/} 78 url = {http://www.amirifont.org/}
79} 79}
80 80
81@Book{Habash,
82 author = {Habash, Nizar Y.},
83 title = {Introduction to Arabic Natural Language Processing},
84 year = 2010,
85 series = {Synthesis Lectures on Human Language Technologies},
86 number = 10,
87 publisher = {Morgan \& Claypool Publishers},
88 location = {Toronto}
89}
90
81@MVBook{Wright, 91@MVBook{Wright,
82 author = {Wright, W. LL.D}, 92 author = {Wright, W. LL.D},
83 title = {A Grammar of the Arabic Language}, 93 title = {A Grammar of the Arabic Language},
@@ -1979,85 +1989,90 @@ muhaddamaTaN mi'_danatu-hu}: \arb[trans]{ra'aytu
1979%</example> 1989%</example>
1980% \fi 1990% \fi
1981% 1991%
1982% \paragraph{Limitations} 1992% \paragraph{\enquote*{base}, \enquote*{\texttt{xml}} and
1983% To date, the following two limitations apply: 1993% \enquote*{safe} schemes}
1984% \begin{compactenum}[(a)] 1994% \package{arabluatex} can use any of the so-called Buckwalter
1985% \item The braces |{| and |}|, which are used in Buckwalter scheme to 1995% \enquote*{base}, \enquote*{\texttt{xml}} or \enquote*{safe} schemes
1986% encode \arb[novoc]{"a} and \arb[novoc]{y"'}, must be replaced with 1996% as they are described in \textcite[25--26]{Habash}.\footnote{I am
1987% square brackets viz. |[| and |]| respectively. 1997% grateful to Graeme Andrews who suggested that the \enquote*{safe}
1988% \item The underscore character |_|, which is used in Buckwalter 1998% scheme be included in \package{arabluatex}.} However, the following
1989% scheme to encode the Arabic \arb[trans]{ta.twIl} must be replaced 1999% limitation apply to the \enquote*{base} and \enquote*{\texttt{xml}}
1990% with a double hyphen |--| (see above \vref{sec:tatwil}). 2000% schemes: the braces |{| and |}|, which are used to encode
1991% \end{compactenum} 2001% \arb[novoc]{"a} and \arb[novoc]{y"'}, must be replaced with square
2002% brackets viz. |[| and |]| respectively.
2003%
2004% It is therefore recommended to use the Buckwalter \enquote*{safe}
2005% scheme.
1992% 2006%
1993% \Cref{tab:buckwalter-scheme} gives the Buckwalter equivalents that 2007% \Cref{tab:buckwalter-scheme} gives the Buckwalter equivalents that
1994% are currently used by \package{arabluatex}. The additional 2008% are currently used by \package{arabluatex}. The additional
1995% characters that are defined in \vref{tab:additional-arabic-codings} 2009% characters that are defined in \vref{tab:additional-arabic-codings}
1996% are also available. 2010% are also available.
1997% \begin{longtable}{llll} 2011% \begin{longtable}{lllll}
1998% \captionlistentry{Buckwalter scheme}\\[-1em] 2012% \captionlistentry{Buckwalter scheme}\\[-1em]
1999% \toprule 2013% \toprule
2000% Letter & \multicolumn{2}{l}{Transliteration\footnotemark} 2014% Letter & \multicolumn{2}{l}{Transliteration\footnotemark}
2001% & Buckwalter notation \\ 2015% & \multicolumn{2}{l}{Buckwalter notation} \\
2002% & \texttt{dmg} & \texttt{loc} & \\ \midrule 2016% & \texttt{dmg} & \texttt{loc} & |base/xml| & |safe| \\ \midrule
2003% \endfirsthead 2017% \endfirsthead
2004% \toprule 2018% \toprule
2005% Letter & \multicolumn{2}{l}{Transliteration} 2019% Letter & \multicolumn{2}{l}{Transliteration}
2006% & Buckwalter notation \\ 2020% & \multicolumn{2}{l}{Buckwalter notation} \\
2007% & \texttt{dmg} & \texttt{loc} & \\ \midrule 2021% & \texttt{dmg} & \texttt{loc} & |base/xml| & |safe| \\ \midrule
2008% \endhead \footnotetext{See \vref{sec:transliteration}.} 2022% \endhead \footnotetext{See \vref{sec:transliteration}.}
2009% \label{tab:buckwalter-scheme} 2023% \label{tab:buckwalter-scheme}
2010% \arb[novoc]{a} & \dmg{a} & \loc{a} & \verb|A| \\ 2024% \arb[novoc]{a} & \dmg{a} & \loc{a} & |A| & |A| \\
2011% \arb[novoc]{b} & \dmg{b} & \loc{b} & |b| \\ 2025% \arb[novoc]{b} & \dmg{b} & \loc{b} & |b| & |b| \\
2012% \arb[novoc]{t} & \dmg{t} & \loc{t} & |t| \\ 2026% \arb[novoc]{t} & \dmg{t} & \loc{t} & |t| & |t| \\
2013% \arb[novoc]{_t} & \dmg{_t} & \loc{_t} & |v|\\ 2027% \arb[novoc]{_t} & \dmg{_t} & \loc{_t} & |v| & |v| \\
2014% \arb[novoc]{j} & \dmg{j} & \loc{j} & |j| \\ 2028% \arb[novoc]{j} & \dmg{j} & \loc{j} & |j| & |j| \\
2015% \arb[novoc]{.h} & \dmg{.h} & \loc{.h} & |H| \\ 2029% \arb[novoc]{.h} & \dmg{.h} & \loc{.h} & |H| & |H| \\
2016% \arb[novoc]{x} & \dmg{x} & \loc{x} & |x|\\ 2030% \arb[novoc]{x} & \dmg{x} & \loc{x} & |x| & |x| \\
2017% \arb[novoc]{d} & \dmg{d} & \loc{d} & |d| \\ 2031% \arb[novoc]{d} & \dmg{d} & \loc{d} & |d| & |d| \\
2018% \arb[novoc]{_d} & \dmg{_d} & \loc{_d} & |*| \\ 2032% \arb[novoc]{_d} & \dmg{_d} & \loc{_d} & |*| & |V| \\
2019% \arb[novoc]{r} & \dmg{r} & \loc{r} & |r| \\ 2033% \arb[novoc]{r} & \dmg{r} & \loc{r} & |r| & |r| \\
2020% \arb[novoc]{z} & \dmg{z} & \loc{z} & |z| \\ 2034% \arb[novoc]{z} & \dmg{z} & \loc{z} & |z| & |z| \\
2021% \arb[novoc]{s} & \dmg{s} & \loc{s} & |s| \\ 2035% \arb[novoc]{s} & \dmg{s} & \loc{s} & |s| & |s| \\
2022% \arb[novoc]{^s} & \dmg{^s} & \loc{^s} & |$| \\ 2036% \arb[novoc]{^s} & \dmg{^s} & \loc{^s} & |$| & |c| \\
2023% \arb[novoc]{.s} & \dmg{.s} & \loc{.s} & |S| \\ 2037% \arb[novoc]{.s} & \dmg{.s} & \loc{.s} & |S| & |S| \\
2024% \arb[novoc]{.d} & \dmg{.d} & \loc{.d} & |D| \\ 2038% \arb[novoc]{.d} & \dmg{.d} & \loc{.d} & |D| & |D| \\
2025% \arb[novoc]{.t} & \dmg{.t} & \loc{.t} & |T| \\ 2039% \arb[novoc]{.t} & \dmg{.t} & \loc{.t} & |T| & |T| \\
2026% \arb[novoc]{.z} & \dmg{.z} & \loc{.z} & |Z| \\ 2040% \arb[novoc]{.z} & \dmg{.z} & \loc{.z} & |Z| & |Z| \\
2027% \arb[novoc]{`} & \dmg{`} & \loc{`} & |E| \\ 2041% \arb[novoc]{`} & \dmg{`} & \loc{`} & |E| & |E| \\
2028% \arb[novoc]{.g} & \dmg{.g} & \loc{.g} & |g| \\ 2042% \arb[novoc]{.g} & \dmg{.g} & \loc{.g} & |g| & |g| \\
2029% \arb[novoc]{f} & \dmg{f} & \loc{f} & |f| \\ 2043% \arb[novoc]{f} & \dmg{f} & \loc{f} & |f| & |f| \\
2030% \arb[novoc]{q} & \dmg{q} & \loc{q} & |q| \\ 2044% \arb[novoc]{q} & \dmg{q} & \loc{q} & |q| & |q| \\
2031% \arb[novoc]{k} & \dmg{k} & \loc{k} & |k| \\ 2045% \arb[novoc]{k} & \dmg{k} & \loc{k} & |k| & |k| \\
2032% \arb[novoc]{l} & \dmg{l} & \loc{l} & |l| \\ 2046% \arb[novoc]{l} & \dmg{l} & \loc{l} & |l| & |l| \\
2033% \arb[novoc]{m} & \dmg{m} & \loc{m} & |m| \\ 2047% \arb[novoc]{m} & \dmg{m} & \loc{m} & |m| & |m| \\
2034% \arb[novoc]{n} & \dmg{n} & \loc{n} & |n| \\ 2048% \arb[novoc]{n} & \dmg{n} & \loc{n} & |n| & |n| \\
2035% \arb[novoc]{h} & \dmg{h} & \loc{h} & |h| \\ 2049% \arb[novoc]{h} & \dmg{h} & \loc{h} & |h| & |h| \\
2036% \arb[novoc]{w} & \dmg{w} & \loc{w} & |w| \\ 2050% \arb[novoc]{w} & \dmg{w} & \loc{w} & |w| & |w| \\
2037% \arb[novoc]{y} & \dmg{y} & \loc{y} & |y| \\ 2051% \arb[novoc]{y} & \dmg{y} & \loc{y} & |y| & |y| \\
2038% \arb[novoc]{T} & \dmg{aT} & \loc{aT} & |p| \\ 2052% \arb[novoc]{Y} & \dmg{Y} & \loc{Y} & |Y| & |Y| \\
2053% \arb[novoc]{T} & \dmg{aT} & \loc{aT} & |p| & |p| \\
2039% \midrule 2054% \midrule
2040% \arb[novoc]{|"'} & \dmg{|"'} & \loc{|"'} & \verb|'| \\ 2055% \arb[novoc]{|"'} & \dmg{|"'} & \loc{|"'} & \verb|'| & |C| \\
2041% \arb[novoc]{A"'} & \dmg{A"'} & \loc{A"'} & \verb+|+ \\ 2056% \arb[novoc]{A"'} & \dmg{A"'} & \loc{A"'} & \verb+|+ & |M| \\
2042% \arb[novoc]{a"'} & \dmg{a"'} & \loc{a"'} & \verb|>| \\ 2057% \arb[novoc]{a"'} & \dmg{a"'} & \loc{a"'} & \verb|>| & |O| \\
2043% \arb[novoc]{w"'} & \dmg{w"'} & \loc{w"'} & \verb|&| \\ 2058% \arb[novoc]{w"'} & \dmg{w"'} & \loc{w"'} & \verb|&| & |W| \\
2044% \arb[novoc]{i"'} & \dmg{i"'} & \loc{i"'} & \verb|<| \\ 2059% \arb[novoc]{i"'} & \dmg{i"'} & \loc{i"'} & \verb|<| & |I| \\
2045% \arb[novoc]{y"'} & \dmg{y"'} & \loc{y"'} & \verb|]| \\ 2060% \arb[novoc]{y"'} & \dmg{y"'} & \loc{y"'} & \verb|]| & |Q| \\
2046% \midrule 2061% \midrule
2047% \arb[novoc]{BB} & --- & --- & \verb|~| \\ 2062% \arb[novoc]{BB} & --- & --- & \verb|~| & |~| \\
2048% \arb[novoc]{"a} & ' & ' & |[| \\ 2063% \arb[novoc]{"a} & ' & ' & |[| & |L| \\
2049% \midrule 2064% \midrule
2050% \arb[voc]{Ba} & \dmg{Ba} & \loc{Ba} & \verb|a| \\ 2065% \arb[voc]{Ba} & \dmg{Ba} & \loc{Ba} & \verb|a| & |a| \\
2051% \arb[voc]{Bu} & \dmg{Bu} & \loc{Bu} & \verb|u| \\ 2066% \arb[voc]{Bu} & \dmg{Bu} & \loc{Bu} & \verb|u| & |u| \\
2052% \arb[voc]{Bi} & \dmg{Bi} & \loc{Bi} & \verb|i| \\ 2067% \arb[voc]{Bi} & \dmg{Bi} & \loc{Bi} & \verb|i| & |i| \\
2053% \arb[voc]{BaN} & \dmg{BaN} & \loc{BaN} & \verb|F| \\ 2068% \arb[voc]{BaN} & \dmg{BaN} & \loc{BaN} & \verb|F| & |F| \\
2054% \arb[voc]{BuN} & \dmg{BuN} & \loc{BuN} & \verb|N| \\ 2069% \arb[voc]{BuN} & \dmg{BuN} & \loc{BuN} & \verb|N| & |N| \\
2055% \arb[voc]{BiN} & \dmg{BiN} & \loc{BiN} & \verb|K| \\ 2070% \arb[voc]{BiN} & \dmg{BiN} & \loc{BiN} & \verb|K| & |K| \\
2056% \arb[voc]{B"} & --- & --- & \verb|o| \\ 2071% \arb[voc]{B"} & --- & --- & \verb|o| & |o| \\
2057% \midrule 2072% \midrule
2058% \arb[novoc]{B_a} & \dmg{B_a} & \loc{B_a} & |`| \\ 2073% \arb[novoc]{B_a} & \dmg{B_a} & \loc{B_a} & |`| & |e| \\
2059% \midrule 2074% \midrule
2060% \arb[novoc]{--} & --- & --- & |--| (\arb[trans]{ta.twIl})\\ 2075% \arb[novoc]{--} (\arb[trans]{ta.twIl}) & --- & --- & |_| & |_| \\
2061% \bottomrule 2076% \bottomrule
2062% \caption*{\Cref*{tab:buckwalter-scheme}: Buckwalter scheme} 2077% \caption*{\Cref*{tab:buckwalter-scheme}: Buckwalter scheme}
2063% \end{longtable} 2078% \end{longtable}
@@ -2073,24 +2088,26 @@ muhaddamaTaN mi'_danatu-hu}: \arb[trans]{ra'aytu
2073% like so:--- \SetInputScheme{buckwalter} 2088% like so:--- \SetInputScheme{buckwalter}
2074% \begin{quote} 2089% \begin{quote}
2075% |Al-EaAlamu| \arb{Al-EaAlam-u} \arb[trans]{Al-EaAlam-u}, 2090% |Al-EaAlamu| \arb{Al-EaAlam-u} \arb[trans]{Al-EaAlam-u},
2076% |Al-$~amsu| \arb{Al-$~ams-u} \arb[trans]{Al-$~ams-u}, 2091% |Al-camsu| \arb{Al-cams-u} \arb[trans]{Al-cams-u},
2077% |bi-SinaAEapi| |Al-T~ib~i|, \arb{bi-SinaAEap-i Al-T~ib~-i} 2092% |bi-SinaAEapi| |Al-T~ib~i|, \arb{bi-SinaAEap-i Al-T~ib~-i}
2078% \arb[trans]{bi-SinaAEap-i Al-T~ib~-i}. 2093% \arb[trans]{bi-SinaAEap-i Al-T~ib~-i}.
2079% 2094%
2080% |wa-Al-l~`hi| \arb{wa-Al-l~`h-i} \arb[trans]{wa-Al-l~`h-i}, 2095% |wa-Al-l~ehi| \arb{wa-Al-l~eh-i} \arb[trans]{wa-Al-l~eh-i},
2081% |Al-Hamdu| |li-l~`hi| \arb{Al-Hamd-u li-l~`h-i} 2096% |Al-Hamdu| |li-l~ehi| \arb{Al-Hamd-u li-l~eh-i}
2082% \arb[trans]{Al-Hamd-u li-l~`h-i}. 2097% \arb[trans]{Al-Hamd-u li-l~eh-i}.
2083% \end{quote} 2098% \end{quote}
2084% \SetInputScheme{arabtex} 2099% \SetInputScheme{arabtex}
2085% 2100%
2086% Similary, it is not advisable to use \verb+|+ and |[| to encode the 2101% Similary, it is not advisable to use \verb+|+ and |[|
2087% \arb[trans]{'alif-u 'l-mamdUdaT-i} and the \arb[trans]{'alif-u 2102% (\enquote*{base} and \enquote*{\texttt{xml}} schemes) or |M| and |L|
2088% 'l-wa.sl-i} for such signs can be generated by \package{arabluatex}. 2103% (\enquote*{safe} scheme) to encode the \arb[trans]{'alif-u
2089% Besides, as they do not \emph{per se} convey any morphological 2104% 'l-mamdUdaT-i} and the \arb[trans]{'alif-u 'l-wa.sl-i} for such
2090% information on what they are derived from, they cannot be 2105% signs are supposed to be generated by \package{arabluatex} internal
2091% transliterated accurately. To take one example, % 2106% functions. Besides, as they do not \emph{per se} convey any
2107% morphological information on what they are derived from, they cannot
2108% be transliterated accurately. To take one example, %
2092% \SetInputScheme{buckwalter}% 2109% \SetInputScheme{buckwalter}%
2093% |<ilY Al-[ntiqaADi| gives \arb{>ilY Al-[ntiqaADi} as expected, but 2110% |<ilY Al-LntiqaADi| gives \arb{>ilY Al-LntiqaADi} as expected, but
2094% only |<ilY Al-intiqADi| can be transliterated as \arb[trans]{<ilY 2111% only |<ilY Al-intiqADi| can be transliterated as \arb[trans]{<ilY
2095% Al-intiqaADi} with the correct vowel \meta{i} in place of the % 2112% Al-intiqaADi} with the correct vowel \meta{i} in place of the %
2096% \SetInputScheme{arabtex}% 2113% \SetInputScheme{arabtex}%
diff --git a/arabluatex_voc.lua b/arabluatex_voc.lua
index 6df1370..20ed016 100644
--- a/arabluatex_voc.lua
+++ b/arabluatex_voc.lua
@@ -75,6 +75,7 @@ buckwalter = {
75 --- hard coded madda: hold it for now 75 --- hard coded madda: hold it for now
76 {a="%|", b="@"}, 76 {a="%|", b="@"},
77 {a="M", b="@"}, -- BW safe 77 {a="M", b="@"}, -- BW safe
78 {a="%_", b="--"}, -- taṭwīl
78 -- prevent any unwanted šadda from being generated 79 -- prevent any unwanted šadda from being generated
79 {a="bb", b="b|b"}, 80 {a="bb", b="b|b"},
80 {a="tt", b="t|t"}, 81 {a="tt", b="t|t"},