diff options
author | Robert Alessi <alessi@robertalessi.net> | 2016-07-09 11:56:03 +0200 |
---|---|---|
committer | Robert Alessi <alessi@robertalessi.net> | 2016-07-09 11:56:03 +0200 |
commit | 644cefb59c1dcfee6591a1fcf68616fe0c11a28c (patch) | |
tree | 3984fd3a3bef91fe4b340677b6c3d226870492fc | |
parent | dabfd4942964958fb68bb9aa162b28afc84b4f8c (diff) | |
download | arabluatex-644cefb59c1dcfee6591a1fcf68616fe0c11a28c.tar.gz |
updated documentation. this will be v1.4.1
-rw-r--r-- | arabluatex.dtx | 161 | ||||
-rw-r--r-- | arabluatex_voc.lua | 1 |
2 files changed, 90 insertions, 72 deletions
diff --git a/arabluatex.dtx b/arabluatex.dtx index 433d66b..403b19e 100644 --- a/arabluatex.dtx +++ b/arabluatex.dtx | |||
@@ -78,6 +78,16 @@ | |||
78 | url = {http://www.amirifont.org/} | 78 | url = {http://www.amirifont.org/} |
79 | } | 79 | } |
80 | 80 | ||
81 | @Book{Habash, | ||
82 | author = {Habash, Nizar Y.}, | ||
83 | title = {Introduction to Arabic Natural Language Processing}, | ||
84 | year = 2010, | ||
85 | series = {Synthesis Lectures on Human Language Technologies}, | ||
86 | number = 10, | ||
87 | publisher = {Morgan \& Claypool Publishers}, | ||
88 | location = {Toronto} | ||
89 | } | ||
90 | |||
81 | @MVBook{Wright, | 91 | @MVBook{Wright, |
82 | author = {Wright, W. LL.D}, | 92 | author = {Wright, W. LL.D}, |
83 | title = {A Grammar of the Arabic Language}, | 93 | title = {A Grammar of the Arabic Language}, |
@@ -1979,85 +1989,90 @@ muhaddamaTaN mi'_danatu-hu}: \arb[trans]{ra'aytu | |||
1979 | %</example> | 1989 | %</example> |
1980 | % \fi | 1990 | % \fi |
1981 | % | 1991 | % |
1982 | % \paragraph{Limitations} | 1992 | % \paragraph{\enquote*{base}, \enquote*{\texttt{xml}} and |
1983 | % To date, the following two limitations apply: | 1993 | % \enquote*{safe} schemes} |
1984 | % \begin{compactenum}[(a)] | 1994 | % \package{arabluatex} can use any of the so-called Buckwalter |
1985 | % \item The braces |{| and |}|, which are used in Buckwalter scheme to | 1995 | % \enquote*{base}, \enquote*{\texttt{xml}} or \enquote*{safe} schemes |
1986 | % encode \arb[novoc]{"a} and \arb[novoc]{y"'}, must be replaced with | 1996 | % as they are described in \textcite[25--26]{Habash}.\footnote{I am |
1987 | % square brackets viz. |[| and |]| respectively. | 1997 | % grateful to Graeme Andrews who suggested that the \enquote*{safe} |
1988 | % \item The underscore character |_|, which is used in Buckwalter | 1998 | % scheme be included in \package{arabluatex}.} However, the following |
1989 | % scheme to encode the Arabic \arb[trans]{ta.twIl} must be replaced | 1999 | % limitation apply to the \enquote*{base} and \enquote*{\texttt{xml}} |
1990 | % with a double hyphen |--| (see above \vref{sec:tatwil}). | 2000 | % schemes: the braces |{| and |}|, which are used to encode |
1991 | % \end{compactenum} | 2001 | % \arb[novoc]{"a} and \arb[novoc]{y"'}, must be replaced with square |
2002 | % brackets viz. |[| and |]| respectively. | ||
2003 | % | ||
2004 | % It is therefore recommended to use the Buckwalter \enquote*{safe} | ||
2005 | % scheme. | ||
1992 | % | 2006 | % |
1993 | % \Cref{tab:buckwalter-scheme} gives the Buckwalter equivalents that | 2007 | % \Cref{tab:buckwalter-scheme} gives the Buckwalter equivalents that |
1994 | % are currently used by \package{arabluatex}. The additional | 2008 | % are currently used by \package{arabluatex}. The additional |
1995 | % characters that are defined in \vref{tab:additional-arabic-codings} | 2009 | % characters that are defined in \vref{tab:additional-arabic-codings} |
1996 | % are also available. | 2010 | % are also available. |
1997 | % \begin{longtable}{llll} | 2011 | % \begin{longtable}{lllll} |
1998 | % \captionlistentry{Buckwalter scheme}\\[-1em] | 2012 | % \captionlistentry{Buckwalter scheme}\\[-1em] |
1999 | % \toprule | 2013 | % \toprule |
2000 | % Letter & \multicolumn{2}{l}{Transliteration\footnotemark} | 2014 | % Letter & \multicolumn{2}{l}{Transliteration\footnotemark} |
2001 | % & Buckwalter notation \\ | 2015 | % & \multicolumn{2}{l}{Buckwalter notation} \\ |
2002 | % & \texttt{dmg} & \texttt{loc} & \\ \midrule | 2016 | % & \texttt{dmg} & \texttt{loc} & |base/xml| & |safe| \\ \midrule |
2003 | % \endfirsthead | 2017 | % \endfirsthead |
2004 | % \toprule | 2018 | % \toprule |
2005 | % Letter & \multicolumn{2}{l}{Transliteration} | 2019 | % Letter & \multicolumn{2}{l}{Transliteration} |
2006 | % & Buckwalter notation \\ | 2020 | % & \multicolumn{2}{l}{Buckwalter notation} \\ |
2007 | % & \texttt{dmg} & \texttt{loc} & \\ \midrule | 2021 | % & \texttt{dmg} & \texttt{loc} & |base/xml| & |safe| \\ \midrule |
2008 | % \endhead \footnotetext{See \vref{sec:transliteration}.} | 2022 | % \endhead \footnotetext{See \vref{sec:transliteration}.} |
2009 | % \label{tab:buckwalter-scheme} | 2023 | % \label{tab:buckwalter-scheme} |
2010 | % \arb[novoc]{a} & \dmg{a} & \loc{a} & \verb|A| \\ | 2024 | % \arb[novoc]{a} & \dmg{a} & \loc{a} & |A| & |A| \\ |
2011 | % \arb[novoc]{b} & \dmg{b} & \loc{b} & |b| \\ | 2025 | % \arb[novoc]{b} & \dmg{b} & \loc{b} & |b| & |b| \\ |
2012 | % \arb[novoc]{t} & \dmg{t} & \loc{t} & |t| \\ | 2026 | % \arb[novoc]{t} & \dmg{t} & \loc{t} & |t| & |t| \\ |
2013 | % \arb[novoc]{_t} & \dmg{_t} & \loc{_t} & |v|\\ | 2027 | % \arb[novoc]{_t} & \dmg{_t} & \loc{_t} & |v| & |v| \\ |
2014 | % \arb[novoc]{j} & \dmg{j} & \loc{j} & |j| \\ | 2028 | % \arb[novoc]{j} & \dmg{j} & \loc{j} & |j| & |j| \\ |
2015 | % \arb[novoc]{.h} & \dmg{.h} & \loc{.h} & |H| \\ | 2029 | % \arb[novoc]{.h} & \dmg{.h} & \loc{.h} & |H| & |H| \\ |
2016 | % \arb[novoc]{x} & \dmg{x} & \loc{x} & |x|\\ | 2030 | % \arb[novoc]{x} & \dmg{x} & \loc{x} & |x| & |x| \\ |
2017 | % \arb[novoc]{d} & \dmg{d} & \loc{d} & |d| \\ | 2031 | % \arb[novoc]{d} & \dmg{d} & \loc{d} & |d| & |d| \\ |
2018 | % \arb[novoc]{_d} & \dmg{_d} & \loc{_d} & |*| \\ | 2032 | % \arb[novoc]{_d} & \dmg{_d} & \loc{_d} & |*| & |V| \\ |
2019 | % \arb[novoc]{r} & \dmg{r} & \loc{r} & |r| \\ | 2033 | % \arb[novoc]{r} & \dmg{r} & \loc{r} & |r| & |r| \\ |
2020 | % \arb[novoc]{z} & \dmg{z} & \loc{z} & |z| \\ | 2034 | % \arb[novoc]{z} & \dmg{z} & \loc{z} & |z| & |z| \\ |
2021 | % \arb[novoc]{s} & \dmg{s} & \loc{s} & |s| \\ | 2035 | % \arb[novoc]{s} & \dmg{s} & \loc{s} & |s| & |s| \\ |
2022 | % \arb[novoc]{^s} & \dmg{^s} & \loc{^s} & |$| \\ | 2036 | % \arb[novoc]{^s} & \dmg{^s} & \loc{^s} & |$| & |c| \\ |
2023 | % \arb[novoc]{.s} & \dmg{.s} & \loc{.s} & |S| \\ | 2037 | % \arb[novoc]{.s} & \dmg{.s} & \loc{.s} & |S| & |S| \\ |
2024 | % \arb[novoc]{.d} & \dmg{.d} & \loc{.d} & |D| \\ | 2038 | % \arb[novoc]{.d} & \dmg{.d} & \loc{.d} & |D| & |D| \\ |
2025 | % \arb[novoc]{.t} & \dmg{.t} & \loc{.t} & |T| \\ | 2039 | % \arb[novoc]{.t} & \dmg{.t} & \loc{.t} & |T| & |T| \\ |
2026 | % \arb[novoc]{.z} & \dmg{.z} & \loc{.z} & |Z| \\ | 2040 | % \arb[novoc]{.z} & \dmg{.z} & \loc{.z} & |Z| & |Z| \\ |
2027 | % \arb[novoc]{`} & \dmg{`} & \loc{`} & |E| \\ | 2041 | % \arb[novoc]{`} & \dmg{`} & \loc{`} & |E| & |E| \\ |
2028 | % \arb[novoc]{.g} & \dmg{.g} & \loc{.g} & |g| \\ | 2042 | % \arb[novoc]{.g} & \dmg{.g} & \loc{.g} & |g| & |g| \\ |
2029 | % \arb[novoc]{f} & \dmg{f} & \loc{f} & |f| \\ | 2043 | % \arb[novoc]{f} & \dmg{f} & \loc{f} & |f| & |f| \\ |
2030 | % \arb[novoc]{q} & \dmg{q} & \loc{q} & |q| \\ | 2044 | % \arb[novoc]{q} & \dmg{q} & \loc{q} & |q| & |q| \\ |
2031 | % \arb[novoc]{k} & \dmg{k} & \loc{k} & |k| \\ | 2045 | % \arb[novoc]{k} & \dmg{k} & \loc{k} & |k| & |k| \\ |
2032 | % \arb[novoc]{l} & \dmg{l} & \loc{l} & |l| \\ | 2046 | % \arb[novoc]{l} & \dmg{l} & \loc{l} & |l| & |l| \\ |
2033 | % \arb[novoc]{m} & \dmg{m} & \loc{m} & |m| \\ | 2047 | % \arb[novoc]{m} & \dmg{m} & \loc{m} & |m| & |m| \\ |
2034 | % \arb[novoc]{n} & \dmg{n} & \loc{n} & |n| \\ | 2048 | % \arb[novoc]{n} & \dmg{n} & \loc{n} & |n| & |n| \\ |
2035 | % \arb[novoc]{h} & \dmg{h} & \loc{h} & |h| \\ | 2049 | % \arb[novoc]{h} & \dmg{h} & \loc{h} & |h| & |h| \\ |
2036 | % \arb[novoc]{w} & \dmg{w} & \loc{w} & |w| \\ | 2050 | % \arb[novoc]{w} & \dmg{w} & \loc{w} & |w| & |w| \\ |
2037 | % \arb[novoc]{y} & \dmg{y} & \loc{y} & |y| \\ | 2051 | % \arb[novoc]{y} & \dmg{y} & \loc{y} & |y| & |y| \\ |
2038 | % \arb[novoc]{T} & \dmg{aT} & \loc{aT} & |p| \\ | 2052 | % \arb[novoc]{Y} & \dmg{Y} & \loc{Y} & |Y| & |Y| \\ |
2053 | % \arb[novoc]{T} & \dmg{aT} & \loc{aT} & |p| & |p| \\ | ||
2039 | % \midrule | 2054 | % \midrule |
2040 | % \arb[novoc]{|"'} & \dmg{|"'} & \loc{|"'} & \verb|'| \\ | 2055 | % \arb[novoc]{|"'} & \dmg{|"'} & \loc{|"'} & \verb|'| & |C| \\ |
2041 | % \arb[novoc]{A"'} & \dmg{A"'} & \loc{A"'} & \verb+|+ \\ | 2056 | % \arb[novoc]{A"'} & \dmg{A"'} & \loc{A"'} & \verb+|+ & |M| \\ |
2042 | % \arb[novoc]{a"'} & \dmg{a"'} & \loc{a"'} & \verb|>| \\ | 2057 | % \arb[novoc]{a"'} & \dmg{a"'} & \loc{a"'} & \verb|>| & |O| \\ |
2043 | % \arb[novoc]{w"'} & \dmg{w"'} & \loc{w"'} & \verb|&| \\ | 2058 | % \arb[novoc]{w"'} & \dmg{w"'} & \loc{w"'} & \verb|&| & |W| \\ |
2044 | % \arb[novoc]{i"'} & \dmg{i"'} & \loc{i"'} & \verb|<| \\ | 2059 | % \arb[novoc]{i"'} & \dmg{i"'} & \loc{i"'} & \verb|<| & |I| \\ |
2045 | % \arb[novoc]{y"'} & \dmg{y"'} & \loc{y"'} & \verb|]| \\ | 2060 | % \arb[novoc]{y"'} & \dmg{y"'} & \loc{y"'} & \verb|]| & |Q| \\ |
2046 | % \midrule | 2061 | % \midrule |
2047 | % \arb[novoc]{BB} & --- & --- & \verb|~| \\ | 2062 | % \arb[novoc]{BB} & --- & --- & \verb|~| & |~| \\ |
2048 | % \arb[novoc]{"a} & ' & ' & |[| \\ | 2063 | % \arb[novoc]{"a} & ' & ' & |[| & |L| \\ |
2049 | % \midrule | 2064 | % \midrule |
2050 | % \arb[voc]{Ba} & \dmg{Ba} & \loc{Ba} & \verb|a| \\ | 2065 | % \arb[voc]{Ba} & \dmg{Ba} & \loc{Ba} & \verb|a| & |a| \\ |
2051 | % \arb[voc]{Bu} & \dmg{Bu} & \loc{Bu} & \verb|u| \\ | 2066 | % \arb[voc]{Bu} & \dmg{Bu} & \loc{Bu} & \verb|u| & |u| \\ |
2052 | % \arb[voc]{Bi} & \dmg{Bi} & \loc{Bi} & \verb|i| \\ | 2067 | % \arb[voc]{Bi} & \dmg{Bi} & \loc{Bi} & \verb|i| & |i| \\ |
2053 | % \arb[voc]{BaN} & \dmg{BaN} & \loc{BaN} & \verb|F| \\ | 2068 | % \arb[voc]{BaN} & \dmg{BaN} & \loc{BaN} & \verb|F| & |F| \\ |
2054 | % \arb[voc]{BuN} & \dmg{BuN} & \loc{BuN} & \verb|N| \\ | 2069 | % \arb[voc]{BuN} & \dmg{BuN} & \loc{BuN} & \verb|N| & |N| \\ |
2055 | % \arb[voc]{BiN} & \dmg{BiN} & \loc{BiN} & \verb|K| \\ | 2070 | % \arb[voc]{BiN} & \dmg{BiN} & \loc{BiN} & \verb|K| & |K| \\ |
2056 | % \arb[voc]{B"} & --- & --- & \verb|o| \\ | 2071 | % \arb[voc]{B"} & --- & --- & \verb|o| & |o| \\ |
2057 | % \midrule | 2072 | % \midrule |
2058 | % \arb[novoc]{B_a} & \dmg{B_a} & \loc{B_a} & |`| \\ | 2073 | % \arb[novoc]{B_a} & \dmg{B_a} & \loc{B_a} & |`| & |e| \\ |
2059 | % \midrule | 2074 | % \midrule |
2060 | % \arb[novoc]{--} & --- & --- & |--| (\arb[trans]{ta.twIl})\\ | 2075 | % \arb[novoc]{--} (\arb[trans]{ta.twIl}) & --- & --- & |_| & |_| \\ |
2061 | % \bottomrule | 2076 | % \bottomrule |
2062 | % \caption*{\Cref*{tab:buckwalter-scheme}: Buckwalter scheme} | 2077 | % \caption*{\Cref*{tab:buckwalter-scheme}: Buckwalter scheme} |
2063 | % \end{longtable} | 2078 | % \end{longtable} |
@@ -2073,24 +2088,26 @@ muhaddamaTaN mi'_danatu-hu}: \arb[trans]{ra'aytu | |||
2073 | % like so:--- \SetInputScheme{buckwalter} | 2088 | % like so:--- \SetInputScheme{buckwalter} |
2074 | % \begin{quote} | 2089 | % \begin{quote} |
2075 | % |Al-EaAlamu| \arb{Al-EaAlam-u} \arb[trans]{Al-EaAlam-u}, | 2090 | % |Al-EaAlamu| \arb{Al-EaAlam-u} \arb[trans]{Al-EaAlam-u}, |
2076 | % |Al-$~amsu| \arb{Al-$~ams-u} \arb[trans]{Al-$~ams-u}, | 2091 | % |Al-camsu| \arb{Al-cams-u} \arb[trans]{Al-cams-u}, |
2077 | % |bi-SinaAEapi| |Al-T~ib~i|, \arb{bi-SinaAEap-i Al-T~ib~-i} | 2092 | % |bi-SinaAEapi| |Al-T~ib~i|, \arb{bi-SinaAEap-i Al-T~ib~-i} |
2078 | % \arb[trans]{bi-SinaAEap-i Al-T~ib~-i}. | 2093 | % \arb[trans]{bi-SinaAEap-i Al-T~ib~-i}. |
2079 | % | 2094 | % |
2080 | % |wa-Al-l~`hi| \arb{wa-Al-l~`h-i} \arb[trans]{wa-Al-l~`h-i}, | 2095 | % |wa-Al-l~ehi| \arb{wa-Al-l~eh-i} \arb[trans]{wa-Al-l~eh-i}, |
2081 | % |Al-Hamdu| |li-l~`hi| \arb{Al-Hamd-u li-l~`h-i} | 2096 | % |Al-Hamdu| |li-l~ehi| \arb{Al-Hamd-u li-l~eh-i} |
2082 | % \arb[trans]{Al-Hamd-u li-l~`h-i}. | 2097 | % \arb[trans]{Al-Hamd-u li-l~eh-i}. |
2083 | % \end{quote} | 2098 | % \end{quote} |
2084 | % \SetInputScheme{arabtex} | 2099 | % \SetInputScheme{arabtex} |
2085 | % | 2100 | % |
2086 | % Similary, it is not advisable to use \verb+|+ and |[| to encode the | 2101 | % Similary, it is not advisable to use \verb+|+ and |[| |
2087 | % \arb[trans]{'alif-u 'l-mamdUdaT-i} and the \arb[trans]{'alif-u | 2102 | % (\enquote*{base} and \enquote*{\texttt{xml}} schemes) or |M| and |L| |
2088 | % 'l-wa.sl-i} for such signs can be generated by \package{arabluatex}. | 2103 | % (\enquote*{safe} scheme) to encode the \arb[trans]{'alif-u |
2089 | % Besides, as they do not \emph{per se} convey any morphological | 2104 | % 'l-mamdUdaT-i} and the \arb[trans]{'alif-u 'l-wa.sl-i} for such |
2090 | % information on what they are derived from, they cannot be | 2105 | % signs are supposed to be generated by \package{arabluatex} internal |
2091 | % transliterated accurately. To take one example, % | 2106 | % functions. Besides, as they do not \emph{per se} convey any |
2107 | % morphological information on what they are derived from, they cannot | ||
2108 | % be transliterated accurately. To take one example, % | ||
2092 | % \SetInputScheme{buckwalter}% | 2109 | % \SetInputScheme{buckwalter}% |
2093 | % |<ilY Al-[ntiqaADi| gives \arb{>ilY Al-[ntiqaADi} as expected, but | 2110 | % |<ilY Al-LntiqaADi| gives \arb{>ilY Al-LntiqaADi} as expected, but |
2094 | % only |<ilY Al-intiqADi| can be transliterated as \arb[trans]{<ilY | 2111 | % only |<ilY Al-intiqADi| can be transliterated as \arb[trans]{<ilY |
2095 | % Al-intiqaADi} with the correct vowel \meta{i} in place of the % | 2112 | % Al-intiqaADi} with the correct vowel \meta{i} in place of the % |
2096 | % \SetInputScheme{arabtex}% | 2113 | % \SetInputScheme{arabtex}% |
diff --git a/arabluatex_voc.lua b/arabluatex_voc.lua index 6df1370..20ed016 100644 --- a/arabluatex_voc.lua +++ b/arabluatex_voc.lua | |||
@@ -75,6 +75,7 @@ buckwalter = { | |||
75 | --- hard coded madda: hold it for now | 75 | --- hard coded madda: hold it for now |
76 | {a="%|", b="@"}, | 76 | {a="%|", b="@"}, |
77 | {a="M", b="@"}, -- BW safe | 77 | {a="M", b="@"}, -- BW safe |
78 | {a="%_", b="--"}, -- taṭwīl | ||
78 | -- prevent any unwanted šadda from being generated | 79 | -- prevent any unwanted šadda from being generated |
79 | {a="bb", b="b|b"}, | 80 | {a="bb", b="b|b"}, |
80 | {a="tt", b="t|t"}, | 81 | {a="tt", b="t|t"}, |