From efb07927d995efbc33822ba447b2beed2b31ef5a Mon Sep 17 00:00:00 2001 From: Robert Alessi Date: Wed, 24 Feb 2016 20:27:29 +0100 Subject: first implementation of fullvoc tables --- arabluatex.dtx | 14 ++- arabluatex.lua | 56 +++++++++-- arabluatex_fullvoc.lua | 267 +++++++++++++++++++++++++++++++++++++++++++++++++ arabluatex_novoc.lua | 6 +- arabluatex_voc.lua | 5 +- 5 files changed, 335 insertions(+), 13 deletions(-) create mode 100644 arabluatex_fullvoc.lua diff --git a/arabluatex.dtx b/arabluatex.dtx index 72792db..67fa916 100644 --- a/arabluatex.dtx +++ b/arabluatex.dtx @@ -175,9 +175,11 @@ % \begin{macrocode} \DeclareOption{voc}{\def\al@mode{voc}} \DeclareOption{novoc}{\def\al@mode{novoc}} +\DeclareOption{fullvoc}{\def\al@mode{fullvoc}} \ExecuteOptions{voc} \ProcessOptions\relax \def\al@mode@voc{voc} +\def\al@mode@fullvoc{fullvoc} \def\al@mode@novoc{novoc} % \newif\ifal@mode@defined \RequirePackage{amsmath} @@ -208,11 +210,15 @@ \bgroup\textdir TRT\arabicfont% \luadirect{tex.sprint(processvoc("\luaescapestring{#2}"))}\egroup% \else% + \ifx\@tempa\al@mode@fullvoc% + \bgroup\textdir TRT\arabicfont% + \luadirect{tex.sprint(processfullvoc("\luaescapestring{#2}"))}\egroup% + \else% \ifx\@tempa\al@mode@novoc% \bgroup\textdir TRT\arabicfont% \luadirect{tex.sprint(processnovoc("\luaescapestring{#2}"))}\egroup% \else% - \fi\fi} + \fi\fi\fi} \DeclareDocumentCommand{\aemph}{m}{$\overline{\text{#1}}$} \NewEnviron{arab}[1][\al@mode]% {\par\edef\@tempa{#1}% @@ -220,12 +226,16 @@ \bgroup\pardir TRT\textdir TRT\arabicfont% \luadirect{tex.sprint(processvoc("\luaescapestring{\BODY}"))}\egroup% \else% + \ifx\@tempa\al@mode@fullvoc% + \bgroup\pardir TRT\textdir TRT\arabicfont% + \luadirect{tex.sprint(processfullvoc("\luaescapestring{\BODY}"))}\egroup% + \else% \ifx\@tempa\al@mode@novoc% \bgroup\pardir TRT\textdir TRT\arabicfont% \luadirect{tex.sprint(processnovoc("\luaescapestring{\BODY}"))}\egroup% \else \BODY% - \fi\fi}[\par] + \fi\fi\fi}[\par] \robustify{\textbf} \robustify{\emph} \robustify{\footnote} diff --git a/arabluatex.lua b/arabluatex.lua index 087ee82..76d61b3 100644 --- a/arabluatex.lua +++ b/arabluatex.lua @@ -23,6 +23,7 @@ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA --]] require("arabluatex_voc") +require("arabluatex_fullvoc") require("arabluatex_novoc") local function protectarb(str) @@ -133,14 +134,47 @@ local function voc(str) return str end +local function fullvoc(str) + str = string.gsub(str, "\\arb(%b{})", function(inside) + inside = string.sub(inside, 2, -2) + for i = 1,#hamzafv do + inside = string.gsub(inside, hamzafv[i].a, hamzafv[i].b) + end + for i = 1,#tanwin do + inside = string.gsub(inside, tanwin[i].a, tanwin[i].b) + end + for i = 1,#trigraphs do + inside = string.gsub(inside, trigraphs[i].a, trigraphs[i].b) + end + for i = 1,#digraphsfv do + inside = string.gsub(inside, digraphsfv[i].a, digraphsfv[i].b) + end + for i = 1,#single do + inside = string.gsub(inside, single[i].a, single[i].b) + end + for i = 1,#longv do + inside = string.gsub(inside, longv[i].a, longv[i].b) + end + for i = 1,#shortv do + inside = string.gsub(inside, shortv[i].a, shortv[i].b) + end + for i = 1,#null do + inside = string.gsub(inside, null[i].a, null[i].b) + end + inside = indnum(inside) + return string.format("\\txarb{%s}", inside) + end) +return str +end + local function novoc(str) str = string.gsub(str, "\\arb(%b{})", function(inside) inside = string.sub(inside, 2, -2) for i = 1,#hamza do inside = string.gsub(inside, hamza[i].a, hamza[i].b) end - for i = 1,#tanwinnovoc do - inside = string.gsub(inside, tanwinnovoc[i].a, tanwinnovoc[i].b) + for i = 1,#tanwinnv do + inside = string.gsub(inside, tanwinnv[i].a, tanwinnv[i].b) end for i = 1,#trigraphs do inside = string.gsub(inside, trigraphs[i].a, trigraphs[i].b) @@ -151,11 +185,11 @@ local function novoc(str) for i = 1,#single do inside = string.gsub(inside, single[i].a, single[i].b) end - for i = 1,#longvnovoc do - inside = string.gsub(inside, longvnovoc[i].a, longvnovoc[i].b) + for i = 1,#longvnv do + inside = string.gsub(inside, longvnv[i].a, longvnv[i].b) end - for i = 1,#shortvnovoc do - inside = string.gsub(inside, shortvnovoc[i].a, shortvnovoc[i].b) + for i = 1,#shortvnv do + inside = string.gsub(inside, shortvnv[i].a, shortvnv[i].b) end for i = 1,#null do inside = string.gsub(inside, null[i].a, null[i].b) @@ -176,6 +210,16 @@ function processvoc(str) return str end +function processfullvoc(str) + str = "\\arb{".. str.."}" + str = protectarb(str) + str = breakcmd(str) + str = holdcmd(str) + str = fullvoc(str) + str = unprotectarb(str) +return str +end + function processnovoc(str) str = "\\arb{".. str.."}" str = protectarb(str) diff --git a/arabluatex_fullvoc.lua b/arabluatex_fullvoc.lua new file mode 100644 index 0000000..9a6f35a --- /dev/null +++ b/arabluatex_fullvoc.lua @@ -0,0 +1,267 @@ +--[[ +This file is part of the `arabluatex' package + +copyright (C) 2016 Robert Alessi + +Please send error reports and suggestions for improvements to +Robert Alessi + +This program is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public License +as published by the Free Software Foundation; either version 2 +of the License, or (at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA +02111-1307, USA. +--]] + +numbers = { + {a="0", b="٠"}, + {a="1", b="١"}, + {a="2", b="٢"}, + {a="3", b="٣"}, + {a="4", b="٤"}, + {a="5", b="٥"}, + {a="6", b="٦"}, + {a="7", b="٧"}, + {a="8", b="٨"}, + {a="9", b="٩"} +} + +raw = { + {a="A", b="َا"}, + {a="U", b="ُو"}, + {a="I", b="ِي"}, + {a="b", b="ب"}, + {a="t", b="ت"}, + {a="j", b="ج"}, + {a="x", b="خ"}, + {a="d", b="د"}, + {a="r", b="ر"}, + {a="z", b="ز"}, + {a="s", b="س"}, + {a="f", b="ف"}, + {a="`", b="ع"}, + {a="f", b="ف"}, + {a="q", b="ق"}, + {a="k", b="ك"}, + {a="l", b="ل"}, + {a="m", b="م"}, + {a="n", b="ن"}, + {a="h", b="ه"}, + {a="w", b="و"}, + {a="y", b="ي"}, + {a="T", b="ة"}, + {a="u", b="ُ"}, + {a="a", b="َ"}, + {a="i", b="ِ"} +} + +-- this will eventually be new +hamzafv = { + -- initial long u + {a="%'%_U", b="أU"}, + -- madda (historic writing below) + {a="'A", b="آ"}, + -- initial (needs both ^ and %W patterns) + {a="^(')([ua])", b="أ%2"}, + {a="^(')(i)", b="إ%2"}, + {a="(%W)(')([ua])", b="%1أ%3"}, + {a="(%W)(')(i)", b="%1إ%3"}, + -- final + {a="([^uai])(')([uai]N?)$", b="%1ء%3"}, + {a="([^uai])(')([uai]N?)(%W)", b="%1ء%3%4"}, + {a="([UI])(')([uai])$", b="%1ء%3"}, + {a="([UI])(')([uai])(%W)", b="%1ء%3%4"}, + -- middle + {a="(A)(')(i)", b="aآئ%3"}, -- historic madda + {a="(A)(')", b="aآء"}, -- historic madda + {a="([UI])(')", b="%1ء"}, + {a="([^uai])(')([uU])", b="%1ؤ%3"}, + {a="([^uai])(')([aA])", b="%1أ%3"}, + {a="([^uai])(')([iI])", b="%1ئ%3"}, + {a="(u)(')([uU])", b="%1ؤ%3"}, + {a="(u)(')([aA])", b="%1ؤ%3"}, + {a="(u)(')([iI])", b="%1ئ%3"}, + {a="(a)(')([aA])", b="%1أ%3"}, + {a="(a)(')([uU])", b="%1ؤ%3"}, + {a="(a)(')([iI])", b="%1ئ%3"}, + {a="(i)(')([aA])", b="%1ئ%3"}, + {a="(i)(')([uU])", b="%1ئ%3"}, + {a="(i)(')([iI])", b="%1ئ%3"}, + {a="(a)(')([^uaiUAI])", b="%1أ%3"}, + {a="(u)(')([^uaiUAI])", b="%1ؤ%3"}, + {a="(i)(')([^uaiUAI])", b="%1ئ%3"} +} + +trigraphs = { -- trigraphs or more + -- 'llatI / 'llad_I + {a="^'ll(a)([%_]?[dt])", b="الّ%1%2"}, + {a="(%s)'ll(a)([%_]?[dt])", b="%1الّ%2%3"}, + -- al- + lām + {a="^(a)l%-(l)", b="ا%1ل%2%2"}, + {a="(%s)(a)l%-(l)", b="%1ا%2ل%3%3"}, + -- al- + solar consonant + {a="^(a)l%-([%_%^%.]?[tdrzsn])", b="ا%1ل%2%2"}, + {a="(%s)(a)l%-([%_%^%.]?[tdrzsn])", b="%1ا%2ل%3%3"}, + -- assim. art. + solar consonant + {a="^(a)([%_%^%.]?[tdrzsn])%-", b="ا%1ل%2"}, + {a="(%s)(a)([%_%^%.]?[tdrzsn])%-", b="%1ا%2ل%3"}, + -- al- + initial unstable hamza + {a="^(a)l%-([uai])", b="ا%1لا%2"}, + {a="(%s)(a)l%-([uai])", b="%1ا%2لا%3"}, + -- li- + art. + initial unstable hamza is a special orthography + {a="l(i)%-l%-([uai])", b="ل%1لا%2"}, + -- al- + lunar consonant (i.e. what remains) + {a="^(a)l%-", b="ا%1ل"}, + {a="(%s)(a)l%-", b="%1ا%2ل"}, + -- art. with waṣla + lām + {a="'l%-(l)", b="ال%1%1"}, + -- art. with waṣla + solar consonant + {a="'l%-([%_%^%.]?[tdrzsn])", b="ال%1%1"}, + -- li- + art. + lām + {a="l(i)%-l%-(l)", b="ل%1ل%2%2"}, + -- assim. art. with waṣla + solar consonant + {a="'([%_%^%.]?[tdrzsn])%-", b="ال%1"}, + -- li- + art. + solar consonant is a special orthography + {a="l(i)%-l%-([%_%^%.]?[tdrzsn])", b="ل%1ل%2%2"}, + -- li- + assim. art. + solar consonant is a special orthography + {a="l(i)%-([%_%^%.]?[tdrzsn])%-([%_%^%.]?[tdrzsn])", b="ل%1ل%3%3"}, + -- art. with waṣla + initial unstable hamza + {a="'l%-([uai])", b="الا%1"}, + -- art. with waṣla + lunar consonant (i.e. what remains) + {a="'l%-", b="ال"} +} + +tanwin = { + {a="uN", b="ٌ"}, + {a="(aN)(_A)", b="ًى"}, + {a="(aN)(Y)", b="ًى"}, + {a="(T)(aN)", b="%1ً"}, + {a="([^TA])(aN)", b="%1ًا"}, + {a="iN", b="ٍ"} +} + +-- this is new +digraphsfv = { + {a="^([uai])", b="ا%1"}, -- initial alif without hamza + {a="(%s)([uai])", b="%1ا%2"}, -- initial alif without hamza + {a="%-%-", b="ـ"}, + {a="bb", b="بّ"}, + {a="BB", b="ـّ"}, + {a="tt", b="تّ"}, + {a="%_t%_t", b="ثّ"}, + {a="jj", b="جّ"}, + {a="%^g%^g", b="جّ"}, + {a="xx", b="خّ"}, + {a="%_h%_h", b="خّ"}, + {a="dd", b="دّ"}, + {a="%_d%_d", b="ذّ"}, + {a="rr", b="رّ"}, + {a="zz", b="زّ"}, + {a="ss", b="سّ"}, + {a="%^s%^s", b="شّ"}, + {a="%.s%.s", b="صّ"}, + {a="%.d%.d", b="ضّ"}, + {a="%.t%.t", b="طّ"}, + {a="%.z%.z", b="ظّ"}, + {a="%`%`", b="عّ"}, + {a="%.g%.g", b="غّ"}, + {a="ff", b="فّ"}, + {a="qq", b="قّ"}, + {a="kk", b="كّ"}, + {a="ll", b="لّ"}, + {a="mm", b="مّ"}, + {a="nn", b="نّ"}, + {a="hh", b="هّ"}, + {a="ww", b="وّ"}, + {a="yy", b="يّ"}, + -- sukūn end + {a="([%_%^%.]?[btjghxdzs%`fqklmnwy])([%s])", b="%1ْ%2"}, + {a="([%_%^%.]?[btjghxdzs%`fqklmnwy])([^%_uaiUAIًٌٍ])", b="%1ْ%2"}, + -- sukūn end + {a="_t", b="ث"}, + {a="%^g", b="ج"}, + {a="%.h", b="ح"}, + {a="_h", b="خ"}, + {a="_d", b="ذ"}, + {a="%^s", b="ش"}, + {a="%.s", b="ص"}, + {a="%.d", b="ض"}, + {a="%.t", b="ط"}, + {a="%.z", b="ظ"}, + {a="%.g", b="غ"}, + {a="(U)(A)", b="%1ا"}, + {a="WA", b="وا"}, + {a="(a)W", b="%1وا"}, + {a="_A", b="aى"}, + {a="_u", b="ٗ"}, + {a="_a", b="ٰ"}, + {a="_i", b="ٖ"}, + {a="%.b", b="ٮ"}, + {a="%.f", b="ڡ"}, + {a="%.k", b="ک"}, + {a="%.n", b="ں"} +} + +single = { + {a="b", b="ب"}, + {a="t", b="ت"}, + {a="j", b="ج"}, + {a="x", b="خ"}, + {a="d", b="د"}, + {a="r", b="ر"}, + {a="z", b="ز"}, + {a="s", b="س"}, + {a="f", b="ف"}, + {a="`", b="ع"}, + {a="f", b="ف"}, + {a="q", b="ق"}, + {a="k", b="ك"}, + {a="l", b="ل"}, + {a="m", b="م"}, + {a="n", b="ن"}, + {a="h", b="ه"}, + {a="w", b="و"}, + {a="y", b="ي"}, + {a="T", b="ة"}, + {a="Y", b="aى"}, + {a='"', b='ْ'}, + {a="([^0-9])%-([^0-9])", b="%1%2"}, + {a="B", b="ـ"} +} + +longv = { + {a="A", b="َا"}, + {a="U", b="ُو"}, + {a="I", b="ِي"} +} + +shortv = { + {a="u", b="ُ"}, + {a="a", b="َ"}, + {a="i", b="ِ"} +} + +null = { + {a="%|", b=""} +} + +-- return { +-- raw = raw, +-- hamza = hamza, +-- trigraphs = trigraphs, +-- tanwin = tanwin, +-- digraphs = digraphs, +-- single = single, +-- longv = longv, +-- shortv = shortv, +-- null =null +-- } diff --git a/arabluatex_novoc.lua b/arabluatex_novoc.lua index b62c1cc..ef8b0ff 100644 --- a/arabluatex_novoc.lua +++ b/arabluatex_novoc.lua @@ -22,7 +22,7 @@ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. --]] -tanwinnovoc = { +tanwinnv = { {a="uN", b=""}, {a="(aN)(_A)", b="ى"}, {a="(T)(aN)", b="%1"}, @@ -30,13 +30,13 @@ tanwinnovoc = { {a="iN", b=""} } -longvnovoc = { +longvnv = { {a="A", b="ا"}, {a="U", b="و"}, {a="I", b="ي"} } -shortvnovoc = { +shortvnv = { {a="u", b=""}, {a="a", b=""}, {a="i", b=""} diff --git a/arabluatex_voc.lua b/arabluatex_voc.lua index 5ea68ee..5d35a82 100644 --- a/arabluatex_voc.lua +++ b/arabluatex_voc.lua @@ -67,9 +67,8 @@ raw = { hamza = { -- initial long u {a="%'%_U", b="أU"}, - -- madda and its historic writing + -- madda (historic writing below) {a="'A", b="آ"}, - {a="(A)(')", b="aآء"}, -- initial (needs both ^ and %W patterns) {a="^(')([ua])", b="أ%2"}, {a="^(')(i)", b="إ%2"}, @@ -81,6 +80,8 @@ hamza = { {a="([UI])(')([uai])$", b="%1ء%3"}, {a="([UI])(')([uai])(%W)", b="%1ء%3%4"}, -- middle + {a="(A)(')(i)", b="aآئ%3"}, -- historic madda + {a="(A)(')", b="aآء"}, -- historic madda {a="([UI])(')", b="%1ء"}, {a="([^uai])(')([uU])", b="%1ؤ%3"}, {a="([^uai])(')([aA])", b="%1أ%3"}, -- cgit v1.2.3