From c6543e722b554437618e19fcb6ccb5caf73cfc40 Mon Sep 17 00:00:00 2001 From: Robert Alessi Date: Wed, 15 Aug 2018 11:42:50 +0200 Subject: arabtex2utf: done programming functions exporting running paragraphs or Arabic verses --- arabluatex.dtx | 118 ++++++++++++++++++++++++++++++++++++++++++++++++++++----- arabluatex.lua | 84 ++++++++++++++++++++++++++-------------- 2 files changed, 165 insertions(+), 37 deletions(-) diff --git a/arabluatex.dtx b/arabluatex.dtx index 3dcfc1f..d54290a 100644 --- a/arabluatex.dtx +++ b/arabluatex.dtx @@ -3481,9 +3481,15 @@ muhaddamaTaN mi'_danatu-hu}: \arb[trans]{ra'aytu % 'l-\uc{`AdAt-i}}}.\footnote{This is odd in Arabic script, but % using such features as \cs{emph} or \cs{textbf} is a matter of % personal taste.} -% -% |\arb{\abjad{45} \framebox[1in][s]{kitAbu-hu fI 'l-`AdAti}}|\\ -% \arb{\abjad{45} \framebox[1in][s]{kitAbu-hu fI 'l-`AdAti}} +% \NewDocumentCommand{\rlframebox}{o o m}{ +% \IfNoValueTF{#2}{\IfNoValueTF{#1}{ +% \framebox{\setRL#3}}{\framebox[#1]{\setRL#3}} +% }{\framebox[#1][#2]{\setRL#3}}} +% +% |\arb{\abjad{45} \rlframebox[1in][s]{kitAbu-hu fI 'l-`AdAti}}|\\ +% \arb{\abjad{45} \rlframebox[1in][s]{kitAbu-hu fI +% 'l-`AdAti}}\,\footnote{\cs{rlframefox} has been adapted from +% \cs{framebox} for insertions of right-to-left text.} % \end{quote} % % The same applies to footnotes:--- @@ -4051,6 +4057,11 @@ wa-ya.sIru ta.hta 'l-jild-i \DeclareOptionX{fullvoc}{\def\al@mode{fullvoc}} \DeclareOptionX{novoc}{\def\al@mode{novoc}} \DeclareOptionX{trans}{\def\al@mode{trans}} +\define@boolkey{arabluatex.sty}[@pkg@]{export}[true]{% + \AtBeginDocument{\luadirect{al_openstream()}% + \MkArbBreak{@al@ob, @al@cb}} + \AtEndDocument{\luadirect{al_closestream()}} +} \ExecuteOptionsX{voc} \ProcessOptionsX\relax \def\al@mode@voc{voc} @@ -4315,10 +4326,10 @@ wa-ya.sIru ta.hta 'l-jild-i % Lua functions to insert transliterated Arabic text. Therefore, it is % not documented. % \begin{macrocode} -\NewDocumentCommand{\txarb}{+m}{\bgroup\textdir - TRT\arabicfont#1\egroup} -\NewDocumentCommand{\txtrans}{+m}{\bgroup\textdir - TLT\al@trans@font\al@trans@style#1\egroup} +\NewDocumentCommand{\txarb}{+m}{% + \bgroup\textdir TRT\arabicfont#1\egroup} +\NewDocumentCommand{\txtrans}{+m}{% + \bgroup\textdir TLT\al@trans@font\al@trans@style#1\egroup} % \end{macrocode} % \end{macro} % \end{macro} @@ -4451,6 +4462,7 @@ wa-ya.sIru ta.hta 'l-jild-i \define@key[al]{verse}{color}[]{\color{#1}} \define@boolkey[al]{verse}{utf}[true]{} \define@boolkey[al]{verse}{delim}[true]{} +\define@boolkey[al]{verse}{export}[true]{} \define@choicekey[al]{verse}{mode}{fullvoc, voc, novoc, trans}{\def\al@mode{#1}} \presetkeys[al]{verse}{metre={}, utf=false, @@ -4459,11 +4471,21 @@ wa-ya.sIru ta.hta 'l-jild-i % Then follows the environment itself: % \begin{macrocode} \NewDocumentEnvironment{arabverse}{O{}}% -{\par\centering\noindent\bgroup\setkeys[al]{verse}[metre]{#1}% +{\bgroup\setkeys[al]{verse}[width,gutter,color,utf,delim,mode,metre]{#1}% + \if@pkg@export\ifal@verse@export% + \ArbOutFile{\begin{arabverse}}% + \luadirect{tooutfile(\luastringN{[#1]})}% + \else\fi\else\fi\egroup% + \par\centering\noindent\bgroup\setkeys[al]{verse}[metre]{#1}% \ifx\al@mode\al@mode@trans% \ifal@verse@utf\setRL\else\setLR\fi% \else\setRL\fi}% -{\hfill\setkeys[al]{verse}[width,gutter,utf,mode]{#1}\egroup\par} +%{\hfill\setkeys[al]{verse}[width,gutter,utf,mode]{#1}\egroup\par} +{\hfill\setkeys[al]{verse}[width,gutter,color,utf,delim,mode,export]{#1}% + \egroup\par% + \bgroup\setkeys[al]{verse}[width,gutter,color,utf,delim,mode,metre]{#1}% + \if@pkg@export\ifal@verse@export% + \ArbOutFile{\end{arabverse}}\else\fi\else\fi\egroup} % \end{macrocode} % \begin{macro}{\bayt} % \changes{v1.6}{2016/12/17}{New macro \cs{bayt} for typesetting @@ -4490,11 +4512,17 @@ wa-ya.sIru ta.hta 'l-jild-i \NewDocumentCommand{\SetHemistichDelim}{m}{\def\al@hemistich@delim{#1}} \def\al@verse@stroke{\leavevmode\xleaders\hbox{\arb{--}}\hfill\kern0pt} \NewDocumentCommand{\bayt}{m o m}{% + \if@pkg@export\ifal@verse@export% + \luadirect{al_doexport("arabverse")}\else\fi\else\fi% {^^^^200b}% quick fix to a bug (or a missing feature) in either % luacolor or adjustbox \ifdefined\savenotes\savenotes\else\fi% \edef\al@tatweel{--}% + \if@pkg@export\ifal@verse@export% + \ArbOutFile{\bayt\@al@ob}\else\fi\else\fi% \adjustbox{width=\al@bayt@width, height=\Height}{\arb@utf{#1}}% + \if@pkg@export\ifal@verse@export% + \ArbOutFile{\@al@cb}\else\fi\else\fi% \IfNoValueTF{#2}{% \ifal@verse@delim\makebox[\al@gutter@width][c]{\al@hemistich@delim}% \else% @@ -4510,12 +4538,26 @@ wa-ya.sIru ta.hta 'l-jild-i \fi% \else% \ifx\al@mode\al@mode@trans% + \if@pkg@export\ifal@verse@export% + \ArbOutFile{[}\else\fi\else\fi% \adjustbox{width=\al@gutter@width, height=\Height}{\arb@utf{#2}}% + \if@pkg@export\ifal@verse@export% + \ArbOutFile{]}\else\fi\else\fi% \else% + \if@pkg@export\ifal@verse@export% + \ArbOutFile{[}\else\fi\else\fi% \makebox[\al@gutter@width][s]{\arb@utf{#2}}% + \if@pkg@export\ifal@verse@export% + \ArbOutFile{]}\else\fi\else\fi% \fi\fi}% + \if@pkg@export\ifal@verse@export% + \ArbOutFile{\@al@ob}\else\fi\else\fi% \adjustbox{width=\al@bayt@width, height=\Height}{\arb@utf{#3}}% + \if@pkg@export\ifal@verse@export% + \ArbOutFile{\@al@cb\\ }\else\fi\else\fi% \ifdefined\spewnotes\spewnotes\else\fi% + \if@pkg@export\ifal@verse@export% + \luadirect{al_doexport("no")}\else\fi\else\fi% } % \end{macrocode} % \end{macro} @@ -4599,6 +4641,64 @@ wa-ya.sIru ta.hta 'l-jild-i \AtBeginDocument{\deffootnote{2em}{1.6em}{\LR{\thefootnotemark}.\enskip}}}} % \end{macrocode} % \end{macro} +% +% \paragraph{Exporting Unicode Arabic to external file} +% \begin{macro}{\SetArbOutSuffix} +% \changes{v1.13}{2018/08/13}{Sets a suffix to be appended to the +% filename of the external Unicode file.}By default, |_out| is the +% suffix to be appended to the external file in which +% \package{arabluatex} exports Unicode in place of +% |arabtex| or |buckwalter| strings. Any other suffix may be +% set with \cs{SetArbOutSuffix}\marg{suffix}. +% \begin{macrocode} +\NewDocumentCommand{\SetArbOutSuffix}{m}{ + \luadirect{al_utffilesuffix(\luastringN{#1})}} +% \end{macrocode} +% \end{macro} +% \begin{macro}{\ArbOutFile} +% \changes{v1.13}{2018/08/13}{Silently exports its argument in the selected +% external file.} \cs{ArbOutFile}\oarg{newline}\marg{string} silently +% exports \meta{string} to the external selected file. It may take +% |newline| as an optional argument in which case a carriage return is +% appended to |string|. +% \begin{macrocode} +\NewDocumentCommand{\ArbOutFile}{O{no} +m}{% + \if@pkg@export% + \luadirect{tooutfile(\luastringN{#2}, "#1")}\else\fi} +% \end{macrocode} +% \end{macro} +% \begin{environment}{arabexport} +% \changes{v1.13}{2018/08/13}{Processes and print its argument in +% the current file and exports it in full Unicode in the external +% selected \texttt{.tex} file.} The |arabexport| environment +% processes and prints its argument unchanged to the current |.pdf| +% file. Additionally, if \package{arabluatex} is loaded with the +% |export| option, this argument is exported to the external +% selected |.tex| file with Unicode in place of the original +% |arabtex| or |buckwalter| strings. +% \begin{macrocode} +\NewEnviron{arabexport}{% + \if@pkg@export% + \par + \BODY + \luadirect{al_doexport("yes")} + \luadirect{tex.sprint(arbtoutf(\luastringO{\BODY}))} + \else\par\BODY\fi + }[\if@pkg@export\par\luadirect{al_doexport("no")}\else\par\fi] +% \end{macrocode} +% \end{environment} +% \begin{macro}{\arbpardir} +% \changes{v1.13}{2018/08/13}{Sets the direction of Arabic paragraphs +% once they are converted to Unicode.} \cs{arbpardir} is automatically +% inserted by \package{arabluatex} at the beginning of Arabic +% paragraphs converted to Unicode so that they are printed in the +% right direction. +% \begin{macrocode} +\NewDocumentCommand{\arbpardir}{}{% + \ifx\al@mode\al@mode@trans\setLR\else\setRL\fi} +% \end{macrocode} +% \end{macro} +% % \subsection*{Errors and Warnings} % \begin{macrocode} \newcommand{\al@warning}[1]{\PackageWarning{arabluatex}{#1}} diff --git a/arabluatex.lua b/arabluatex.lua index c0d9022..ac6a251 100644 --- a/arabluatex.lua +++ b/arabluatex.lua @@ -49,7 +49,7 @@ local cmdstar = lpeg.Cs(spce * lpeg.P("*")) local bsqbracketsii = lpeg.Cs(bsqbrackets^-2) local bcbracesii = lpeg.Cs(bcbraces^-2) local cmd = lpeg.Cs(dblbkslash * ascii^1 * cmdstar^-1) -local cmdargs = lpeg.Cs(spce * bsqbracketsii * bcbracesii * bsqbrackets^-1) +local cmdargs = lpeg.Cs(spce^-1 * bsqbracketsii * bcbracesii * bsqbrackets^-1) local arbargs = lpeg.Cs(spce^-1 * bsqbrackets^-1 * bcbraces) local baytargs = lpeg.Cs(spce * bcbraces * bsqbrackets^-1 * bcbraces) @@ -642,7 +642,9 @@ local function processbuckw(str) return str end ---- *** +-- The following functions produce a copy of the original .tex source +-- file in which all arabtex strings are replaced with Unicode +-- equivalents local utffilesuffix = "_out" local export_utf = "no" @@ -671,14 +673,21 @@ function al_openstream() end local function processarbtoutf(str) - str = "\n\\begin{arbexport}\n"..str + str = "\\begin{arabexport}"..str str = string.gsub(str, "(\\prname)%s?(%b{})", function(tag, body) body = string.sub(body, 2, -2) return string.format("\\arb[trans]{\\uc{%s}}", body) end) - str = string.gsub(str, "(\\begin%s?{arab})(%b[])", "\\par\\bgroup\\arbpardir\\arb%2{") + str = string.gsub(str, "(\\begin%s?{arab})(%b[])", function(tag, body) + if string.find(body, "trans") then + return string.format("\\par\\bgroup\\setLR\\arb%s{", body) + else + return string.format("\\par\\bgroup\\setRL\\arb%s{", body) + end + end) str = string.gsub(str, "(\\begin%s?{arab})", "\\par\\bgroup\\arbpardir\\arb{") str = string.gsub(str, "\\end%s?{arab}", "}\\egroup\\par") + -- This does not work, while the following two do. Look into this later. -- str = gsub(str, lpeg.Cs("\\arb") * spcenc * bsqbrackets^-1 * bcbraces, function(tag, opt, body) -- body = string.sub(body, 2, -2) -- return string.format("%s%s\\@al@pr@ob%s\\@al@pr@cb", tag, opt, body) @@ -699,50 +708,50 @@ local function processarbtoutf(str) body = string.sub(body, 2, -2) return string.format("%s\\@al@pr@ob%s\\@al@pr@cb", tag, body) end) - str = string.gsub(str, "{", "\\@al@ob ") - str = string.gsub(str, "}", "\\@al@cb ") + str = string.gsub(str, "{", "\\@al@ob") + str = string.gsub(str, "}", "\\@al@cb") str = string.gsub(str, "\\@al@pr@ob", "{") str = string.gsub(str, "\\@al@pr@cb", "}") str = string.gsub(str, "(%b{})", function(body) body = string.sub(body, 2, -2) - body = string.gsub(body, "\\@al@ob%s?", "{") - body = string.gsub(body, "\\@al@cb%s?", "}") + body = string.gsub(body, "(%s?)(\\@al@ob)", "%1{") + body = string.gsub(body, "(\\@al@cb)(%s?)", "}%2") return string.format("{%s}", body) end) - str = str.."\n\\end{arbexport}" + str = str.."\\end{arabexport}" return str end function arbtoutf(str) str = processarbtoutf(str) - str = "\\ToOutFile{"..str.."}" - str = string.gsub(str, "(\\ToOutFile)%s?(%b{})", function(tag, body) + str = "\\ArbOutFile{"..str.."}" + str = string.gsub(str, "(\\ArbOutFile)%s?(%b{})", function(tag, body) body = string.sub(body, 2, -2) - body = gsub(body, lpeg.Cs("\\arb") * arbargs, "}%1%2\\ToOutFile{") + body = gsub(body, lpeg.Cs("\\arb") * arbargs, "}%1%2\\ArbOutFile{") return string.format("%s{%s}", tag, body) end) - str = string.gsub(str, "(\\ToOutFile)%s?(%b{})", function(tag, body) + str = string.gsub(str, "(\\ArbOutFile)%s?(%b{})", function(tag, body) body = string.sub(body, 2, -2) - body = string.gsub(body, "(\\abjad)%s?(%b{})", "}%1%2\\ToOutFile{") + body = string.gsub(body, "(\\abjad)%s?(%b{})", "}%1%2\\ArbOutFile{") return string.format("%s{%s}", tag, body) end) - str = string.gsub(str, "(\\ToOutFile)%s?(%b{})", function(tag, body) + str = string.gsub(str, "(\\ArbOutFile)%s?(%b{})", function(tag, body) body = string.sub(body, 2, -2) - body = string.gsub(body, "(\\arbmark)%s?(%b{})", "}%1%2\\ToOutFile{") + body = string.gsub(body, "(\\arbmark)%s?(%b{})", "}%1%2\\ArbOutFile{") return string.format("%s{%s}", tag, body) end) - -- str = string.gsub(str, "(\\arb%s?)(%b[])(%b{})", "}%1%2%3\\ToOutFile{") - -- str = string.gsub(str, "(\\arb)%s?(%b{})", "}%1%2\\ToOutFile{") - -- str = gsub(str, lpeg.Cs("\\bayt") * baytargs, "}\\ToOutFile{\\bayt}%1%2\\ToOutFile{") --- str = gsub(str, lpeg.Cs("\\arb") * arbargs, "}%1%2\\ToOutFile{") --- str = string.gsub(str, "(\\abjad)%s?(%b{})", "}%1%2\\ToOutFile{") --- str = string.gsub(str, "(\\abjad)(%s?)(%b{})", "}%1%3\\tooutfile{") + -- str = string.gsub(str, "(\\arb%s?)(%b[])(%b{})", "}%1%2%3\\ArbOutFile{") + -- str = string.gsub(str, "(\\arb)%s?(%b{})", "}%1%2\\ArbOutFile{") + -- str = gsub(str, lpeg.Cs("\\bayt") * baytargs, "}\\ArbOutFile{\\bayt}%1%2\\ArbOutFile{") +-- str = gsub(str, lpeg.Cs("\\arb") * arbargs, "}%1%2\\ArbOutFile{") +-- str = string.gsub(str, "(\\abjad)%s?(%b{})", "}%1%2\\ArbOutFile{") +-- str = string.gsub(str, "(\\abjad)(%s?)(%b{})", "}%1%3\\ArbOutFile{") -- str = string.gsub(str, "(\\begin%s?{arab})", "}%1") --- str = string.gsub(str, "(\\end%s?{arab})", "%1\\ToOutFile{") +-- str = string.gsub(str, "(\\end%s?{arab})", "%1\\ArbOutFile{") -- str = string.gsub(str, "(\\begin)", "\n%1") -- str = string.gsub(str, "(\\end)", "\n%1") -- str = string.gsub(str, "(\\bayt)", "\n%1") --- str = string.gsub(str, "(\\\\)", "%1\n") + -- str = string.gsub(str, "(\\\\)", "%1\n") return str end @@ -784,8 +793,11 @@ function al_closestream() --]] t = string.gsub(t, "\\arabicfont{}", "") t = string.gsub(t, "\\par ", "\n\n") - t = string.gsub(t, "\\@al@ob%s?", "{") - t = string.gsub(t, "\\@al@cb", "}") + t = string.gsub(t, "(%s?)(\\@al@ob%s?)", "{") + t = string.gsub(t, "(\\@al@cb)(%s?)", "}%2") + t = gsub(t, lpeg.Cs("\\begin") * spcenc^-1 * bcbraces * cmdargs, "\n%1%2%3\n") + t = string.gsub(t, "(\\\\)", "%1\n") + t = string.gsub(t, "(\\end%s?)(%b{})", "\n%1%2") t = string.gsub(t, "\n\n\n", "\n\n") if string.find(t, "\\begin%s?{document}.-\\arb%s?[%[%{]") then tex.print([[\unexpanded{\PackageWarningNoLine{arabluatex}{There are still 'arabtex' strings to be converted. Open ]]..tex.jobname..utffilesuffix..".tex"..[[ and compile it one more time}}]]) @@ -800,7 +812,7 @@ function al_closestream() return true end - +-- Process standard arabluatex modes: function processvoc(str, rules, scheme) str = takeoutarb(str) str = processarbnull(str, scheme) @@ -820,6 +832,10 @@ function processvoc(str, rules, scheme) if export_utf == "yes" then tofile = "\\txarb{"..str.."}" tooutfile(tofile) + elseif export_utf == "arabverse" then + tofile = "\\txarb{"..str.."}" + tooutfile(tofile) + return str else return str end @@ -847,6 +863,10 @@ function processfullvoc(str, rules, scheme) if export_utf == "yes" then tofile = "\\txarb{"..str.."}" tooutfile(tofile) + elseif export_utf == "arabverse" then + tofile = "\\txarb{"..str.."}" + tooutfile(tofile) + return str else return str end @@ -872,6 +892,10 @@ function processnovoc(str, rules, scheme) if export_utf == "yes" then tofile = "\\txarb{"..str.."}" tooutfile(tofile) + elseif export_utf == "arabverse" then + tofile = "\\txarb{"..str.."}" + tooutfile(tofile) + return str else return str end @@ -899,7 +923,11 @@ function processtrans(str, mode, rules, scheme) str = unprotectarb(str) if export_utf == "yes" then tofile = str - tooutfile(str) + tooutfile(str) + elseif export_utf == "arabverse" then + tofile = str + tooutfile(tofile) + return str else return str end -- cgit v1.2.3