From 8a3b5fe36fca8b7450a4b7a2545436201eba83be Mon Sep 17 00:00:00 2001 From: Robert Alessi Date: Fri, 28 May 2021 12:38:48 +0200 Subject: use lpeg to relocate anchors. adapted frontier pattern to handle dots, hyphens and underscores in xml:ids --- ekdosis.dtx | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/ekdosis.dtx b/ekdosis.dtx index b51cf53..7e24140 100644 --- a/ekdosis.dtx +++ b/ekdosis.dtx @@ -61,7 +61,7 @@ along with this program. If not, see %\NeedsTeXFormat{LaTeX2e}[1999/12/01] %\ProvidesPackage{ekdosis} %<*package> - [2021/05/27 v1.3-dev Typesetting TEI xml-compliant critical editions] + [2021/05/28 v1.3-dev Typesetting TEI xml-compliant critical editions] % %<*driver> \begin{filecontents}[noheader,overwrite]{bibdata.xml} @@ -7507,7 +7507,7 @@ Sample text with a \textcolor{red}{word} in red. % \end{macrocode} % \end{macro} % \begin{macro}{\teidirect} -% \changes{v1.3}{2021/05/27}{direct insertion of elements in the +% \changes{v1.3}{2021/05/28}{direct insertion of elements in the % \texttt{TEI xml} file} % \cs{teidirect}\oarg{xml attributes}\marg{xml element}\marg{code} % does nothing in \LaTeX. Its only use is to insert elements in the @@ -7866,7 +7866,7 @@ Sample text with a \textcolor{red}{word} in red. % \end{macrocode} % \end{macro} % \begin{macro}{\SetApparatusNoteLanguage} -% \changes{v1.3}{2021/05/27}{defines an alternate language to be +% \changes{v1.3}{2021/05/28}{defines an alternate language to be % applied in note apparatus entries} % \cs{SetApparatusNoteLang}\marg{languagename} can be used when % it is needed to apply in entries introduced by the \cs{note} command @@ -9397,6 +9397,8 @@ local app = lpeg.Cs("app") local lemrdg = lpeg.Cs(lpeg.Cs("lem") + lpeg.Cs("rdg")) local note = lpeg.Cs("note") local inlem = lpeg.Cs{ "")) + lpeg.V(1))^0 * "" } +local inanchor = lpeg.Cs{ "")) + lpeg.V(1))^0 * ">" } +local inopeningnote = lpeg.Cs{ "")) + lpeg.V(1))^0 * ">" } local lnbrk = lpeg.Cs("\\\\") local poemline = lpeg.Cs(lnbrk * spcenc^-1 * lpeg.S("*!")^-1 * bsqbrackets^-1 * spcenc^-1) local poemlinebreak = lpeg.Cs(lnbrk * spcenc^-1 * lpeg.P(">") * bsqbrackets^-1 * spcenc^-1) @@ -9708,7 +9710,7 @@ function ekdosis.getsiglum(str, opt) str = string.gsub(str, shorthands[i].a, shorthands[i].c) end for i = 1,#idsRend do - str = string.gsub(str, "(%f[%w])"..idsRend[i].xmlid.."(%,)", + str = string.gsub(str, "(%f[%w%.%-%_])"..idsRend[i].xmlid.."(%,)", "%1#"..idsRend[i].xmlid.."%2") ctrl = string.gsub(ctrl, idsRend[i].xmlid.."%,", "") end @@ -9747,7 +9749,7 @@ local cmdtotags = { {a="textsf", b="hi", c=" rend=\"sf\""}, {a="arbup", b="hi", c=" rend=\"sup\""}, {a="txarb", b="s", c=" xml:lang=\"arb\""}, - {a="arb", b="foreign", + {a="arb", b="span", c=" xml:lang=\"ar-Latn\" type=\"transliterated\" subtype=\"arabtex\""} } @@ -9920,7 +9922,7 @@ local function note_totei(str) end local function remove_extra_anchors(str) - str = string.gsub(str, "()()()", function(enote, anchor, bnote) + str = gsub(str, lpeg.Cs("") * inanchor * inopeningnote, function(enote, anchor, bnote) local id_one = string.gsub(anchor, "(%)", "%2") id_one = string.sub(get_attr_value(id_one, "xml:id"), 2, -2) local id_two = string.match(bnote, "target%=.-right%((.-)%)") -- cgit v1.2.3