*: Reorganize code

2026-05-02 21:32:34 +00:00 · 2024-09-26 15:24:39 +02:00
parent 2c71b50438
commit dc8842106b
74 changed files with 114 additions and 73 deletions
--- a/renderer/text.go
+++ b/renderer/text.go
@@ -0,0 +1,136 @@
+package renderer
+
+import (
+	"unicode"
+	"unicode/utf8"
+
+	"github.com/yuin/goldmark/ast"
+	"github.com/yuin/goldmark/renderer"
+	"github.com/yuin/goldmark/renderer/html"
+	"github.com/yuin/goldmark/util"
+)
+
+// ConfluenceTextRenderer slightly alters the default goldmark behavior for
+// inline text block. It allows for soft breaks
+// (c.f. https://spec.commonmark.org/0.30/#softbreak)
+// to be rendered into HTML as either '\n' (the goldmark default)
+// or as ' '.
+// This latter option is useful for Confluence,
+// which inserts <br> tags into uploaded HTML where it sees '\n'.
+// See also https://sembr.org/ for partial motivation.
+type ConfluenceTextRenderer struct {
+	html.Config
+	softBreak rune
+}
+
+// NewConfluenceTextRenderer creates a new instance of the ConfluenceTextRenderer
+func NewConfluenceTextRenderer(stripNL bool, opts ...html.Option) renderer.NodeRenderer {
+	sb := '\n'
+	if stripNL {
+		sb = ' '
+	}
+	return &ConfluenceTextRenderer{
+		Config:    html.NewConfig(),
+		softBreak: sb,
+	}
+}
+
+// RegisterFuncs implements NodeRenderer.RegisterFuncs .
+func (r *ConfluenceTextRenderer) RegisterFuncs(reg renderer.NodeRendererFuncRegisterer) {
+	reg.Register(ast.KindText, r.renderText)
+}
+
+// This is taken from https://github.com/yuin/goldmark/blob/v1.6.0/renderer/html/html.go#L719
+// with the hardcoded '\n' for soft breaks swapped for the configurable r.softBreak
+func (r *ConfluenceTextRenderer) renderText(w util.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) {
+	if !entering {
+		return ast.WalkContinue, nil
+	}
+	n := node.(*ast.Text)
+	segment := n.Segment
+	if n.IsRaw() {
+		r.Writer.RawWrite(w, segment.Value(source))
+	} else {
+		value := segment.Value(source)
+		r.Writer.Write(w, value)
+		if n.HardLineBreak() || (n.SoftLineBreak() && r.HardWraps) {
+			if r.XHTML {
+				_, _ = w.WriteString("<br />\n")
+			} else {
+				_, _ = w.WriteString("<br>\n")
+			}
+		} else if n.SoftLineBreak() {
+			if r.EastAsianLineBreaks != html.EastAsianLineBreaksNone && len(value) != 0 {
+				sibling := node.NextSibling()
+				if sibling != nil && sibling.Kind() == ast.KindText {
+					if siblingText := sibling.(*ast.Text).Text(source); len(siblingText) != 0 {
+						thisLastRune := util.ToRune(value, len(value)-1)
+						siblingFirstRune, _ := utf8.DecodeRune(siblingText)
+						// Inline the softLineBreak function as it's not public
+						writeLineBreak := false
+						switch r.EastAsianLineBreaks {
+						case html.EastAsianLineBreaksNone:
+							writeLineBreak = false
+						case html.EastAsianLineBreaksSimple:
+							writeLineBreak = !(util.IsEastAsianWideRune(thisLastRune) && util.IsEastAsianWideRune(siblingFirstRune))
+						case html.EastAsianLineBreaksCSS3Draft:
+							writeLineBreak = eastAsianLineBreaksCSS3DraftSoftLineBreak(thisLastRune, siblingFirstRune)
+						}
+
+						if writeLineBreak {
+							_ = w.WriteByte(byte(r.softBreak))
+						}
+					}
+				}
+			} else {
+				_ = w.WriteByte(byte(r.softBreak))
+			}
+		}
+	}
+	return ast.WalkContinue, nil
+}
+
+func eastAsianLineBreaksCSS3DraftSoftLineBreak(thisLastRune rune, siblingFirstRune rune) bool {
+	// Implements CSS text level3 Segment Break Transformation Rules with some enhancements.
+	// References:
+	//   - https://www.w3.org/TR/2020/WD-css-text-3-20200429/#line-break-transform
+	//   - https://github.com/w3c/csswg-drafts/issues/5086
+
+	// Rule1:
+	//   If the character immediately before or immediately after the segment break is
+	//   the zero-width space character (U+200B), then the break is removed, leaving behind the zero-width space.
+	if thisLastRune == '\u200B' || siblingFirstRune == '\u200B' {
+		return false
+	}
+
+	// Rule2:
+	//   Otherwise, if the East Asian Width property of both the character before and after the segment break is
+	//   F, W, or H (not A), and neither side is Hangul, then the segment break is removed.
+	thisLastRuneEastAsianWidth := util.EastAsianWidth(thisLastRune)
+	siblingFirstRuneEastAsianWidth := util.EastAsianWidth(siblingFirstRune)
+	if (thisLastRuneEastAsianWidth == "F" ||
+		thisLastRuneEastAsianWidth == "W" ||
+		thisLastRuneEastAsianWidth == "H") &&
+		(siblingFirstRuneEastAsianWidth == "F" ||
+			siblingFirstRuneEastAsianWidth == "W" ||
+			siblingFirstRuneEastAsianWidth == "H") {
+		return unicode.Is(unicode.Hangul, thisLastRune) || unicode.Is(unicode.Hangul, siblingFirstRune)
+	}
+
+	// Rule3:
+	//   Otherwise, if either the character before or after the segment break belongs to
+	//   the space-discarding character set and it is a Unicode Punctuation (P*) or U+3000,
+	//   then the segment break is removed.
+	if util.IsSpaceDiscardingUnicodeRune(thisLastRune) ||
+		unicode.IsPunct(thisLastRune) ||
+		thisLastRune == '\u3000' ||
+		util.IsSpaceDiscardingUnicodeRune(siblingFirstRune) ||
+		unicode.IsPunct(siblingFirstRune) ||
+		siblingFirstRune == '\u3000' {
+		return false
+	}
+
+	// Rule4:
+	//   Otherwise, the segment break is converted to a space (U+0020).
+	return true
+}