package mark
import (
"bytes"
"crypto/sha1"
"encoding/hex"
"errors"
"fmt"
stdhtml "html"
"io"
"os"
"path/filepath"
"regexp"
"slices"
"strings"
"time"
"unicode/utf8"
"github.com/bmatcuk/doublestar/v4"
"github.com/kovetskiy/mark/v16/attachment"
"github.com/kovetskiy/mark/v16/confluence"
"github.com/kovetskiy/mark/v16/includes"
"github.com/kovetskiy/mark/v16/macro"
markmd "github.com/kovetskiy/mark/v16/markdown"
"github.com/kovetskiy/mark/v16/metadata"
"github.com/kovetskiy/mark/v16/page"
"github.com/kovetskiy/mark/v16/stdlib"
"github.com/kovetskiy/mark/v16/types"
"github.com/kovetskiy/mark/v16/vfs"
"github.com/rs/zerolog/log"
)
var markerRegex = regexp.MustCompile(`(?s)(.*?)`)
// Config holds all configuration options for running Mark.
type Config struct {
// Connection settings
BaseURL string
Username string
Password string
PageID string
InsecureSkipTLSVerify bool
// File selection
Files string
// Behaviour
CompileOnly bool
DryRun bool
ContinueOnError bool
CI bool
// Page content
Space string
Parents []string
TitleFromH1 bool
TitleFromFilename bool
TitleAppendGeneratedHash bool
ContentAppearance string
// Page updates
MinorEdit bool
VersionMessage string
EditLock bool
ChangesOnly bool
PreserveComments bool
// Rendering
DropH1 bool
StripLinebreaks bool
MermaidScale float64
D2Scale float64
Features []string
ImageAlign string
IncludePath string
// Output is the writer used for result output (e.g. published page URLs,
// compiled HTML). If nil, output is discarded; the CLI sets this to
// os.Stdout.
Output io.Writer
}
// output returns the configured writer, falling back to io.Discard so that
// library callers that do not set Output receive no implicit stdout writes.
func (c Config) output() io.Writer {
if c.Output != nil {
return c.Output
}
return io.Discard
}
// Run processes all files matching Config.Files and publishes them to Confluence.
func Run(config Config) error {
api := confluence.NewAPI(config.BaseURL, config.Username, config.Password, config.InsecureSkipTLSVerify)
files, err := doublestar.FilepathGlob(config.Files)
if err != nil {
return err
}
if len(files) == 0 {
msg := "no files matched"
if config.CI {
log.Warn().Msg(msg)
} else {
return errors.New(msg)
}
}
var hasErrors bool
for _, file := range files {
log.Info().Msgf("processing %s", file)
target, err := ProcessFile(file, api, config)
if err != nil {
if config.ContinueOnError {
log.Error().Err(err).Msgf("processing %s", file)
hasErrors = true
continue
}
return err
}
if target != nil {
log.Info().Msgf("page successfully updated: %s", api.BaseURL+target.Links.Full)
if _, err := fmt.Fprintln(config.output(), api.BaseURL+target.Links.Full); err != nil {
return err
}
}
}
if hasErrors {
return fmt.Errorf("one or more files failed to process")
}
return nil
}
// ProcessFile processes a single markdown file and publishes it to Confluence.
// Returns nil for the page info when compile-only or dry-run mode is active.
func ProcessFile(file string, api *confluence.API, config Config) (*confluence.PageInfo, error) {
markdown, err := os.ReadFile(file)
if err != nil {
return nil, fmt.Errorf("unable to read file %q: %w", file, err)
}
markdown = bytes.ReplaceAll(markdown, []byte("\r\n"), []byte("\n"))
meta, markdown, err := metadata.ExtractMeta(
markdown,
config.Space,
config.TitleFromH1,
config.TitleFromFilename,
file,
config.Parents,
config.TitleAppendGeneratedHash,
config.ContentAppearance,
)
if err != nil {
return nil, fmt.Errorf("unable to extract metadata from file %q: %w", file, err)
}
if config.PageID != "" && meta != nil {
log.Warn().Msg(
`specified file contains metadata, ` +
`but it will be ignored due specified command line URL`,
)
meta = nil
}
if config.PageID == "" && meta == nil {
return nil, fmt.Errorf(
"specified file doesn't contain metadata and URL is not specified " +
"via command line or doesn't contain pageId GET-parameter",
)
}
if meta != nil {
if meta.Space == "" {
return nil, fmt.Errorf(
"space is not set ('Space' header is not set and '--space' option is not set)",
)
}
if meta.Title == "" {
return nil, fmt.Errorf(
"page title is not set: use the 'Title' header, " +
"or the --title-from-h1 / --title-from-filename flags",
)
}
}
std, err := stdlib.New(api)
if err != nil {
return nil, fmt.Errorf("unable to retrieve standard library: %w", err)
}
templates := std.Templates
var recurse bool
for {
templates, markdown, recurse, err = includes.ProcessIncludes(
filepath.Dir(file),
config.IncludePath,
markdown,
templates,
)
if err != nil {
return nil, fmt.Errorf("unable to process includes: %w", err)
}
if !recurse {
break
}
}
macros, markdown, err := macro.ExtractMacros(
filepath.Dir(file),
config.IncludePath,
markdown,
templates,
)
if err != nil {
return nil, fmt.Errorf("unable to extract macros: %w", err)
}
for _, m := range macros {
markdown, err = m.Apply(markdown)
if err != nil {
return nil, fmt.Errorf("unable to apply macro: %w", err)
}
}
links, err := page.ResolveRelativeLinks(
api,
meta,
markdown,
filepath.Dir(file),
config.Space,
config.TitleFromH1,
config.TitleFromFilename,
config.Parents,
config.TitleAppendGeneratedHash,
)
if err != nil {
return nil, fmt.Errorf("unable to resolve relative links: %w", err)
}
markdown = page.SubstituteLinks(markdown, links)
if config.DryRun {
if meta != nil {
if _, _, err := page.ResolvePage(true, api, meta); err != nil {
return nil, fmt.Errorf("unable to resolve page location: %w", err)
}
} else if config.PageID != "" {
if _, err := api.GetPageByID(config.PageID); err != nil {
return nil, fmt.Errorf("unable to resolve page by ID: %w", err)
}
}
}
if config.CompileOnly || config.DryRun {
if config.DropH1 {
log.Info().Msg("the leading H1 heading will be excluded from the Confluence output")
}
imageAlign, err := getImageAlign(config.ImageAlign, meta)
if err != nil {
return nil, fmt.Errorf("unable to determine image-align: %w", err)
}
cfg := types.MarkConfig{
MermaidScale: config.MermaidScale,
D2Scale: config.D2Scale,
DropFirstH1: config.DropH1,
StripNewlines: config.StripLinebreaks,
Features: config.Features,
ImageAlign: imageAlign,
}
html, _, err := markmd.CompileMarkdown(markdown, std, file, cfg)
if err != nil {
return nil, fmt.Errorf("unable to compile markdown: %w", err)
}
if _, err := fmt.Fprintln(config.output(), html); err != nil {
return nil, err
}
return nil, nil
}
var target *confluence.PageInfo
var pageCreated bool
if meta != nil {
parent, pg, err := page.ResolvePage(false, api, meta)
if err != nil {
return nil, fmt.Errorf("error resolving page %q: %w", meta.Title, err)
}
if pg == nil {
pg, err = api.CreatePage(meta.Space, meta.Type, parent, meta.Title, ``)
if err != nil {
return nil, fmt.Errorf("can't create %s %q: %w", meta.Type, meta.Title, err)
}
// A delay between the create and update call helps mitigate a 409
// conflict that can occur when attempting to update a page just
// after it was created. See issues/139.
time.Sleep(1 * time.Second)
pageCreated = true
}
target = pg
} else {
pg, err := api.GetPageByID(config.PageID)
if err != nil {
return nil, fmt.Errorf("unable to retrieve page by id: %w", err)
}
if pg == nil {
return nil, fmt.Errorf("page with id %q not found", config.PageID)
}
target = pg
}
// Collect attachments declared via directives.
var declaredAttachments []string
if meta != nil {
declaredAttachments = meta.Attachments
}
localAttachments, err := attachment.ResolveLocalAttachments(
vfs.LocalOS,
filepath.Dir(file),
declaredAttachments,
)
if err != nil {
return nil, fmt.Errorf("unable to locate attachments: %w", err)
}
attaches, err := attachment.ResolveAttachments(api, target, localAttachments)
if err != nil {
return nil, fmt.Errorf("unable to create/update attachments: %w", err)
}
markdown = attachment.CompileAttachmentLinks(markdown, attaches)
if config.DropH1 {
log.Info().Msg("the leading H1 heading will be excluded from the Confluence output")
}
imageAlign, err := getImageAlign(config.ImageAlign, meta)
if err != nil {
return nil, fmt.Errorf("unable to determine image-align: %w", err)
}
cfg := types.MarkConfig{
MermaidScale: config.MermaidScale,
D2Scale: config.D2Scale,
DropFirstH1: config.DropH1,
StripNewlines: config.StripLinebreaks,
Features: config.Features,
ImageAlign: imageAlign,
}
html, inlineAttachments, err := markmd.CompileMarkdown(markdown, std, file, cfg)
if err != nil {
return nil, fmt.Errorf("unable to compile markdown: %w", err)
}
if _, err = attachment.ResolveAttachments(api, target, inlineAttachments); err != nil {
return nil, fmt.Errorf("unable to create/update attachments: %w", err)
}
var layout, sidebar string
var labels []string
var contentAppearance, emoji string
if meta != nil {
layout = meta.Layout
sidebar = meta.Sidebar
labels = meta.Labels
contentAppearance = meta.ContentAppearance
emoji = meta.Emoji
}
{
var buffer bytes.Buffer
err := std.Templates.ExecuteTemplate(
&buffer,
"ac:layout",
struct {
Layout string
Sidebar string
Body string
}{
Layout: layout,
Sidebar: sidebar,
Body: html,
},
)
if err != nil {
return nil, fmt.Errorf("unable to execute layout template: %w", err)
}
html = buffer.String()
}
var finalVersionMessage string
shouldUpdatePage := true
if config.ChangesOnly {
contentHash := sha1Hash(html)
log.Debug().Msgf("content hash: %s", contentHash)
re := regexp.MustCompile(`\[v([a-f0-9]{40})]$`)
if matches := re.FindStringSubmatch(target.Version.Message); len(matches) > 1 {
log.Debug().Msgf("previous content hash: %s", matches[1])
if matches[1] == contentHash {
log.Info().Msgf("page %q is already up to date", target.Title)
shouldUpdatePage = false
}
}
finalVersionMessage = fmt.Sprintf("%s [v%s]", config.VersionMessage, contentHash)
} else {
finalVersionMessage = config.VersionMessage
}
// Only fetch the old body and inline comments when we know the page will
// actually be updated. This avoids unnecessary API round-trips for no-op
// runs (e.g. when --changes-only determines the content is unchanged).
if shouldUpdatePage && config.PreserveComments && !pageCreated {
pg, err := api.GetPageByIDExpanded(target.ID, "ancestors,version,body.storage")
if err != nil {
return nil, fmt.Errorf("unable to retrieve page body for comments: %w", err)
}
target = pg
comments, err := api.GetInlineComments(target.ID)
if err != nil {
return nil, fmt.Errorf("unable to retrieve inline comments: %w", err)
}
html, err = mergeComments(html, target.Body.Storage.Value, comments)
if err != nil {
return nil, fmt.Errorf("unable to merge inline comments: %w", err)
}
}
if shouldUpdatePage {
err = api.UpdatePage(
target,
html,
config.MinorEdit,
finalVersionMessage,
contentAppearance,
emoji,
)
if err != nil {
return nil, fmt.Errorf("unable to update page: %w", err)
}
}
if meta != nil {
if err := updateLabels(api, target, labels); err != nil {
return nil, err
}
}
if config.EditLock {
log.Info().Msgf(
`edit locked on page %q by user %q to prevent manual edits`,
target.Title,
config.Username,
)
if err := api.RestrictPageUpdates(target, config.Username); err != nil {
return nil, fmt.Errorf("unable to restrict page updates: %w", err)
}
}
return target, nil
}
func updateLabels(api *confluence.API, target *confluence.PageInfo, metaLabels []string) error {
labelInfo, err := api.GetPageLabels(target, "global")
if err != nil {
return err
}
log.Debug().Msg("Page Labels:")
log.Debug().Interface("labels", labelInfo.Labels).Send()
log.Debug().Msg("Meta Labels:")
log.Debug().Interface("labels", metaLabels).Send()
delLabels := determineLabelsToRemove(labelInfo, metaLabels)
log.Debug().Msg("Del Labels:")
log.Debug().Interface("labels", delLabels).Send()
addLabels := determineLabelsToAdd(metaLabels, labelInfo)
log.Debug().Msg("Add Labels:")
log.Debug().Interface("labels", addLabels).Send()
if len(addLabels) > 0 {
if _, err = api.AddPageLabels(target, addLabels); err != nil {
return fmt.Errorf("error adding labels: %w", err)
}
}
for _, label := range delLabels {
if _, err = api.DeletePageLabel(target, label); err != nil {
return fmt.Errorf("error deleting label %q: %w", label, err)
}
}
return nil
}
func determineLabelsToRemove(labelInfo *confluence.LabelInfo, metaLabels []string) []string {
var labels []string
for _, label := range labelInfo.Labels {
if !slices.ContainsFunc(metaLabels, func(metaLabel string) bool {
return strings.EqualFold(metaLabel, label.Name)
}) {
labels = append(labels, label.Name)
}
}
return labels
}
func determineLabelsToAdd(metaLabels []string, labelInfo *confluence.LabelInfo) []string {
var labels []string
for _, metaLabel := range metaLabels {
if !slices.ContainsFunc(labelInfo.Labels, func(label confluence.Label) bool {
return strings.EqualFold(label.Name, metaLabel)
}) {
labels = append(labels, metaLabel)
}
}
return labels
}
func getImageAlign(align string, meta *metadata.Meta) (string, error) {
if meta != nil && meta.ImageAlign != "" {
align = meta.ImageAlign
}
if align != "" {
align = strings.ToLower(strings.TrimSpace(align))
if align != "left" && align != "center" && align != "right" {
return "", fmt.Errorf(
`unknown image-align %q, expected one of: left, center, right`,
align,
)
}
return align, nil
}
return "", nil
}
func sha1Hash(input string) string {
h := sha1.New()
h.Write([]byte(input))
return hex.EncodeToString(h.Sum(nil))
}
// htmlEscapeText escapes only the characters that Confluence storage HTML
// always encodes in text nodes (&, <, >). Unlike html.EscapeString it does NOT
// escape single-quotes or double-quotes, because those are frequently left
// unescaped inside text nodes by the Confluence editor and by mark's own
// renderer, so escaping them would prevent the selection-search from finding
// a valid match.
var htmlTextReplacer = strings.NewReplacer("&", "&", "<", "<", ">", ">")
func htmlEscapeText(s string) string {
return htmlTextReplacer.Replace(s)
}
// truncateSelection returns a truncated preview of s for use in log messages,
// capped at maxRunes runes, with an ellipsis appended when trimmed.
func truncateSelection(s string, maxRunes int) string {
runes := []rune(s)
if len(runes) <= maxRunes {
return s
}
return string(runes[:maxRunes]) + "…"
}
// contextBefore returns up to maxBytes of s ending at byteEnd, trimmed
// forward to the nearest valid UTF-8 rune start so the slice is never
// split across a multi-byte sequence.
func contextBefore(s string, byteEnd, maxBytes int) string {
start := byteEnd - maxBytes
if start < 0 {
start = 0
}
for start < byteEnd && !utf8.RuneStart(s[start]) {
start++
}
return s[start:byteEnd]
}
// contextAfter returns up to maxBytes of s starting at byteStart, trimmed
// back to the nearest valid UTF-8 rune start so the slice is never split
// across a multi-byte sequence.
func contextAfter(s string, byteStart, maxBytes int) string {
end := byteStart + maxBytes
if end >= len(s) {
return s[byteStart:]
}
for end > byteStart && !utf8.RuneStart(s[end]) {
end--
}
return s[byteStart:end]
}
func levenshteinDistance(s1, s2 string) int {
r1 := []rune(s1)
r2 := []rune(s2)
if len(r1) == 0 {
return len(r2)
}
if len(r2) == 0 {
return len(r1)
}
// Use two rolling rows instead of a full matrix to reduce allocations
// from O(m×n) to O(n). Swap r1/r2 so r2 is the shorter string, keeping
// the row width (len(r2)+1) as small as possible.
if len(r1) < len(r2) {
r1, r2 = r2, r1
}
prev := make([]int, len(r2)+1)
curr := make([]int, len(r2)+1)
for j := range prev {
prev[j] = j
}
for i := 1; i <= len(r1); i++ {
curr[0] = i
for j := 1; j <= len(r2); j++ {
cost := 0
if r1[i-1] != r2[j-1] {
cost = 1
}
curr[j] = min(
prev[j]+1, // deletion
curr[j-1]+1, // insertion
prev[j-1]+cost, // substitution
)
}
prev, curr = curr, prev
}
return prev[len(r2)]
}
type commentContext struct {
before string
after string
}
// mergeComments re-embeds inline comment markers from the Confluence API into
// newBody (the updated storage HTML about to be uploaded). It extracts context
// from each existing marker in oldBody and uses Levenshtein distance to
// relocate each marker to the best-matching position in newBody, so comment
// threads survive page edits even when the surrounding text has shifted.
//
// At most maxCandidates occurrences of each selection are evaluated with
// Levenshtein distance; further occurrences are ignored to bound CPU cost on
// pages where a selection is short or very common.
const maxCandidates = 100
// contextWindowBytes is the number of bytes of surrounding text captured as
// context around each inline-comment marker. It is used both when extracting
// context from oldBody and when scoring candidates in newBody.
const contextWindowBytes = 100
func mergeComments(newBody string, oldBody string, comments *confluence.InlineComments) (string, error) {
if comments == nil {
return newBody, nil
}
// 1. Extract context for each comment from oldBody
contexts := make(map[string]commentContext)
matches := markerRegex.FindAllStringSubmatchIndex(oldBody, -1)
for _, match := range matches {
ref := oldBody[match[2]:match[3]]
// context around the tag
before := contextBefore(oldBody, match[0], contextWindowBytes)
after := contextAfter(oldBody, match[1], contextWindowBytes)
contexts[ref] = commentContext{
before: before,
after: after,
}
}
type replacement struct {
start int
end int
ref string
selection string
}
var replacements []replacement
seenRefs := make(map[string]bool)
for _, comment := range comments.Results {
if comment.Extensions.Location != "inline" {
log.Debug().
Str("location", comment.Extensions.Location).
Str("ref", comment.Extensions.InlineProperties.MarkerRef).
Msg("comment ignored during inline marker merge: not an inline comment")
continue
}
ref := comment.Extensions.InlineProperties.MarkerRef
selection := comment.Extensions.InlineProperties.OriginalSelection
if seenRefs[ref] {
// Multiple results share the same MarkerRef (e.g. threaded replies).
// The marker only needs to be inserted once; skip duplicates.
continue
}
// Mark ref as seen immediately so subsequent results for the same ref
// (threaded replies) are always deduplicated, even if this one is dropped.
seenRefs[ref] = true
if selection == "" {
log.Warn().
Str("ref", ref).
Msg("inline comment skipped: original selection is empty; comment will be lost")
continue
}
ctx, hasCtx := contexts[ref]
// Build the list of forms to search for in newBody. The escaped form
// is tried first (normal XML text nodes). The raw form is appended as a
// fallback for text inside CDATA-backed macro bodies (e.g. ac:code),
// where < and > are stored unescaped inside .
escapedSelection := htmlEscapeText(selection)
searchForms := []string{escapedSelection}
if selection != escapedSelection {
searchForms = append(searchForms, selection)
}
var bestStart = -1
var bestEnd = -1
var minDistance = 1000000
// Iterate over search forms; stop as soon as we have a definitive best.
candidates := 0
stopSearch := false
for _, form := range searchForms {
if stopSearch {
break
}
currentPos := 0
for {
index := strings.Index(newBody[currentPos:], form)
if index == -1 {
break
}
start := currentPos + index
end := start + len(form)
// Skip candidates that start or end in the middle of a multi-byte
// UTF-8 rune; such a match would produce invalid UTF-8 output.
if !utf8.RuneStart(newBody[start]) || (end < len(newBody) && !utf8.RuneStart(newBody[end])) {
currentPos = start + 1
continue
}
candidates++
if candidates > maxCandidates {
stopSearch = true
break
}
if !hasCtx {
// No context available; use the first occurrence.
bestStart = start
bestEnd = end
stopSearch = true
break
}
newBefore := contextBefore(newBody, start, contextWindowBytes)
newAfter := contextAfter(newBody, end, contextWindowBytes)
// Fast path: exact context match is the best possible result.
if newBefore == ctx.before && newAfter == ctx.after {
bestStart = start
bestEnd = end
stopSearch = true
break
}
// Lower-bound pruning: Levenshtein distance is at least the
// absolute difference in rune counts. Use rune counts (not byte
// lengths) to match the unit levenshteinDistance operates on,
// avoiding false skips for multibyte UTF-8 content.
lbBefore := utf8.RuneCountInString(ctx.before) - utf8.RuneCountInString(newBefore)
if lbBefore < 0 {
lbBefore = -lbBefore
}
lbAfter := utf8.RuneCountInString(ctx.after) - utf8.RuneCountInString(newAfter)
if lbAfter < 0 {
lbAfter = -lbAfter
}
if lbBefore+lbAfter >= minDistance {
currentPos = start + 1
continue
}
distance := levenshteinDistance(ctx.before, newBefore) + levenshteinDistance(ctx.after, newAfter)
if distance < minDistance {
minDistance = distance
bestStart = start
bestEnd = end
}
currentPos = start + 1
}
}
if bestStart != -1 {
replacements = append(replacements, replacement{
start: bestStart,
end: bestEnd,
ref: ref,
selection: selection,
})
} else {
log.Warn().
Str("ref", ref).
Str("selection_preview", truncateSelection(selection, 50)).
Msg("inline comment dropped: selected text not found in new body; comment will be lost")
}
}
// Sort replacements from back to front to avoid offset issues.
// Use a stable sort with ref as a tie-breaker so the ordering is
// deterministic when two markers resolve to the same start offset.
slices.SortStableFunc(replacements, func(a, b replacement) int {
if a.start != b.start {
return b.start - a.start
}
if a.ref < b.ref {
return -1
}
if a.ref > b.ref {
return 1
}
return 0
})
// Apply replacements back-to-front. Track the minimum start of any
// applied replacement so that overlapping candidates (whose end exceeds
// that boundary) are dropped rather than producing nested or malformed
// tags.
minAppliedStart := len(newBody)
for _, r := range replacements {
if r.end > minAppliedStart {
// This replacement overlaps with an already-applied one.
// Drop it and warn so the user knows the comment was skipped.
log.Warn().
Str("ref", r.ref).
Str("selection_preview", truncateSelection(r.selection, 50)).
Int("start", r.start).
Int("end", r.end).
Int("conflicting_start", minAppliedStart).
Msg("inline comment marker dropped: selection overlaps an already-placed marker")
continue
}
minAppliedStart = r.start
selection := newBody[r.start:r.end]
withComment := fmt.Sprintf(
`%s`,
stdhtml.EscapeString(r.ref),
selection,
)
newBody = newBody[:r.start] + withComment + newBody[r.end:]
}
return newBody, nil
}