diff --git a/README.md b/README.md index 75c5773..9f66ac8 100644 --- a/README.md +++ b/README.md @@ -880,6 +880,7 @@ GLOBAL OPTIONS: --mermaid-scale float defines the scaling factor for mermaid renderings. (default: 1) [$MARK_MERMAID_SCALE] --include-path string Path for shared includes, used as a fallback if the include doesn't exist in the current directory. [$MARK_INCLUDE_PATH] --changes-only Avoids re-uploading pages that haven't changed since the last run. [$MARK_CHANGES_ONLY] + --preserve-comments Fetch and preserve inline comments on existing Confluence pages. [$MARK_PRESERVE_COMMENTS] --d2-scale float defines the scaling factor for d2 renderings. (default: 1) [$MARK_D2_SCALE] --features string [ --features string ] Enables optional features. Current features: d2, mermaid, mention, mkdocsadmonitions (default: "mermaid", "mention") [$MARK_FEATURES] --insecure-skip-tls-verify skip TLS certificate verification (useful for self-signed certificates) [$MARK_INSECURE_SKIP_TLS_VERIFY] @@ -903,6 +904,8 @@ image-align = "center" **NOTE**: Labels aren't supported when using `minor-edit`! +**NOTE**: See [Preserving Inline Comments](#preserving-inline-comments) for a detailed description of the `--preserve-comments` flag. + **NOTE**: The system specific locations are described in here: . Currently, these are: @@ -973,6 +976,34 @@ mark -f "**/docs/*.md" We recommend to lint your markdown files with [markdownlint-cli2](https://github.com/DavidAnson/markdownlint-cli2) before publishing them to confluence to catch any conversion errors early. +### Preserving Inline Comments + +When collaborators leave inline comments on a Confluence page, updating the page via `mark` will normally erase those comments because the stored body is fully replaced. The `--preserve-comments` flag re-attaches inline comment markers to the new page body before uploading, so existing review threads survive updates. + +```bash +mark --preserve-comments -f docs/page.md +``` + +Or via environment variable: + +```bash +MARK_PRESERVE_COMMENTS=true mark -f docs/page.md +``` + +**How it works:** + +1. Before uploading, `mark` fetches the current page body and all inline comment markers from the Confluence API. +2. For each existing `` tag it records the content wrapped by that marker plus a short context window immediately before the opening tag and immediately after the closing tag in the old body (not around the raw selection text, so the context is stable even when the marker wraps additional inline markup such as ``). +3. It searches the new body for the same selected text and picks the occurrence whose surrounding context best matches the original (using Levenshtein distance), so the marker lands in the right place even if nearby text has shifted. +4. The updated body—with all markers re-embedded—is then uploaded as normal. + +**Limitations:** + +* If the commented text was deleted from the document, the inline comment cannot be relocated and will be lost. `mark` logs a warning in this case. +* Overlapping selections (two comments anchored to the same stretch of text) are detected; the earlier overlapping match is dropped with a warning, and the later one (higher byte offset) is kept, rather than producing malformed markup. +* `--preserve-comments` is automatically skipped for newly created pages (there are no comments to preserve yet). +* When combined with `--changes-only`, the comment-preservation API calls are skipped entirely on runs where the page content has not changed, avoiding unnecessary round-trips. + ## Issues, Bugs & Contributions I've started the project to solve my own problem and open sourced the solution so anyone who has a problem like me can solve it too. diff --git a/confluence/api.go b/confluence/api.go index d3a4e43..4cd2078 100644 --- a/confluence/api.go +++ b/confluence/api.go @@ -58,6 +58,12 @@ type PageInfo struct { Title string `json:"title"` } `json:"ancestors"` + Body struct { + Storage struct { + Value string `json:"value"` + } `json:"storage"` + } `json:"body"` + Links struct { Full string `json:"webui"` Base string `json:"-"` // Not from JSON; populated from response _links.base @@ -85,6 +91,29 @@ type LabelInfo struct { Labels []Label `json:"results"` Size int `json:"number"` } + +type InlineCommentProperties struct { + OriginalSelection string `json:"originalSelection"` + MarkerRef string `json:"markerRef"` +} + +type InlineCommentExtensions struct { + Location string `json:"location"` + InlineProperties InlineCommentProperties `json:"inlineProperties"` +} + +type InlineCommentResult struct { + Extensions InlineCommentExtensions `json:"extensions"` +} + +type InlineComments struct { + Links struct { + Context string `json:"context"` + Next string `json:"next"` + } `json:"_links"` + Results []InlineCommentResult `json:"results"` +} + type form struct { buffer io.Reader writer *multipart.Writer @@ -464,9 +493,13 @@ func (api *API) GetAttachments(pageID string) ([]AttachmentInfo, error) { } func (api *API) GetPageByID(pageID string) (*PageInfo, error) { + return api.GetPageByIDExpanded(pageID, "ancestors,version") +} + +func (api *API) GetPageByIDExpanded(pageID string, expand string) (*PageInfo, error) { request, err := api.rest.Res( "content/"+pageID, &PageInfo{}, - ).Get(map[string]string{"expand": "ancestors,version"}) + ).Get(map[string]string{"expand": expand}) if err != nil { return nil, err } @@ -478,6 +511,44 @@ func (api *API) GetPageByID(pageID string) (*PageInfo, error) { return request.Response.(*PageInfo), nil } +func (api *API) GetInlineComments(pageID string) (*InlineComments, error) { + const pageSize = 100 + all := &InlineComments{} + start := 0 + + for { + result := &InlineComments{} + request, err := api.rest.Res( + "content/"+pageID+"/child/comment", result, + ).Get(map[string]string{ + "expand": "extensions.inlineProperties", + "limit": fmt.Sprintf("%d", pageSize), + "start": fmt.Sprintf("%d", start), + }) + if err != nil { + return nil, err + } + + if request.Raw.StatusCode != http.StatusOK { + return nil, newErrorStatusNotOK(request) + } + + if all.Links.Context == "" { + all.Links = result.Links + } + + all.Results = append(all.Results, result.Results...) + + if len(result.Results) < pageSize || result.Links.Next == "" { + break + } + + start += len(result.Results) + } + + return all, nil +} + func (api *API) CreatePage( space string, pageType string, diff --git a/mark.go b/mark.go index b5b5598..f8bed62 100644 --- a/mark.go +++ b/mark.go @@ -6,6 +6,7 @@ import ( "encoding/hex" "errors" "fmt" + stdhtml "html" "io" "os" "path/filepath" @@ -13,6 +14,7 @@ import ( "slices" "strings" "time" + "unicode/utf8" "github.com/bmatcuk/doublestar/v4" "github.com/kovetskiy/mark/v16/attachment" @@ -28,6 +30,8 @@ import ( "github.com/rs/zerolog/log" ) +var markerRegex = regexp.MustCompile(`(?s)(.*?)`) + // Config holds all configuration options for running Mark. type Config struct { // Connection settings @@ -55,10 +59,11 @@ type Config struct { ContentAppearance string // Page updates - MinorEdit bool - VersionMessage string - EditLock bool - ChangesOnly bool + MinorEdit bool + VersionMessage string + EditLock bool + ChangesOnly bool + PreserveComments bool // Rendering DropH1 bool @@ -282,6 +287,7 @@ func ProcessFile(file string, api *confluence.API, config Config) (*confluence.P } var target *confluence.PageInfo + var pageCreated bool if meta != nil { parent, pg, err := page.ResolvePage(false, api, meta) @@ -298,6 +304,7 @@ func ProcessFile(file string, api *confluence.API, config Config) (*confluence.P // conflict that can occur when attempting to update a page just // after it was created. See issues/139. time.Sleep(1 * time.Second) + pageCreated = true } target = pg @@ -415,6 +422,27 @@ func ProcessFile(file string, api *confluence.API, config Config) (*confluence.P finalVersionMessage = config.VersionMessage } + // Only fetch the old body and inline comments when we know the page will + // actually be updated. This avoids unnecessary API round-trips for no-op + // runs (e.g. when --changes-only determines the content is unchanged). + if shouldUpdatePage && config.PreserveComments && !pageCreated { + pg, err := api.GetPageByIDExpanded(target.ID, "ancestors,version,body.storage") + if err != nil { + return nil, fmt.Errorf("unable to retrieve page body for comments: %w", err) + } + target = pg + + comments, err := api.GetInlineComments(target.ID) + if err != nil { + return nil, fmt.Errorf("unable to retrieve inline comments: %w", err) + } + + html, err = mergeComments(html, target.Body.Storage.Value, comments) + if err != nil { + return nil, fmt.Errorf("unable to merge inline comments: %w", err) + } + } + if shouldUpdatePage { err = api.UpdatePage( target, @@ -531,3 +559,327 @@ func sha1Hash(input string) string { h.Write([]byte(input)) return hex.EncodeToString(h.Sum(nil)) } + +// htmlEscapeText escapes only the characters that Confluence storage HTML +// always encodes in text nodes (&, <, >). Unlike html.EscapeString it does NOT +// escape single-quotes or double-quotes, because those are frequently left +// unescaped inside text nodes by the Confluence editor and by mark's own +// renderer, so escaping them would prevent the selection-search from finding +// a valid match. +var htmlTextReplacer = strings.NewReplacer("&", "&", "<", "<", ">", ">") + +func htmlEscapeText(s string) string { + return htmlTextReplacer.Replace(s) +} + +// truncateSelection returns a truncated preview of s for use in log messages, +// capped at maxRunes runes, with an ellipsis appended when trimmed. +func truncateSelection(s string, maxRunes int) string { + runes := []rune(s) + if len(runes) <= maxRunes { + return s + } + return string(runes[:maxRunes]) + "…" +} + +// contextBefore returns up to maxBytes of s ending at byteEnd, trimmed +// forward to the nearest valid UTF-8 rune start so the slice is never +// split across a multi-byte sequence. +func contextBefore(s string, byteEnd, maxBytes int) string { + start := byteEnd - maxBytes + if start < 0 { + start = 0 + } + for start < byteEnd && !utf8.RuneStart(s[start]) { + start++ + } + return s[start:byteEnd] +} + +// contextAfter returns up to maxBytes of s starting at byteStart, trimmed +// back to the nearest valid UTF-8 rune start so the slice is never split +// across a multi-byte sequence. +func contextAfter(s string, byteStart, maxBytes int) string { + end := byteStart + maxBytes + if end >= len(s) { + return s[byteStart:] + } + for end > byteStart && !utf8.RuneStart(s[end]) { + end-- + } + return s[byteStart:end] +} + +func levenshteinDistance(s1, s2 string) int { + r1 := []rune(s1) + r2 := []rune(s2) + + if len(r1) == 0 { + return len(r2) + } + if len(r2) == 0 { + return len(r1) + } + + // Use two rolling rows instead of a full matrix to reduce allocations + // from O(m×n) to O(n). Swap r1/r2 so r2 is the shorter string, keeping + // the row width (len(r2)+1) as small as possible. + if len(r1) < len(r2) { + r1, r2 = r2, r1 + } + + prev := make([]int, len(r2)+1) + curr := make([]int, len(r2)+1) + + for j := range prev { + prev[j] = j + } + + for i := 1; i <= len(r1); i++ { + curr[0] = i + for j := 1; j <= len(r2); j++ { + cost := 0 + if r1[i-1] != r2[j-1] { + cost = 1 + } + curr[j] = min( + prev[j]+1, // deletion + curr[j-1]+1, // insertion + prev[j-1]+cost, // substitution + ) + } + prev, curr = curr, prev + } + return prev[len(r2)] +} + +type commentContext struct { + before string + after string +} + +// mergeComments re-embeds inline comment markers from the Confluence API into +// newBody (the updated storage HTML about to be uploaded). It extracts context +// from each existing marker in oldBody and uses Levenshtein distance to +// relocate each marker to the best-matching position in newBody, so comment +// threads survive page edits even when the surrounding text has shifted. +// +// At most maxCandidates occurrences of each selection are evaluated with +// Levenshtein distance; further occurrences are ignored to bound CPU cost on +// pages where a selection is short or very common. +const maxCandidates = 100 + +// contextWindowBytes is the number of bytes of surrounding text captured as +// context around each inline-comment marker. It is used both when extracting +// context from oldBody and when scoring candidates in newBody. +const contextWindowBytes = 100 + +func mergeComments(newBody string, oldBody string, comments *confluence.InlineComments) (string, error) { + if comments == nil { + return newBody, nil + } + // 1. Extract context for each comment from oldBody + contexts := make(map[string]commentContext) + matches := markerRegex.FindAllStringSubmatchIndex(oldBody, -1) + for _, match := range matches { + ref := oldBody[match[2]:match[3]] + // context around the tag + before := contextBefore(oldBody, match[0], contextWindowBytes) + after := contextAfter(oldBody, match[1], contextWindowBytes) + contexts[ref] = commentContext{ + before: before, + after: after, + } + } + + type replacement struct { + start int + end int + ref string + selection string + } + var replacements []replacement + seenRefs := make(map[string]bool) + + for _, comment := range comments.Results { + if comment.Extensions.Location != "inline" { + log.Debug(). + Str("location", comment.Extensions.Location). + Str("ref", comment.Extensions.InlineProperties.MarkerRef). + Msg("comment ignored during inline marker merge: not an inline comment") + continue + } + + ref := comment.Extensions.InlineProperties.MarkerRef + selection := comment.Extensions.InlineProperties.OriginalSelection + + if seenRefs[ref] { + // Multiple results share the same MarkerRef (e.g. threaded replies). + // The marker only needs to be inserted once; skip duplicates. + continue + } + // Mark ref as seen immediately so subsequent results for the same ref + // (threaded replies) are always deduplicated, even if this one is dropped. + seenRefs[ref] = true + + if selection == "" { + log.Warn(). + Str("ref", ref). + Msg("inline comment skipped: original selection is empty; comment will be lost") + continue + } + + ctx, hasCtx := contexts[ref] + + // Build the list of forms to search for in newBody. The escaped form + // is tried first (normal XML text nodes). The raw form is appended as a + // fallback for text inside CDATA-backed macro bodies (e.g. ac:code), + // where < and > are stored unescaped inside . + escapedSelection := htmlEscapeText(selection) + searchForms := []string{escapedSelection} + if selection != escapedSelection { + searchForms = append(searchForms, selection) + } + + var bestStart = -1 + var bestEnd = -1 + var minDistance = 1000000 + + // Iterate over search forms; stop as soon as we have a definitive best. + candidates := 0 + stopSearch := false + for _, form := range searchForms { + if stopSearch { + break + } + currentPos := 0 + for { + index := strings.Index(newBody[currentPos:], form) + if index == -1 { + break + } + start := currentPos + index + end := start + len(form) + + // Skip candidates that start or end in the middle of a multi-byte + // UTF-8 rune; such a match would produce invalid UTF-8 output. + if !utf8.RuneStart(newBody[start]) || (end < len(newBody) && !utf8.RuneStart(newBody[end])) { + currentPos = start + 1 + continue + } + + candidates++ + if candidates > maxCandidates { + stopSearch = true + break + } + + if !hasCtx { + // No context available; use the first occurrence. + bestStart = start + bestEnd = end + stopSearch = true + break + } + + newBefore := contextBefore(newBody, start, contextWindowBytes) + newAfter := contextAfter(newBody, end, contextWindowBytes) + + // Fast path: exact context match is the best possible result. + if newBefore == ctx.before && newAfter == ctx.after { + bestStart = start + bestEnd = end + stopSearch = true + break + } + + // Lower-bound pruning: Levenshtein distance is at least the + // absolute difference in rune counts. Use rune counts (not byte + // lengths) to match the unit levenshteinDistance operates on, + // avoiding false skips for multibyte UTF-8 content. + lbBefore := utf8.RuneCountInString(ctx.before) - utf8.RuneCountInString(newBefore) + if lbBefore < 0 { + lbBefore = -lbBefore + } + lbAfter := utf8.RuneCountInString(ctx.after) - utf8.RuneCountInString(newAfter) + if lbAfter < 0 { + lbAfter = -lbAfter + } + if lbBefore+lbAfter >= minDistance { + currentPos = start + 1 + continue + } + + distance := levenshteinDistance(ctx.before, newBefore) + levenshteinDistance(ctx.after, newAfter) + + if distance < minDistance { + minDistance = distance + bestStart = start + bestEnd = end + } + + currentPos = start + 1 + } + } + + if bestStart != -1 { + replacements = append(replacements, replacement{ + start: bestStart, + end: bestEnd, + ref: ref, + selection: selection, + }) + } else { + log.Warn(). + Str("ref", ref). + Str("selection_preview", truncateSelection(selection, 50)). + Msg("inline comment dropped: selected text not found in new body; comment will be lost") + } + } + + // Sort replacements from back to front to avoid offset issues. + // Use a stable sort with ref as a tie-breaker so the ordering is + // deterministic when two markers resolve to the same start offset. + slices.SortStableFunc(replacements, func(a, b replacement) int { + if a.start != b.start { + return b.start - a.start + } + if a.ref < b.ref { + return -1 + } + if a.ref > b.ref { + return 1 + } + return 0 + }) + + // Apply replacements back-to-front. Track the minimum start of any + // applied replacement so that overlapping candidates (whose end exceeds + // that boundary) are dropped rather than producing nested or malformed + // tags. + minAppliedStart := len(newBody) + for _, r := range replacements { + if r.end > minAppliedStart { + // This replacement overlaps with an already-applied one. + // Drop it and warn so the user knows the comment was skipped. + log.Warn(). + Str("ref", r.ref). + Str("selection_preview", truncateSelection(r.selection, 50)). + Int("start", r.start). + Int("end", r.end). + Int("conflicting_start", minAppliedStart). + Msg("inline comment marker dropped: selection overlaps an already-placed marker") + continue + } + minAppliedStart = r.start + selection := newBody[r.start:r.end] + withComment := fmt.Sprintf( + `%s`, + stdhtml.EscapeString(r.ref), + selection, + ) + newBody = newBody[:r.start] + withComment + newBody[r.end:] + } + + return newBody, nil +} diff --git a/mark_test.go b/mark_test.go new file mode 100644 index 0000000..dfb111a --- /dev/null +++ b/mark_test.go @@ -0,0 +1,369 @@ +package mark + +import ( + "testing" + + "github.com/kovetskiy/mark/v16/confluence" + "github.com/stretchr/testify/assert" +) + +// --------------------------------------------------------------------------- +// Helper function unit tests +// --------------------------------------------------------------------------- + +func TestTruncateSelection(t *testing.T) { + assert.Equal(t, "hello", truncateSelection("hello", 10)) + assert.Equal(t, "hello", truncateSelection("hello", 5)) + assert.Equal(t, "hell…", truncateSelection("hello", 4)) + assert.Equal(t, "", truncateSelection("", 5)) + // Multibyte runes count as single units. + assert.Equal(t, "世界…", truncateSelection("世界 is the world", 2)) +} + +func TestLevenshteinDistance(t *testing.T) { + tests := []struct { + s1, s2 string + want int + }{ + {"", "", 0}, + {"abc", "", 3}, + {"", "abc", 3}, + {"abc", "abc", 0}, + {"abc", "axc", 1}, // one substitution + {"abc", "ab", 1}, // one deletion + {"ab", "abc", 1}, // one insertion + {"kitten", "sitting", 3}, + // Multibyte: é is one rune, so distance from "héllo" to "hello" is 1. + {"héllo", "hello", 1}, + } + for _, tt := range tests { + t.Run(tt.s1+"/"+tt.s2, func(t *testing.T) { + assert.Equal(t, tt.want, levenshteinDistance(tt.s1, tt.s2)) + }) + } +} + +func TestContextBefore(t *testing.T) { + // Basic cases. + assert.Equal(t, "", contextBefore("hello", 0, 10)) + assert.Equal(t, "hello", contextBefore("hello", 5, 10)) + assert.Equal(t, "llo", contextBefore("hello", 5, 3)) + + // "héllo" is 6 bytes (h=1, é=2, l=1, l=1, o=1). + // maxBytes=4 → raw start=2, which lands mid-rune (é's continuation byte). + // Should advance to byte 3 (first 'l'). + assert.Equal(t, "llo", contextBefore("héllo", 6, 4)) +} + +func TestContextAfter(t *testing.T) { + // Basic cases. + assert.Equal(t, "", contextAfter("hello", 5, 10)) + assert.Equal(t, "hello", contextAfter("hello", 0, 10)) + assert.Equal(t, "hel", contextAfter("hello", 0, 3)) + + // "héllo" is 6 bytes. contextAfter(s, 0, 2) → raw end=2 (é's continuation + // byte), which is not a rune start. Should back up to 1, returning just "h". + assert.Equal(t, "h", contextAfter("héllo", 0, 2)) +} + +// makeComments builds an InlineComments value from alternating +// (selection, markerRef) pairs, all with location "inline". +func makeComments(pairs ...string) *confluence.InlineComments { + c := &confluence.InlineComments{} + for i := 0; i+1 < len(pairs); i += 2 { + selection, ref := pairs[i], pairs[i+1] + c.Results = append(c.Results, confluence.InlineCommentResult{ + Extensions: confluence.InlineCommentExtensions{ + Location: "inline", + InlineProperties: confluence.InlineCommentProperties{ + OriginalSelection: selection, + MarkerRef: ref, + }, + }, + }) + } + return c +} + +func TestMergeComments(t *testing.T) { + body := "

Hello world

" + oldBody := `

Hello world

` + comments := makeComments("world", "uuid-123") + + result, err := mergeComments(body, oldBody, comments) + assert.NoError(t, err) + assert.Equal(t, `

Hello world

`, result) +} + +func TestMergeComments_Escaping(t *testing.T) { + body := "

Hello & world

" + oldBody := `

Hello & world

` + comments := makeComments("&", "uuid-456") + + result, err := mergeComments(body, oldBody, comments) + assert.NoError(t, err) + assert.Equal(t, `

Hello & world

`, result) +} + +func TestMergeComments_Disambiguation(t *testing.T) { + body := "

Item one. Item two. Item one.

" + // Comment is on the second "Item one." + oldBody := `

Item one. Item two. Item one.

` + comments := makeComments("Item one.", "uuid-1") + + result, err := mergeComments(body, oldBody, comments) + assert.NoError(t, err) + // Context should correctly pick the second occurrence + assert.Equal(t, `

Item one. Item two. Item one.

`, result) +} + +// TestMergeComments_SelectionMissing verifies that a comment whose selection +// no longer appears in the new body is dropped without returning an error or panicking. +// A warning is logged so the user knows the comment was not relocated. +func TestMergeComments_SelectionMissing(t *testing.T) { + body := "

Completely different content

" + oldBody := `

old text

` + comments := makeComments("old text", "uuid-gone") + + result, err := mergeComments(body, oldBody, comments) + assert.NoError(t, err) + // Comment is dropped; body is returned unchanged. + assert.Equal(t, body, result) +} + +// TestMergeComments_OverlappingSelections verifies that when two comments +// reference overlapping text regions the later one (by position) is kept and +// the earlier overlapping one is dropped rather than corrupting the body. +func TestMergeComments_OverlappingSelections(t *testing.T) { + body := "

foo bar baz

" + // Neither comment has a marker in oldBody, so no positional context is + // available; the algorithm falls back to a plain string search. + oldBody := "

foo bar baz

" + // "foo bar" starts at 3, ends at 10; "bar baz" starts at 7, ends at 14. + // They overlap on "bar". The later match (uuid-B at position 7) wins. + comments := makeComments("foo bar", "uuid-A", "bar baz", "uuid-B") + + result, err := mergeComments(body, oldBody, comments) + assert.NoError(t, err) + assert.Equal(t, `

foo bar baz

`, result) +} + +// TestMergeComments_NilComments verifies that a nil comments pointer is +// handled gracefully and the new body is returned unchanged. +func TestMergeComments_NilComments(t *testing.T) { + body := "

Hello world

" + result, err := mergeComments(body, "", nil) + assert.NoError(t, err) + assert.Equal(t, body, result) +} + +// TestMergeComments_HTMLEntities verifies that selections containing HTML +// entities (<, >) are matched correctly. The API returns raw (unescaped) +// text for OriginalSelection; htmlEscapeText encodes &, < and > to their +// entity forms before searching. +func TestMergeComments_HTMLEntities(t *testing.T) { + body := `

Hello <world> it's me

` + oldBody := `

Hello <world> it's me

` + // The API returns the raw (unescaped) selection text. + comments := makeComments("", "uuid-ent") + + result, err := mergeComments(body, oldBody, comments) + assert.NoError(t, err) + assert.Equal(t, `

Hello <world> it's me

`, result) +} + +// TestMergeComments_ApostropheEncoded verifies the known limitation: when a +// selection includes an apostrophe that Confluence stores as the numeric +// entity ' in the page body, mergeComments cannot locate the selection +// (htmlEscapeText does not encode ' to ') and the comment is dropped with +// a warning rather than panicking or producing invalid output. +func TestMergeComments_ApostropheEncoded(t *testing.T) { + // New body uses ' entity (as Confluence sometimes stores apostrophes). + body := `

Hello <world> it's me

` + // Old body has the comment marker around a selection that includes an apostrophe. + oldBody := `

Hello <world> it's me

` + // The API returns the raw unescaped selection including a literal apostrophe. + comments := makeComments(" it's", "uuid-apos-enc") + + result, err := mergeComments(body, oldBody, comments) + assert.NoError(t, err) + // The comment is dropped (body unchanged) because htmlEscapeText("it's") + // produces "it's", which doesn't match "it's" in the new body. + assert.Equal(t, body, result) +} + +// TestMergeComments_ApostropheSelection verifies that a selection containing a +// literal apostrophe is found when the new body also contains a literal +// apostrophe (as mark's renderer typically emits). This exercises the +// htmlEscapeText path which intentionally does not encode ' or ". +func TestMergeComments_ApostropheSelection(t *testing.T) { + body := `

Hello it's a test

` + oldBody := `

Hello it's a test

` + // The API returns the raw (unescaped) selection text with a literal apostrophe. + comments := makeComments("it's", "uuid-apos") + + result, err := mergeComments(body, oldBody, comments) + assert.NoError(t, err) + assert.Equal(t, `

Hello it's a test

`, result) +} + + +// TestMergeComments_NestedTags verifies that a marker whose stored content +// contains nested inline tags (e.g. ) is still recognised by +// markerRegex and the comment is correctly relocated into the new body. +func TestMergeComments_NestedTags(t *testing.T) { + // The new body contains plain bold text (no marker yet). + body := "

Hello world

" + // The old body already has the marker wrapping the bold tag. + oldBody := `

Hello world

` + // The API returns the raw selected text without markup. + comments := makeComments("world", "uuid-nested") + + result, err := mergeComments(body, oldBody, comments) + assert.NoError(t, err) + assert.Equal(t, `

Hello world

`, result) +} + +// TestMergeComments_EmptySelection verifies that a comment with an empty +// OriginalSelection is skipped without panicking and the body is returned +// unchanged. +func TestMergeComments_EmptySelection(t *testing.T) { + body := "

Hello world

" + comments := makeComments("", "uuid-empty") + + result, err := mergeComments(body, body, comments) + assert.NoError(t, err) + assert.Equal(t, body, result) +} + +// TestMergeComments_DuplicateMarkerRef verifies that multiple comment results +// sharing the same MarkerRef (e.g. threaded replies) produce exactly one +// insertion rather than nested duplicates. +func TestMergeComments_DuplicateMarkerRef(t *testing.T) { + body := "

Hello world

" + oldBody := `

Hello world

` + // Two results with identical ref — simulates threaded replies. + comments := makeComments("world", "uuid-dup", "world", "uuid-dup") + + result, err := mergeComments(body, oldBody, comments) + assert.NoError(t, err) + assert.Equal(t, `

Hello world

`, result) +} + +// --------------------------------------------------------------------------- +// Additional mergeComments scenario tests +// --------------------------------------------------------------------------- + +// TestMergeComments_MultipleComments verifies that two non-overlapping comments +// are both correctly re-embedded via back-to-front replacement. +func TestMergeComments_MultipleComments(t *testing.T) { + body := "

Hello world and foo bar

" + oldBody := `

Hello world and foo bar

` + comments := makeComments("world", "uuid-1", "bar", "uuid-2") + + result, err := mergeComments(body, oldBody, comments) + assert.NoError(t, err) + assert.Equal(t, `

Hello world and foo bar

`, result) +} + +// TestMergeComments_EmptyResults verifies that an InlineComments value with a +// non-nil but empty Results slice is handled gracefully. +func TestMergeComments_EmptyResults(t *testing.T) { + body := "

Hello world

" + result, err := mergeComments(body, body, &confluence.InlineComments{}) + assert.NoError(t, err) + assert.Equal(t, body, result) +} + +// TestMergeComments_NonInlineLocation verifies that page-level comments +// (location != "inline") are silently skipped and the body is unchanged. +func TestMergeComments_NonInlineLocation(t *testing.T) { + body := "

Hello world

" + comments := &confluence.InlineComments{ + Results: []confluence.InlineCommentResult{ + { + Extensions: confluence.InlineCommentExtensions{ + Location: "page", + InlineProperties: confluence.InlineCommentProperties{ + OriginalSelection: "Hello", + MarkerRef: "uuid-page", + }, + }, + }, + }, + } + result, err := mergeComments(body, body, comments) + assert.NoError(t, err) + assert.Equal(t, body, result) +} + +// TestMergeComments_NoContext verifies that when a comment's MarkerRef has no +// corresponding marker in oldBody (no context available) the first occurrence +// of the selection in the new body is used. +func TestMergeComments_NoContext(t *testing.T) { + body := "

foo bar foo

" + oldBody := "

foo bar foo

" // no markers → no context + comments := makeComments("foo", "uuid-noctx") + + result, err := mergeComments(body, oldBody, comments) + assert.NoError(t, err) + // First occurrence of "foo" is at position 3. + assert.Equal(t, `

foo bar foo

`, result) +} + +// TestMergeComments_UTF8 verifies that selections and bodies containing +// multibyte UTF-8 characters are handled correctly. +func TestMergeComments_UTF8(t *testing.T) { + body := "

こんにちは世界

" + oldBody := `

こんにちは世界

` + comments := makeComments("世界", "uuid-jp") + + result, err := mergeComments(body, oldBody, comments) + assert.NoError(t, err) + assert.Equal(t, `

こんにちは世界

`, result) +} + +// TestMergeComments_SelectionWithQuotes verifies that a selection containing +// apostrophes or double-quotes is found correctly in the new body even though +// html.EscapeString would encode those characters. Only &, <, > should be +// escaped when searching. +func TestMergeComments_SelectionWithQuotes(t *testing.T) { + body := `

It's a "test" page

` + oldBody := `

It's a "test" page

` + comments := makeComments(`"test"`, "uuid-q") + + result, err := mergeComments(body, oldBody, comments) + assert.NoError(t, err) + assert.Equal(t, `

It's a "test" page

`, result) +} + +// TestMergeComments_DuplicateMarkerRefDropped verifies that when multiple +// comment results share the same MarkerRef and the selection cannot be found, +// only a single warning is emitted (not one per result). +func TestMergeComments_DuplicateMarkerRefDropped(t *testing.T) { + body := "

Hello world

" + // Duplicate refs, but selection "gone" is not present in body or oldBody. + comments := makeComments("gone", "uuid-dup2", "gone", "uuid-dup2") + + result, err := mergeComments(body, body, comments) + assert.NoError(t, err) + assert.Equal(t, body, result) // body unchanged, single warning logged +} + +// TestMergeComments_CDATASelection verifies that a selection inside a +// CDATA-backed macro body (e.g. ac:code) is matched even though < and > are +// stored as raw characters rather than HTML entities. The raw form is tried as +// a fallback when the escaped form is not found. +func TestMergeComments_CDATASelection(t *testing.T) { + // New body contains a code macro with CDATA — raw < and > in the content. + body := ` }]]>` + // Old body has the marker around the raw selection inside CDATA. + oldBody := `
}]]>` + // The API returns the raw (unescaped) selection. + comments := makeComments("", "uuid-cdata") + + result, err := mergeComments(body, oldBody, comments) + assert.NoError(t, err) + // The raw selection "" should be found and wrapped with a marker. + assert.Equal(t, `
}]]>`, result) +} diff --git a/util/cli.go b/util/cli.go index 8eb315d..c9008cf 100644 --- a/util/cli.go +++ b/util/cli.go @@ -7,9 +7,9 @@ import ( "path/filepath" "strings" + mark "github.com/kovetskiy/mark/v16" "github.com/rs/zerolog" "github.com/rs/zerolog/log" - mark "github.com/kovetskiy/mark/v16" "github.com/urfave/cli/v3" ) @@ -111,10 +111,11 @@ func RunMark(ctx context.Context, cmd *cli.Command) error { TitleAppendGeneratedHash: cmd.Bool("title-append-generated-hash"), ContentAppearance: cmd.String("content-appearance"), - MinorEdit: cmd.Bool("minor-edit"), - VersionMessage: cmd.String("version-message"), - EditLock: cmd.Bool("edit-lock"), - ChangesOnly: cmd.Bool("changes-only"), + MinorEdit: cmd.Bool("minor-edit"), + VersionMessage: cmd.String("version-message"), + EditLock: cmd.Bool("edit-lock"), + ChangesOnly: cmd.Bool("changes-only"), + PreserveComments: cmd.Bool("preserve-comments"), DropH1: cmd.Bool("drop-h1"), StripLinebreaks: cmd.Bool("strip-linebreaks"), diff --git a/util/flags.go b/util/flags.go index db32470..89bfed2 100644 --- a/util/flags.go +++ b/util/flags.go @@ -194,6 +194,12 @@ var Flags = []cli.Flag{ Usage: "Avoids re-uploading pages that haven't changed since the last run.", Sources: cli.NewValueSourceChain(cli.EnvVar("MARK_CHANGES_ONLY"), altsrctoml.TOML("changes-only", altsrc.NewStringPtrSourcer(&filename))), }, + &cli.BoolFlag{ + Name: "preserve-comments", + Value: false, + Usage: "Fetch and preserve inline comments on existing Confluence pages.", + Sources: cli.NewValueSourceChain(cli.EnvVar("MARK_PRESERVE_COMMENTS"), altsrctoml.TOML("preserve-comments", altsrc.NewStringPtrSourcer(&filename))), + }, &cli.FloatFlag{ Name: "d2-scale", Value: 1.0,