Parcourir la source

pkg/md/fmt.go: Improve escaping of link tails.

- Handle newlines in bare link destination correctly.

- Allow link destination with balanced parentheses to be written without angle
  brackets.

- Don't escape link destination or title more than necessary.
Qi Xiao il y a 1 an
Parent
commit
09ad1316b4
6 fichiers modifiés avec 64 ajouts et 27 suppressions
  1. 51 19
      pkg/md/fmt.go
  2. 5 0
      pkg/md/fmt_test.go
  3. 4 4
      pkg/md/inline.go
  4. 1 1
      website/learn/faq.md
  5. 2 2
      website/learn/fundamentals.md
  6. 1 1
      website/ref/language.md

+ 51 - 19
pkg/md/fmt.go

@@ -860,17 +860,24 @@ func matchLens(pieces []string, pattern *regexp.Regexp) map[int]bool {
 
 const asciiControl = "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"
 
-const forbiddenInRawLinkDest = asciiControl + " ()"
+const forbiddenInRawLinkDest = asciiControl + " "
 
 func formatLinkTail(dest, title string) string {
 	var sb strings.Builder
 	sb.WriteString("(")
-	if strings.ContainsAny(dest, forbiddenInRawLinkDest) {
-		sb.WriteString("<" + strings.ReplaceAll(escapeText(dest), ">", "&gt;") + ">")
+	if strings.ContainsAny(dest, forbiddenInRawLinkDest) || !balancedParens(dest) {
+		// Angle-bracketed destinations recognize a few characters plus
+		// character references as special and disallow newlines. The order of
+		// function calls is important here to avoid double-escaping.
+		sb.WriteString("<" + strings.ReplaceAll(
+			escapeAmpersandBackslash(dest, "<>"), "\n", "&NewLine;") + ">")
 	} else if dest == "" && title != "" {
 		sb.WriteString("<>")
 	} else {
-		sb.WriteString(escapeText(dest))
+		// Bare destinations only recognize backslash and character references
+		// as special. The order of function calls is important here to avoid
+		// double-escaping.
+		sb.WriteString(escapeAmpersandBackslash(dest, ""))
 	}
 	if title != "" {
 		sb.WriteString(" ")
@@ -880,29 +887,55 @@ func formatLinkTail(dest, title string) string {
 	return sb.String()
 }
 
-var escapeParens = strings.NewReplacer("(", `\(`, ")", `\)`).Replace
+func balancedParens(s string) bool {
+	balance := 0
+	for i := 0; i < len(s); i++ {
+		switch s[i] {
+		case '(':
+			balance++
+		case ')':
+			if balance == 0 {
+				return false
+			}
+			balance--
+		}
+	}
+	return balance == 0
+}
 
 func wrapAndEscapeLinkTitle(title string) string {
 	doubleQuotes := strings.Count(title, "\"")
 	if doubleQuotes == 0 {
-		return "\"" + escapeText(title) + "\""
+		return "\"" + escapeAmpersandBackslash(title, "") + "\""
 	}
 	singleQuotes := strings.Count(title, "'")
 	if singleQuotes == 0 {
-		return "'" + escapeText(title) + "'"
+		return "'" + escapeAmpersandBackslash(title, "") + "'"
 	}
 	parens := strings.Count(title, "(") + strings.Count(title, ")")
 	if parens == 0 {
-		return "(" + escapeText(title) + ")"
+		return "(" + escapeAmpersandBackslash(title, "") + ")"
 	}
 	switch {
 	case doubleQuotes <= singleQuotes && doubleQuotes <= parens:
-		return `"` + strings.ReplaceAll(escapeText(title), `"`, `\"`) + `"`
+		return `"` + escapeAmpersandBackslash(title, `"`) + `"`
 	case singleQuotes <= parens:
-		return "'" + strings.ReplaceAll(escapeText(title), "'", `\'`) + "'"
+		return "'" + escapeAmpersandBackslash(title, `'`) + "'"
 	default:
-		return "(" + escapeParens(escapeText(title)) + ")"
+		return "(" + escapeAmpersandBackslash(title, "()") + ")"
+	}
+}
+
+// Backslash-escape ampersands, backslashes and bytes in the specified set.
+func escapeAmpersandBackslash(s, set string) string {
+	var sb strings.Builder
+	for i := 0; i < len(s); i++ {
+		if s[i] == '\\' || strings.IndexByte(set, s[i]) >= 0 || leadingCharRef(s[i:]) != "" {
+			sb.WriteByte('\\')
+		}
+		sb.WriteByte(s[i])
 	}
+	return sb.String()
 }
 
 func (c *FmtCodec) startLine() {
@@ -990,24 +1023,23 @@ func escapeText(s string) string {
 			if isWord(utf8.DecodeLastRuneInString(s[:i])) && isWord(utf8.DecodeRuneInString(s[i+1:])) {
 				sb.WriteByte('_')
 			} else {
-				sb.WriteString("\\_")
+				sb.WriteString(`\_`)
 			}
 		case '&':
-			// Look ahead to next ";" to decide whether the ampersand can start
-			// a character reference and thus needs to be escaped. Since
-			// any inline markup will introduce a metacharacter that is not
-			// allowed within character reference, it is sufficient to check
-			// within the text.
+			// Look ahead decide whether the ampersand can start a character
+			// reference and thus needs to be escaped. Since any inline markup
+			// will introduce a metacharacter that is not allowed within
+			// character reference, it is sufficient to check within the text.
 			if leadingCharRef(s[i:]) == "" {
 				sb.WriteByte('&')
 			} else {
-				sb.WriteString("&amp;")
+				sb.WriteString(`\&`)
 			}
 		case '<':
 			if i < len(s)-1 && !canBeSpecialAfterLt(s[i+1]) {
 				sb.WriteByte('<')
 			} else {
-				sb.WriteString("&lt;")
+				sb.WriteString(`\<`)
 			}
 		case '\u00A0':
 			// This is by no means required, but it's nice to make non-breaking

+ 5 - 0
pkg/md/fmt_test.go

@@ -70,6 +70,11 @@ var supplementalFmtCases = []testCase{
 		Name:     "Link title with fewer parens than single and double quotes",
 		Markdown: `[a](b (\(''""))`,
 	},
+	{
+		Section:  "Links",
+		Name:     "Newline in link destination",
+		Markdown: `[a](<&NewLine;>)`,
+	},
 	{
 		Section:  "Soft line breaks",
 		Name:     "Space at start of line",

+ 4 - 4
pkg/md/inline.go

@@ -649,7 +649,7 @@ func (p *linkTailParser) parse() (n int, dest, title string) {
 			case '\\':
 				destBuilder.WriteByte(p.parseBackslash())
 			case '&':
-				destBuilder.WriteString(p.parseEntity())
+				destBuilder.WriteString(p.parseCharRef())
 			default:
 				destBuilder.WriteByte(p.text[p.pos])
 				p.pos++
@@ -680,7 +680,7 @@ func (p *linkTailParser) parse() (n int, dest, title string) {
 			case '\\':
 				destBuilder.WriteByte(p.parseBackslash())
 			case '&':
-				destBuilder.WriteString(p.parseEntity())
+				destBuilder.WriteString(p.parseCharRef())
 			default:
 				destBuilder.WriteByte(p.text[p.pos])
 				p.pos++
@@ -713,7 +713,7 @@ func (p *linkTailParser) parse() (n int, dest, title string) {
 			case '\\':
 				titleBuilder.WriteByte(p.parseBackslash())
 			case '&':
-				titleBuilder.WriteString(p.parseEntity())
+				titleBuilder.WriteString(p.parseCharRef())
 			default:
 				titleBuilder.WriteByte(p.text[p.pos])
 				p.pos++
@@ -747,7 +747,7 @@ func (p *linkTailParser) parseBackslash() byte {
 	return '\\'
 }
 
-func (p *linkTailParser) parseEntity() string {
+func (p *linkTailParser) parseCharRef() string {
 	if entity := leadingCharRef(p.text[p.pos:]); entity != "" {
 		p.pos += len(entity)
 		return UnescapeHTML(entity)

+ 1 - 1
website/learn/faq.md

@@ -56,7 +56,7 @@ Elvish is named after **elven** items in
 high quality. You can think of Elvish as an abbreviation of "elven shell".
 
 The name is not directly related to
-[Tolkien's Elvish languages](<https://en.wikipedia.org/wiki/Elvish_languages\_(Middle-earth)>),
+[Tolkien's Elvish languages](https://en.wikipedia.org/wiki/Elvish_languages_(Middle-earth)),
 but you're welcome to create something related to both Elvishes.
 
 Alternatively, Elvish is a backronym for "Expressive programming Language and

+ 2 - 2
website/learn/fundamentals.md

@@ -332,8 +332,8 @@ Linux
 ```
 
 (If you are running macOS, `uname` will print `Darwin`, the
-[open-source core](<https://en.wikipedia.org/wiki/Darwin\_(operating_system)>)
-of macOS.)
+[open-source core](https://en.wikipedia.org/wiki/Darwin_(operating_system)) of
+macOS.)
 
 Let's try to integrate this information into our "hello" message. The Elvish
 command-line allows us to run multiple commands in a batch, as long as they are

+ 1 - 1
website/ref/language.md

@@ -743,7 +743,7 @@ Compilation error: variable $nonexistent not found
 When a function literal refers to a variable in an outer scope, the function
 will keep that variable alive, even if that variable is the local variable of an
 outer function that function has returned. This is called
-[closure semantics](<https://en.wikipedia.org/wiki/Closure\_(computer_programming)>),
+[closure semantics](https://en.wikipedia.org/wiki/Closure_(computer_programming)),
 because the function literal "closes" over the environment it is defined in.
 
 In the following example, the `make-adder` function outputs two functions, both