ProjectNagae
/
elvish-clone


			
				
					
						
						
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781
							package md

import (
	"regexp"
	"strings"
	"unicode"
	"unicode/utf8"
)

// InlineOp represents an inline operation.
type InlineOp struct {
	Type InlineOpType
	// OpText, OpCodeSpan, OpRawHTML, OpAutolink: Text content
	// OpLinkStart, OpLinkEnd, OpImage: title text
	Text string
	// OpLinkStart, OpLinkEnd, OpImage, OpAutolink
	Dest string
	// ForOpImage
	Alt string
}

// InlineOpType enumerates possible types of an InlineOp.
type InlineOpType uint

const (
	// Text elements. Embedded newlines in OpText are turned into OpNewLine, but
	// OpRawHTML can contain embedded newlines. OpCodeSpan never contains
	// embedded newlines.
	OpText InlineOpType = iota
	OpCodeSpan
	OpRawHTML
	OpNewLine

	// Inline markup elements.
	OpEmphasisStart
	OpEmphasisEnd
	OpStrongEmphasisStart
	OpStrongEmphasisEnd
	OpLinkStart
	OpLinkEnd
	OpImage
	OpAutolink
	OpHardLineBreak
)

// String returns the text content of the InlineOp
func (op InlineOp) String() string {
	switch op.Type {
	case OpText, OpCodeSpan, OpRawHTML, OpAutolink:
		return op.Text
	case OpNewLine:
		return "\n"
	case OpImage:
		return op.Alt
	}
	return ""
}

func renderInline(text string) []InlineOp {
	p := inlineParser{text, 0, makeDelimStack(), buffer{}}
	p.render()
	return p.buf.ops()
}

type inlineParser struct {
	text   string
	pos    int
	delims delimStack
	buf    buffer
}

const (
	scheme           = `[a-zA-Z][a-zA-Z0-9+.-]{1,31}`
	emailLocalPuncts = ".!#$%&'*+/=?^_`{|}~-"
)

var (
	// https://spec.commonmark.org/0.30/#uri-autolink
	uriAutolinkRegexp = regexp.MustCompile(
		`^<` + scheme + `:[^\x00-\x19 <>]*` + `>`)
	// https://spec.commonmark.org/0.30/#email-autolink
	emailAutolinkRegexp = regexp.MustCompile(
		`^<[a-zA-Z0-9` + emailLocalPuncts + `]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*>`)

	openTagRegexp    = regexp.MustCompile(`^` + openTag)
	closingTagRegexp = regexp.MustCompile(`^` + closingTag)
)

const (
	// https://spec.commonmark.org/0.30/#open-tag
	openTag = `<` +
		`[a-zA-Z][a-zA-Z0-9-]*` + // tag name
		(`(?:` +
			`[ \t\n]+` + // whitespace
			`[a-zA-Z_:][a-zA-Z0-9_\.:-]*` + // attribute name
			`(?:[ \t\n]*=[ \t\n]*(?:[^ \t\n"'=<>` + "`" + `]+|'[^']*'|"[^"]*"))?` + // attribute value specification
			`)*`) + // zero or more attributes
		`[ \t\n]*` + // whitespace
		`/?>`
	// https://spec.commonmark.org/0.30/#closing-tag
	closingTag = `</[a-zA-Z][a-zA-Z0-9-]*[ \t\n]*>`
)

func (p *inlineParser) render() {
	for p.pos < len(p.text) {
		b := p.text[p.pos]
		begin := p.pos
		p.pos++

		parseText := func() {
			for p.pos < len(p.text) && !isMeta(p.text[p.pos]) {
				p.pos++
			}
			text := p.text[begin:p.pos]
			hardLineBreak := false
			if p.pos < len(p.text) && p.text[p.pos] == '\n' {
				// https://spec.commonmark.org/0.30/#hard-line-break
				//
				// The input to renderInline never ends in a newline, so all
				// newlines are internal ones, thus subject to the hard line
				// break rules
				hardLineBreak = strings.HasSuffix(text, "  ")
				text = strings.TrimRight(text, " ")
			}
			p.buf.push(textPiece(text))
			if hardLineBreak {
				p.buf.push(piece{main: InlineOp{Type: OpHardLineBreak}})
			}
		}

		switch b {
		// The 3 branches below implement the first part of
		// https://spec.commonmark.org/0.30/#an-algorithm-for-parsing-nested-emphasis-and-links.
		case '[':
			bufIdx := p.buf.push(textPiece("["))
			p.delims.push(&delim{typ: '[', bufIdx: bufIdx})
		case '!':
			if p.pos < len(p.text) && p.text[p.pos] == '[' {
				p.pos++
				bufIdx := p.buf.push(textPiece("!["))
				p.delims.push(&delim{typ: '!', bufIdx: bufIdx})
			} else {
				parseText()
			}
		case '*', '_':
			p.consumeRun(b)
			canOpen, canClose := canOpenCloseEmphasis(rune(b),
				emptyToNewline(utf8.DecodeLastRuneInString(p.text[:begin])),
				emptyToNewline(utf8.DecodeRuneInString(p.text[p.pos:])))
			bufIdx := p.buf.push(textPiece(p.text[begin:p.pos]))
			p.delims.push(
				&delim{typ: b, bufIdx: bufIdx,
					n: p.pos - begin, canOpen: canOpen, canClose: canClose})
		case ']':
			// https://spec.commonmark.org/0.30/#look-for-link-or-image.
			var opener *delim
			for d := p.delims.top.prev; d != p.delims.bottom; d = d.prev {
				if d.typ == '[' || d.typ == '!' {
					opener = d
					break
				}
			}
			if opener == nil || opener.inactive {
				if opener != nil {
					unlink(opener)
				}
				p.buf.push(textPiece("]"))
				continue
			}
			n, dest, title := parseLinkTail(p.text[p.pos:])
			if n == -1 {
				unlink(opener)
				p.buf.push(textPiece("]"))
				continue
			}
			p.pos += n
			p.processEmphasis(opener)
			if opener.typ == '[' {
				for d := opener.prev; d != p.delims.bottom; d = d.prev {
					if d.typ == '[' {
						d.inactive = true
					}
				}
			}
			unlink(opener)
			if opener.typ == '[' {
				p.buf.pieces[opener.bufIdx] = piece{
					before: []InlineOp{{Type: OpLinkStart, Dest: dest, Text: title}}}
				p.buf.push(piece{
					after: []InlineOp{{Type: OpLinkEnd, Dest: dest, Text: title}}})
			} else {
				// Use the pieces after "![" to build the image alt text.
				var altBuilder strings.Builder
				for _, piece := range p.buf.pieces[opener.bufIdx+1:] {
					altBuilder.WriteString(piece.main.String())
				}
				p.buf.pieces = p.buf.pieces[:opener.bufIdx]
				alt := altBuilder.String()
				p.buf.push(piece{
					main: InlineOp{Type: OpImage, Dest: dest, Alt: alt, Text: title}})
			}
		case '`':
			// https://spec.commonmark.org/0.30/#code-spans
			p.consumeRun('`')
			closer := findBacktickRun(p.text, p.text[begin:p.pos], p.pos)
			if closer == -1 {
				// No matching closer, don't parse as code span.
				parseText()
				continue
			}
			p.buf.push(piece{
				main: InlineOp{Type: OpCodeSpan,
					Text: normalizeCodeSpanContent(p.text[p.pos:closer])}})
			p.pos = closer + (p.pos - begin)
		case '<':
			// https://spec.commonmark.org/0.30/#raw-html
			if p.pos == len(p.text) {
				parseText()
				continue
			}
			parseWithRegexp := func(pattern *regexp.Regexp) bool {
				html := pattern.FindString(p.text[begin:])
				if html == "" {
					return false
				}
				p.buf.push(htmlPiece(html))
				p.pos = begin + len(html)
				return true
			}
			parseWithCloser := func(closer string) bool {
				i := strings.Index(p.text[p.pos:], closer)
				if i == -1 {
					return false
				}
				p.pos += i + len(closer)
				p.buf.push(htmlPiece(p.text[begin:p.pos]))
				return true
			}
			switch p.text[p.pos] {
			case '!':
				switch {
				case strings.HasPrefix(p.text[p.pos:], "!--"):
					// Try parsing a comment.
					if parseWithCloser("-->") {
						continue
					}
				case strings.HasPrefix(p.text[p.pos:], "![CDATA["):
					// Try parsing a CDATA section
					if parseWithCloser("]]>") {
						continue
					}
				case p.pos+1 < len(p.text) && isASCIILetter(p.text[p.pos+1]):
					// Try parsing a declaration.
					if parseWithCloser(">") {
						continue
					}
				}
			case '?':
				// Try parsing a processing instruction.
				closer := strings.Index(p.text[p.pos:], "?>")
				if closer != -1 {
					p.buf.push(htmlPiece(p.text[begin : p.pos+closer+2]))
					p.pos += closer + 2
					continue
				}
			case '/':
				// Try parsing a closing tag.
				if parseWithRegexp(closingTagRegexp) {
					continue
				}
			default:
				// Try parsing a open tag.
				if parseWithRegexp(openTagRegexp) {
					continue
				} else {
					// Try parsing an autolink.
					autolink := uriAutolinkRegexp.FindString(p.text[begin:])
					email := false
					if autolink == "" {
						autolink = emailAutolinkRegexp.FindString(p.text[begin:])
						email = true
					}
					if autolink != "" {
						p.pos = begin + len(autolink)
						// Autolinks support character references but not
						// backslashes, so UnescapeHTML gives us the desired
						// behavior.
						text := UnescapeHTML(autolink[1 : len(autolink)-1])
						dest := text
						if email {
							dest = "mailto:" + dest
						}
						p.buf.push(piece{
							main: InlineOp{Type: OpAutolink, Text: text, Dest: dest},
						})
						continue
					}
				}
			}
			parseText()
		case '&':
			// https://spec.commonmark.org/0.30/#entity-and-numeric-character-references
			if entity := leadingCharRef(p.text[begin:]); entity != "" {
				p.buf.push(textPiece(UnescapeHTML(entity)))
				p.pos = begin + len(entity)
			} else {
				parseText()
			}
		case '\\':
			// https://spec.commonmark.org/0.30/#backslash-escapes
			if p.pos < len(p.text) {
				if p.text[p.pos] == '\n' {
					// https://spec.commonmark.org/0.30/#hard-line-break
					//
					// Do *not* consume the newline; "\\\n" is a hard line break
					// plus a (soft) line break.
					p.buf.push(piece{main: InlineOp{Type: OpHardLineBreak}})
					continue
				} else if isASCIIPunct(p.text[p.pos]) {
					// Valid backslash escape: handle this by just discarding
					// the backslash. The parseText call below will consider the
					// next byte to be already included in the text content.
					begin++
					p.pos++
				}
			}
			parseText()
		case '\n':
			// Hard line breaks are already inserted using lookahead in
			// parseText and the case '\\' branch.

			p.buf.push(piece{main: InlineOp{Type: OpNewLine}})
			// Remove spaces at the beginning of the next line per
			// https://spec.commonmark.org/0.30/#soft-line-breaks.
			for p.pos < len(p.text) && p.text[p.pos] == ' ' {
				p.pos++
			}
		default:
			parseText()
		}
	}
	p.processEmphasis(p.delims.bottom)
}

func (p *inlineParser) consumeRun(b byte) {
	for p.pos < len(p.text) && p.text[p.pos] == b {
		p.pos++
	}
}

// Processes the (rune, int) result of utf8.Decode* so that an empty result is
// converted to '\n'.
func emptyToNewline(r rune, l int) rune {
	if l == 0 {
		return '\n'
	}
	return r
}

// Returns whether an emphasis punctuation can open or close an emphasis, when
// following prev and preceding next. Start and end of file should be
// represented by '\n'.
//
// The criteria are described in:
// https://spec.commonmark.org/0.30/#emphasis-and-strong-emphasis
//
// The algorithm is a bit complicated. Here is another way to describe the
// criteria:
//
//   - Every rune falls into one of three categories: space, punctuation and
//     other. "Other" is the category of word runes in "intraword emphasis".
//
//   - The following tables describe whether a punctuation can open or close
//     emphasis:
//
//     Can open emphasis:
//
//     |            | next space | next punct | next other |
//     | ---------- | ---------- | ---------- | ---------- |
//     | prev space |            |   _ or *   |   _ or *   |
//     | prev punct |            |   _ or *   |   _ or *   |
//     | prev other |            |            |   only *   |
//
//     Can close emphasis:
//
//     |            | next space | next punct | next other |
//     | ---------- | ---------- | ---------- | ---------- |
//     | prev space |            |            |            |
//     | prev punct |   _ or *   |   _ or *   |            |
//     | prev other |   _ or *   |   _ or *   |   only *   |
func canOpenCloseEmphasis(b, prev, next rune) (bool, bool) {
	leftFlanking := !unicode.IsSpace(next) &&
		(!isUnicodePunct(next) || unicode.IsSpace(prev) || isUnicodePunct(prev))
	rightFlanking := !unicode.IsSpace(prev) &&
		(!isUnicodePunct(prev) || unicode.IsSpace(next) || isUnicodePunct(next))
	if b == '*' {
		return leftFlanking, rightFlanking
	}
	return leftFlanking && (!rightFlanking || isUnicodePunct(prev)),
		rightFlanking && (!leftFlanking || isUnicodePunct(next))
}

// Returns the starting index of the next backtick run identical to the given
// run, starting from i. Returns -1 if no such run exists.
func findBacktickRun(s, run string, i int) int {
	for i < len(s) {
		j := strings.Index(s[i:], run)
		if j == -1 {
			return -1
		}
		j += i
		if j+len(run) == len(s) || s[j+len(run)] != '`' {
			return j
		}
		// Too many backticks; skip over the entire run.
		for j += len(run); j < len(s) && s[j] == '`'; j++ {
		}
		i = j
	}
	return -1
}

func normalizeCodeSpanContent(s string) string {
	s = strings.ReplaceAll(s, "\n", " ")
	if len(s) > 1 && s[0] == ' ' && s[len(s)-1] == ' ' && strings.Trim(s, " ") != "" {
		return s[1 : len(s)-1]
	}
	return s
}

// https://spec.commonmark.org/0.30/#process-emphasis
func (p *inlineParser) processEmphasis(bottom *delim) {
	var openersBottom [2][3][2]*delim
	for closer := bottom.next; closer != nil; {
		if !closer.canClose {
			closer = closer.next
			continue
		}
		openerBottom := &openersBottom[b2i(closer.typ == '_')][closer.n%3][b2i(closer.canOpen)]
		if *openerBottom == nil {
			*openerBottom = bottom
		}
		var opener *delim
		for p := closer.prev; p != *openerBottom && p != bottom; p = p.prev {
			if p.canOpen && p.typ == closer.typ &&
				((!p.canClose && !closer.canOpen) ||
					(p.n+closer.n)%3 != 0 || (p.n%3 == 0 && closer.n%3 == 0)) {
				opener = p
				break
			}
		}
		if opener == nil {
			*openerBottom = closer.prev
			if !closer.canOpen {
				closer.prev.next = closer.next
				closer.next.prev = closer.prev
			}
			closer = closer.next
			continue
		}
		openerPiece := &p.buf.pieces[opener.bufIdx]
		closerPiece := &p.buf.pieces[closer.bufIdx]
		strong := len(openerPiece.main.Text) >= 2 && len(closerPiece.main.Text) >= 2
		if strong {
			openerPiece.main.Text = openerPiece.main.Text[2:]
			openerPiece.append(InlineOp{Type: OpStrongEmphasisStart})
			closerPiece.main.Text = closerPiece.main.Text[2:]
			closerPiece.prepend(InlineOp{Type: OpStrongEmphasisEnd})
		} else {
			openerPiece.main.Text = openerPiece.main.Text[1:]
			openerPiece.append(InlineOp{Type: OpEmphasisStart})
			closerPiece.main.Text = closerPiece.main.Text[1:]
			closerPiece.prepend(InlineOp{Type: OpEmphasisEnd})
		}
		opener.next = closer
		closer.prev = opener
		if openerPiece.main.Text == "" {
			opener.prev.next = opener.next
			opener.next.prev = opener.prev
		}
		if closerPiece.main.Text == "" {
			closer.prev.next = closer.next
			closer.next.prev = closer.prev
			closer = closer.next
		}
	}
	bottom.next = p.delims.top
	p.delims.top.prev = bottom
}

func b2i(b bool) int {
	if b {
		return 1
	} else {
		return 0
	}
}

// Stores output of inline rendering.
type buffer struct {
	pieces []piece
}

func (b *buffer) push(p piece) int {
	b.pieces = append(b.pieces, p)
	return len(b.pieces) - 1
}

func (b *buffer) ops() []InlineOp {
	var ops []InlineOp
	for _, p := range b.pieces {
		p.iterate(func(op InlineOp) {
			if op.Type == OpText {
				// Convert any embedded newlines into OpNewLine, and merge
				// adjacent OpText's or OpRawHTML's.
				if op.Text == "" {
					return
				}
				lines := strings.Split(op.Text, "\n")
				if len(ops) > 0 && ops[len(ops)-1].Type == op.Type {
					ops[len(ops)-1].Text += lines[0]
				} else if lines[0] != "" {
					ops = append(ops, InlineOp{Type: op.Type, Text: lines[0]})
				}
				for _, line := range lines[1:] {
					ops = append(ops, InlineOp{Type: OpNewLine})
					if line != "" {
						ops = append(ops, InlineOp{Type: op.Type, Text: line})
					}
				}
			} else {
				ops = append(ops, op)
			}
		})
	}
	return ops
}

// The algorithm described in
// https://spec.commonmark.org/0.30/#phase-2-inline-structure involves inserting
// nodes before and after existing nodes in the output. The most natural choice
// is a doubly linked list; but for simplicity, we use a slice for output nodes,
// keep track of nodes that need to be prepended or appended to each node.
//
// TODO: Compare the performance of this data structure with doubly linked
// lists.
type piece struct {
	before []InlineOp
	main   InlineOp
	after  []InlineOp
}

func textPiece(text string) piece {
	return piece{main: InlineOp{Type: OpText, Text: text}}
}

func htmlPiece(html string) piece {
	return piece{main: InlineOp{Type: OpRawHTML, Text: html}}
}

func (p *piece) prepend(op InlineOp) { p.before = append(p.before, op) }
func (p *piece) append(op InlineOp)  { p.after = append(p.after, op) }

func (p *piece) iterate(f func(InlineOp)) {
	for _, op := range p.before {
		f(op)
	}
	f(p.main)
	for i := len(p.after) - 1; i >= 0; i-- {
		f(p.after[i])
	}
}

// A delimiter "stack" (actually a doubly linked list), with sentinels as bottom
// and top, with the bottom being the head of the list.
//
// https://spec.commonmark.org/0.30/#delimiter-stack
type delimStack struct {
	bottom, top *delim
}

func makeDelimStack() delimStack {
	bottom := &delim{}
	top := &delim{prev: bottom}
	bottom.next = top
	return delimStack{bottom, top}
}

func (s *delimStack) push(n *delim) {
	n.prev = s.top.prev
	n.next = s.top
	s.top.prev.next = n
	s.top.prev = n
}

// A node in the delimiter "stack".
type delim struct {
	typ    byte
	bufIdx int
	prev   *delim
	next   *delim
	// Only used when typ is '['
	inactive bool
	// Only used when typ is '_' or '*'.
	n        int
	canOpen  bool
	canClose bool
}

func unlink(n *delim) {
	n.next.prev = n.prev
	n.prev.next = n.next
}

type linkTailParser struct {
	text string
	pos  int
}

// Parses the link "tail", the part after the ] that closes the link text.
func parseLinkTail(text string) (n int, dest, title string) {
	p := linkTailParser{text, 0}
	return p.parse()
}

// https://spec.commonmark.org/0.30/#links
func (p *linkTailParser) parse() (n int, dest, title string) {
	if len(p.text) < 2 || p.text[0] != '(' {
		return -1, "", ""
	}

	p.pos = 1
	p.skipWhitespaces()
	if p.pos == len(p.text) {
		return -1, "", ""
	}
	// Parse an optional link destination.
	var destBuilder strings.Builder
	if p.text[p.pos] == '<' {
		p.pos++
		closed := false
	angleDest:
		for p.pos < len(p.text) {
			switch p.text[p.pos] {
			case '>':
				p.pos++
				closed = true
				break angleDest
			case '\n', '<':
				return -1, "", ""
			case '\\':
				destBuilder.WriteByte(p.parseBackslash())
			case '&':
				destBuilder.WriteString(p.parseCharRef())
			default:
				destBuilder.WriteByte(p.text[p.pos])
				p.pos++
			}
		}
		if !closed {
			return -1, "", ""
		}
	} else {
		parenBalance := 0
	bareDest:
		for p.pos < len(p.text) {
			if isASCIIControl(p.text[p.pos]) || p.text[p.pos] == ' ' {
				break
			}
			switch p.text[p.pos] {
			case '(':
				parenBalance++
				destBuilder.WriteByte('(')
				p.pos++
			case ')':
				if parenBalance == 0 {
					break bareDest
				}
				parenBalance--
				destBuilder.WriteByte(')')
				p.pos++
			case '\\':
				destBuilder.WriteByte(p.parseBackslash())
			case '&':
				destBuilder.WriteString(p.parseCharRef())
			default:
				destBuilder.WriteByte(p.text[p.pos])
				p.pos++
			}
		}
		if parenBalance != 0 {
			return -1, "", ""
		}
	}
	p.skipWhitespaces()

	var titleBuilder strings.Builder
	if p.pos < len(p.text) && strings.ContainsRune("'\"(", rune(p.text[p.pos])) {
		opener := p.text[p.pos]
		closer := p.text[p.pos]
		if closer == '(' {
			closer = ')'
		}
		p.pos++
	title:
		for p.pos < len(p.text) {
			switch p.text[p.pos] {
			case closer:
				p.pos++
				break title
			case opener:
				// Titles started with "(" does not allow unescaped "(":
				// https://spec.commonmark.org/0.30/#link-title
				return -1, "", ""
			case '\\':
				titleBuilder.WriteByte(p.parseBackslash())
			case '&':
				titleBuilder.WriteString(p.parseCharRef())
			default:
				titleBuilder.WriteByte(p.text[p.pos])
				p.pos++
			}
		}
	}

	p.skipWhitespaces()

	if p.pos == len(p.text) || p.text[p.pos] != ')' {
		return -1, "", ""
	}
	return p.pos + 1, destBuilder.String(), titleBuilder.String()
}

func (p *linkTailParser) skipWhitespaces() {
	for p.pos < len(p.text) && isWhitespace(p.text[p.pos]) {
		p.pos++
	}
}

func isWhitespace(b byte) bool { return b == ' ' || b == '\t' || b == '\n' }

func (p *linkTailParser) parseBackslash() byte {
	if p.pos+1 < len(p.text) && isASCIIPunct(p.text[p.pos+1]) {
		b := p.text[p.pos+1]
		p.pos += 2
		return b
	}
	p.pos++
	return '\\'
}

func (p *linkTailParser) parseCharRef() string {
	if entity := leadingCharRef(p.text[p.pos:]); entity != "" {
		p.pos += len(entity)
		return UnescapeHTML(entity)
	}
	p.pos++
	return p.text[p.pos-1 : p.pos]
}

func isASCIILetter(b byte) bool { return ('a' <= b && b <= 'z') || ('A' <= b && b <= 'Z') }

func isASCIIControl(b byte) bool { return b < 0x20 }

const asciiPuncts = "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~"

func isASCIIPunct(b byte) bool { return strings.IndexByte(asciiPuncts, b) >= 0 }

// The CommonMark spec has its own definition of Unicode punctuation:
// https://spec.commonmark.org/0.30/#unicode-punctuation-character
//
// This definition includes all the ASCII punctuations above, some of which
// ("$+<=>^`|~" to be exact) are not considered to be punctuations by
// unicode.IsPunct.
func isUnicodePunct(r rune) bool {
	return unicode.IsPunct(r) || r <= 0x7f && isASCIIPunct(byte(r))
}

const metas = "![]*_`\\&<\n"

func isMeta(b byte) bool { return strings.IndexByte(metas, b) >= 0 }