123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270 |
- package md_test
- import (
- "fmt"
- "html"
- "regexp"
- "strings"
- "testing"
- "unicode/utf8"
- "github.com/google/go-cmp/cmp"
- . "src.elv.sh/pkg/md"
- "src.elv.sh/pkg/testutil"
- "src.elv.sh/pkg/wcwidth"
- )
- var supplementalFmtCases = []testCase{
- {
- Section: "Fenced code blocks",
- Name: "Tilde fence with info starting with tilde",
- Markdown: "~~~ ~`\n" + "~~~",
- },
- {
- Section: "Emphasis and strong emphasis",
- Name: "Space at start of content",
- Markdown: "* x*",
- },
- {
- Section: "Emphasis and strong emphasis",
- Name: "Space at end of content",
- Markdown: "*x *",
- },
- {
- Section: "Emphasis and strong emphasis",
- Name: "Emphasis opener after word before punctuation",
- Markdown: "A*!*",
- },
- {
- Section: "Emphasis and strong emphasis",
- Name: "Emphasis closer after punctuation before word",
- Markdown: "*!*A",
- },
- {
- Section: "Emphasis and strong emphasis",
- Name: "Space-only content",
- Markdown: "* *",
- },
- {
- Section: "Links",
- Name: "Exclamation mark before link",
- Markdown: `\![a](b)`,
- },
- {
- Section: "Links",
- Name: "Link title with both single and double quotes",
- Markdown: `[a](b ('"))`,
- },
- {
- Section: "Links",
- Name: "Link title with fewer double quotes than single quotes and parens",
- Markdown: `[a](b "\"''()")`,
- },
- {
- Section: "Links",
- Name: "Link title with fewer single quotes than double quotes and parens",
- Markdown: `[a](b '\'""()')`,
- },
- {
- Section: "Links",
- Name: "Link title with fewer parens than single and double quotes",
- Markdown: `[a](b (\(''""))`,
- },
- {
- Section: "Links",
- Name: "Newline in link destination",
- Markdown: `[a](<
>)`,
- },
- {
- Section: "Soft line breaks",
- Name: "Space at start of line",
- Markdown: " foo",
- },
- {
- Section: "Soft line breaks",
- Name: "Space at end of line",
- Markdown: "foo ",
- },
- }
- var fmtTestCases = concat(htmlTestCases, supplementalFmtCases)
- func TestFmtPreservesHTMLRender(t *testing.T) {
- testutil.Set(t, &UnescapeHTML, html.UnescapeString)
- for _, tc := range fmtTestCases {
- t.Run(tc.testName(), func(t *testing.T) {
- testFmtPreservesHTMLRender(t, tc.Markdown)
- })
- }
- }
- func FuzzFmtPreservesHTMLRender(f *testing.F) {
- for _, tc := range fmtTestCases {
- f.Add(tc.Markdown)
- }
- f.Fuzz(testFmtPreservesHTMLRender)
- }
- func testFmtPreservesHTMLRender(t *testing.T, original string) {
- testFmtPreservesHTMLRenderModulo(t, original, 0, nil)
- }
- func TestReflowFmtPreservesHTMLRenderModuleWhitespaces(t *testing.T) {
- testReflowFmt(t, testReflowFmtPreservesHTMLRenderModuloWhitespaces)
- }
- func FuzzReflowFmtPreservesHTMLRenderModuleWhitespaces(f *testing.F) {
- fuzzReflowFmt(f, testReflowFmtPreservesHTMLRenderModuloWhitespaces)
- }
- var (
- paragraph = regexp.MustCompile(`(?s)<p>.*?</p>`)
- whitespaceRun = regexp.MustCompile(`[ \t\n]+`)
- brWithWhitespaces = regexp.MustCompile(`[ \t\n]*<br />[ \t\n]*`)
- )
- func testReflowFmtPreservesHTMLRenderModuloWhitespaces(t *testing.T, original string, w int) {
- if strings.Contains(original, "<p>") {
- t.Skip("markdown contains <p>")
- }
- if strings.Contains(original, "</p>") {
- t.Skip("markdown contains </p>")
- }
- testFmtPreservesHTMLRenderModulo(t, original, w, func(html string) string {
- // Coalesce whitespaces in each paragraph.
- return paragraph.ReplaceAllStringFunc(html, func(p string) string {
- body := strings.Trim(p[3:len(p)-4], " \t\n")
- // Convert each whitespace run to a single space.
- body = whitespaceRun.ReplaceAllLiteralString(body, " ")
- // Remove whitespaces around <br />.
- body = brWithWhitespaces.ReplaceAllLiteralString(body, "<br />")
- return "<p>" + body + "</p>"
- })
- })
- }
- func TestReflowFmtResultIsUnchangedUnderFmt(t *testing.T) {
- testReflowFmt(t, testReflowFmtResultIsUnchangedUnderFmt)
- }
- func FuzzReflowFmtResultIsUnchangedUnderFmt(f *testing.F) {
- fuzzReflowFmt(f, testReflowFmtResultIsUnchangedUnderFmt)
- }
- func testReflowFmtResultIsUnchangedUnderFmt(t *testing.T, original string, w int) {
- reflowed := formatAndSkipIfUnsupported(t, original, w)
- formatted := RenderString(reflowed, &FmtCodec{})
- if reflowed != formatted {
- t.Errorf("original:\n%s\nreflowed:\n%s\nformatted:\n%s"+
- "markdown diff (-reflowed +formatted):\n%s",
- hr+"\n"+original+hr, hr+"\n"+reflowed+hr, hr+"\n"+formatted+hr,
- cmp.Diff(reflowed, formatted))
- }
- }
- func TestReflowFmtResultFitsInWidth(t *testing.T) {
- testReflowFmt(t, testReflowFmtResultFitsInWidth)
- }
- func FuzzReflowFmtResultFitsInWidth(f *testing.F) {
- fuzzReflowFmt(f, testReflowFmtResultFitsInWidth)
- }
- var (
- // Match all markers that can be written by FmtCodec.
- markersRegexp = regexp.MustCompile(`^ *(?:(?:[-*>]|[0-9]{1,9}[.)]) *)*`)
- linkRegexp = regexp.MustCompile(`\[.*\]\(.*\)`)
- codeSpanRegexp = regexp.MustCompile("`.*`")
- )
- func testReflowFmtResultFitsInWidth(t *testing.T, original string, w int) {
- if w <= 0 {
- t.Skip("width <= 0")
- }
- var trace TraceCodec
- Render(original, &trace)
- for _, op := range trace.Ops() {
- switch op.Type {
- case OpHeading, OpCodeBlock, OpHTMLBlock:
- t.Skipf("input contains unsupported block type %s", op.Type)
- }
- }
- reflowed := formatAndSkipIfUnsupported(t, original, w)
- for _, line := range strings.Split(reflowed, "\n") {
- lineWidth := wcwidth.Of(line)
- if lineWidth <= w {
- continue
- }
- // Strip all markers
- content := line[len(markersRegexp.FindString(line)):]
- // Analyze whether the content is allowed to exceed width
- switch {
- case !strings.Contains(content, " "):
- case strings.Contains(content, "<"):
- case linkRegexp.MatchString(content):
- case codeSpanRegexp.MatchString(content):
- default:
- t.Errorf("line length > %d: %q\nfull reflowed:\n%s",
- w, line, hr+"\n"+reflowed+hr)
- }
- }
- }
- var widths = []int{20, 51, 80}
- func testReflowFmt(t *testing.T, test func(*testing.T, string, int)) {
- for _, tc := range fmtTestCases {
- for _, w := range widths {
- t.Run(fmt.Sprintf("%s/Width %d", tc.testName(), w), func(t *testing.T) {
- test(t, tc.Markdown, w)
- })
- }
- }
- }
- func fuzzReflowFmt(f *testing.F, test func(*testing.T, string, int)) {
- for _, tc := range fmtTestCases {
- for _, w := range widths {
- f.Add(tc.Markdown, w)
- }
- }
- f.Fuzz(test)
- }
- func testFmtPreservesHTMLRenderModulo(t *testing.T, original string, w int, processHTML func(string) string) {
- formatted := formatAndSkipIfUnsupported(t, original, w)
- originalRender := RenderString(original, &HTMLCodec{})
- formattedRender := RenderString(formatted, &HTMLCodec{})
- if processHTML != nil {
- originalRender = processHTML(originalRender)
- formattedRender = processHTML(formattedRender)
- }
- if formattedRender != originalRender {
- t.Errorf("original:\n%s\nformatted:\n%s\n"+
- "markdown diff (-original +formatted):\n%s"+
- "HTML diff (-original +formatted):\n%s"+
- "ops diff (-original +formatted):\n%s",
- hr+"\n"+original+hr, hr+"\n"+formatted+hr,
- cmp.Diff(original, formatted),
- cmp.Diff(originalRender, formattedRender),
- cmp.Diff(RenderString(original, &TraceCodec{}), RenderString(formatted, &TraceCodec{})))
- }
- }
- func formatAndSkipIfUnsupported(t *testing.T, original string, w int) string {
- if !utf8.ValidString(original) {
- t.Skipf("input is not valid UTF-8")
- }
- if strings.Contains(original, "\t") {
- t.Skipf("input contains tab")
- }
- codec := &FmtCodec{Width: w}
- formatted := RenderString(original, codec)
- if u := codec.Unsupported(); u != nil {
- t.Skipf("input uses unsupported feature: %v", u)
- }
- return formatted
- }
|