Jelajahi Sumber

website: Use a pkg/md-based HTML renderer.

Pandoc is no longer required to build the website.
Qi Xiao 1 tahun lalu
induk
melakukan
ffd2152a19

+ 20 - 16
pkg/md/html.go

@@ -48,9 +48,7 @@ func (c *HTMLCodec) Do(op Op) {
 			}
 		}
 		fmt.Fprintf(c, "<h%d%s>", op.Number, &attrs)
-		for _, inlineOp := range op.Content {
-			c.doInline(inlineOp)
-		}
+		RenderInlineContentToHTML(&c.Builder, op.Content)
 		fmt.Fprintf(c, "</h%d>\n", op.Number)
 	case OpCodeBlock:
 		var attrs attrBuilder
@@ -71,9 +69,7 @@ func (c *HTMLCodec) Do(op Op) {
 		}
 	case OpParagraph:
 		c.WriteString("<p>")
-		for _, inlineOp := range op.Content {
-			c.doInline(inlineOp)
-		}
+		RenderInlineContentToHTML(&c.Builder, op.Content)
 		c.WriteString("</p>\n")
 	case OpOrderedListStart:
 		var attrs attrBuilder
@@ -94,23 +90,31 @@ var inlineTags = []string{
 	OpHardLineBreak: "<br />",
 }
 
-func (c *HTMLCodec) doInline(op InlineOp) {
+// RenderInlineContentToHTML renders inline content to HTML, writing to a
+// [strings.Builder].
+func RenderInlineContentToHTML(sb *strings.Builder, ops []InlineOp) {
+	for _, op := range ops {
+		doInline(sb, op)
+	}
+}
+
+func doInline(sb *strings.Builder, op InlineOp) {
 	switch op.Type {
 	case OpText:
-		c.WriteString(escapeHTML(op.Text))
+		sb.WriteString(escapeHTML(op.Text))
 	case OpCodeSpan:
-		c.WriteString("<code>")
-		c.WriteString(escapeHTML(op.Text))
-		c.WriteString("</code>")
+		sb.WriteString("<code>")
+		sb.WriteString(escapeHTML(op.Text))
+		sb.WriteString("</code>")
 	case OpRawHTML:
-		c.WriteString(op.Text)
+		sb.WriteString(op.Text)
 	case OpLinkStart:
 		var attrs attrBuilder
 		attrs.set("href", escapeURL(op.Dest))
 		if op.Text != "" {
 			attrs.set("title", op.Text)
 		}
-		fmt.Fprintf(c, "<a%s>", &attrs)
+		fmt.Fprintf(sb, "<a%s>", &attrs)
 	case OpImage:
 		var attrs attrBuilder
 		attrs.set("src", escapeURL(op.Dest))
@@ -118,13 +122,13 @@ func (c *HTMLCodec) doInline(op InlineOp) {
 		if op.Text != "" {
 			attrs.set("title", op.Text)
 		}
-		fmt.Fprintf(c, "<img%s />", &attrs)
+		fmt.Fprintf(sb, "<img%s />", &attrs)
 	case OpAutolink:
 		var attrs attrBuilder
 		attrs.set("href", escapeURL(op.Dest))
-		fmt.Fprintf(c, "<a%s>%s</a>", &attrs, escapeHTML(op.Text))
+		fmt.Fprintf(sb, "<a%s>%s</a>", &attrs, escapeHTML(op.Text))
 	default:
-		c.WriteString(inlineTags[op.Type])
+		sb.WriteString(inlineTags[op.Type])
 	}
 }
 

+ 14 - 13
pkg/md/inline.go

@@ -43,6 +43,19 @@ const (
 	OpHardLineBreak
 )
 
+// String returns the text content of the InlineOp
+func (op InlineOp) String() string {
+	switch op.Type {
+	case OpText, OpCodeSpan, OpRawHTML, OpAutolink:
+		return op.Text
+	case OpNewLine:
+		return "\n"
+	case OpImage:
+		return op.Alt
+	}
+	return ""
+}
+
 func renderInline(text string) []InlineOp {
 	p := inlineParser{text, 0, makeDelimStack(), buffer{}}
 	p.render()
@@ -178,7 +191,7 @@ func (p *inlineParser) render() {
 				// Use the pieces after "![" to build the image alt text.
 				var altBuilder strings.Builder
 				for _, piece := range p.buf.pieces[opener.bufIdx+1:] {
-					altBuilder.WriteString(plainText(piece))
+					altBuilder.WriteString(piece.main.String())
 				}
 				p.buf.pieces = p.buf.pieces[:opener.bufIdx]
 				alt := altBuilder.String()
@@ -544,18 +557,6 @@ func htmlPiece(html string) piece {
 	return piece{main: InlineOp{Type: OpRawHTML, Text: html}}
 }
 
-func plainText(p piece) string {
-	switch p.main.Type {
-	case OpText, OpCodeSpan, OpRawHTML, OpAutolink:
-		return p.main.Text
-	case OpNewLine:
-		return "\n"
-	case OpImage:
-		return p.main.Alt
-	}
-	return ""
-}
-
 func (p *piece) prepend(op InlineOp) { p.before = append(p.before, op) }
 func (p *piece) append(op InlineOp)  { p.after = append(p.after, op) }
 

+ 2 - 4
website/Makefile

@@ -6,8 +6,6 @@ DOCSET_DST_DIR := ./Elvish.docset
 MDS := home.md $(filter-out %/README.md,$(wildcard [^_]*/*.md))
 HTMLS := $(MDS:.md=.html)
 
-MD_TO_HTML_TOOLS = $(addprefix tools/,$(shell grep -o '\w*\.bin' tools/md-to-html))
-
 # Generates the website into $(DST_DIR).
 gen: tools/gensite.bin $(HTMLS)
 	tools/gensite.bin . $(DST_DIR)
@@ -47,5 +45,5 @@ endif
 tools/%.bin: cmd/% $$(wildcard cmd/%/*)
 	go build -o $@ ./$<
 
-%.html: %.md tools/md-to-html $(MD_TO_HTML_TOOLS) $$(shell tools/extra-deps $$@)
-	tools/md-to-html $< $@
+%.html: %.md tools/macros.bin tools/elvdoc.bin tools/highlight.bin tools/md2html.bin $$(shell tools/extra-deps $$@)
+	tools/macros.bin -repo .. -elvdoc tools/elvdoc.bin < $< | tools/highlight.bin | tools/md2html.bin > $@

+ 3 - 5
website/README.md

@@ -2,9 +2,9 @@
 
 This directory contains source for Elvish's official website.
 
-The documents are written in GitHub-flavored markdown sprinkled with some HTML
-and custom macros. Most of them can be viewed directly in GitHub; notable
-exceptions are the homepage (`home.md`) and the download page
+The documents are written in [CommonMark](https://commonmark.org) sprinkled with
+some HTML and custom macros. Most of them can be viewed directly in GitHub;
+notable exceptions are the homepage (`home.md`) and the download page
 (`get/prelude.md`).
 
 ## Building
@@ -14,8 +14,6 @@ a custom toolchain. You need the following software to build it:
 
 -   Go, with the same version requirement as Elvish itself.
 
--   Pandoc 2.2.1 (other versions in the 2.x series might also work).
-
 -   GNU Make (any "reasonably modern" version should do).
 
 To build the website, just run `make`. The built website is in the `_dst`

+ 238 - 0
website/cmd/md2html/main.go

@@ -0,0 +1,238 @@
+// Command md2html converts Markdown to HTML.
+//
+// It is tailored for converting the Markdown sources of the Elvish website
+// (https://elv.sh) to HTML. Based on [md.HTMLCodec], it adds the following
+// features:
+//
+//   - Autogenerated ID for each heading
+//
+//   - Self link for each heading
+//
+// The following features can be turned on per Markdown file by specifying them
+// in an HTML comment before any text (for example <!-- toc number-sections -->
+// turns on both):
+//
+//   - toc: Generate a table of content
+//
+//   - number-sections: Generate section numbers for headings
+package main
+
+import (
+	"fmt"
+	"html"
+	"io"
+	"log"
+	"os"
+	"regexp"
+	"strings"
+
+	"src.elv.sh/pkg/md"
+)
+
+func main() {
+	markdown, err := io.ReadAll(os.Stdin)
+	if err != nil {
+		log.Fatal(err)
+	}
+	codec := &codec{}
+	md.Render(string(markdown), md.SmartPunctsCodec{Inner: codec})
+	os.Stdout.WriteString(codec.String())
+}
+
+type codec struct {
+	md.HTMLCodec
+	// Extensions
+	numberSections, toc bool
+	// Components of the current section number. Populated if numberSections or
+	// toc is true (used for maintaining the sections array in the latter case).
+	sectionNumbers []int
+	// Tree of sections to be used in the table of content. Populated if toc is
+	// true. The root node is a dummy node.
+	sectionRoot section
+}
+
+type section struct {
+	title    string
+	id       string
+	children []section
+}
+
+var (
+	numberSectionsRegexp = regexp.MustCompile(`\bnumber-sections\b`)
+	tocRegexp            = regexp.MustCompile(`\btoc\b`)
+)
+
+func (c *codec) Do(op md.Op) {
+	switch op.Type {
+	case md.OpHeading:
+		id := ""
+		// Only support #id since that's the only thing used in Elvish's
+		// Markdown right now. More can be added if needed.
+		if info := op.Info; info != "" && op.Info[0] == '#' {
+			id = op.Info[1:]
+		} else {
+			// Generate an ID using the inline text content converted to lower
+			// case.
+			id = strings.ToLower(plainTextOfInlineContent(op.Content))
+		}
+		idHTML := html.EscapeString(processHTMLID(id))
+
+		level := op.Number
+		// An empty onclick handler is needed for :hover to work on mobile:
+		// https://stackoverflow.com/a/25673064/566659
+		fmt.Fprintf(c, `<h%d onclick="" id="%s">`, level, idHTML)
+
+		// Render the content separately first; this may be used in the ToC too.
+		var sb strings.Builder
+		md.RenderInlineContentToHTML(&sb, op.Content)
+		titleHTML := sb.String()
+
+		// Number the section.
+		if c.numberSections || c.toc {
+			if level < len(c.sectionNumbers) {
+				// When going from a higher section level to a lower one,
+				// discard higher-level numbers. Discard higher-level section
+				// numbers. For example, when going from a #### to a #, only
+				// keep the first section number.
+				c.sectionNumbers = c.sectionNumbers[:level]
+			}
+			if level == len(c.sectionNumbers) {
+				c.sectionNumbers[level-1]++
+			} else {
+				// We are going from a lower section level to a higher one (e.g.
+				// # to ##), possibly with missing levels (e.g. # to ###).
+				// Populate all with 1.
+				for level > len(c.sectionNumbers) {
+					c.sectionNumbers = append(c.sectionNumbers, 1)
+				}
+			}
+
+			if c.numberSections {
+				titleHTML = sectionNumberPrefix(c.sectionNumbers) + titleHTML
+			}
+			if c.toc {
+				// The section numbers identify a path in the section tree.
+				p := &c.sectionRoot
+				for _, num := range c.sectionNumbers {
+					idx := num - 1
+					if idx == len(p.children) {
+						p.children = append(p.children, section{})
+					}
+					p = &p.children[idx]
+				}
+				p.id = idHTML
+				p.title = titleHTML
+			}
+		}
+
+		c.WriteString(titleHTML)
+
+		// Add self link
+		fmt.Fprintf(c,
+			`<a href="#%s" class="anchor icon-link" aria-hidden="true"></a>`, idHTML)
+
+		fmt.Fprintf(c, "</h%d>\n", op.Number)
+	case md.OpHTMLBlock:
+		if c.Len() == 0 && strings.HasPrefix(op.Lines[0], "<!--") {
+			// Look for options.
+			for _, line := range op.Lines {
+				if numberSectionsRegexp.MatchString(line) {
+					c.numberSections = true
+				}
+				if tocRegexp.MatchString(line) {
+					c.toc = true
+				}
+			}
+		}
+		c.HTMLCodec.Do(op)
+	default:
+		c.HTMLCodec.Do(op)
+	}
+}
+
+func sectionNumberPrefix(nums []int) string {
+	var sb strings.Builder
+	for _, num := range nums {
+		fmt.Fprintf(&sb, "%d.", num)
+	}
+	sb.WriteByte(' ')
+	return sb.String()
+}
+
+func plainTextOfInlineContent(ops []md.InlineOp) string {
+	var sb strings.Builder
+	for _, op := range ops {
+		sb.WriteString(op.String())
+	}
+	return sb.String()
+}
+
+var whitespaceRun = regexp.MustCompile(`\s+`)
+
+func processHTMLID(s string) string {
+	// https://developer.mozilla.org/en-US/docs/Web/HTML/Global_attributes/id
+	// Only whitespaces are not allowed in ID; place them with "-".
+	return whitespaceRun.ReplaceAllLiteralString(s, "-")
+}
+
+const tocBefore = `
+<div id="pandoc-toc-wrapper">
+  <p>Table of Content: <span id="pandoc-toc-toggle-wrapper"></span></p>
+  <div id="pandoc-toc">
+`
+
+const tocAfter = `
+  </div>
+  <script>
+  (function() {
+    var shown = true,
+        tocToggleWrapper = document.getElementById('pandoc-toc-toggle-wrapper'),
+        tocList = document.getElementById('pandoc-toc');
+    var tocToggle = document.createElement('a');
+    tocToggle.innerText = "[Hide]";
+    tocToggle.href = "";
+    tocToggleWrapper.appendChild(tocToggle);
+    tocToggle.onclick = function(ev) {
+      shown = !shown;
+      if (shown) {
+        tocToggle.innerText = "[Hide]";
+        tocList.className = "";
+      } else {
+        tocToggle.innerText = "[Show]";
+        tocList.className = "no-display";
+      }
+      ev.preventDefault();
+    };
+  })();
+  </script>
+</div>
+`
+
+func (c *codec) String() string {
+	if !c.toc {
+		return c.HTMLCodec.String()
+	}
+	var sb strings.Builder
+	sb.WriteString(tocBefore)
+	sb.WriteString("<ul>\n")
+	for _, section := range c.sectionRoot.children {
+		writeSection(&sb, section)
+	}
+	sb.WriteString("</ul>\n")
+	sb.WriteString(tocAfter)
+
+	sb.WriteString(c.HTMLCodec.String())
+	return sb.String()
+}
+
+func writeSection(sb *strings.Builder, s section) {
+	fmt.Fprintf(sb, `<li><a href="#%s">%s</a>`, s.id, s.title)
+	if len(s.children) > 0 {
+		sb.WriteString("\n<ul>\n")
+		for _, child := range s.children {
+			writeSection(sb, child)
+		}
+		sb.WriteString("</ul>\n")
+	}
+	sb.WriteString("</li>\n")
+}

+ 0 - 21
website/tools/md-to-html

@@ -1,21 +0,0 @@
-#!/bin/sh
-in=$1
-out=$2
-opts=
-
-has() {
-    head -n1 $in | grep "$@" >/dev/null
-}
-
-has toc && {
-    opts="$opts --toc --template=toc-and-body"
-}
-has number-sections && {
-    opts="$opts --number-sections"
-}
-
-mydir=$(dirname "$0")
-
-$mydir/macros.bin -repo $mydir/../.. -elvdoc $mydir/elvdoc.bin < $1 |
-  $mydir/highlight.bin |
-  pandoc -f gfm+smart+attributes --data-dir=$mydir/pandoc --lua-filter=$mydir/pandoc/header-anchors.lua --metadata title=${1%.md} -o $2 $opts

+ 0 - 9
website/tools/pandoc/header-anchors.lua

@@ -1,9 +0,0 @@
-function Header(el)
-  local id = el.identifier
-  if id == '' then return el end
-  local link = pandoc.Link('', '#'..id, '',
-                           {['class'] = 'anchor icon-link', ['aria-hidden'] = 'true'})
-  el.content:insert(link)
-  el.attributes['onclick'] = ''
-  return el
-end

+ 0 - 33
website/tools/pandoc/templates/toc-and-body.html

@@ -1,33 +0,0 @@
-<div id="pandoc-toc-wrapper">
-  <p>Table of Content: <span id="pandoc-toc-toggle-wrapper"></span></p>
-  <div id="pandoc-toc">
-    <!--
-    Note: links in the generated ToC have IDs starting with "toc-", so
-    avoid that prefix elsewhere.
-    -->
-    $toc$
-  </div>
-  <script>
-  (function() {
-    var shown = true,
-        tocToggleWrapper = document.getElementById('pandoc-toc-toggle-wrapper'),
-        tocList = document.getElementById('pandoc-toc');
-    var tocToggle = document.createElement('a');
-    tocToggle.innerText = "[Hide]";
-    tocToggle.href = "";
-    tocToggleWrapper.appendChild(tocToggle);
-    tocToggle.onclick = function(ev) {
-      shown = !shown;
-      if (shown) {
-        tocToggle.innerText = "[Hide]";
-        tocList.className = "";
-      } else {
-        tocToggle.innerText = "[Show]";
-        tocList.className = "no-display";
-      }
-      ev.preventDefault();
-    };
-  })();
-  </script>
-</div>
-$body$