mirror of https://github.com/gohugoio/hugo
487 lines
14 KiB
Go
487 lines
14 KiB
Go
// Copyright 2011 The Go Authors. All rights reserved.
|
|
// Use of this source code is governed by a BSD-style
|
|
// license that can be found in the LICENSE file.
|
|
|
|
package template
|
|
|
|
import (
|
|
"bytes"
|
|
"encoding/json"
|
|
"fmt"
|
|
htmltemplate "html/template"
|
|
"reflect"
|
|
"strings"
|
|
"unicode/utf8"
|
|
)
|
|
|
|
// jsWhitespace contains all of the JS whitespace characters, as defined
|
|
// by the \s character class.
|
|
// See https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_expressions/Character_classes.
|
|
const jsWhitespace = "\f\n\r\t\v\u0020\u00a0\u1680\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200a\u2028\u2029\u202f\u205f\u3000\ufeff"
|
|
|
|
// nextJSCtx returns the context that determines whether a slash after the
|
|
// given run of tokens starts a regular expression instead of a division
|
|
// operator: / or /=.
|
|
//
|
|
// This assumes that the token run does not include any string tokens, comment
|
|
// tokens, regular expression literal tokens, or division operators.
|
|
//
|
|
// This fails on some valid but nonsensical JavaScript programs like
|
|
// "x = ++/foo/i" which is quite different than "x++/foo/i", but is not known to
|
|
// fail on any known useful programs. It is based on the draft
|
|
// JavaScript 2.0 lexical grammar and requires one token of lookbehind:
|
|
// https://www.mozilla.org/js/language/js20-2000-07/rationale/syntax.html
|
|
func nextJSCtx(s []byte, preceding jsCtx) jsCtx {
|
|
// Trim all JS whitespace characters
|
|
s = bytes.TrimRight(s, jsWhitespace)
|
|
if len(s) == 0 {
|
|
return preceding
|
|
}
|
|
|
|
// All cases below are in the single-byte UTF-8 group.
|
|
switch c, n := s[len(s)-1], len(s); c {
|
|
case '+', '-':
|
|
// ++ and -- are not regexp preceders, but + and - are whether
|
|
// they are used as infix or prefix operators.
|
|
start := n - 1
|
|
// Count the number of adjacent dashes or pluses.
|
|
for start > 0 && s[start-1] == c {
|
|
start--
|
|
}
|
|
if (n-start)&1 == 1 {
|
|
// Reached for trailing minus signs since "---" is the
|
|
// same as "-- -".
|
|
return jsCtxRegexp
|
|
}
|
|
return jsCtxDivOp
|
|
case '.':
|
|
// Handle "42."
|
|
if n != 1 && '0' <= s[n-2] && s[n-2] <= '9' {
|
|
return jsCtxDivOp
|
|
}
|
|
return jsCtxRegexp
|
|
// Suffixes for all punctuators from section 7.7 of the language spec
|
|
// that only end binary operators not handled above.
|
|
case ',', '<', '>', '=', '*', '%', '&', '|', '^', '?':
|
|
return jsCtxRegexp
|
|
// Suffixes for all punctuators from section 7.7 of the language spec
|
|
// that are prefix operators not handled above.
|
|
case '!', '~':
|
|
return jsCtxRegexp
|
|
// Matches all the punctuators from section 7.7 of the language spec
|
|
// that are open brackets not handled above.
|
|
case '(', '[':
|
|
return jsCtxRegexp
|
|
// Matches all the punctuators from section 7.7 of the language spec
|
|
// that precede expression starts.
|
|
case ':', ';', '{':
|
|
return jsCtxRegexp
|
|
// CAVEAT: the close punctuators ('}', ']', ')') precede div ops and
|
|
// are handled in the default except for '}' which can precede a
|
|
// division op as in
|
|
// ({ valueOf: function () { return 42 } } / 2
|
|
// which is valid, but, in practice, developers don't divide object
|
|
// literals, so our heuristic works well for code like
|
|
// function () { ... } /foo/.test(x) && sideEffect();
|
|
// The ')' punctuator can precede a regular expression as in
|
|
// if (b) /foo/.test(x) && ...
|
|
// but this is much less likely than
|
|
// (a + b) / c
|
|
case '}':
|
|
return jsCtxRegexp
|
|
default:
|
|
// Look for an IdentifierName and see if it is a keyword that
|
|
// can precede a regular expression.
|
|
j := n
|
|
for j > 0 && isJSIdentPart(rune(s[j-1])) {
|
|
j--
|
|
}
|
|
if regexpPrecederKeywords[string(s[j:])] {
|
|
return jsCtxRegexp
|
|
}
|
|
}
|
|
// Otherwise is a punctuator not listed above, or
|
|
// a string which precedes a div op, or an identifier
|
|
// which precedes a div op.
|
|
return jsCtxDivOp
|
|
}
|
|
|
|
// regexpPrecederKeywords is a set of reserved JS keywords that can precede a
|
|
// regular expression in JS source.
|
|
var regexpPrecederKeywords = map[string]bool{
|
|
"break": true,
|
|
"case": true,
|
|
"continue": true,
|
|
"delete": true,
|
|
"do": true,
|
|
"else": true,
|
|
"finally": true,
|
|
"in": true,
|
|
"instanceof": true,
|
|
"return": true,
|
|
"throw": true,
|
|
"try": true,
|
|
"typeof": true,
|
|
"void": true,
|
|
}
|
|
|
|
var jsonMarshalType = reflect.TypeFor[json.Marshaler]()
|
|
|
|
// indirectToJSONMarshaler returns the value, after dereferencing as many times
|
|
// as necessary to reach the base type (or nil) or an implementation of json.Marshal.
|
|
func indirectToJSONMarshaler(a any) any {
|
|
// text/template now supports passing untyped nil as a func call
|
|
// argument, so we must support it. Otherwise we'd panic below, as one
|
|
// cannot call the Type or Interface methods on an invalid
|
|
// reflect.Value. See golang.org/issue/18716.
|
|
if a == nil {
|
|
return nil
|
|
}
|
|
|
|
v := reflect.ValueOf(a)
|
|
for !v.Type().Implements(jsonMarshalType) && v.Kind() == reflect.Pointer && !v.IsNil() {
|
|
v = v.Elem()
|
|
}
|
|
return v.Interface()
|
|
}
|
|
|
|
// jsValEscaper escapes its inputs to a JS Expression (section 11.14) that has
|
|
// neither side-effects nor free variables outside (NaN, Infinity).
|
|
func jsValEscaper(args ...any) string {
|
|
var a any
|
|
if len(args) == 1 {
|
|
a = indirectToJSONMarshaler(args[0])
|
|
switch t := a.(type) {
|
|
case htmltemplate.JS:
|
|
return string(t)
|
|
case htmltemplate.JSStr:
|
|
// TODO: normalize quotes.
|
|
return `"` + string(t) + `"`
|
|
case json.Marshaler:
|
|
// Do not treat as a Stringer.
|
|
case fmt.Stringer:
|
|
a = t.String()
|
|
}
|
|
} else {
|
|
for i, arg := range args {
|
|
args[i] = indirectToJSONMarshaler(arg)
|
|
}
|
|
a = fmt.Sprint(args...)
|
|
}
|
|
// TODO: detect cycles before calling Marshal which loops infinitely on
|
|
// cyclic data. This may be an unacceptable DoS risk.
|
|
b, err := json.Marshal(a)
|
|
if err != nil {
|
|
// While the standard JSON marshaler does not include user controlled
|
|
// information in the error message, if a type has a MarshalJSON method,
|
|
// the content of the error message is not guaranteed. Since we insert
|
|
// the error into the template, as part of a comment, we attempt to
|
|
// prevent the error from either terminating the comment, or the script
|
|
// block itself.
|
|
//
|
|
// In particular we:
|
|
// * replace "*/" comment end tokens with "* /", which does not
|
|
// terminate the comment
|
|
// * replace "</script" with "\x3C/script", and "<!--" with
|
|
// "\x3C!--", which prevents confusing script block termination
|
|
// semantics
|
|
//
|
|
// We also put a space before the comment so that if it is flush against
|
|
// a division operator it is not turned into a line comment:
|
|
// x/{{y}}
|
|
// turning into
|
|
// x//* error marshaling y:
|
|
// second line of error message */null
|
|
errStr := err.Error()
|
|
errStr = strings.ReplaceAll(errStr, "*/", "* /")
|
|
errStr = strings.ReplaceAll(errStr, "</script", `\x3C/script`)
|
|
errStr = strings.ReplaceAll(errStr, "<!--", `\x3C!--`)
|
|
return fmt.Sprintf(" /* %s */null ", errStr)
|
|
}
|
|
|
|
// TODO: maybe post-process output to prevent it from containing
|
|
// "<!--", "-->", "<![CDATA[", "]]>", or "</script"
|
|
// in case custom marshalers produce output containing those.
|
|
// Note: Do not use \x escaping to save bytes because it is not JSON compatible and this escaper
|
|
// supports ld+json content-type.
|
|
if len(b) == 0 {
|
|
// In, `x=y/{{.}}*z` a json.Marshaler that produces "" should
|
|
// not cause the output `x=y/*z`.
|
|
return " null "
|
|
}
|
|
first, _ := utf8.DecodeRune(b)
|
|
last, _ := utf8.DecodeLastRune(b)
|
|
var buf strings.Builder
|
|
// Prevent IdentifierNames and NumericLiterals from running into
|
|
// keywords: in, instanceof, typeof, void
|
|
pad := isJSIdentPart(first) || isJSIdentPart(last)
|
|
if pad {
|
|
buf.WriteByte(' ')
|
|
}
|
|
written := 0
|
|
// Make sure that json.Marshal escapes codepoints U+2028 & U+2029
|
|
// so it falls within the subset of JSON which is valid JS.
|
|
for i := 0; i < len(b); {
|
|
rune, n := utf8.DecodeRune(b[i:])
|
|
repl := ""
|
|
if rune == 0x2028 {
|
|
repl = `\u2028`
|
|
} else if rune == 0x2029 {
|
|
repl = `\u2029`
|
|
}
|
|
if repl != "" {
|
|
buf.Write(b[written:i])
|
|
buf.WriteString(repl)
|
|
written = i + n
|
|
}
|
|
i += n
|
|
}
|
|
if buf.Len() != 0 {
|
|
buf.Write(b[written:])
|
|
if pad {
|
|
buf.WriteByte(' ')
|
|
}
|
|
return buf.String()
|
|
}
|
|
return string(b)
|
|
}
|
|
|
|
// jsStrEscaper produces a string that can be included between quotes in
|
|
// JavaScript source, in JavaScript embedded in an HTML5 <script> element,
|
|
// or in an HTML5 event handler attribute such as onclick.
|
|
func jsStrEscaper(args ...any) string {
|
|
s, t := stringify(args...)
|
|
if t == contentTypeJSStr {
|
|
return replace(s, jsStrNormReplacementTable)
|
|
}
|
|
return replace(s, jsStrReplacementTable)
|
|
}
|
|
|
|
func jsTmplLitEscaper(args ...any) string {
|
|
s, _ := stringify(args...)
|
|
return replace(s, jsBqStrReplacementTable)
|
|
}
|
|
|
|
// jsRegexpEscaper behaves like jsStrEscaper but escapes regular expression
|
|
// specials so the result is treated literally when included in a regular
|
|
// expression literal. /foo{{.X}}bar/ matches the string "foo" followed by
|
|
// the literal text of {{.X}} followed by the string "bar".
|
|
func jsRegexpEscaper(args ...any) string {
|
|
s, _ := stringify(args...)
|
|
s = replace(s, jsRegexpReplacementTable)
|
|
if s == "" {
|
|
// /{{.X}}/ should not produce a line comment when .X == "".
|
|
return "(?:)"
|
|
}
|
|
return s
|
|
}
|
|
|
|
// replace replaces each rune r of s with replacementTable[r], provided that
|
|
// r < len(replacementTable). If replacementTable[r] is the empty string then
|
|
// no replacement is made.
|
|
// It also replaces runes U+2028 and U+2029 with the raw strings `\u2028` and
|
|
// `\u2029`.
|
|
func replace(s string, replacementTable []string) string {
|
|
var b strings.Builder
|
|
r, w, written := rune(0), 0, 0
|
|
for i := 0; i < len(s); i += w {
|
|
// See comment in htmlEscaper.
|
|
r, w = utf8.DecodeRuneInString(s[i:])
|
|
var repl string
|
|
switch {
|
|
case int(r) < len(lowUnicodeReplacementTable):
|
|
repl = lowUnicodeReplacementTable[r]
|
|
case int(r) < len(replacementTable) && replacementTable[r] != "":
|
|
repl = replacementTable[r]
|
|
case r == '\u2028':
|
|
repl = `\u2028`
|
|
case r == '\u2029':
|
|
repl = `\u2029`
|
|
default:
|
|
continue
|
|
}
|
|
if written == 0 {
|
|
b.Grow(len(s))
|
|
}
|
|
b.WriteString(s[written:i])
|
|
b.WriteString(repl)
|
|
written = i + w
|
|
}
|
|
if written == 0 {
|
|
return s
|
|
}
|
|
b.WriteString(s[written:])
|
|
return b.String()
|
|
}
|
|
|
|
var lowUnicodeReplacementTable = []string{
|
|
0: `\u0000`, 1: `\u0001`, 2: `\u0002`, 3: `\u0003`, 4: `\u0004`, 5: `\u0005`, 6: `\u0006`,
|
|
'\a': `\u0007`,
|
|
'\b': `\u0008`,
|
|
'\t': `\t`,
|
|
'\n': `\n`,
|
|
'\v': `\u000b`, // "\v" == "v" on IE 6.
|
|
'\f': `\f`,
|
|
'\r': `\r`,
|
|
0xe: `\u000e`, 0xf: `\u000f`, 0x10: `\u0010`, 0x11: `\u0011`, 0x12: `\u0012`, 0x13: `\u0013`,
|
|
0x14: `\u0014`, 0x15: `\u0015`, 0x16: `\u0016`, 0x17: `\u0017`, 0x18: `\u0018`, 0x19: `\u0019`,
|
|
0x1a: `\u001a`, 0x1b: `\u001b`, 0x1c: `\u001c`, 0x1d: `\u001d`, 0x1e: `\u001e`, 0x1f: `\u001f`,
|
|
}
|
|
|
|
var jsStrReplacementTable = []string{
|
|
0: `\u0000`,
|
|
'\t': `\t`,
|
|
'\n': `\n`,
|
|
'\v': `\u000b`, // "\v" == "v" on IE 6.
|
|
'\f': `\f`,
|
|
'\r': `\r`,
|
|
// Encode HTML specials as hex so the output can be embedded
|
|
// in HTML attributes without further encoding.
|
|
'"': `\u0022`,
|
|
'`': `\u0060`,
|
|
'&': `\u0026`,
|
|
'\'': `\u0027`,
|
|
'+': `\u002b`,
|
|
'/': `\/`,
|
|
'<': `\u003c`,
|
|
'>': `\u003e`,
|
|
'\\': `\\`,
|
|
}
|
|
|
|
// jsBqStrReplacementTable is like jsStrReplacementTable except it also contains
|
|
// the special characters for JS template literals: $, {, and }.
|
|
var jsBqStrReplacementTable = []string{
|
|
0: `\u0000`,
|
|
'\t': `\t`,
|
|
'\n': `\n`,
|
|
'\v': `\u000b`, // "\v" == "v" on IE 6.
|
|
'\f': `\f`,
|
|
'\r': `\r`,
|
|
// Encode HTML specials as hex so the output can be embedded
|
|
// in HTML attributes without further encoding.
|
|
'"': `\u0022`,
|
|
'`': `\u0060`,
|
|
'&': `\u0026`,
|
|
'\'': `\u0027`,
|
|
'+': `\u002b`,
|
|
'/': `\/`,
|
|
'<': `\u003c`,
|
|
'>': `\u003e`,
|
|
'\\': `\\`,
|
|
'$': `\u0024`,
|
|
'{': `\u007b`,
|
|
'}': `\u007d`,
|
|
}
|
|
|
|
// jsStrNormReplacementTable is like jsStrReplacementTable but does not
|
|
// overencode existing escapes since this table has no entry for `\`.
|
|
var jsStrNormReplacementTable = []string{
|
|
0: `\u0000`,
|
|
'\t': `\t`,
|
|
'\n': `\n`,
|
|
'\v': `\u000b`, // "\v" == "v" on IE 6.
|
|
'\f': `\f`,
|
|
'\r': `\r`,
|
|
// Encode HTML specials as hex so the output can be embedded
|
|
// in HTML attributes without further encoding.
|
|
'"': `\u0022`,
|
|
'&': `\u0026`,
|
|
'\'': `\u0027`,
|
|
'`': `\u0060`,
|
|
'+': `\u002b`,
|
|
'/': `\/`,
|
|
'<': `\u003c`,
|
|
'>': `\u003e`,
|
|
}
|
|
var jsRegexpReplacementTable = []string{
|
|
0: `\u0000`,
|
|
'\t': `\t`,
|
|
'\n': `\n`,
|
|
'\v': `\u000b`, // "\v" == "v" on IE 6.
|
|
'\f': `\f`,
|
|
'\r': `\r`,
|
|
// Encode HTML specials as hex so the output can be embedded
|
|
// in HTML attributes without further encoding.
|
|
'"': `\u0022`,
|
|
'$': `\$`,
|
|
'&': `\u0026`,
|
|
'\'': `\u0027`,
|
|
'(': `\(`,
|
|
')': `\)`,
|
|
'*': `\*`,
|
|
'+': `\u002b`,
|
|
'-': `\-`,
|
|
'.': `\.`,
|
|
'/': `\/`,
|
|
'<': `\u003c`,
|
|
'>': `\u003e`,
|
|
'?': `\?`,
|
|
'[': `\[`,
|
|
'\\': `\\`,
|
|
']': `\]`,
|
|
'^': `\^`,
|
|
'{': `\{`,
|
|
'|': `\|`,
|
|
'}': `\}`,
|
|
}
|
|
|
|
// isJSIdentPart reports whether the given rune is a JS identifier part.
|
|
// It does not handle all the non-Latin letters, joiners, and combining marks,
|
|
// but it does handle every codepoint that can occur in a numeric literal or
|
|
// a keyword.
|
|
func isJSIdentPart(r rune) bool {
|
|
switch {
|
|
case r == '$':
|
|
return true
|
|
case '0' <= r && r <= '9':
|
|
return true
|
|
case 'A' <= r && r <= 'Z':
|
|
return true
|
|
case r == '_':
|
|
return true
|
|
case 'a' <= r && r <= 'z':
|
|
return true
|
|
}
|
|
return false
|
|
}
|
|
|
|
// isJSType reports whether the given MIME type should be considered JavaScript.
|
|
//
|
|
// It is used to determine whether a script tag with a type attribute is a javascript container.
|
|
func isJSType(mimeType string) bool {
|
|
// per
|
|
// https://www.w3.org/TR/html5/scripting-1.html#attr-script-type
|
|
// https://tools.ietf.org/html/rfc7231#section-3.1.1
|
|
// https://tools.ietf.org/html/rfc4329#section-3
|
|
// https://www.ietf.org/rfc/rfc4627.txt
|
|
// discard parameters
|
|
mimeType, _, _ = strings.Cut(mimeType, ";")
|
|
mimeType = strings.ToLower(mimeType)
|
|
mimeType = strings.TrimSpace(mimeType)
|
|
switch mimeType {
|
|
case
|
|
"application/ecmascript",
|
|
"application/javascript",
|
|
"application/json",
|
|
"application/ld+json",
|
|
"application/x-ecmascript",
|
|
"application/x-javascript",
|
|
"module",
|
|
"text/ecmascript",
|
|
"text/javascript",
|
|
"text/javascript1.0",
|
|
"text/javascript1.1",
|
|
"text/javascript1.2",
|
|
"text/javascript1.3",
|
|
"text/javascript1.4",
|
|
"text/javascript1.5",
|
|
"text/jscript",
|
|
"text/livescript",
|
|
"text/x-ecmascript",
|
|
"text/x-javascript":
|
|
return true
|
|
default:
|
|
return false
|
|
}
|
|
}
|