mirror of https://github.com/gohugoio/hugo
217 lines
6.6 KiB
Go
217 lines
6.6 KiB
Go
// Copyright 2011 The Go Authors. All rights reserved.
|
|
// Use of this source code is governed by a BSD-style
|
|
// license that can be found in the LICENSE file.
|
|
|
|
package template
|
|
|
|
import (
|
|
"fmt"
|
|
"strings"
|
|
)
|
|
|
|
// urlFilter returns its input unless it contains an unsafe scheme in which
|
|
// case it defangs the entire URL.
|
|
//
|
|
// Schemes that cause unintended side effects that are irreversible without user
|
|
// interaction are considered unsafe. For example, clicking on a "javascript:"
|
|
// link can immediately trigger JavaScript code execution.
|
|
//
|
|
// This filter conservatively assumes that all schemes other than the following
|
|
// are unsafe:
|
|
// - http: Navigates to a new website, and may open a new window or tab.
|
|
// These side effects can be reversed by navigating back to the
|
|
// previous website, or closing the window or tab. No irreversible
|
|
// changes will take place without further user interaction with
|
|
// the new website.
|
|
// - https: Same as http.
|
|
// - mailto: Opens an email program and starts a new draft. This side effect
|
|
// is not irreversible until the user explicitly clicks send; it
|
|
// can be undone by closing the email program.
|
|
//
|
|
// To allow URLs containing other schemes to bypass this filter, developers must
|
|
// explicitly indicate that such a URL is expected and safe by encapsulating it
|
|
// in a template.URL value.
|
|
func urlFilter(args ...any) string {
|
|
s, t := stringify(args...)
|
|
if t == contentTypeURL {
|
|
return s
|
|
}
|
|
if !isSafeURL(s) {
|
|
return "#" + filterFailsafe
|
|
}
|
|
return s
|
|
}
|
|
|
|
// isSafeURL is true if s is a relative URL or if URL has a protocol in
|
|
// (http, https, mailto).
|
|
func isSafeURL(s string) bool {
|
|
if protocol, _, ok := strings.Cut(s, ":"); ok && !strings.Contains(protocol, "/") {
|
|
if !strings.EqualFold(protocol, "http") && !strings.EqualFold(protocol, "https") && !strings.EqualFold(protocol, "mailto") {
|
|
return false
|
|
}
|
|
}
|
|
return true
|
|
}
|
|
|
|
// urlEscaper produces an output that can be embedded in a URL query.
|
|
// The output can be embedded in an HTML attribute without further escaping.
|
|
func urlEscaper(args ...any) string {
|
|
return urlProcessor(false, args...)
|
|
}
|
|
|
|
// urlNormalizer normalizes URL content so it can be embedded in a quote-delimited
|
|
// string or parenthesis delimited url(...).
|
|
// The normalizer does not encode all HTML specials. Specifically, it does not
|
|
// encode '&' so correct embedding in an HTML attribute requires escaping of
|
|
// '&' to '&'.
|
|
func urlNormalizer(args ...any) string {
|
|
return urlProcessor(true, args...)
|
|
}
|
|
|
|
// urlProcessor normalizes (when norm is true) or escapes its input to produce
|
|
// a valid hierarchical or opaque URL part.
|
|
func urlProcessor(norm bool, args ...any) string {
|
|
s, t := stringify(args...)
|
|
if t == contentTypeURL {
|
|
norm = true
|
|
}
|
|
var b strings.Builder
|
|
if processURLOnto(s, norm, &b) {
|
|
return b.String()
|
|
}
|
|
return s
|
|
}
|
|
|
|
// processURLOnto appends a normalized URL corresponding to its input to b
|
|
// and reports whether the appended content differs from s.
|
|
func processURLOnto(s string, norm bool, b *strings.Builder) bool {
|
|
b.Grow(len(s) + 16)
|
|
written := 0
|
|
// The byte loop below assumes that all URLs use UTF-8 as the
|
|
// content-encoding. This is similar to the URI to IRI encoding scheme
|
|
// defined in section 3.1 of RFC 3987, and behaves the same as the
|
|
// EcmaScript builtin encodeURIComponent.
|
|
// It should not cause any misencoding of URLs in pages with
|
|
// Content-type: text/html;charset=UTF-8.
|
|
for i, n := 0, len(s); i < n; i++ {
|
|
c := s[i]
|
|
switch c {
|
|
// Single quote and parens are sub-delims in RFC 3986, but we
|
|
// escape them so the output can be embedded in single
|
|
// quoted attributes and unquoted CSS url(...) constructs.
|
|
// Single quotes are reserved in URLs, but are only used in
|
|
// the obsolete "mark" rule in an appendix in RFC 3986
|
|
// so can be safely encoded.
|
|
case '!', '#', '$', '&', '*', '+', ',', '/', ':', ';', '=', '?', '@', '[', ']':
|
|
if norm {
|
|
continue
|
|
}
|
|
// Unreserved according to RFC 3986 sec 2.3
|
|
// "For consistency, percent-encoded octets in the ranges of
|
|
// ALPHA (%41-%5A and %61-%7A), DIGIT (%30-%39), hyphen (%2D),
|
|
// period (%2E), underscore (%5F), or tilde (%7E) should not be
|
|
// created by URI producers
|
|
case '-', '.', '_', '~':
|
|
continue
|
|
case '%':
|
|
// When normalizing do not re-encode valid escapes.
|
|
if norm && i+2 < len(s) && isHex(s[i+1]) && isHex(s[i+2]) {
|
|
continue
|
|
}
|
|
default:
|
|
// Unreserved according to RFC 3986 sec 2.3
|
|
if 'a' <= c && c <= 'z' {
|
|
continue
|
|
}
|
|
if 'A' <= c && c <= 'Z' {
|
|
continue
|
|
}
|
|
if '0' <= c && c <= '9' {
|
|
continue
|
|
}
|
|
}
|
|
b.WriteString(s[written:i])
|
|
fmt.Fprintf(b, "%%%02x", c)
|
|
written = i + 1
|
|
}
|
|
b.WriteString(s[written:])
|
|
return written != 0
|
|
}
|
|
|
|
// Filters and normalizes srcset values which are comma separated
|
|
// URLs followed by metadata.
|
|
func srcsetFilterAndEscaper(args ...any) string {
|
|
s, t := stringify(args...)
|
|
switch t {
|
|
case contentTypeSrcset:
|
|
return s
|
|
case contentTypeURL:
|
|
// Normalizing gets rid of all HTML whitespace
|
|
// which separate the image URL from its metadata.
|
|
var b strings.Builder
|
|
if processURLOnto(s, true, &b) {
|
|
s = b.String()
|
|
}
|
|
// Additionally, commas separate one source from another.
|
|
return strings.ReplaceAll(s, ",", "%2c")
|
|
}
|
|
|
|
var b strings.Builder
|
|
written := 0
|
|
for i := 0; i < len(s); i++ {
|
|
if s[i] == ',' {
|
|
filterSrcsetElement(s, written, i, &b)
|
|
b.WriteString(",")
|
|
written = i + 1
|
|
}
|
|
}
|
|
filterSrcsetElement(s, written, len(s), &b)
|
|
return b.String()
|
|
}
|
|
|
|
// Derived from https://play.golang.org/p/Dhmj7FORT5
|
|
const htmlSpaceAndASCIIAlnumBytes = "\x00\x36\x00\x00\x01\x00\xff\x03\xfe\xff\xff\x07\xfe\xff\xff\x07"
|
|
|
|
// isHTMLSpace is true iff c is a whitespace character per
|
|
// https://infra.spec.whatwg.org/#ascii-whitespace
|
|
func isHTMLSpace(c byte) bool {
|
|
return (c <= 0x20) && 0 != (htmlSpaceAndASCIIAlnumBytes[c>>3]&(1<<uint(c&0x7)))
|
|
}
|
|
|
|
func isHTMLSpaceOrASCIIAlnum(c byte) bool {
|
|
return (c < 0x80) && 0 != (htmlSpaceAndASCIIAlnumBytes[c>>3]&(1<<uint(c&0x7)))
|
|
}
|
|
|
|
func filterSrcsetElement(s string, left int, right int, b *strings.Builder) {
|
|
start := left
|
|
for start < right && isHTMLSpace(s[start]) {
|
|
start++
|
|
}
|
|
end := right
|
|
for i := start; i < right; i++ {
|
|
if isHTMLSpace(s[i]) {
|
|
end = i
|
|
break
|
|
}
|
|
}
|
|
if url := s[start:end]; isSafeURL(url) {
|
|
// If image metadata is only spaces or alnums then
|
|
// we don't need to URL normalize it.
|
|
metadataOk := true
|
|
for i := end; i < right; i++ {
|
|
if !isHTMLSpaceOrASCIIAlnum(s[i]) {
|
|
metadataOk = false
|
|
break
|
|
}
|
|
}
|
|
if metadataOk {
|
|
b.WriteString(s[left:start])
|
|
processURLOnto(url, true, b)
|
|
b.WriteString(s[end:right])
|
|
return
|
|
}
|
|
}
|
|
b.WriteString("#")
|
|
b.WriteString(filterFailsafe)
|
|
}
|