caddy/modules/caddyhttp/encode/encode.go

575 lines
18 KiB
Go

// Copyright 2015 Matthew Holt and The Caddy Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Package encode implements an encoder middleware for Caddy. The initial
// enhancements related to Accept-Encoding, minimum content length, and
// buffer/writer pools were adapted from https://github.com/xi2/httpgzip
// then modified heavily to accommodate modular encoders and fix bugs.
// Code borrowed from that repository is Copyright (c) 2015 The Httpgzip Authors.
package encode
import (
"fmt"
"io"
"math"
"net/http"
"slices"
"sort"
"strconv"
"strings"
"sync"
"github.com/caddyserver/caddy/v2"
"github.com/caddyserver/caddy/v2/modules/caddyhttp"
)
func init() {
caddy.RegisterModule(Encode{})
}
// Encode is a middleware which can encode responses.
type Encode struct {
// Selection of compression algorithms to choose from. The best one
// will be chosen based on the client's Accept-Encoding header.
EncodingsRaw caddy.ModuleMap `json:"encodings,omitempty" caddy:"namespace=http.encoders"`
// If the client has no strong preference, choose these encodings in order.
Prefer []string `json:"prefer,omitempty"`
// Only encode responses that are at least this many bytes long.
MinLength int `json:"minimum_length,omitempty"`
// Only encode responses that match against this ResponseMmatcher.
// The default is a collection of text-based Content-Type headers.
Matcher *caddyhttp.ResponseMatcher `json:"match,omitempty"`
writerPools map[string]*sync.Pool // TODO: these pools do not get reused through config reloads...
}
// CaddyModule returns the Caddy module information.
func (Encode) CaddyModule() caddy.ModuleInfo {
return caddy.ModuleInfo{
ID: "http.handlers.encode",
New: func() caddy.Module { return new(Encode) },
}
}
// Provision provisions enc.
func (enc *Encode) Provision(ctx caddy.Context) error {
mods, err := ctx.LoadModule(enc, "EncodingsRaw")
if err != nil {
return fmt.Errorf("loading encoder modules: %v", err)
}
for modName, modIface := range mods.(map[string]any) {
err = enc.addEncoding(modIface.(Encoding))
if err != nil {
return fmt.Errorf("adding encoding %s: %v", modName, err)
}
}
if enc.MinLength == 0 {
enc.MinLength = defaultMinLength
}
if enc.Matcher == nil {
// common text-based content types
// list based on https://developers.cloudflare.com/speed/optimization/content/brotli/content-compression/#compression-between-cloudflare-and-website-visitors
enc.Matcher = &caddyhttp.ResponseMatcher{
Headers: http.Header{
"Content-Type": []string{
"application/atom+xml*",
"application/eot*",
"application/font*",
"application/geo+json*",
"application/graphql+json*",
"application/javascript*",
"application/json*",
"application/ld+json*",
"application/manifest+json*",
"application/opentype*",
"application/otf*",
"application/rss+xml*",
"application/truetype*",
"application/ttf*",
"application/vnd.api+json*",
"application/vnd.ms-fontobject*",
"application/wasm*",
"application/x-httpd-cgi*",
"application/x-javascript*",
"application/x-opentype*",
"application/x-otf*",
"application/x-perl*",
"application/x-protobuf*",
"application/x-ttf*",
"application/xhtml+xml*",
"application/xml*",
"font/ttf*",
"font/otf*",
"image/svg+xml*",
"image/vnd.microsoft.icon*",
"image/x-icon*",
"multipart/bag*",
"multipart/mixed*",
"text/*",
},
},
}
}
return nil
}
// Validate ensures that enc's configuration is valid.
func (enc *Encode) Validate() error {
check := make(map[string]bool)
for _, encName := range enc.Prefer {
if _, ok := enc.writerPools[encName]; !ok {
return fmt.Errorf("encoding %s not enabled", encName)
}
if _, ok := check[encName]; ok {
return fmt.Errorf("encoding %s is duplicated in prefer", encName)
}
check[encName] = true
}
return nil
}
func isEncodeAllowed(h http.Header) bool {
return !strings.Contains(h.Get("Cache-Control"), "no-transform")
}
func (enc *Encode) ServeHTTP(w http.ResponseWriter, r *http.Request, next caddyhttp.Handler) error {
if isEncodeAllowed(r.Header) {
for _, encName := range AcceptedEncodings(r, enc.Prefer) {
if _, ok := enc.writerPools[encName]; !ok {
continue // encoding not offered
}
w = enc.openResponseWriter(encName, w, r.Method == http.MethodConnect)
defer w.(*responseWriter).Close()
// to comply with RFC 9110 section 8.8.3(.3), we modify the Etag when encoding
// by appending a hyphen and the encoder name; the problem is, the client will
// send back that Etag in a If-None-Match header, but upstream handlers that set
// the Etag in the first place don't know that we appended to their Etag! so here
// we have to strip our addition so the upstream handlers can still honor client
// caches without knowing about our changes...
if etag := r.Header.Get("If-None-Match"); etag != "" && !strings.HasPrefix(etag, "W/") {
ourSuffix := "-" + encName + `"`
if strings.HasSuffix(etag, ourSuffix) {
etag = strings.TrimSuffix(etag, ourSuffix) + `"`
r.Header.Set("If-None-Match", etag)
}
}
break
}
}
return next.ServeHTTP(w, r)
}
func (enc *Encode) addEncoding(e Encoding) error {
ae := e.AcceptEncoding()
if ae == "" {
return fmt.Errorf("encoder does not specify an Accept-Encoding value")
}
if _, ok := enc.writerPools[ae]; ok {
return fmt.Errorf("encoder already added: %s", ae)
}
if enc.writerPools == nil {
enc.writerPools = make(map[string]*sync.Pool)
}
enc.writerPools[ae] = &sync.Pool{
New: func() any {
return e.NewEncoder()
},
}
return nil
}
// openResponseWriter creates a new response writer that may (or may not)
// encode the response with encodingName. The returned response writer MUST
// be closed after the handler completes.
func (enc *Encode) openResponseWriter(encodingName string, w http.ResponseWriter, isConnect bool) *responseWriter {
var rw responseWriter
return enc.initResponseWriter(&rw, encodingName, w, isConnect)
}
// initResponseWriter initializes the responseWriter instance
// allocated in openResponseWriter, enabling mid-stack inlining.
func (enc *Encode) initResponseWriter(rw *responseWriter, encodingName string, wrappedRW http.ResponseWriter, isConnect bool) *responseWriter {
if rww, ok := wrappedRW.(*caddyhttp.ResponseWriterWrapper); ok {
rw.ResponseWriter = rww
} else {
rw.ResponseWriter = &caddyhttp.ResponseWriterWrapper{ResponseWriter: wrappedRW}
}
rw.encodingName = encodingName
rw.config = enc
rw.isConnect = isConnect
return rw
}
// responseWriter writes to an underlying response writer
// using the encoding represented by encodingName and
// configured by config.
type responseWriter struct {
http.ResponseWriter
encodingName string
w Encoder
config *Encode
statusCode int
wroteHeader bool
isConnect bool
}
// WriteHeader stores the status to write when the time comes
// to actually write the header.
func (rw *responseWriter) WriteHeader(status int) {
rw.statusCode = status
// See #5849 and RFC 9110 section 15.4.5 (https://www.rfc-editor.org/rfc/rfc9110.html#section-15.4.5) - 304
// Not Modified must have certain headers set as if it was a 200 response, and according to the issue
// we would miss the Vary header in this case when compression was also enabled; note that we set this
// header in the responseWriter.init() method but that is only called if we are writing a response body
if status == http.StatusNotModified && !hasVaryValue(rw.Header(), "Accept-Encoding") {
rw.Header().Add("Vary", "Accept-Encoding")
}
// write status immediately if status is 2xx and the request is CONNECT
// since it means the response is successful.
// see: https://github.com/caddyserver/caddy/issues/6733#issuecomment-2525058845
if rw.isConnect && 200 <= status && status <= 299 {
rw.ResponseWriter.WriteHeader(status)
rw.wroteHeader = true
}
// write status immediately when status code is informational
// see: https://caddy.community/t/disappear-103-early-hints-response-with-encode-enable-caddy-v2-7-6/23081/5
if 100 <= status && status <= 199 {
rw.ResponseWriter.WriteHeader(status)
}
}
// Match determines, if encoding should be done based on the ResponseMatcher.
func (enc *Encode) Match(rw *responseWriter) bool {
return enc.Matcher.Match(rw.statusCode, rw.Header())
}
// FlushError is an alternative Flush returning an error. It delays the actual Flush of the underlying
// ResponseWriterWrapper until headers were written.
func (rw *responseWriter) FlushError() error {
// WriteHeader wasn't called and is a CONNECT request, treat it as a success.
// otherwise, wait until header is written.
if rw.isConnect && !rw.wroteHeader && rw.statusCode == 0 {
rw.WriteHeader(http.StatusOK)
}
if !rw.wroteHeader {
// flushing the underlying ResponseWriter will write header and status code,
// but we need to delay that until we can determine if we must encode and
// therefore add the Content-Encoding header; this happens in the first call
// to rw.Write (see bug in #4314)
return nil
}
// also flushes the encoder, if any
// see: https://github.com/jjiang-stripe/caddy-slow-gzip
if rw.w != nil {
err := rw.w.Flush()
if err != nil {
return err
}
}
//nolint:bodyclose
return http.NewResponseController(rw.ResponseWriter).Flush()
}
// Write writes to the response. If the response qualifies,
// it is encoded using the encoder, which is initialized
// if not done so already.
func (rw *responseWriter) Write(p []byte) (int, error) {
// ignore zero data writes, probably head request
if len(p) == 0 {
return 0, nil
}
// WriteHeader wasn't called and is a CONNECT request, treat it as a success.
// otherwise, determine if the response should be compressed.
if rw.isConnect && !rw.wroteHeader && rw.statusCode == 0 {
rw.WriteHeader(http.StatusOK)
}
// sniff content-type and determine content-length
if !rw.wroteHeader && rw.config.MinLength > 0 {
var gtMinLength bool
if len(p) > rw.config.MinLength {
gtMinLength = true
} else if cl, err := strconv.Atoi(rw.Header().Get("Content-Length")); err == nil && cl > rw.config.MinLength {
gtMinLength = true
}
if gtMinLength {
if rw.Header().Get("Content-Type") == "" {
rw.Header().Set("Content-Type", http.DetectContentType(p))
}
rw.init()
}
}
// before we write to the response, we need to make
// sure the header is written exactly once; we do
// that by checking if a status code has been set,
// and if so, that means we haven't written the
// header OR the default status code will be written
// by the standard library
if !rw.wroteHeader {
if rw.statusCode != 0 {
rw.ResponseWriter.WriteHeader(rw.statusCode)
}
rw.wroteHeader = true
}
if rw.w != nil {
return rw.w.Write(p)
} else {
return rw.ResponseWriter.Write(p)
}
}
// used to mask ReadFrom method
type writerOnly struct {
io.Writer
}
// copied from stdlib
const sniffLen = 512
// ReadFrom will try to use sendfile to copy from the reader to the response writer.
// It's only used if the response writer implements io.ReaderFrom and the data can't be compressed.
// It's based on stdlin http1.1 response writer implementation.
// https://github.com/golang/go/blob/f4e3ec3dbe3b8e04a058d266adf8e048bab563f2/src/net/http/server.go#L586
func (rw *responseWriter) ReadFrom(r io.Reader) (int64, error) {
rf, ok := rw.ResponseWriter.(io.ReaderFrom)
// sendfile can't be used anyway
if !ok {
// mask ReadFrom to avoid infinite recursion
return io.Copy(writerOnly{rw}, r)
}
var ns int64
// try to sniff the content type and determine if the response should be compressed
if !rw.wroteHeader && rw.config.MinLength > 0 {
var (
err error
buf [sniffLen]byte
)
// mask ReadFrom to let Write determine if the response should be compressed
ns, err = io.CopyBuffer(writerOnly{rw}, io.LimitReader(r, sniffLen), buf[:])
if err != nil || ns < sniffLen {
return ns, err
}
}
// the response will be compressed, no sendfile support
if rw.w != nil {
nr, err := io.Copy(rw.w, r)
return nr + ns, err
}
nr, err := rf.ReadFrom(r)
return nr + ns, err
}
// Close writes any remaining buffered response and
// deallocates any active resources.
func (rw *responseWriter) Close() error {
// didn't write, probably head request
if !rw.wroteHeader {
cl, err := strconv.Atoi(rw.Header().Get("Content-Length"))
if err == nil && cl > rw.config.MinLength {
rw.init()
}
// issue #5059, don't write status code if not set explicitly.
if rw.statusCode != 0 {
rw.ResponseWriter.WriteHeader(rw.statusCode)
}
rw.wroteHeader = true
}
var err error
if rw.w != nil {
err = rw.w.Close()
rw.w.Reset(nil)
rw.config.writerPools[rw.encodingName].Put(rw.w)
rw.w = nil
}
return err
}
// Unwrap returns the underlying ResponseWriter.
func (rw *responseWriter) Unwrap() http.ResponseWriter {
return rw.ResponseWriter
}
// init should be called before we write a response, if rw.buf has contents.
func (rw *responseWriter) init() {
hdr := rw.Header()
if hdr.Get("Content-Encoding") == "" && isEncodeAllowed(hdr) &&
rw.config.Match(rw) {
rw.w = rw.config.writerPools[rw.encodingName].Get().(Encoder)
rw.w.Reset(rw.ResponseWriter)
hdr.Del("Content-Length") // https://github.com/golang/go/issues/14975
hdr.Set("Content-Encoding", rw.encodingName)
if !hasVaryValue(hdr, "Accept-Encoding") {
hdr.Add("Vary", "Accept-Encoding")
}
hdr.Del("Accept-Ranges") // we don't know ranges for dynamically-encoded content
// strong ETags need to be distinct depending on the encoding ("selected representation")
// see RFC 9110 section 8.8.3.3:
// https://www.rfc-editor.org/rfc/rfc9110.html#name-example-entity-tags-varying
// I don't know a great way to do this... how about appending? That's a neat trick!
// (We have to strip the value we append from If-None-Match headers before
// sending subsequent requests back upstream, however, since upstream handlers
// don't know about our appending to their Etag since they've already done their work)
if etag := hdr.Get("Etag"); etag != "" && !strings.HasPrefix(etag, "W/") {
etag = fmt.Sprintf(`%s-%s"`, strings.TrimSuffix(etag, `"`), rw.encodingName)
hdr.Set("Etag", etag)
}
}
}
func hasVaryValue(hdr http.Header, target string) bool {
for _, vary := range hdr.Values("Vary") {
vals := strings.Split(vary, ",")
for _, val := range vals {
if strings.EqualFold(strings.TrimSpace(val), target) {
return true
}
}
}
return false
}
// AcceptedEncodings returns the list of encodings that the
// client supports, in descending order of preference.
// The client preference via q-factor and the server
// preference via Prefer setting are taken into account. If
// the Sec-WebSocket-Key header is present then non-identity
// encodings are not considered. See
// http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html.
func AcceptedEncodings(r *http.Request, preferredOrder []string) []string {
acceptEncHeader := r.Header.Get("Accept-Encoding")
websocketKey := r.Header.Get("Sec-WebSocket-Key")
if acceptEncHeader == "" {
return []string{}
}
prefs := []encodingPreference{}
for _, accepted := range strings.Split(acceptEncHeader, ",") {
parts := strings.Split(accepted, ";")
encName := strings.ToLower(strings.TrimSpace(parts[0]))
// determine q-factor
qFactor := 1.0
if len(parts) > 1 {
qFactorStr := strings.ToLower(strings.TrimSpace(parts[1]))
if strings.HasPrefix(qFactorStr, "q=") {
if qFactorFloat, err := strconv.ParseFloat(qFactorStr[2:], 32); err == nil {
if qFactorFloat >= 0 && qFactorFloat <= 1 {
qFactor = qFactorFloat
}
}
}
}
// encodings with q-factor of 0 are not accepted;
// use a small threshold to account for float precision
if qFactor < 0.00001 {
continue
}
// don't encode WebSocket handshakes
if websocketKey != "" && encName != "identity" {
continue
}
// set server preference
prefOrder := slices.Index(preferredOrder, encName)
if prefOrder > -1 {
prefOrder = len(preferredOrder) - prefOrder
}
prefs = append(prefs, encodingPreference{
encoding: encName,
q: qFactor,
preferOrder: prefOrder,
})
}
// sort preferences by descending q-factor first, then by preferOrder
sort.Slice(prefs, func(i, j int) bool {
if math.Abs(prefs[i].q-prefs[j].q) < 0.00001 {
return prefs[i].preferOrder > prefs[j].preferOrder
}
return prefs[i].q > prefs[j].q
})
prefEncNames := make([]string, len(prefs))
for i := range prefs {
prefEncNames[i] = prefs[i].encoding
}
return prefEncNames
}
// encodingPreference pairs an encoding with its q-factor.
type encodingPreference struct {
encoding string
q float64
preferOrder int
}
// Encoder is a type which can encode a stream of data.
type Encoder interface {
io.WriteCloser
Reset(io.Writer)
Flush() error // encoder by default buffers data to maximize compressing rate
}
// Encoding is a type which can create encoders of its kind
// and return the name used in the Accept-Encoding header.
type Encoding interface {
AcceptEncoding() string
NewEncoder() Encoder
}
// Precompressed is a type which returns filename suffix of precompressed
// file and Accept-Encoding header to use when serving this file.
type Precompressed interface {
AcceptEncoding() string
Suffix() string
}
// defaultMinLength is the minimum length at which to compress content.
const defaultMinLength = 512
// Interface guards
var (
_ caddy.Provisioner = (*Encode)(nil)
_ caddy.Validator = (*Encode)(nil)
_ caddyhttp.MiddlewareHandler = (*Encode)(nil)
)