...

Source file src/k8s.io/apimachinery/pkg/util/proxy/transport.go

Documentation: k8s.io/apimachinery/pkg/util/proxy

     1  /*
     2  Copyright 2014 The Kubernetes Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package proxy
    18  
    19  import (
    20  	"bytes"
    21  	"compress/flate"
    22  	"compress/gzip"
    23  	"fmt"
    24  	"io"
    25  	"net/http"
    26  	"net/url"
    27  	"path"
    28  	"strings"
    29  
    30  	"golang.org/x/net/html"
    31  	"golang.org/x/net/html/atom"
    32  	"k8s.io/klog/v2"
    33  
    34  	"k8s.io/apimachinery/pkg/api/errors"
    35  	"k8s.io/apimachinery/pkg/util/net"
    36  	"k8s.io/apimachinery/pkg/util/sets"
    37  )
    38  
    39  // atomsToAttrs states which attributes of which tags require URL substitution.
    40  // Sources: http://www.w3.org/TR/REC-html40/index/attributes.html
    41  //
    42  //	http://www.w3.org/html/wg/drafts/html/master/index.html#attributes-1
    43  var atomsToAttrs = map[atom.Atom]sets.String{
    44  	atom.A:          sets.NewString("href"),
    45  	atom.Applet:     sets.NewString("codebase"),
    46  	atom.Area:       sets.NewString("href"),
    47  	atom.Audio:      sets.NewString("src"),
    48  	atom.Base:       sets.NewString("href"),
    49  	atom.Blockquote: sets.NewString("cite"),
    50  	atom.Body:       sets.NewString("background"),
    51  	atom.Button:     sets.NewString("formaction"),
    52  	atom.Command:    sets.NewString("icon"),
    53  	atom.Del:        sets.NewString("cite"),
    54  	atom.Embed:      sets.NewString("src"),
    55  	atom.Form:       sets.NewString("action"),
    56  	atom.Frame:      sets.NewString("longdesc", "src"),
    57  	atom.Head:       sets.NewString("profile"),
    58  	atom.Html:       sets.NewString("manifest"),
    59  	atom.Iframe:     sets.NewString("longdesc", "src"),
    60  	atom.Img:        sets.NewString("longdesc", "src", "usemap"),
    61  	atom.Input:      sets.NewString("src", "usemap", "formaction"),
    62  	atom.Ins:        sets.NewString("cite"),
    63  	atom.Link:       sets.NewString("href"),
    64  	atom.Object:     sets.NewString("classid", "codebase", "data", "usemap"),
    65  	atom.Q:          sets.NewString("cite"),
    66  	atom.Script:     sets.NewString("src"),
    67  	atom.Source:     sets.NewString("src"),
    68  	atom.Video:      sets.NewString("poster", "src"),
    69  
    70  	// TODO: css URLs hidden in style elements.
    71  }
    72  
    73  // Transport is a transport for text/html content that replaces URLs in html
    74  // content with the prefix of the proxy server
    75  type Transport struct {
    76  	Scheme      string
    77  	Host        string
    78  	PathPrepend string
    79  
    80  	http.RoundTripper
    81  }
    82  
    83  // RoundTrip implements the http.RoundTripper interface
    84  func (t *Transport) RoundTrip(req *http.Request) (*http.Response, error) {
    85  	// Add reverse proxy headers.
    86  	forwardedURI := path.Join(t.PathPrepend, req.URL.EscapedPath())
    87  	if strings.HasSuffix(req.URL.Path, "/") {
    88  		forwardedURI = forwardedURI + "/"
    89  	}
    90  	req.Header.Set("X-Forwarded-Uri", forwardedURI)
    91  	if len(t.Host) > 0 {
    92  		req.Header.Set("X-Forwarded-Host", t.Host)
    93  	}
    94  	if len(t.Scheme) > 0 {
    95  		req.Header.Set("X-Forwarded-Proto", t.Scheme)
    96  	}
    97  
    98  	rt := t.RoundTripper
    99  	if rt == nil {
   100  		rt = http.DefaultTransport
   101  	}
   102  	resp, err := rt.RoundTrip(req)
   103  
   104  	if err != nil {
   105  		return nil, errors.NewServiceUnavailable(fmt.Sprintf("error trying to reach service: %v", err))
   106  	}
   107  
   108  	if redirect := resp.Header.Get("Location"); redirect != "" {
   109  		targetURL, err := url.Parse(redirect)
   110  		if err != nil {
   111  			return nil, errors.NewInternalError(fmt.Errorf("error trying to parse Location header: %v", err))
   112  		}
   113  		resp.Header.Set("Location", t.rewriteURL(targetURL, req.URL, req.Host))
   114  		return resp, nil
   115  	}
   116  
   117  	cType := resp.Header.Get("Content-Type")
   118  	cType = strings.TrimSpace(strings.SplitN(cType, ";", 2)[0])
   119  	if cType != "text/html" {
   120  		// Do nothing, simply pass through
   121  		return resp, nil
   122  	}
   123  
   124  	return t.rewriteResponse(req, resp)
   125  }
   126  
   127  var _ = net.RoundTripperWrapper(&Transport{})
   128  
   129  func (rt *Transport) WrappedRoundTripper() http.RoundTripper {
   130  	return rt.RoundTripper
   131  }
   132  
   133  // rewriteURL rewrites a single URL to go through the proxy, if the URL refers
   134  // to the same host as sourceURL, which is the page on which the target URL
   135  // occurred, or if the URL matches the sourceRequestHost.
   136  func (t *Transport) rewriteURL(url *url.URL, sourceURL *url.URL, sourceRequestHost string) string {
   137  	// Example:
   138  	//      When API server processes a proxy request to a service (e.g. /api/v1/namespace/foo/service/bar/proxy/),
   139  	//      the sourceURL.Host (i.e. req.URL.Host) is the endpoint IP address of the service. The
   140  	//      sourceRequestHost (i.e. req.Host) is the Host header that specifies the host on which the
   141  	//      URL is sought, which can be different from sourceURL.Host. For example, if user sends the
   142  	//      request through "kubectl proxy" locally (i.e. localhost:8001/api/v1/namespace/foo/service/bar/proxy/),
   143  	//      sourceRequestHost is "localhost:8001".
   144  	//
   145  	//      If the service's response URL contains non-empty host, and url.Host is equal to either sourceURL.Host
   146  	//      or sourceRequestHost, we should not consider the returned URL to be a completely different host.
   147  	//      It's the API server's responsibility to rewrite a same-host-and-absolute-path URL and append the
   148  	//      necessary URL prefix (i.e. /api/v1/namespace/foo/service/bar/proxy/).
   149  	isDifferentHost := url.Host != "" && url.Host != sourceURL.Host && url.Host != sourceRequestHost
   150  	isRelative := !strings.HasPrefix(url.Path, "/")
   151  	if isDifferentHost || isRelative {
   152  		return url.String()
   153  	}
   154  
   155  	// Do not rewrite scheme and host if the Transport has empty scheme and host
   156  	// when targetURL already contains the sourceRequestHost
   157  	if !(url.Host == sourceRequestHost && t.Scheme == "" && t.Host == "") {
   158  		url.Scheme = t.Scheme
   159  		url.Host = t.Host
   160  	}
   161  
   162  	origPath := url.Path
   163  	// Do not rewrite URL if the sourceURL already contains the necessary prefix.
   164  	if strings.HasPrefix(url.Path, t.PathPrepend) {
   165  		return url.String()
   166  	}
   167  	url.Path = path.Join(t.PathPrepend, url.Path)
   168  	if strings.HasSuffix(origPath, "/") {
   169  		// Add back the trailing slash, which was stripped by path.Join().
   170  		url.Path += "/"
   171  	}
   172  
   173  	return url.String()
   174  }
   175  
   176  // rewriteHTML scans the HTML for tags with url-valued attributes, and updates
   177  // those values with the urlRewriter function. The updated HTML is output to the
   178  // writer.
   179  func rewriteHTML(reader io.Reader, writer io.Writer, urlRewriter func(*url.URL) string) error {
   180  	// Note: This assumes the content is UTF-8.
   181  	tokenizer := html.NewTokenizer(reader)
   182  
   183  	var err error
   184  	for err == nil {
   185  		tokenType := tokenizer.Next()
   186  		switch tokenType {
   187  		case html.ErrorToken:
   188  			err = tokenizer.Err()
   189  		case html.StartTagToken, html.SelfClosingTagToken:
   190  			token := tokenizer.Token()
   191  			if urlAttrs, ok := atomsToAttrs[token.DataAtom]; ok {
   192  				for i, attr := range token.Attr {
   193  					if urlAttrs.Has(attr.Key) {
   194  						url, err := url.Parse(attr.Val)
   195  						if err != nil {
   196  							// Do not rewrite the URL if it isn't valid.  It is intended not
   197  							// to error here to prevent the inability to understand the
   198  							// content of the body to cause a fatal error.
   199  							continue
   200  						}
   201  						token.Attr[i].Val = urlRewriter(url)
   202  					}
   203  				}
   204  			}
   205  			_, err = writer.Write([]byte(token.String()))
   206  		default:
   207  			_, err = writer.Write(tokenizer.Raw())
   208  		}
   209  	}
   210  	if err != io.EOF {
   211  		return err
   212  	}
   213  	return nil
   214  }
   215  
   216  // rewriteResponse modifies an HTML response by updating absolute links referring
   217  // to the original host to instead refer to the proxy transport.
   218  func (t *Transport) rewriteResponse(req *http.Request, resp *http.Response) (*http.Response, error) {
   219  	origBody := resp.Body
   220  	defer origBody.Close()
   221  
   222  	newContent := &bytes.Buffer{}
   223  	var reader io.Reader = origBody
   224  	var writer io.Writer = newContent
   225  	encoding := resp.Header.Get("Content-Encoding")
   226  	switch encoding {
   227  	case "gzip":
   228  		var err error
   229  		reader, err = gzip.NewReader(reader)
   230  		if err != nil {
   231  			return nil, fmt.Errorf("errorf making gzip reader: %v", err)
   232  		}
   233  		gzw := gzip.NewWriter(writer)
   234  		defer gzw.Close()
   235  		writer = gzw
   236  	case "deflate":
   237  		var err error
   238  		reader = flate.NewReader(reader)
   239  		flw, err := flate.NewWriter(writer, flate.BestCompression)
   240  		if err != nil {
   241  			return nil, fmt.Errorf("errorf making flate writer: %v", err)
   242  		}
   243  		defer func() {
   244  			flw.Close()
   245  			flw.Flush()
   246  		}()
   247  		writer = flw
   248  	case "":
   249  		// This is fine
   250  	default:
   251  		// Some encoding we don't understand-- don't try to parse this
   252  		klog.Errorf("Proxy encountered encoding %v for text/html; can't understand this so not fixing links.", encoding)
   253  		return resp, nil
   254  	}
   255  
   256  	urlRewriter := func(targetUrl *url.URL) string {
   257  		return t.rewriteURL(targetUrl, req.URL, req.Host)
   258  	}
   259  	err := rewriteHTML(reader, writer, urlRewriter)
   260  	if err != nil {
   261  		klog.Errorf("Failed to rewrite URLs: %v", err)
   262  		return resp, err
   263  	}
   264  
   265  	resp.Body = io.NopCloser(newContent)
   266  	// Update header node with new content-length
   267  	// TODO: Remove any hash/signature headers here?
   268  	resp.Header.Del("Content-Length")
   269  	resp.ContentLength = int64(newContent.Len())
   270  
   271  	return resp, err
   272  }
   273  

View as plain text