...

Source file src/github.com/PuerkitoBio/goquery/type.go

Documentation: github.com/PuerkitoBio/goquery

     1  package goquery
     2  
     3  import (
     4  	"errors"
     5  	"io"
     6  	"net/http"
     7  	"net/url"
     8  
     9  	"github.com/andybalholm/cascadia"
    10  	"golang.org/x/net/html"
    11  )
    12  
    13  // Document represents an HTML document to be manipulated. Unlike jQuery, which
    14  // is loaded as part of a DOM document, and thus acts upon its containing
    15  // document, GoQuery doesn't know which HTML document to act upon. So it needs
    16  // to be told, and that's what the Document class is for. It holds the root
    17  // document node to manipulate, and can make selections on this document.
    18  type Document struct {
    19  	*Selection
    20  	Url      *url.URL
    21  	rootNode *html.Node
    22  }
    23  
    24  // NewDocumentFromNode is a Document constructor that takes a root html Node
    25  // as argument.
    26  func NewDocumentFromNode(root *html.Node) *Document {
    27  	return newDocument(root, nil)
    28  }
    29  
    30  // NewDocument is a Document constructor that takes a string URL as argument.
    31  // It loads the specified document, parses it, and stores the root Document
    32  // node, ready to be manipulated.
    33  //
    34  // Deprecated: Use the net/http standard library package to make the request
    35  // and validate the response before calling goquery.NewDocumentFromReader
    36  // with the response's body.
    37  func NewDocument(url string) (*Document, error) {
    38  	// Load the URL
    39  	res, e := http.Get(url)
    40  	if e != nil {
    41  		return nil, e
    42  	}
    43  	return NewDocumentFromResponse(res)
    44  }
    45  
    46  // NewDocumentFromReader returns a Document from an io.Reader.
    47  // It returns an error as second value if the reader's data cannot be parsed
    48  // as html. It does not check if the reader is also an io.Closer, the
    49  // provided reader is never closed by this call. It is the responsibility
    50  // of the caller to close it if required.
    51  func NewDocumentFromReader(r io.Reader) (*Document, error) {
    52  	root, e := html.Parse(r)
    53  	if e != nil {
    54  		return nil, e
    55  	}
    56  	return newDocument(root, nil), nil
    57  }
    58  
    59  // NewDocumentFromResponse is another Document constructor that takes an http response as argument.
    60  // It loads the specified response's document, parses it, and stores the root Document
    61  // node, ready to be manipulated. The response's body is closed on return.
    62  //
    63  // Deprecated: Use goquery.NewDocumentFromReader with the response's body.
    64  func NewDocumentFromResponse(res *http.Response) (*Document, error) {
    65  	if res == nil {
    66  		return nil, errors.New("Response is nil")
    67  	}
    68  	defer res.Body.Close()
    69  	if res.Request == nil {
    70  		return nil, errors.New("Response.Request is nil")
    71  	}
    72  
    73  	// Parse the HTML into nodes
    74  	root, e := html.Parse(res.Body)
    75  	if e != nil {
    76  		return nil, e
    77  	}
    78  
    79  	// Create and fill the document
    80  	return newDocument(root, res.Request.URL), nil
    81  }
    82  
    83  // CloneDocument creates a deep-clone of a document.
    84  func CloneDocument(doc *Document) *Document {
    85  	return newDocument(cloneNode(doc.rootNode), doc.Url)
    86  }
    87  
    88  // Private constructor, make sure all fields are correctly filled.
    89  func newDocument(root *html.Node, url *url.URL) *Document {
    90  	// Create and fill the document
    91  	d := &Document{nil, url, root}
    92  	d.Selection = newSingleSelection(root, d)
    93  	return d
    94  }
    95  
    96  // Selection represents a collection of nodes matching some criteria. The
    97  // initial Selection can be created by using Document.Find, and then
    98  // manipulated using the jQuery-like chainable syntax and methods.
    99  type Selection struct {
   100  	Nodes    []*html.Node
   101  	document *Document
   102  	prevSel  *Selection
   103  }
   104  
   105  // Helper constructor to create an empty selection
   106  func newEmptySelection(doc *Document) *Selection {
   107  	return &Selection{nil, doc, nil}
   108  }
   109  
   110  // Helper constructor to create a selection of only one node
   111  func newSingleSelection(node *html.Node, doc *Document) *Selection {
   112  	return &Selection{[]*html.Node{node}, doc, nil}
   113  }
   114  
   115  // Matcher is an interface that defines the methods to match
   116  // HTML nodes against a compiled selector string. Cascadia's
   117  // Selector implements this interface.
   118  type Matcher interface {
   119  	Match(*html.Node) bool
   120  	MatchAll(*html.Node) []*html.Node
   121  	Filter([]*html.Node) []*html.Node
   122  }
   123  
   124  // Single compiles a selector string to a Matcher that stops after the first
   125  // match is found.
   126  //
   127  // By default, Selection.Find and other functions that accept a selector string
   128  // to select nodes will use all matches corresponding to that selector. By
   129  // using the Matcher returned by Single, at most the first match will be
   130  // selected.
   131  //
   132  // For example, those two statements are semantically equivalent:
   133  //
   134  //     sel1 := doc.Find("a").First()
   135  //     sel2 := doc.FindMatcher(goquery.Single("a"))
   136  //
   137  // The one using Single is optimized to be potentially much faster on large
   138  // documents.
   139  //
   140  // Only the behaviour of the MatchAll method of the Matcher interface is
   141  // altered compared to standard Matchers. This means that the single-selection
   142  // property of the Matcher only applies for Selection methods where the Matcher
   143  // is used to select nodes, not to filter or check if a node matches the
   144  // Matcher - in those cases, the behaviour of the Matcher is unchanged (e.g.
   145  // FilterMatcher(Single("div")) will still result in a Selection with multiple
   146  // "div"s if there were many "div"s in the Selection to begin with).
   147  func Single(selector string) Matcher {
   148  	return singleMatcher{compileMatcher(selector)}
   149  }
   150  
   151  // SingleMatcher returns a Matcher matches the same nodes as m, but that stops
   152  // after the first match is found.
   153  //
   154  // See the documentation of function Single for more details.
   155  func SingleMatcher(m Matcher) Matcher {
   156  	if _, ok := m.(singleMatcher); ok {
   157  		// m is already a singleMatcher
   158  		return m
   159  	}
   160  	return singleMatcher{m}
   161  }
   162  
   163  // compileMatcher compiles the selector string s and returns
   164  // the corresponding Matcher. If s is an invalid selector string,
   165  // it returns a Matcher that fails all matches.
   166  func compileMatcher(s string) Matcher {
   167  	cs, err := cascadia.Compile(s)
   168  	if err != nil {
   169  		return invalidMatcher{}
   170  	}
   171  	return cs
   172  }
   173  
   174  type singleMatcher struct {
   175  	Matcher
   176  }
   177  
   178  func (m singleMatcher) MatchAll(n *html.Node) []*html.Node {
   179  	// Optimized version - stops finding at the first match (cascadia-compiled
   180  	// matchers all use this code path).
   181  	if mm, ok := m.Matcher.(interface{ MatchFirst(*html.Node) *html.Node }); ok {
   182  		node := mm.MatchFirst(n)
   183  		if node == nil {
   184  			return nil
   185  		}
   186  		return []*html.Node{node}
   187  	}
   188  
   189  	// Fallback version, for e.g. test mocks that don't provide the MatchFirst
   190  	// method.
   191  	nodes := m.Matcher.MatchAll(n)
   192  	if len(nodes) > 0 {
   193  		return nodes[:1:1]
   194  	}
   195  	return nil
   196  }
   197  
   198  // invalidMatcher is a Matcher that always fails to match.
   199  type invalidMatcher struct{}
   200  
   201  func (invalidMatcher) Match(n *html.Node) bool             { return false }
   202  func (invalidMatcher) MatchAll(n *html.Node) []*html.Node  { return nil }
   203  func (invalidMatcher) Filter(ns []*html.Node) []*html.Node { return nil }
   204  

View as plain text