Browse Source

Add host parameter

master
Maxim Likhachev 6 months ago
parent
commit
9e94cab0a4
  1. 14
      Containerfile
  2. 35
      Makefile
  3. BIN
      dist/lexis.darwin.bin
  4. BIN
      dist/lexis.linux.bin
  5. 8
      go.mod
  6. 12
      go.sum
  7. 14
      lexis.go
  8. BIN
      lexis_2021-04-23.tar
  9. 1
      vendor/github.com/PuerkitoBio/goquery/.gitattributes
  10. 16
      vendor/github.com/PuerkitoBio/goquery/.gitignore
  11. 44
      vendor/github.com/PuerkitoBio/goquery/.travis.yml
  12. 0
      vendor/github.com/PuerkitoBio/goquery/LICENSE
  13. 12
      vendor/github.com/PuerkitoBio/goquery/README.md
  14. 0
      vendor/github.com/PuerkitoBio/goquery/array.go
  15. 0
      vendor/github.com/PuerkitoBio/goquery/doc.go
  16. 0
      vendor/github.com/PuerkitoBio/goquery/expand.go
  17. 0
      vendor/github.com/PuerkitoBio/goquery/filter.go
  18. 8
      vendor/github.com/PuerkitoBio/goquery/go.mod
  19. 8
      vendor/github.com/PuerkitoBio/goquery/go.sum
  20. 0
      vendor/github.com/PuerkitoBio/goquery/iteration.go
  21. 167
      vendor/github.com/PuerkitoBio/goquery/manipulation.go
  22. 0
      vendor/github.com/PuerkitoBio/goquery/property.go
  23. 0
      vendor/github.com/PuerkitoBio/goquery/query.go
  24. 0
      vendor/github.com/PuerkitoBio/goquery/traversal.go
  25. 0
      vendor/github.com/PuerkitoBio/goquery/type.go
  26. 18
      vendor/github.com/PuerkitoBio/goquery/utilities.go
  27. 0
      vendor/github.com/andybalholm/cascadia/.travis.yml
  28. 0
      vendor/github.com/andybalholm/cascadia/LICENSE
  29. 0
      vendor/github.com/andybalholm/cascadia/README.md
  30. 0
      vendor/github.com/andybalholm/cascadia/go.mod
  31. 282
      vendor/github.com/andybalholm/cascadia/parser.go
  32. 833
      vendor/github.com/andybalholm/cascadia/selector.go
  33. 26
      vendor/github.com/andybalholm/cascadia/specificity.go
  34. 0
      vendor/github.com/gorilla/mux/AUTHORS
  35. 0
      vendor/github.com/gorilla/mux/LICENSE
  36. 159
      vendor/github.com/gorilla/mux/README.md
  37. 2
      vendor/github.com/gorilla/mux/doc.go
  38. 2
      vendor/github.com/gorilla/mux/go.mod
  39. 56
      vendor/github.com/gorilla/mux/middleware.go
  40. 31
      vendor/github.com/gorilla/mux/mux.go
  41. 71
      vendor/github.com/gorilla/mux/regexp.go
  42. 38
      vendor/github.com/gorilla/mux/route.go
  43. 2
      vendor/github.com/gorilla/mux/test_helpers.go
  44. 0
      vendor/golang.org/x/net/AUTHORS
  45. 0
      vendor/golang.org/x/net/CONTRIBUTORS
  46. 0
      vendor/golang.org/x/net/LICENSE
  47. 0
      vendor/golang.org/x/net/PATENTS
  48. 0
      vendor/golang.org/x/net/html/atom/atom.go
  49. 0
      vendor/golang.org/x/net/html/atom/table.go
  50. 1
      vendor/golang.org/x/net/html/const.go
  51. 0
      vendor/golang.org/x/net/html/doc.go
  52. 0
      vendor/golang.org/x/net/html/doctype.go
  53. 0
      vendor/golang.org/x/net/html/entity.go
  54. 0
      vendor/golang.org/x/net/html/escape.go
  55. 1
      vendor/golang.org/x/net/html/foreign.go
  56. 5
      vendor/golang.org/x/net/html/node.go
  57. 298
      vendor/golang.org/x/net/html/parse.go
  58. 34
      vendor/golang.org/x/net/html/render.go
  59. 9
      vendor/golang.org/x/net/html/token.go
  60. 11
      vendor/modules.txt
  61. 16
      vendor/src/github.com/PuerkitoBio/goquery/.travis.yml
  62. 234
      vendor/src/github.com/PuerkitoBio/goquery/array_test.go
  63. 436
      vendor/src/github.com/PuerkitoBio/goquery/bench/v0.1.0
  64. 438
      vendor/src/github.com/PuerkitoBio/goquery/bench/v0.1.1
  65. 405
      vendor/src/github.com/PuerkitoBio/goquery/bench/v0.1.1-v0.2.1-go1.1rc1.svg
  66. 459
      vendor/src/github.com/PuerkitoBio/goquery/bench/v0.2.0
  67. 420
      vendor/src/github.com/PuerkitoBio/goquery/bench/v0.2.0-v0.2.1-go1.1rc1.svg
  68. 470
      vendor/src/github.com/PuerkitoBio/goquery/bench/v0.2.1-go1.1rc1
  69. 476
      vendor/src/github.com/PuerkitoBio/goquery/bench/v0.3.0
  70. 478
      vendor/src/github.com/PuerkitoBio/goquery/bench/v0.3.2-go1.2
  71. 477
      vendor/src/github.com/PuerkitoBio/goquery/bench/v0.3.2-go1.2-take2
  72. 477
      vendor/src/github.com/PuerkitoBio/goquery/bench/v0.3.2-go1.2rc1
  73. 85
      vendor/src/github.com/PuerkitoBio/goquery/bench/v1.0.0-go1.7
  74. 85
      vendor/src/github.com/PuerkitoBio/goquery/bench/v1.0.1a-go1.7
  75. 85
      vendor/src/github.com/PuerkitoBio/goquery/bench/v1.0.1b-go1.7
  76. 86
      vendor/src/github.com/PuerkitoBio/goquery/bench/v1.0.1c-go1.7
  77. 120
      vendor/src/github.com/PuerkitoBio/goquery/bench_array_test.go
  78. 40
      vendor/src/github.com/PuerkitoBio/goquery/bench_example_test.go
  79. 104
      vendor/src/github.com/PuerkitoBio/goquery/bench_expand_test.go
  80. 236
      vendor/src/github.com/PuerkitoBio/goquery/bench_filter_test.go
  81. 68
      vendor/src/github.com/PuerkitoBio/goquery/bench_iteration_test.go
  82. 51
      vendor/src/github.com/PuerkitoBio/goquery/bench_property_test.go
  83. 111
      vendor/src/github.com/PuerkitoBio/goquery/bench_query_test.go
  84. 802
      vendor/src/github.com/PuerkitoBio/goquery/bench_traversal_test.go
  85. 68
      vendor/src/github.com/PuerkitoBio/goquery/doc/tips.md
  86. 82
      vendor/src/github.com/PuerkitoBio/goquery/example_test.go
  87. 118
      vendor/src/github.com/PuerkitoBio/goquery/expand_test.go
  88. 206
      vendor/src/github.com/PuerkitoBio/goquery/filter_test.go
  89. 6
      vendor/src/github.com/PuerkitoBio/goquery/go.mod
  90. 5
      vendor/src/github.com/PuerkitoBio/goquery/go.sum
  91. 88
      vendor/src/github.com/PuerkitoBio/goquery/iteration_test.go
  92. 513
      vendor/src/github.com/PuerkitoBio/goquery/manipulation_test.go
  93. 37
      vendor/src/github.com/PuerkitoBio/goquery/misc/git/pre-commit
  94. 252
      vendor/src/github.com/PuerkitoBio/goquery/property_test.go
  95. 103
      vendor/src/github.com/PuerkitoBio/goquery/query_test.go
  96. 855
      vendor/src/github.com/PuerkitoBio/goquery/testdata/gotesting.html
  97. 1214
      vendor/src/github.com/PuerkitoBio/goquery/testdata/gowiki.html
  98. 413
      vendor/src/github.com/PuerkitoBio/goquery/testdata/metalreview.html
  99. 102
      vendor/src/github.com/PuerkitoBio/goquery/testdata/page.html
  100. 24
      vendor/src/github.com/PuerkitoBio/goquery/testdata/page2.html

14
Dockerfile → Containerfile

@ -2,20 +2,19 @@ FROM golang:alpine as builder
WORKDIR /lexis
COPY lexis.go .
COPY lexis.go go.mod ./
RUN apk add --no-cache git
RUN go get github.com/PuerkitoBio/goquery
RUN go get github.com/gorilla/mux
# RUN go get github.com/PuerkitoBio/goquery
# RUN go get github.com/gorilla/mux
RUN go mod tidy
RUN go build lexis.go
#----------------------------------
FROM alpine:edge
ENV PORT 9000
ENV service_user="lexis"
RUN addgroup -S ${service_user} && adduser -S ${service_user} -G ${service_user}
@ -29,7 +28,6 @@ USER "$service_user"
COPY web web
COPY --from=builder /lexis/lexis .
EXPOSE $PORT
ENTRYPOINT ./lexis -serve -port "${PORT}"
ENTRYPOINT ["./lexis"]
CMD ["-serve"]

35
Makefile

@ -1,25 +1,44 @@
APP?=lexis
ROOT_DIR:=$(shell dirname $(realpath $(lastword $(MAKEFILE_LIST))))
CONTAINER_RUNTIME ?= podman
CONTAINER_VERSION ?= $(shell date --iso-8601)
help: ## Display this help.
@awk 'BEGIN { \
FS = ":.*##"; \
printf "\nUsage:\n make \033[36m<target>\033[0m\n\nTargets:\n" \
} \
/^[a-zA-Z_-]+:.*?##/ { \
printf " \033[36m%-15s\033[0m %s\n", $$1, $$2 \
} \
END { \
print \
}' \
$(MAKEFILE_LIST)
.PHONY: run
run:
run: ## Run lexis
GOPATH=${ROOT_DIR}/vendor go run ${APP}.go $(filter-out $@, $(MAKECMDGOALS))
.PHONY: build
build:
build: ## Build lexis binary
GOPATH=${ROOT_DIR}/vendor go build ${APP}.go
.PHONY: serve
serve: build
serve: build ## Build and run lexis -serve
${APP} --serve
docker: docker_build docker_run
container: container-build ## Build container image
container-build:
$(CONTAINER_RUNTIME) build -t lexis/lexis:$(CONTAINER_VERSION) -f Containerfile .
docker_build:
docker build -t lexis/lexis:latest -f Dockerfile .
container-run: ## Run lexis container
$(CONTAINER_RUNTIME) run lexis/lexis:$(CONTAINER_VERSION)
docker_run:
docker run lexis/lexis:latest
container-save: ## Save lexis container to a file
$(CONTAINER_RUNTIME) save lexis/lexis:$(CONTAINER_VERSION) -o lexis_$(CONTAINER_VERSION).tar
%:
@true

BIN
dist/lexis.darwin.bin

BIN
dist/lexis.linux.bin

8
go.mod

@ -0,0 +1,8 @@
module lexis
go 1.16
require (
github.com/PuerkitoBio/goquery v1.6.1
github.com/gorilla/mux v1.8.0
)

12
go.sum

@ -0,0 +1,12 @@
github.com/PuerkitoBio/goquery v1.6.1 h1:FgjbQZKl5HTmcn4sKBgvx8vv63nhyhIpv7lJpFGCWpk=
github.com/PuerkitoBio/goquery v1.6.1/go.mod h1:GsLWisAFVj4WgDibEWF4pvYnkVQBpKBKeU+7zCJoLcc=
github.com/andybalholm/cascadia v1.1.0 h1:BuuO6sSfQNFRu1LppgbD25Hr2vLYW25JvxHs5zzsLTo=
github.com/andybalholm/cascadia v1.1.0/go.mod h1:GsXiBklL0woXo1j/WYWtSYYC4ouU9PqHO0sqidkEA4Y=
github.com/gorilla/mux v1.8.0 h1:i40aqfkR1h2SlN9hojwV5ZA91wcXFOvkdNIeFDP5koI=
github.com/gorilla/mux v1.8.0/go.mod h1:DVbg23sWSpFRCP0SfiEN6jmj59UnW/n46BH5rLB71So=
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
golang.org/x/net v0.0.0-20180218175443-cbe0f9307d01/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20200202094626-16171245cfb2 h1:CCH4IOTTfewWjGOlSp+zGcjutRKlBEZQ6wTn8ozI/nI=
golang.org/x/net v0.0.0-20200202094626-16171245cfb2/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=

14
lexis.go

@ -1,6 +1,6 @@
package main
// Copyright (C) 2019, Maxim Lihachev, <envrm@yandex.ru>
// Copyright (C) 2021, Maxim Lihachev, <envrm@yandex.ru>
//
// This program is free software: you can redistribute it and/or modify it
// under the terms of the GNU General Public License as published by the Free
@ -35,6 +35,7 @@ import (
const (
defaultLanguage = "ru"
defaultHost = "127.0.0.1"
defaultPort = 9000
webDirectory = "web"
@ -307,8 +308,8 @@ func srvPage(w http.ResponseWriter, r *http.Request) {
}
// srv runs server on specified port.
func srv(port int) {
fmt.Println("Server is started at port", port)
func srv(host string, port int) {
fmt.Println("Server is started at " + host + ":" + strconv.Itoa(port) + ".")
router := mux.NewRouter()
@ -322,7 +323,7 @@ func srv(port int) {
http.Handle("/", router)
err := http.ListenAndServe(":" + strconv.Itoa(port), nil)
err := http.ListenAndServe(host + ":" + strconv.Itoa(port), nil)
if err != nil {
fmt.Println("ListenAndServe: ", err)
@ -336,7 +337,7 @@ func showHelp() {
fmt.Printf("Usage:\n")
fmt.Printf(" CLI: %s <text>\n", program)
fmt.Printf(" WEB: %s --serve [--port]\n\n", program)
fmt.Printf(" WEB: %s --serve [--host] [--port]\n\n", program)
flag.PrintDefaults()
}
@ -344,13 +345,14 @@ func showHelp() {
func main() {
flag.Usage = showHelp
host := flag.String("host", defaultHost, "Host to bind")
port := flag.Int("port", defaultPort, "Server port")
serve := flag.Bool("serve", false, "Serve LEΞΙΣ")
flag.Parse()
if (*serve) {
srv(*port)
srv(*host, *port)
} else if (len(flag.Args()) > 0) {
txt := strings.Join(flag.Args(), " ")

BIN
lexis_2021-04-23.tar

1
vendor/github.com/PuerkitoBio/goquery/.gitattributes

@ -0,0 +1 @@
testdata/* linguist-vendored

16
vendor/github.com/PuerkitoBio/goquery/.gitignore

@ -0,0 +1,16 @@
# editor temporary files
*.sublime-*
.DS_Store
*.swp
#*.*#
tags
# direnv config
.env*
# test binaries
*.test
# coverage and profilte outputs
*.out

44
vendor/github.com/PuerkitoBio/goquery/.travis.yml

@ -0,0 +1,44 @@
arch:
- amd64
- ppc64le
language: go
go:
- 1.2.x
- 1.3.x
- 1.4.x
- 1.5.x
- 1.6.x
- 1.7.x
- 1.8.x
- 1.9.x
- 1.10.x
- 1.11.x
- 1.12.x
- 1.13.x
- tip
jobs:
exclude:
- arch: ppc64le
go: 1.2.x
- arch: ppc64le
go: 1.3.x
- arch: ppc64le
go: 1.4.x
- arch: ppc64le
go: 1.5.x
- arch: ppc64le
go: 1.6.x
- arch: ppc64le
go: 1.7.x
- arch: ppc64le
go: 1.8.x
- arch: ppc64le
go: 1.9.x
- arch: ppc64le
go: 1.10.x
- arch: ppc64le
go: 1.11.x
- arch: ppc64le
go: 1.12.x

0
vendor/src/github.com/PuerkitoBio/goquery/LICENSE → vendor/github.com/PuerkitoBio/goquery/LICENSE

12
vendor/src/github.com/PuerkitoBio/goquery/README.md → vendor/github.com/PuerkitoBio/goquery/README.md

@ -37,6 +37,9 @@ Please note that because of the net/html dependency, goquery requires Go1.1+.
**Note that goquery's API is now stable, and will not break.**
* **2021-01-11 (v1.6.1)** : Fix panic when calling `{Prepend,Append,Set}Html` on a `Selection` that contains non-Element nodes.
* **2020-10-08 (v1.6.0)** : Parse html in context of the container node for all functions that deal with html strings (`AfterHtml`, `AppendHtml`, etc.). Thanks to [@thiemok][thiemok] and [@davidjwilkins][djw] for their work on this.
* **2020-02-04 (v1.5.1)** : Update module dependencies.
* **2018-11-15 (v1.5.0)** : Go module support (thanks @Zaba505).
* **2018-06-07 (v1.4.1)** : Add `NewDocumentFromReader` examples.
* **2018-03-24 (v1.4.0)** : Deprecate `NewDocument(url)` and `NewDocumentFromResponse(response)`.
@ -138,11 +141,14 @@ func main() {
- [Goq][goq], an HTML deserialization and scraping library based on goquery and struct tags.
- [andybalholm/cascadia][cascadia], the CSS selector library used by goquery.
- [suntong/cascadia][cascadiacli], a command-line interface to the cascadia CSS selector library, useful to test selectors.
- [asciimoo/colly](https://github.com/asciimoo/colly), a lightning fast and elegant Scraping Framework
- [gocolly/colly](https://github.com/gocolly/colly), a lightning fast and elegant Scraping Framework
- [gnulnx/goperf](https://github.com/gnulnx/goperf), a website performance test tool that also fetches static assets.
- [MontFerret/ferret](https://github.com/MontFerret/ferret), declarative web scraping.
- [tacusci/berrycms](https://github.com/tacusci/berrycms), a modern simple to use CMS with easy to write plugins
- [Dataflow kit](https://github.com/slotix/dataflowkit), Web Scraping framework for Gophers.
- [Dataflow kit](https://github.com/slotix/dataflowkit), Web Scraping framework for Gophers.
- [Geziyor](https://github.com/geziyor/geziyor), a fast web crawling & scraping framework for Go. Supports JS rendering.
- [Pagser](https://github.com/foolin/pagser), a simple, easy, extensible, configurable HTML parser to struct based on goquery and struct tags.
- [stitcherd](https://github.com/vhodges/stitcherd), A server for doing server side includes using css selectors and DOM updates.
## Support
@ -179,3 +185,5 @@ The [BSD 3-Clause license][bsd], the same as the [Go language][golic]. Cascadia'
[thatguystone]: https://github.com/thatguystone
[piotr]: https://github.com/piotrkowalczuk
[goq]: https://github.com/andrewstuart/goq
[thiemok]: https://github.com/thiemok
[djw]: https://github.com/davidjwilkins

0
vendor/src/github.com/PuerkitoBio/goquery/array.go → vendor/github.com/PuerkitoBio/goquery/array.go

0
vendor/src/github.com/PuerkitoBio/goquery/doc.go → vendor/github.com/PuerkitoBio/goquery/doc.go

0
vendor/src/github.com/PuerkitoBio/goquery/expand.go → vendor/github.com/PuerkitoBio/goquery/expand.go

0
vendor/src/github.com/PuerkitoBio/goquery/filter.go → vendor/github.com/PuerkitoBio/goquery/filter.go

8
vendor/github.com/PuerkitoBio/goquery/go.mod

@ -0,0 +1,8 @@
module github.com/PuerkitoBio/goquery
require (
github.com/andybalholm/cascadia v1.1.0
golang.org/x/net v0.0.0-20200202094626-16171245cfb2
)
go 1.13

8
vendor/github.com/PuerkitoBio/goquery/go.sum

@ -0,0 +1,8 @@
github.com/andybalholm/cascadia v1.1.0 h1:BuuO6sSfQNFRu1LppgbD25Hr2vLYW25JvxHs5zzsLTo=
github.com/andybalholm/cascadia v1.1.0/go.mod h1:GsXiBklL0woXo1j/WYWtSYYC4ouU9PqHO0sqidkEA4Y=
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
golang.org/x/net v0.0.0-20180218175443-cbe0f9307d01/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20200202094626-16171245cfb2 h1:CCH4IOTTfewWjGOlSp+zGcjutRKlBEZQ6wTn8ozI/nI=
golang.org/x/net v0.0.0-20200202094626-16171245cfb2/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=

0
vendor/src/github.com/PuerkitoBio/goquery/iteration.go → vendor/github.com/PuerkitoBio/goquery/iteration.go

167
vendor/src/github.com/PuerkitoBio/goquery/manipulation.go → vendor/github.com/PuerkitoBio/goquery/manipulation.go

@ -39,8 +39,15 @@ func (s *Selection) AfterSelection(sel *Selection) *Selection {
// AfterHtml parses the html and inserts it after the set of matched elements.
//
// This follows the same rules as Selection.Append.
func (s *Selection) AfterHtml(html string) *Selection {
return s.AfterNodes(parseHtml(html)...)
func (s *Selection) AfterHtml(htmlStr string) *Selection {
return s.eachNodeHtml(htmlStr, true, func(node *html.Node, nodes []*html.Node) {
nextSibling := node.NextSibling
for _, n := range nodes {
if node.Parent != nil {
node.Parent.InsertBefore(n, nextSibling)
}
}
})
}
// AfterNodes inserts the nodes after each element in the set of matched elements.
@ -85,8 +92,12 @@ func (s *Selection) AppendSelection(sel *Selection) *Selection {
}
// AppendHtml parses the html and appends it to the set of matched elements.
func (s *Selection) AppendHtml(html string) *Selection {
return s.AppendNodes(parseHtml(html)...)
func (s *Selection) AppendHtml(htmlStr string) *Selection {
return s.eachNodeHtml(htmlStr, false, func(node *html.Node, nodes []*html.Node) {
for _, n := range nodes {
node.AppendChild(n)
}
})
}
// AppendNodes appends the specified nodes to each node in the set of matched elements.
@ -123,8 +134,14 @@ func (s *Selection) BeforeSelection(sel *Selection) *Selection {
// BeforeHtml parses the html and inserts it before the set of matched elements.
//
// This follows the same rules as Selection.Append.
func (s *Selection) BeforeHtml(html string) *Selection {
return s.BeforeNodes(parseHtml(html)...)
func (s *Selection) BeforeHtml(htmlStr string) *Selection {
return s.eachNodeHtml(htmlStr, true, func(node *html.Node, nodes []*html.Node) {
for _, n := range nodes {
if node.Parent != nil {
node.Parent.InsertBefore(n, node)
}
}
})
}
// BeforeNodes inserts the nodes before each element in the set of matched elements.
@ -184,8 +201,13 @@ func (s *Selection) PrependSelection(sel *Selection) *Selection {
}
// PrependHtml parses the html and prepends it to the set of matched elements.
func (s *Selection) PrependHtml(html string) *Selection {
return s.PrependNodes(parseHtml(html)...)
func (s *Selection) PrependHtml(htmlStr string) *Selection {
return s.eachNodeHtml(htmlStr, false, func(node *html.Node, nodes []*html.Node) {
firstChild := node.FirstChild
for _, n := range nodes {
node.InsertBefore(n, firstChild)
}
})
}
// PrependNodes prepends the specified nodes to each node in the set of
@ -212,14 +234,19 @@ func (s *Selection) Remove() *Selection {
return s
}
// RemoveFiltered removes the set of matched elements by selector.
// It returns the Selection of removed nodes.
// RemoveFiltered removes from the current set of matched elements those that
// match the selector filter. It returns the Selection of removed nodes.
//
// For example if the selection s contains "<h1>", "<h2>" and "<h3>"
// and s.RemoveFiltered("h2") is called, only the "<h2>" node is removed
// (and returned), while "<h1>" and "<h3>" are kept in the document.
func (s *Selection) RemoveFiltered(selector string) *Selection {
return s.RemoveMatcher(compileMatcher(selector))
}
// RemoveMatcher removes the set of matched elements.
// It returns the Selection of removed nodes.
// RemoveMatcher removes from the current set of matched elements those that
// match the Matcher filter. It returns the Selection of removed nodes.
// See RemoveFiltered for additional information.
func (s *Selection) RemoveMatcher(m Matcher) *Selection {
return s.FilterMatcher(m).Remove()
}
@ -256,8 +283,16 @@ func (s *Selection) ReplaceWithSelection(sel *Selection) *Selection {
// It returns the removed elements.
//
// This follows the same rules as Selection.Append.
func (s *Selection) ReplaceWithHtml(html string) *Selection {
return s.ReplaceWithNodes(parseHtml(html)...)
func (s *Selection) ReplaceWithHtml(htmlStr string) *Selection {
s.eachNodeHtml(htmlStr, true, func(node *html.Node, nodes []*html.Node) {
nextSibling := node.NextSibling
for _, n := range nodes {
if node.Parent != nil {
node.Parent.InsertBefore(n, nextSibling)
}
}
})
return s.Remove()
}
// ReplaceWithNodes replaces each element in the set of matched elements with
@ -272,8 +307,17 @@ func (s *Selection) ReplaceWithNodes(ns ...*html.Node) *Selection {
// SetHtml sets the html content of each element in the selection to
// specified html string.
func (s *Selection) SetHtml(html string) *Selection {
return setHtmlNodes(s, parseHtml(html)...)
func (s *Selection) SetHtml(htmlStr string) *Selection {
for _, context := range s.Nodes {
for c := context.FirstChild; c != nil; c = context.FirstChild {
context.RemoveChild(c)
}
}
return s.eachNodeHtml(htmlStr, false, func(node *html.Node, nodes []*html.Node) {
for _, n := range nodes {
node.AppendChild(n)
}
})
}
// SetText sets the content of each element in the selection to specified content.
@ -329,8 +373,23 @@ func (s *Selection) WrapSelection(sel *Selection) *Selection {
// most child of the given HTML.
//
// It returns the original set of elements.
func (s *Selection) WrapHtml(html string) *Selection {
return s.wrapNodes(parseHtml(html)...)
func (s *Selection) WrapHtml(htmlStr string) *Selection {
nodesMap := make(map[string][]*html.Node)
for _, context := range s.Nodes {
var parent *html.Node
if context.Parent != nil {
parent = context.Parent
} else {
parent = &html.Node{Type: html.ElementNode}
}
nodes, found := nodesMap[nodeName(parent)]
if !found {
nodes = parseHtmlWithContext(htmlStr, parent)
nodesMap[nodeName(parent)] = nodes
}
newSingleSelection(context, s.document).wrapAllNodes(cloneNodes(nodes)...)
}
return s
}
// WrapNode wraps each element in the set of matched elements inside the inner-
@ -382,8 +441,18 @@ func (s *Selection) WrapAllSelection(sel *Selection) *Selection {
// document.
//
// It returns the original set of elements.
func (s *Selection) WrapAllHtml(html string) *Selection {
return s.wrapAllNodes(parseHtml(html)...)
func (s *Selection) WrapAllHtml(htmlStr string) *Selection {
var context *html.Node
var nodes []*html.Node
if len(s.Nodes) > 0 {
context = s.Nodes[0]
if context.Parent != nil {
nodes = parseHtmlWithContext(htmlStr, context)
} else {
nodes = parseHtml(htmlStr)
}
}
return s.wrapAllNodes(nodes...)
}
func (s *Selection) wrapAllNodes(ns ...*html.Node) *Selection {
@ -452,8 +521,17 @@ func (s *Selection) WrapInnerSelection(sel *Selection) *Selection {
// cloned before being inserted into the document.
//
// It returns the original set of elements.
func (s *Selection) WrapInnerHtml(html string) *Selection {
return s.wrapInnerNodes(parseHtml(html)...)
func (s *Selection) WrapInnerHtml(htmlStr string) *Selection {
nodesMap := make(map[string][]*html.Node)
for _, context := range s.Nodes {
nodes, found := nodesMap[nodeName(context)]
if !found {
nodes = parseHtmlWithContext(htmlStr, context)
nodesMap[nodeName(context)] = nodes
}
newSingleSelection(context, s.document).wrapInnerNodes(cloneNodes(nodes)...)
}
return s
}
// WrapInnerNode wraps an HTML structure, matched by the given selector, around
@ -493,16 +571,14 @@ func parseHtml(h string) []*html.Node {
return nodes
}
func setHtmlNodes(s *Selection, ns ...*html.Node) *Selection {
for _, n := range s.Nodes {
for c := n.FirstChild; c != nil; c = n.FirstChild {
n.RemoveChild(c)
}
for _, c := range ns {
n.AppendChild(cloneNode(c))
}
func parseHtmlWithContext(h string, context *html.Node) []*html.Node {
// Errors are only returned when the io.Reader returns any error besides
// EOF, but strings.Reader never will
nodes, err := html.ParseFragment(strings.NewReader(h), context)
if err != nil {
panic("goquery: failed to parse HTML: " + err.Error())
}
return s
return nodes
}
// Get the first child that is an ElementNode
@ -572,3 +648,32 @@ func (s *Selection) manipulateNodes(ns []*html.Node, reverse bool,
return s
}
// eachNodeHtml parses the given html string and inserts the resulting nodes in the dom with the mergeFn.
// The parsed nodes are inserted for each element of the selection.
// isParent can be used to indicate that the elements of the selection should be treated as the parent for the parsed html.
// A cache is used to avoid parsing the html multiple times should the elements of the selection result in the same context.
func (s *Selection) eachNodeHtml(htmlStr string, isParent bool, mergeFn func(n *html.Node, nodes []*html.Node)) *Selection {
// cache to avoid parsing the html for the same context multiple times
nodeCache := make(map[string][]*html.Node)
var context *html.Node
for _, n := range s.Nodes {
if isParent {
context = n.Parent
} else {
if n.Type != html.ElementNode {
continue
}
context = n
}
if context != nil {
nodes, found := nodeCache[nodeName(context)]
if !found {
nodes = parseHtmlWithContext(htmlStr, context)
nodeCache[nodeName(context)] = nodes
}
mergeFn(n, cloneNodes(nodes))
}
}
return s
}

0
vendor/src/github.com/PuerkitoBio/goquery/property.go → vendor/github.com/PuerkitoBio/goquery/property.go

0
vendor/src/github.com/PuerkitoBio/goquery/query.go → vendor/github.com/PuerkitoBio/goquery/query.go

0
vendor/src/github.com/PuerkitoBio/goquery/traversal.go → vendor/github.com/PuerkitoBio/goquery/traversal.go

0
vendor/src/github.com/PuerkitoBio/goquery/type.go → vendor/github.com/PuerkitoBio/goquery/type.go

18
vendor/src/github.com/PuerkitoBio/goquery/utilities.go → vendor/github.com/PuerkitoBio/goquery/utilities.go

@ -36,12 +36,22 @@ func NodeName(s *Selection) string {
if s.Length() == 0 {
return ""
}
switch n := s.Get(0); n.Type {
return nodeName(s.Get(0))
}
// nodeName returns the node name of the given html node.
// See NodeName for additional details on behaviour.
func nodeName(node *html.Node) string {
if node == nil {
return ""
}
switch node.Type {
case html.ElementNode, html.DoctypeNode:
return n.Data
return node.Data
default:
if n.Type >= 0 && int(n.Type) < len(nodeNames) {
return nodeNames[n.Type]
if node.Type >= 0 && int(node.Type) < len(nodeNames) {
return nodeNames[node.Type]
}
return ""
}

0
vendor/src/github.com/andybalholm/cascadia/.travis.yml → vendor/github.com/andybalholm/cascadia/.travis.yml

0
vendor/src/github.com/andybalholm/cascadia/LICENSE → vendor/github.com/andybalholm/cascadia/LICENSE

0
vendor/src/github.com/andybalholm/cascadia/README.md → vendor/github.com/andybalholm/cascadia/README.md

0
vendor/src/github.com/andybalholm/cascadia/go.mod → vendor/github.com/andybalholm/cascadia/go.mod

282
vendor/src/github.com/andybalholm/cascadia/parser.go → vendor/github.com/andybalholm/cascadia/parser.go

@ -7,8 +7,6 @@ import (
"regexp"
"strconv"
"strings"
"golang.org/x/net/html"
)
// a parser for CSS selectors
@ -56,6 +54,26 @@ func (p *parser) parseEscape() (result string, err error) {
return result, nil
}
// toLowerASCII returns s with all ASCII capital letters lowercased.
func toLowerASCII(s string) string {
var b []byte
for i := 0; i < len(s); i++ {
if c := s[i]; 'A' <= c && c <= 'Z' {
if b == nil {
b = make([]byte, len(s))
copy(b, s)
}
b[i] = s[i] + ('a' - 'A')
}
}
if b == nil {
return s
}
return string(b)
}
func hexDigit(c byte) bool {
return '0' <= c && c <= '9' || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F'
}
@ -280,92 +298,92 @@ func (p *parser) consumeClosingParenthesis() bool {
}
// parseTypeSelector parses a type selector (one that matches by tag name).
func (p *parser) parseTypeSelector() (result Selector, err error) {
func (p *parser) parseTypeSelector() (result tagSelector, err error) {
tag, err := p.parseIdentifier()
if err != nil {
return nil, err
return
}
return typeSelector(tag), nil
return tagSelector{tag: toLowerASCII(tag)}, nil
}
// parseIDSelector parses a selector that matches by id attribute.
func (p *parser) parseIDSelector() (Selector, error) {
func (p *parser) parseIDSelector() (idSelector, error) {
if p.i >= len(p.s) {
return nil, fmt.Errorf("expected id selector (#id), found EOF instead")
return idSelector{}, fmt.Errorf("expected id selector (#id), found EOF instead")
}
if p.s[p.i] != '#' {
return nil, fmt.Errorf("expected id selector (#id), found '%c' instead", p.s[p.i])
return idSelector{}, fmt.Errorf("expected id selector (#id), found '%c' instead", p.s[p.i])
}
p.i++
id, err := p.parseName()
if err != nil {
return nil, err
return idSelector{}, err
}
return attributeEqualsSelector("id", id), nil
return idSelector{id: id}, nil
}
// parseClassSelector parses a selector that matches by class attribute.
func (p *parser) parseClassSelector() (Selector, error) {
func (p *parser) parseClassSelector() (classSelector, error) {
if p.i >= len(p.s) {
return nil, fmt.Errorf("expected class selector (.class), found EOF instead")
return classSelector{}, fmt.Errorf("expected class selector (.class), found EOF instead")
}
if p.s[p.i] != '.' {
return nil, fmt.Errorf("expected class selector (.class), found '%c' instead", p.s[p.i])
return classSelector{}, fmt.Errorf("expected class selector (.class), found '%c' instead", p.s[p.i])
}
p.i++
class, err := p.parseIdentifier()
if err != nil {
return nil, err
return classSelector{}, err
}
return attributeIncludesSelector("class", class), nil
return classSelector{class: class}, nil
}
// parseAttributeSelector parses a selector that matches by attribute value.
func (p *parser) parseAttributeSelector() (Selector, error) {
func (p *parser) parseAttributeSelector() (attrSelector, error) {
if p.i >= len(p.s) {
return nil, fmt.Errorf("expected attribute selector ([attribute]), found EOF instead")
return attrSelector{}, fmt.Errorf("expected attribute selector ([attribute]), found EOF instead")
}
if p.s[p.i] != '[' {
return nil, fmt.Errorf("expected attribute selector ([attribute]), found '%c' instead", p.s[p.i])
return attrSelector{}, fmt.Errorf("expected attribute selector ([attribute]), found '%c' instead", p.s[p.i])
}
p.i++
p.skipWhitespace()
key, err := p.parseIdentifier()
if err != nil {
return nil, err
return attrSelector{}, err
}
key = toLowerASCII(key)
p.skipWhitespace()
if p.i >= len(p.s) {
return nil, errors.New("unexpected EOF in attribute selector")
return attrSelector{}, errors.New("unexpected EOF in attribute selector")
}
if p.s[p.i] == ']' {
p.i++
return attributeExistsSelector(key), nil
return attrSelector{key: key, operation: ""}, nil
}
if p.i+2 >= len(p.s) {
return nil, errors.New("unexpected EOF in attribute selector")
return attrSelector{}, errors.New("unexpected EOF in attribute selector")
}
op := p.s[p.i : p.i+2]
if op[0] == '=' {
op = "="
} else if op[1] != '=' {
return nil, fmt.Errorf(`expected equality operator, found "%s" instead`, op)
return attrSelector{}, fmt.Errorf(`expected equality operator, found "%s" instead`, op)
}
p.i += len(op)
p.skipWhitespace()
if p.i >= len(p.s) {
return nil, errors.New("unexpected EOF in attribute selector")
return attrSelector{}, errors.New("unexpected EOF in attribute selector")
}
var val string
var rx *regexp.Regexp
@ -380,46 +398,32 @@ func (p *parser) parseAttributeSelector() (Selector, error) {
}
}
if err != nil {
return nil, err
return attrSelector{}, err
}
p.skipWhitespace()
if p.i >= len(p.s) {
return nil, errors.New("unexpected EOF in attribute selector")
return attrSelector{}, errors.New("unexpected EOF in attribute selector")
}
if p.s[p.i] != ']' {
return nil, fmt.Errorf("expected ']', found '%c' instead", p.s[p.i])
return attrSelector{}, fmt.Errorf("expected ']', found '%c' instead", p.s[p.i])
}
p.i++
switch op {
case "=":
return attributeEqualsSelector(key, val), nil
case "!=":
return attributeNotEqualSelector(key, val), nil
case "~=":
return attributeIncludesSelector(key, val), nil
case "|=":
return attributeDashmatchSelector(key, val), nil
case "^=":
return attributePrefixSelector(key, val), nil
case "$=":
return attributeSuffixSelector(key, val), nil
case "*=":
return attributeSubstringSelector(key, val), nil
case "#=":
return attributeRegexSelector(key, rx), nil
}
return nil, fmt.Errorf("attribute operator %q is not supported", op)
case "=", "!=", "~=", "|=", "^=", "$=", "*=", "#=":
return attrSelector{key: key, val: val, operation: op, regexp: rx}, nil
default:
return attrSelector{}, fmt.Errorf("attribute operator %q is not supported", op)
}
}
var errExpectedParenthesis = errors.New("expected '(' but didn't find it")
var errExpectedClosingParenthesis = errors.New("expected ')' but didn't find it")
var errUnmatchedParenthesis = errors.New("unmatched '('")
// parsePseudoclassSelector parses a pseudoclass selector like :not(p).
func (p *parser) parsePseudoclassSelector() (Selector, error) {
// parsePseudoclassSelector parses a pseudoclass selector like :not(p)
func (p *parser) parsePseudoclassSelector() (out Sel, err error) {
if p.i >= len(p.s) {
return nil, fmt.Errorf("expected pseudoclass selector (:pseudoclass), found EOF instead")
}
@ -428,40 +432,36 @@ func (p *parser) parsePseudoclassSelector() (Selector, error) {
}
p.i++
if p.s[p.i] == ':' { // we found a pseudo-element
p.i++
}
name, err := p.parseIdentifier()
if err != nil {
return nil, err
return
}
name = toLowerASCII(name)
switch name {
case "not", "has", "haschild":
if !p.consumeParenthesis() {
return nil, errExpectedParenthesis
return out, errExpectedParenthesis
}
sel, parseErr := p.parseSelectorGroup()
if parseErr != nil {
return nil, parseErr
return out, parseErr
}
if !p.consumeClosingParenthesis() {
return nil, errExpectedClosingParenthesis
return out, errExpectedClosingParenthesis
}
switch name {
case "not":
return negatedSelector(sel), nil
case "has":
return hasDescendantSelector(sel), nil
case "haschild":
return hasChildSelector(sel), nil
}
out = relativePseudoClassSelector{name: name, match: sel}
case "contains", "containsown":
if !p.consumeParenthesis() {
return nil, errExpectedParenthesis
return out, errExpectedParenthesis
}
if p.i == len(p.s) {
return nil, errUnmatchedParenthesis
return out, errUnmatchedParenthesis
}
var val string
switch p.s[p.i] {
@ -471,95 +471,75 @@ func (p *parser) parsePseudoclassSelector() (Selector, error) {
val, err = p.parseIdentifier()
}
if err != nil {
return nil, err
return out, err
}
val = strings.ToLower(val)
p.skipWhitespace()
if p.i >= len(p.s) {
return nil, errors.New("unexpected EOF in pseudo selector")
return out, errors.New("unexpected EOF in pseudo selector")
}
if !p.consumeClosingParenthesis() {
return nil, errExpectedClosingParenthesis
return out, errExpectedClosingParenthesis
}
switch name {
case "contains":
return textSubstrSelector(val), nil
case "containsown":
return ownTextSubstrSelector(val), nil
}
out = containsPseudoClassSelector{own: name == "containsown", value: val}
case "matches", "matchesown":
if !p.consumeParenthesis() {
return nil, errExpectedParenthesis
return out, errExpectedParenthesis
}
rx, err := p.parseRegex()
if err != nil {
return nil, err
return out, err
}
if p.i >= len(p.s) {
return nil, errors.New("unexpected EOF in pseudo selector")
return out, errors.New("unexpected EOF in pseudo selector")
}
if !p.consumeClosingParenthesis() {
return nil, errExpectedClosingParenthesis
return out, errExpectedClosingParenthesis
}
switch name {
case "matches":
return textRegexSelector(rx), nil
case "matchesown":
return ownTextRegexSelector(rx), nil
}
out = regexpPseudoClassSelector{own: name == "matchesown", regexp: rx}
case "nth-child", "nth-last-child", "nth-of-type", "nth-last-of-type":
if !p.consumeParenthesis() {
return nil, errExpectedParenthesis
return out, errExpectedParenthesis
}
a, b, err := p.parseNth()
if err != nil {
return nil, err
return out, err
}
if !p.consumeClosingParenthesis() {
return nil, errExpectedClosingParenthesis
}
if a == 0 {
switch name {
case "nth-child":
return simpleNthChildSelector(b, false), nil
case "nth-of-type":
return simpleNthChildSelector(b, true), nil
case "nth-last-child":
return simpleNthLastChildSelector(b, false), nil
case "nth-last-of-type":
return simpleNthLastChildSelector(b, true), nil
}
return out, errExpectedClosingParenthesis
}
return nthChildSelector(a, b,
name == "nth-last-child" || name == "nth-last-of-type",
name == "nth-of-type" || name == "nth-last-of-type"),
nil
last := name == "nth-last-child" || name == "nth-last-of-type"
ofType := name == "nth-of-type" || name == "nth-last-of-type"
out = nthPseudoClassSelector{a: a, b: b, last: last, ofType: ofType}
case "first-child":
return simpleNthChildSelector(1, false), nil
out = nthPseudoClassSelector{a: 0, b: 1, ofType: false, last: false}
case "last-child":
return simpleNthLastChildSelector(1, false), nil
out = nthPseudoClassSelector{a: 0, b: 1, ofType: false, last: true}
case "first-of-type":
return simpleNthChildSelector(1, true), nil
out = nthPseudoClassSelector{a: 0, b: 1, ofType: true, last: false}
case "last-of-type":
return simpleNthLastChildSelector(1, true), nil
out = nthPseudoClassSelector{a: 0, b: 1, ofType: true, last: true}
case "only-child":
return onlyChildSelector(false), nil
out = onlyChildPseudoClassSelector{ofType: false}
case "only-of-type":
return onlyChildSelector(true), nil
out = onlyChildPseudoClassSelector{ofType: true}
case "input":
return inputSelector, nil
out = inputPseudoClassSelector{}
case "empty":
return emptyElementSelector, nil
out = emptyElementPseudoClassSelector{}
case "root":
return rootSelector, nil
out = rootPseudoClassSelector{}
case "after", "backdrop", "before", "cue", "first-letter", "first-line", "grammar-error", "marker", "placeholder", "selection", "spelling-error":
return out, errors.New("pseudo-elements are not yet supported")
default:
return out, fmt.Errorf("unknown pseudoclass or pseudoelement :%s", name)
}
return nil, fmt.Errorf("unknown pseudoclass :%s", name)
return
}
// parseInteger parses a decimal integer.
@ -705,8 +685,8 @@ invalid:
// parseSimpleSelectorSequence parses a selector sequence that applies to
// a single element.
func (p *parser) parseSimpleSelectorSequence() (Selector, error) {
var result Selector
func (p *parser) parseSimpleSelectorSequence() (Sel, error) {
var selectors []Sel
if p.i >= len(p.s) {
return nil, errors.New("expected selector, found EOF instead")
@ -723,13 +703,15 @@ func (p *parser) parseSimpleSelectorSequence() (Selector, error) {
if err != nil {
return nil, err
}
result = r
selectors = append(selectors, r)
}
loop:
for p.i < len(p.s) {
var ns Selector
var err error
var (
ns Sel
err error
)
switch p.s[p.i] {
case '#':
ns, err = p.parseIDSelector()
@ -745,37 +727,33 @@ loop:
if err != nil {
return nil, err
}
if result == nil {
result = ns
} else {
result = intersectionSelector(result, ns)
}
}
if result == nil {
result = func(n *html.Node) bool {
return n.Type == html.ElementNode
}
selectors = append(selectors, ns)
}
return result, nil
if len(selectors) == 1 { // no need wrap the selectors in compoundSelector
return selectors[0], nil
}
return compoundSelector{selectors: selectors}, nil
}
// parseSelector parses a selector that may include combinators.
func (p *parser) parseSelector() (result Selector, err error) {
func (p *parser) parseSelector() (Sel, error) {
p.skipWhitespace()
result, err = p.parseSimpleSelectorSequence()
result, err := p.parseSimpleSelectorSequence()
if err != nil {
return
return nil, err
}
for {
var combinator byte
var (
combinator byte
c Sel
)
if p.skipWhitespace() {
combinator = ' '
}
if p.i >= len(p.s) {
return
return result, nil
}
switch p.s[p.i] {
@ -785,51 +763,39 @@ func (p *parser) parseSelector() (result Selector, err error) {
p.skipWhitespace()
case ',', ')':
// These characters can't begin a selector, but they can legally occur after one.
return
return result, nil
}
if combinator == 0 {
return
return result, nil
}
c, err := p.parseSimpleSelectorSequence()
c, err = p.parseSimpleSelectorSequence()
if err != nil {
return nil, err
}
switch combinator {
case ' ':
result = descendantSelector(result, c)
case '>':
result = childSelector(result, c)
case '+':
result = siblingSelector(result, c, true)
case '~':
result = siblingSelector(result, c, false)
}
result = combinedSelector{first: result, combinator: combinator, second: c}
}
panic("unreachable")
}
// parseSelectorGroup parses a group of selectors, separated by commas.
func (p *parser) parseSelectorGroup() (result Selector, err error) {
result, err = p.parseSelector()
func (p *parser) parseSelectorGroup() (SelectorGroup, error) {
current, err := p.parseSelector()
if err != nil {
return
return nil, err
}
result := SelectorGroup{current}
for p.i < len(p.s) {
if p.s[p.i] != ',' {
return result, nil
break
}
p.i++
c, err := p.parseSelector()
if err != nil {
return nil, err
}
result = unionSelector(result, c)
result = append(result, c)
}
return
return result, nil
}

833
vendor/github.com/andybalholm/cascadia/selector.go

@ -0,0 +1,833 @@
package cascadia
import (
"bytes"
"fmt"
"regexp"
"strings"
"golang.org/x/net/html"
)
// Matcher is the interface for basic selector functionality.
// Match returns whether a selector matches n.
type Matcher interface {
Match(n *html.Node) bool
}
// Sel is the interface for all the functionality provided by selectors.
// It is currently the same as Matcher, but other methods may be added in the
// future.
type Sel interface {
Matcher
Specificity() Specificity
}
// Parse parses a selector.
func Parse(sel string) (Sel, error) {
p := &parser{s: sel}
compiled, err := p.parseSelector()
if err != nil {
return nil, err
}
if p.i < len(sel) {
return nil, fmt.Errorf("parsing %q: %d bytes left over", sel, len(sel)-p.i)
}
return compiled, nil
}
// ParseGroup parses a selector, or a group of selectors separated by commas.
func ParseGroup(sel string) (SelectorGroup, error) {
p := &parser{s: sel}
compiled, err := p.parseSelectorGroup()
if err != nil {
return nil, err
}
if p.i < len(sel) {
return nil, fmt.Errorf("parsing %q: %d bytes left over", sel, len(sel)-p.i)
}
return compiled, nil
}
// A Selector is a function which tells whether a node matches or not.
//
// This type is maintained for compatibility; I recommend using the newer and
// more idiomatic interfaces Sel and Matcher.
type Selector func(*html.Node) bool
// Compile parses a selector and returns, if successful, a Selector object
// that can be used to match against html.Node objects.
func Compile(sel string) (Selector, error) {
compiled, err := ParseGroup(sel)
if err != nil {
return nil, err
}
return Selector(compiled.Match), nil
}
// MustCompile is like Compile, but panics instead of returning an error.
func MustCompile(sel string) Selector {
compiled, err := Compile(sel)
if err != nil {
panic(err)
}
return compiled
}
// MatchAll returns a slice of the nodes that match the selector,
// from n and its children.
func (s Selector) MatchAll(n *html.Node) []*html.Node {
return s.matchAllInto(n, nil)
}
func (s Selector) matchAllInto(n *html.Node, storage []*html.Node) []*html.Node {
if s(n) {
storage = append(storage, n)
}
for child := n.FirstChild; child != nil; child = child.NextSibling {
storage = s.matchAllInto(child, storage)
}
return storage
}
func queryInto(n *html.Node, m Matcher, storage []*html.Node) []*html.Node {
for child := n.FirstChild; child != nil; child = child.NextSibling {
if m.Match(child) {
storage = append(storage, child)
}
storage = queryInto(child, m, storage)
}
return storage
}
// QueryAll returns a slice of all the nodes that match m, from the descendants
// of n.
func QueryAll(n *html.Node, m Matcher) []*html.Node {
return queryInto(n, m, nil)
}
// Match returns true if the node matches the selector.
func (s Selector) Match(n *html.Node) bool {
return s(n)
}
// MatchFirst returns the first node that matches s, from n and its children.
func (s Selector) MatchFirst(n *html.Node) *html.Node {
if s.Match(n) {
return n
}
for c := n.FirstChild; c != nil; c = c.NextSibling {
m := s.MatchFirst(c)
if m != nil {
return m
}
}
return nil
}
// Query returns the first node that matches m, from the descendants of n.
// If none matches, it returns nil.
func Query(n *html.Node, m Matcher) *html.Node {
for c := n.FirstChild; c != nil; c = c.NextSibling {
if m.Match(c) {
return c
}
if matched := Query(c, m); matched != nil {
return matched
}
}
return nil
}
// Filter returns the nodes in nodes that match the selector.
func (s Selector) Filter(nodes []*html.Node) (result []*html.Node) {
for _, n := range nodes {
if s(n) {
result = append(result, n)
}
}
return result
}
// Filter returns the nodes that match m.
func Filter(nodes []*html.Node, m Matcher) (result []*html.Node) {
for _, n := range nodes {
if m.Match(n) {
result = append(result, n)
}
}
return result
}
type tagSelector struct {
tag string
}
// Matches elements with a given tag name.
func (t tagSelector) Match(n *html.Node) bool {
return n.Type == html.ElementNode && n.Data == t.tag
}
func (c tagSelector) Specificity() Specificity {
return Specificity{0, 0, 1}
}
type classSelector struct {
class string
}
// Matches elements by class attribute.
func (t classSelector) Match(n *html.Node) bool {
return matchAttribute(n, "class", func(s string) bool {
return matchInclude(t.class, s)
})
}
func (c classSelector) Specificity() Specificity {
return Specificity{0, 1, 0}
}
type idSelector struct {
id string
}
// Matches elements by id attribute.
func (t idSelector) Match(n *html.Node) bool {
return matchAttribute(n, "id", func(s string) bool {
return s == t.id
})
}
func (c idSelector) Specificity() Specificity {
return Specificity{1, 0, 0}
}
type attrSelector struct {
key, val, operation string
regexp *regexp.Regexp
}
// Matches elements by attribute value.
func (t attrSelector) Match(n *html.Node) bool {
switch t.operation {
case "":
return matchAttribute(n, t.key, func(string) bool { return true })
case "=":
return matchAttribute(n, t.key, func(s string) bool { return s == t.val })
case "!=":
return attributeNotEqualMatch(t.key, t.val, n)
case "~=":
// matches elements where the attribute named key is a whitespace-separated list that includes val.
return matchAttribute(n, t.key, func(s string) bool { return matchInclude(t.val, s) })
case "|=":
return attributeDashMatch(t.key, t.val, n)
case "^=":
return attributePrefixMatch(t.key, t.val, n)
case "$=":
return attributeSuffixMatch(t.key, t.val, n)
case "*=":
return attributeSubstringMatch(t.key, t.val, n)
case "#=":
return attributeRegexMatch(t.key, t.regexp, n)
default:
panic(fmt.Sprintf("unsuported operation : %s", t.operation))
}
}
// matches elements where the attribute named key satisifes the function f.
func matchAttribute(n *html.Node, key string, f func(string) bool) bool {
if n.Type != html.ElementNode {
return false
}
for _, a := range n.Attr {
if a.Key == key && f(a.Val) {
return true
}
}
return false
}
// attributeNotEqualMatch matches elements where
// the attribute named key does not have the value val.
func attributeNotEqualMatch(key, val string, n *html.Node) bool {
if n.Type != html.ElementNode {
return false
}
for _, a := range n.Attr {
if a.Key == key && a.Val == val {
return false
}
}
return true
}
// returns true if s is a whitespace-separated list that includes val.
func matchInclude(val, s string) bool {
for s != "" {
i := strings.IndexAny(s, " \t\r\n\f")
if i == -1 {
return s == val
}
if s[:i] == val {
return true
}
s = s[i+1:]
}