Refactors to wrap *sitter.Node with *Node
This commit is contained in:
@@ -1,7 +1,6 @@
|
||||
# jsluice
|
||||
|
||||
A Go package and tool for extracting URLs, secrets, and other interesting data from JavaScript files.
|
||||
Uses [go-tree-sitter](https://github.com/smacker/go-tree-sitter) for parsing.
|
||||
A Go package for extracting URLs, secrets, and other interesting data from JavaScript.
|
||||
|
||||
|
||||
## Extracting URLs
|
||||
@@ -9,7 +8,7 @@ Uses [go-tree-sitter](https://github.com/smacker/go-tree-sitter) for parsing.
|
||||
Rather than using regular expressions alone, `jsluice` uses `go-tree-sitter` to look for places that URLs are known to be used,
|
||||
such as being assigned to `document.location`, passed to `window.open()`, or passed to `fetch()` etc.
|
||||
|
||||
A simple example program is provided [here](/bishopfoxmss/jsluice/blob/main/examples/basic/main.go):
|
||||
A simple example program is provided [here](/examples/basic/main.go):
|
||||
|
||||
```go
|
||||
package main
|
||||
|
||||
12
analyzer.go
12
analyzer.go
@@ -12,7 +12,7 @@ type Analyzer struct {
|
||||
source []byte
|
||||
parser *sitter.Parser
|
||||
urlMatchers []URLMatcher
|
||||
rootNode *sitter.Node
|
||||
rootNode *Node
|
||||
}
|
||||
|
||||
// NewAnalyzer accepts a slice of bytes representing some JavaScript
|
||||
@@ -26,6 +26,14 @@ func NewAnalyzer(source []byte) *Analyzer {
|
||||
source: source,
|
||||
parser: parser,
|
||||
urlMatchers: AllURLMatchers(),
|
||||
rootNode: tree.RootNode(),
|
||||
rootNode: NewNode(tree.RootNode(), source),
|
||||
}
|
||||
}
|
||||
|
||||
// Query peforms a tree-sitter query on the JavaScript being analyzed.
|
||||
// The provided function is called for every node that matches the query.
|
||||
// See https://tree-sitter.github.io/tree-sitter/using-parsers#query-syntax
|
||||
// for details on query syntax.
|
||||
func (a *Analyzer) Query(q string, fn func(*Node)) {
|
||||
a.rootNode.Query(q, fn)
|
||||
}
|
||||
|
||||
1
cmd/jsecrets/.gitignore
vendored
Normal file
1
cmd/jsecrets/.gitignore
vendored
Normal file
@@ -0,0 +1 @@
|
||||
jsecrets
|
||||
20
objects.go
20
objects.go
@@ -2,16 +2,14 @@ package jsluice
|
||||
|
||||
import (
|
||||
"strings"
|
||||
|
||||
sitter "github.com/smacker/go-tree-sitter"
|
||||
)
|
||||
|
||||
type object struct {
|
||||
node *sitter.Node
|
||||
node *Node
|
||||
source []byte
|
||||
}
|
||||
|
||||
func newObject(n *sitter.Node, source []byte) object {
|
||||
func newObject(n *Node, source []byte) object {
|
||||
return object{
|
||||
node: n,
|
||||
source: source,
|
||||
@@ -34,7 +32,7 @@ func (o object) hasValidNode() bool {
|
||||
return o.node != nil && o.node.Type() == "object"
|
||||
}
|
||||
|
||||
func (o object) getNodeFunc(fn func(key string) bool) *sitter.Node {
|
||||
func (o object) getNodeFunc(fn func(key string) bool) *Node {
|
||||
if !o.hasValidNode() {
|
||||
return nil
|
||||
}
|
||||
@@ -48,7 +46,7 @@ func (o object) getNodeFunc(fn func(key string) bool) *sitter.Node {
|
||||
continue
|
||||
}
|
||||
|
||||
if !fn(dequote(content(pair.ChildByFieldName("key"), o.source))) {
|
||||
if !fn(pair.ChildByFieldName("key").RawString()) {
|
||||
continue
|
||||
}
|
||||
|
||||
@@ -57,13 +55,13 @@ func (o object) getNodeFunc(fn func(key string) bool) *sitter.Node {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (o object) getNode(key string) *sitter.Node {
|
||||
func (o object) getNode(key string) *Node {
|
||||
return o.getNodeFunc(func(candidate string) bool {
|
||||
return key == candidate
|
||||
})
|
||||
}
|
||||
|
||||
func (o object) getNodeI(key string) *sitter.Node {
|
||||
func (o object) getNodeI(key string) *Node {
|
||||
key = strings.ToLower(key)
|
||||
return o.getNodeFunc(func(candidate string) bool {
|
||||
return key == strings.ToLower(candidate)
|
||||
@@ -85,7 +83,7 @@ func (o object) getKeys() []string {
|
||||
continue
|
||||
}
|
||||
|
||||
key := dequote(content(pair.ChildByFieldName("key"), o.source))
|
||||
key := pair.ChildByFieldName("key").RawString()
|
||||
out = append(out, key)
|
||||
}
|
||||
return out
|
||||
@@ -100,7 +98,7 @@ func (o object) getString(key, defaultVal string) string {
|
||||
if value == nil || value.Type() != "string" {
|
||||
return defaultVal
|
||||
}
|
||||
return dequote(content(value, o.source))
|
||||
return value.RawString()
|
||||
}
|
||||
|
||||
func (o object) getStringI(key, defaultVal string) string {
|
||||
@@ -108,5 +106,5 @@ func (o object) getStringI(key, defaultVal string) string {
|
||||
if value == nil || value.Type() != "string" {
|
||||
return defaultVal
|
||||
}
|
||||
return dequote(content(value, o.source))
|
||||
return value.RawString()
|
||||
}
|
||||
|
||||
@@ -2,8 +2,6 @@ package jsluice
|
||||
|
||||
import (
|
||||
"strings"
|
||||
|
||||
sitter "github.com/smacker/go-tree-sitter"
|
||||
)
|
||||
|
||||
// A Secret represents any bit of secret or otherwise interesting
|
||||
@@ -21,14 +19,14 @@ func (a *Analyzer) GetSecrets() []*Secret {
|
||||
out := make([]*Secret, 0)
|
||||
|
||||
// we only want to run each query once so let's cache them
|
||||
nodeCache := make(map[string][]*sitter.Node)
|
||||
nodeCache := make(map[string][]*Node)
|
||||
|
||||
matchers := AllSecretMatchers()
|
||||
for _, m := range matchers {
|
||||
|
||||
if _, exists := nodeCache[m.Query]; !exists {
|
||||
nodes := make([]*sitter.Node, 0)
|
||||
query(a.rootNode, m.Query, func(n *sitter.Node) {
|
||||
nodes := make([]*Node, 0)
|
||||
a.Query(m.Query, func(n *Node) {
|
||||
nodes = append(nodes, n)
|
||||
})
|
||||
nodeCache[m.Query] = nodes
|
||||
@@ -53,15 +51,15 @@ func (a *Analyzer) GetSecrets() []*Secret {
|
||||
// returning any Secret that is found.
|
||||
type SecretMatcher struct {
|
||||
Query string
|
||||
Fn func(*sitter.Node, []byte) *Secret
|
||||
Fn func(*Node, []byte) *Secret
|
||||
}
|
||||
|
||||
// AllSecretMatchers returns the default list of SecretMatchers
|
||||
func AllSecretMatchers() []SecretMatcher {
|
||||
return []SecretMatcher{
|
||||
// AWS Keys
|
||||
{"(string) @matches", func(n *sitter.Node, source []byte) *Secret {
|
||||
str := dequote(content(n, source))
|
||||
{"(string) @matches", func(n *Node, source []byte) *Secret {
|
||||
str := n.RawString()
|
||||
|
||||
// https://docs.aws.amazon.com/STS/latest/APIReference/API_Credentials.html
|
||||
if len(str) < 16 || len(str) > 128 {
|
||||
@@ -124,7 +122,7 @@ func AllSecretMatchers() []SecretMatcher {
|
||||
}},
|
||||
|
||||
// REACT_APP_... containing objects
|
||||
{"(object) @matches", func(n *sitter.Node, source []byte) *Secret {
|
||||
{"(object) @matches", func(n *Node, source []byte) *Secret {
|
||||
|
||||
return nil
|
||||
o := newObject(n, source)
|
||||
@@ -148,7 +146,7 @@ func AllSecretMatchers() []SecretMatcher {
|
||||
}},
|
||||
|
||||
// Firebase objects
|
||||
{"(object) @matches", func(n *sitter.Node, source []byte) *Secret {
|
||||
{"(object) @matches", func(n *Node, source []byte) *Secret {
|
||||
o := newObject(n, source)
|
||||
|
||||
mustHave := map[string]bool{
|
||||
|
||||
216
tree.go
216
tree.go
@@ -8,20 +8,112 @@ import (
|
||||
"github.com/smacker/go-tree-sitter/javascript"
|
||||
)
|
||||
|
||||
// nil-safe wrapper around calling node.Content(source)
|
||||
func content(n *sitter.Node, source []byte) string {
|
||||
if n == nil {
|
||||
return ""
|
||||
}
|
||||
return n.Content(source)
|
||||
type Node struct {
|
||||
node *sitter.Node
|
||||
source []byte
|
||||
}
|
||||
|
||||
func isStringy(n *sitter.Node, source []byte) bool {
|
||||
func NewNode(n *sitter.Node, source []byte) *Node {
|
||||
return &Node{
|
||||
node: n,
|
||||
source: source,
|
||||
}
|
||||
}
|
||||
|
||||
func (n *Node) Content() string {
|
||||
if n.node == nil {
|
||||
return ""
|
||||
}
|
||||
return n.node.Content(n.source)
|
||||
}
|
||||
|
||||
func (n *Node) Type() string {
|
||||
if n.node == nil {
|
||||
return ""
|
||||
}
|
||||
return n.node.Type()
|
||||
}
|
||||
|
||||
func (n *Node) ChildByFieldName(name string) *Node {
|
||||
return NewNode(n.node.ChildByFieldName(name), n.source)
|
||||
}
|
||||
|
||||
func (n *Node) NamedChild(index int) *Node {
|
||||
return NewNode(n.node.NamedChild(0), n.source)
|
||||
}
|
||||
|
||||
func (n *Node) NamedChildCount() int {
|
||||
return int(n.node.NamedChildCount())
|
||||
}
|
||||
|
||||
// CollapsedString takes a node representing a URL and attempts to make it
|
||||
// at least somewhat easily parseable. It's common to build URLs out
|
||||
// of variables and function calls so we want to turn something like:
|
||||
//
|
||||
// './upload.php?profile='+res.id+'&show='+$('.participate_modal_container').attr('data-val')
|
||||
//
|
||||
// Into something more like:
|
||||
//
|
||||
// ./upload.php?profile=EXPR&show=EXPR
|
||||
//
|
||||
func (n *Node) CollapsedString() string {
|
||||
if n.node == nil {
|
||||
return ""
|
||||
}
|
||||
switch n.Type() {
|
||||
case "binary_expression":
|
||||
return fmt.Sprintf(
|
||||
"%s%s",
|
||||
n.ChildByFieldName("left").CollapsedString(),
|
||||
n.ChildByFieldName("right").CollapsedString(),
|
||||
)
|
||||
case "string":
|
||||
return n.RawString()
|
||||
default:
|
||||
return "EXPR"
|
||||
}
|
||||
}
|
||||
|
||||
func (n *Node) RawString() string {
|
||||
return dequote(n.Content())
|
||||
}
|
||||
|
||||
func (n *Node) Parent() *Node {
|
||||
return NewNode(n.node.Parent(), n.source)
|
||||
}
|
||||
|
||||
func (n *Node) Query(query string, fn func(*Node)) {
|
||||
q, err := sitter.NewQuery(
|
||||
[]byte(query),
|
||||
javascript.GetLanguage(),
|
||||
)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
qc := sitter.NewQueryCursor()
|
||||
defer qc.Close()
|
||||
|
||||
qc.Exec(q, n.node)
|
||||
|
||||
for {
|
||||
match, exists := qc.NextMatch()
|
||||
if !exists || match == nil {
|
||||
break
|
||||
}
|
||||
|
||||
for _, capture := range match.Captures {
|
||||
fn(NewNode(capture.Node, n.source))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (n *Node) IsStringy() bool {
|
||||
if n.Type() == "string" {
|
||||
return true
|
||||
}
|
||||
|
||||
c := content(n, source)
|
||||
c := n.Content()
|
||||
if len(c) == 0 {
|
||||
return false
|
||||
}
|
||||
@@ -34,115 +126,15 @@ func isStringy(n *sitter.Node, source []byte) bool {
|
||||
}
|
||||
}
|
||||
|
||||
func hasDescendantOfType(n *sitter.Node, t string) bool {
|
||||
if n == nil {
|
||||
return false
|
||||
}
|
||||
|
||||
// node is provided type exactly
|
||||
if n.Type() == t {
|
||||
return true
|
||||
}
|
||||
|
||||
hasType := false
|
||||
enter := func(n *sitter.Node) {
|
||||
if n.Type() == t {
|
||||
hasType = true
|
||||
}
|
||||
}
|
||||
|
||||
walk(n, enter)
|
||||
return hasType
|
||||
}
|
||||
|
||||
// cleanURL takes a node representing a URL and attempts to make it
|
||||
// at least somewhat easily parseable. It's common to build URLs out
|
||||
// of variables and function calls so we want to turn something like:
|
||||
//
|
||||
// './upload.php?profile='+res.id+'&show='+$('.participate_modal_container').attr('data-val')
|
||||
//
|
||||
// Into something more like:
|
||||
//
|
||||
// ./upload.php?profile=EXPR&show=EXPR
|
||||
//
|
||||
func cleanURL(n *sitter.Node, source []byte) string {
|
||||
if n == nil {
|
||||
return ""
|
||||
}
|
||||
switch n.Type() {
|
||||
case "binary_expression":
|
||||
return fmt.Sprintf(
|
||||
"%s%s",
|
||||
cleanURL(n.ChildByFieldName("left"), source),
|
||||
cleanURL(n.ChildByFieldName("right"), source),
|
||||
)
|
||||
case "string":
|
||||
return dequote(content(n, source))
|
||||
default:
|
||||
return "EXPR"
|
||||
}
|
||||
}
|
||||
|
||||
func dequote(in string) string {
|
||||
return strings.Trim(in, "'\"`")
|
||||
}
|
||||
|
||||
func query(n *sitter.Node, query string, enter func(*sitter.Node)) {
|
||||
q, err := sitter.NewQuery(
|
||||
[]byte(query),
|
||||
javascript.GetLanguage(),
|
||||
)
|
||||
if err != nil {
|
||||
return
|
||||
func content(n *sitter.Node, source []byte) string {
|
||||
if n == nil {
|
||||
return ""
|
||||
}
|
||||
|
||||
qc := sitter.NewQueryCursor()
|
||||
defer qc.Close()
|
||||
|
||||
qc.Exec(q, n)
|
||||
|
||||
for {
|
||||
match, exists := qc.NextMatch()
|
||||
if !exists || match == nil {
|
||||
break
|
||||
}
|
||||
|
||||
for _, capture := range match.Captures {
|
||||
enter(capture.Node)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func walk(n *sitter.Node, enter func(*sitter.Node)) {
|
||||
|
||||
c := sitter.NewTreeCursor(n)
|
||||
defer c.Close()
|
||||
|
||||
// walkies
|
||||
recurse := true
|
||||
for {
|
||||
// descend into the tree
|
||||
if recurse && c.GoToFirstChild() {
|
||||
recurse = true
|
||||
enter(c.CurrentNode())
|
||||
continue
|
||||
}
|
||||
|
||||
// move sideways
|
||||
if c.GoToNextSibling() {
|
||||
recurse = true
|
||||
enter(c.CurrentNode())
|
||||
continue
|
||||
}
|
||||
|
||||
// climb back up the tree, but make sure we don't descend right back to where we were
|
||||
if c.GoToParent() {
|
||||
recurse = false
|
||||
continue
|
||||
}
|
||||
break
|
||||
}
|
||||
|
||||
return n.Content(source)
|
||||
}
|
||||
|
||||
func PrintTree(source []byte) {
|
||||
|
||||
10
tree_test.go
10
tree_test.go
@@ -8,7 +8,7 @@ import (
|
||||
"github.com/smacker/go-tree-sitter/javascript"
|
||||
)
|
||||
|
||||
func TestCleanURL(t *testing.T) {
|
||||
func TestCollapsedString(t *testing.T) {
|
||||
cases := []struct {
|
||||
JS []byte
|
||||
Expected string
|
||||
@@ -24,7 +24,7 @@ func TestCleanURL(t *testing.T) {
|
||||
for i, c := range cases {
|
||||
t.Run(strconv.Itoa(i), func(t *testing.T) {
|
||||
tree := parser.Parse(nil, c.JS)
|
||||
root := tree.RootNode()
|
||||
root := NewNode(tree.RootNode(), c.JS)
|
||||
|
||||
// Example tree:
|
||||
// program
|
||||
@@ -33,12 +33,12 @@ func TestCleanURL(t *testing.T) {
|
||||
// left: string ("./login.php?redirect=")
|
||||
// right: identifier (url)
|
||||
//
|
||||
// We want the binary_expression to pass to cleanURL, which is
|
||||
// We want the binary_expression to pass to CollapsedString, which is
|
||||
// the first Named Child of the first Named Child of the root node.
|
||||
actual := cleanURL(root.NamedChild(0).NamedChild(0), c.JS)
|
||||
actual := root.NamedChild(0).NamedChild(0).CollapsedString()
|
||||
|
||||
if actual != c.Expected {
|
||||
t.Errorf("want %s for cleanURL(%s), have: %s", c.Expected, c.JS, actual)
|
||||
t.Errorf("want %s for CollapsedString(%s), have: %s", c.Expected, c.JS, actual)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
@@ -3,14 +3,13 @@ package jsluice
|
||||
import (
|
||||
"strings"
|
||||
|
||||
sitter "github.com/smacker/go-tree-sitter"
|
||||
"golang.org/x/exp/slices"
|
||||
)
|
||||
|
||||
func matchJQuery() URLMatcher {
|
||||
|
||||
return URLMatcher{"call_expression", func(n *sitter.Node, source []byte) *URL {
|
||||
callName := content(n.ChildByFieldName("function"), source)
|
||||
return URLMatcher{"call_expression", func(n *Node, source []byte) *URL {
|
||||
callName := n.ChildByFieldName("function").Content()
|
||||
|
||||
if !slices.Contains(
|
||||
[]string{
|
||||
@@ -48,7 +47,7 @@ func matchJQuery() URLMatcher {
|
||||
|
||||
m := &URL{
|
||||
Type: callName,
|
||||
Source: content(n, source),
|
||||
Source: n.Content(),
|
||||
}
|
||||
|
||||
// Infer the method for .post and .get calls
|
||||
@@ -58,11 +57,11 @@ func matchJQuery() URLMatcher {
|
||||
m.Method = "GET"
|
||||
}
|
||||
|
||||
var settingsNode *sitter.Node
|
||||
var settingsNode *Node
|
||||
|
||||
if isStringy(firstArg, source) {
|
||||
if firstArg.IsStringy() {
|
||||
// first argument is the URL
|
||||
m.URL = cleanURL(firstArg, source)
|
||||
m.URL = firstArg.CollapsedString()
|
||||
|
||||
// If the first arg is a URL, the second arg is a
|
||||
// settings object for $.ajax, or a data object for
|
||||
@@ -93,7 +92,7 @@ func matchJQuery() URLMatcher {
|
||||
settings := newObject(settingsNode, source)
|
||||
|
||||
if m.URL == "" {
|
||||
m.URL = cleanURL(settings.getNode("url"), source)
|
||||
m.URL = settings.getNode("url").CollapsedString()
|
||||
}
|
||||
|
||||
m.Headers = settings.getObject("headers").asMap()
|
||||
|
||||
@@ -4,28 +4,27 @@ import (
|
||||
"strings"
|
||||
"sync"
|
||||
|
||||
sitter "github.com/smacker/go-tree-sitter"
|
||||
"golang.org/x/exp/slices"
|
||||
)
|
||||
|
||||
type nodeCache struct {
|
||||
sync.RWMutex
|
||||
data map[*sitter.Node][]*sitter.Node
|
||||
data map[*Node][]*Node
|
||||
}
|
||||
|
||||
func newNodeCache() *nodeCache {
|
||||
return &nodeCache{
|
||||
data: make(map[*sitter.Node][]*sitter.Node),
|
||||
data: make(map[*Node][]*Node),
|
||||
}
|
||||
}
|
||||
|
||||
func (c *nodeCache) set(k *sitter.Node, v []*sitter.Node) {
|
||||
func (c *nodeCache) set(k *Node, v []*Node) {
|
||||
c.Lock()
|
||||
c.data[k] = v
|
||||
c.Unlock()
|
||||
}
|
||||
|
||||
func (c *nodeCache) get(k *sitter.Node) ([]*sitter.Node, bool) {
|
||||
func (c *nodeCache) get(k *Node) ([]*Node, bool) {
|
||||
c.RLock()
|
||||
v, exists := c.data[k]
|
||||
c.RUnlock()
|
||||
@@ -35,8 +34,8 @@ func (c *nodeCache) get(k *sitter.Node) ([]*sitter.Node, bool) {
|
||||
func matchXHR() URLMatcher {
|
||||
cache := newNodeCache()
|
||||
|
||||
return URLMatcher{"call_expression", func(n *sitter.Node, source []byte) *URL {
|
||||
callName := content(n.ChildByFieldName("function"), source)
|
||||
return URLMatcher{"call_expression", func(n *Node, source []byte) *URL {
|
||||
callName := n.ChildByFieldName("function").Content()
|
||||
|
||||
// We don't know what the XMLHttpRequest object will be called,
|
||||
// so we have to focus on just the .open bit
|
||||
@@ -50,7 +49,7 @@ func matchXHR() URLMatcher {
|
||||
// This will miss cases where the method is a variable.
|
||||
arguments := n.ChildByFieldName("arguments")
|
||||
|
||||
method := dequote(content(arguments.NamedChild(0), source))
|
||||
method := arguments.NamedChild(0).RawString()
|
||||
|
||||
if !slices.Contains(
|
||||
[]string{"GET", "HEAD", "OPTIONS", "POST", "PUT", "PATCH", "DELETE"},
|
||||
@@ -60,15 +59,15 @@ func matchXHR() URLMatcher {
|
||||
}
|
||||
|
||||
urlArg := arguments.NamedChild(1)
|
||||
if !isStringy(urlArg, source) {
|
||||
if !urlArg.IsStringy() {
|
||||
return nil
|
||||
}
|
||||
|
||||
match := &URL{
|
||||
URL: cleanURL(urlArg, source),
|
||||
URL: urlArg.CollapsedString(),
|
||||
Method: method,
|
||||
Type: "XMLHttpRequest.open",
|
||||
Source: content(n, source),
|
||||
Source: n.Content(),
|
||||
}
|
||||
|
||||
// to find headers we need to look for calls to setRequestHeader() on
|
||||
@@ -103,7 +102,7 @@ func matchXHR() URLMatcher {
|
||||
// Look for call_expressions under the same parent as our .open call.
|
||||
// It's common to end up querying the exact same parent over and over
|
||||
// again, so we cache the results on a per-parent node basis.
|
||||
nodes := make([]*sitter.Node, 0)
|
||||
nodes := make([]*Node, 0)
|
||||
if v, exists := cache.get(parent); exists {
|
||||
nodes = v
|
||||
} else {
|
||||
@@ -116,7 +115,7 @@ func matchXHR() URLMatcher {
|
||||
arguments: (arguments (string))
|
||||
) @matches
|
||||
`
|
||||
query(parent, q, func(sibling *sitter.Node) {
|
||||
parent.Query(q, func(sibling *Node) {
|
||||
nodes = append(nodes, sibling)
|
||||
})
|
||||
cache.set(parent, nodes)
|
||||
@@ -131,7 +130,7 @@ func matchXHR() URLMatcher {
|
||||
// it's possible for the .send to be wrapped in a conditional so that might
|
||||
// cause us to miss some values.
|
||||
for _, sibling := range nodes {
|
||||
name := content(sibling.ChildByFieldName("function"), source)
|
||||
name := sibling.ChildByFieldName("function").Content()
|
||||
if !strings.HasSuffix(name, ".setRequestHeader") {
|
||||
continue
|
||||
}
|
||||
@@ -146,7 +145,7 @@ func matchXHR() URLMatcher {
|
||||
continue
|
||||
}
|
||||
|
||||
header := dequote(content(headerNode, source))
|
||||
header := headerNode.RawString()
|
||||
if _, exists := headers[header]; exists {
|
||||
continue
|
||||
}
|
||||
@@ -154,7 +153,7 @@ func matchXHR() URLMatcher {
|
||||
var value string
|
||||
valueNode := args.NamedChild(1)
|
||||
if valueNode != nil && valueNode.Type() == "string" {
|
||||
value = dequote(content(valueNode, source))
|
||||
value = valueNode.RawString()
|
||||
}
|
||||
|
||||
headers[header] = value
|
||||
|
||||
@@ -4,8 +4,6 @@ import (
|
||||
"net/url"
|
||||
"regexp"
|
||||
"strings"
|
||||
|
||||
sitter "github.com/smacker/go-tree-sitter"
|
||||
)
|
||||
|
||||
// A URL is any URL found in the source code with accompanying details
|
||||
@@ -36,7 +34,7 @@ func (a *Analyzer) GetURLs() []*URL {
|
||||
re := regexp.MustCompile("[^A-Z-a-z]")
|
||||
|
||||
// function to run on entry to each node in the tree
|
||||
enter := func(n *sitter.Node) {
|
||||
enter := func(n *Node) {
|
||||
|
||||
for _, matcher := range a.urlMatchers {
|
||||
if matcher.Type != n.Type() {
|
||||
@@ -98,7 +96,7 @@ func (a *Analyzer) GetURLs() []*URL {
|
||||
}
|
||||
|
||||
// find the nodes we need in the the tree and run the enter function for every node
|
||||
query(a.rootNode, "[(assignment_expression) (call_expression) (string)] @matches", enter)
|
||||
a.Query("[(assignment_expression) (call_expression) (string)] @matches", enter)
|
||||
|
||||
return matches
|
||||
}
|
||||
@@ -121,7 +119,7 @@ func unique[T comparable](items []T) []T {
|
||||
// and a function to actually do the matching and producing of the *URL
|
||||
type URLMatcher struct {
|
||||
Type string
|
||||
Fn func(*sitter.Node, []byte) *URL
|
||||
Fn func(*Node, []byte) *URL
|
||||
}
|
||||
|
||||
// AllURLMatchers returns the detault list of URLMatchers
|
||||
@@ -158,11 +156,11 @@ func AllURLMatchers() []URLMatcher {
|
||||
matchJQuery(),
|
||||
|
||||
// location assignment
|
||||
{"assignment_expression", func(n *sitter.Node, source []byte) *URL {
|
||||
{"assignment_expression", func(n *Node, source []byte) *URL {
|
||||
left := n.ChildByFieldName("left")
|
||||
right := n.ChildByFieldName("right")
|
||||
|
||||
if !isInterestingAssignment(content(left, source)) {
|
||||
if !isInterestingAssignment(left.Content()) {
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -177,7 +175,7 @@ func AllURLMatchers() []URLMatcher {
|
||||
//
|
||||
// So while we might miss out on some things this way, they probably wouldn't
|
||||
// have been super useful to anything automated anyway.
|
||||
rightContent := content(right, source)
|
||||
rightContent := right.Content()
|
||||
if len(rightContent) < 2 {
|
||||
return nil
|
||||
}
|
||||
@@ -187,16 +185,16 @@ func AllURLMatchers() []URLMatcher {
|
||||
}
|
||||
|
||||
return &URL{
|
||||
URL: cleanURL(right, source),
|
||||
URL: right.CollapsedString(),
|
||||
Method: "GET",
|
||||
Type: "locationAssignment",
|
||||
Source: content(n, source),
|
||||
Source: n.Content(),
|
||||
}
|
||||
}},
|
||||
|
||||
// location replacement
|
||||
{"call_expression", func(n *sitter.Node, source []byte) *URL {
|
||||
callName := content(n.ChildByFieldName("function"), source)
|
||||
{"call_expression", func(n *Node, source []byte) *URL {
|
||||
callName := n.ChildByFieldName("function").Content()
|
||||
|
||||
if !strings.HasSuffix(callName, "location.replace") {
|
||||
return nil
|
||||
@@ -205,69 +203,69 @@ func AllURLMatchers() []URLMatcher {
|
||||
arguments := n.ChildByFieldName("arguments")
|
||||
|
||||
// check the argument contains at least one string literal
|
||||
if !hasDescendantOfType(arguments.NamedChild(0), "string") {
|
||||
if !arguments.NamedChild(0).IsStringy() {
|
||||
return nil
|
||||
}
|
||||
|
||||
return &URL{
|
||||
URL: cleanURL(arguments.NamedChild(0), source),
|
||||
URL: arguments.NamedChild(0).CollapsedString(),
|
||||
Method: "GET",
|
||||
Type: "locationReplacement",
|
||||
Source: content(n, source),
|
||||
Source: n.Content(),
|
||||
}
|
||||
}},
|
||||
|
||||
// window.open(url)
|
||||
{"call_expression", func(n *sitter.Node, source []byte) *URL {
|
||||
callName := content(n.ChildByFieldName("function"), source)
|
||||
{"call_expression", func(n *Node, source []byte) *URL {
|
||||
callName := n.ChildByFieldName("function").Content()
|
||||
if callName != "window.open" && callName != "open" {
|
||||
return nil
|
||||
}
|
||||
arguments := n.ChildByFieldName("arguments")
|
||||
|
||||
// check the argument contains at least one string literal
|
||||
if !hasDescendantOfType(arguments.NamedChild(0), "string") {
|
||||
if !arguments.NamedChild(0).IsStringy() {
|
||||
return nil
|
||||
}
|
||||
|
||||
return &URL{
|
||||
URL: cleanURL(arguments.NamedChild(0), source),
|
||||
URL: arguments.NamedChild(0).CollapsedString(),
|
||||
Method: "GET",
|
||||
Type: "window.open",
|
||||
Source: content(n, source),
|
||||
Source: n.Content(),
|
||||
}
|
||||
return nil
|
||||
}},
|
||||
|
||||
// fetch(url, [init])
|
||||
{"call_expression", func(n *sitter.Node, source []byte) *URL {
|
||||
callName := content(n.ChildByFieldName("function"), source)
|
||||
{"call_expression", func(n *Node, source []byte) *URL {
|
||||
callName := n.ChildByFieldName("function").Content()
|
||||
if callName != "fetch" {
|
||||
return nil
|
||||
}
|
||||
arguments := n.ChildByFieldName("arguments")
|
||||
|
||||
// check the argument contains at least one string literal
|
||||
if !hasDescendantOfType(arguments.NamedChild(0), "string") {
|
||||
if !arguments.NamedChild(0).IsStringy() {
|
||||
return nil
|
||||
}
|
||||
|
||||
init := newObject(arguments.NamedChild(1), source)
|
||||
|
||||
return &URL{
|
||||
URL: cleanURL(arguments.NamedChild(0), source),
|
||||
URL: arguments.NamedChild(0).CollapsedString(),
|
||||
Method: init.getString("method", "GET"),
|
||||
Headers: init.getObject("headers").asMap(),
|
||||
ContentType: init.getObject("headers").getStringI("content-type", ""),
|
||||
Type: "fetch",
|
||||
Source: content(n, source),
|
||||
Source: n.Content(),
|
||||
}
|
||||
return nil
|
||||
}},
|
||||
|
||||
// string literals
|
||||
{"string", func(n *sitter.Node, source []byte) *URL {
|
||||
trimmed := dequote(content(n, source))
|
||||
{"string", func(n *Node, source []byte) *URL {
|
||||
trimmed := n.RawString()
|
||||
|
||||
if !MaybeURL(trimmed) {
|
||||
return nil
|
||||
@@ -276,7 +274,7 @@ func AllURLMatchers() []URLMatcher {
|
||||
return &URL{
|
||||
URL: trimmed,
|
||||
Type: "stringLiteral",
|
||||
Source: content(n, source),
|
||||
Source: n.Content(),
|
||||
}
|
||||
}},
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user