diff --git a/README.mkd b/README.mkd index cc4fbc0..7fafc5a 100644 --- a/README.mkd +++ b/README.mkd @@ -1,7 +1,6 @@ # jsluice -A Go package and tool for extracting URLs, secrets, and other interesting data from JavaScript files. -Uses [go-tree-sitter](https://github.com/smacker/go-tree-sitter) for parsing. +A Go package for extracting URLs, secrets, and other interesting data from JavaScript. ## Extracting URLs @@ -9,7 +8,7 @@ Uses [go-tree-sitter](https://github.com/smacker/go-tree-sitter) for parsing. Rather than using regular expressions alone, `jsluice` uses `go-tree-sitter` to look for places that URLs are known to be used, such as being assigned to `document.location`, passed to `window.open()`, or passed to `fetch()` etc. -A simple example program is provided [here](/bishopfoxmss/jsluice/blob/main/examples/basic/main.go): +A simple example program is provided [here](/examples/basic/main.go): ```go package main diff --git a/analyzer.go b/analyzer.go index 4b1897b..ba5362f 100644 --- a/analyzer.go +++ b/analyzer.go @@ -12,7 +12,7 @@ type Analyzer struct { source []byte parser *sitter.Parser urlMatchers []URLMatcher - rootNode *sitter.Node + rootNode *Node } // NewAnalyzer accepts a slice of bytes representing some JavaScript @@ -26,6 +26,14 @@ func NewAnalyzer(source []byte) *Analyzer { source: source, parser: parser, urlMatchers: AllURLMatchers(), - rootNode: tree.RootNode(), + rootNode: NewNode(tree.RootNode(), source), } } + +// Query peforms a tree-sitter query on the JavaScript being analyzed. +// The provided function is called for every node that matches the query. +// See https://tree-sitter.github.io/tree-sitter/using-parsers#query-syntax +// for details on query syntax. +func (a *Analyzer) Query(q string, fn func(*Node)) { + a.rootNode.Query(q, fn) +} diff --git a/cmd/jsecrets/.gitignore b/cmd/jsecrets/.gitignore new file mode 100644 index 0000000..129fa8a --- /dev/null +++ b/cmd/jsecrets/.gitignore @@ -0,0 +1 @@ +jsecrets diff --git a/objects.go b/objects.go index 11ba408..853a58b 100644 --- a/objects.go +++ b/objects.go @@ -2,16 +2,14 @@ package jsluice import ( "strings" - - sitter "github.com/smacker/go-tree-sitter" ) type object struct { - node *sitter.Node + node *Node source []byte } -func newObject(n *sitter.Node, source []byte) object { +func newObject(n *Node, source []byte) object { return object{ node: n, source: source, @@ -34,7 +32,7 @@ func (o object) hasValidNode() bool { return o.node != nil && o.node.Type() == "object" } -func (o object) getNodeFunc(fn func(key string) bool) *sitter.Node { +func (o object) getNodeFunc(fn func(key string) bool) *Node { if !o.hasValidNode() { return nil } @@ -48,7 +46,7 @@ func (o object) getNodeFunc(fn func(key string) bool) *sitter.Node { continue } - if !fn(dequote(content(pair.ChildByFieldName("key"), o.source))) { + if !fn(pair.ChildByFieldName("key").RawString()) { continue } @@ -57,13 +55,13 @@ func (o object) getNodeFunc(fn func(key string) bool) *sitter.Node { return nil } -func (o object) getNode(key string) *sitter.Node { +func (o object) getNode(key string) *Node { return o.getNodeFunc(func(candidate string) bool { return key == candidate }) } -func (o object) getNodeI(key string) *sitter.Node { +func (o object) getNodeI(key string) *Node { key = strings.ToLower(key) return o.getNodeFunc(func(candidate string) bool { return key == strings.ToLower(candidate) @@ -85,7 +83,7 @@ func (o object) getKeys() []string { continue } - key := dequote(content(pair.ChildByFieldName("key"), o.source)) + key := pair.ChildByFieldName("key").RawString() out = append(out, key) } return out @@ -100,7 +98,7 @@ func (o object) getString(key, defaultVal string) string { if value == nil || value.Type() != "string" { return defaultVal } - return dequote(content(value, o.source)) + return value.RawString() } func (o object) getStringI(key, defaultVal string) string { @@ -108,5 +106,5 @@ func (o object) getStringI(key, defaultVal string) string { if value == nil || value.Type() != "string" { return defaultVal } - return dequote(content(value, o.source)) + return value.RawString() } diff --git a/secret-matchers.go b/secret-matchers.go index 418999f..6f82178 100644 --- a/secret-matchers.go +++ b/secret-matchers.go @@ -2,8 +2,6 @@ package jsluice import ( "strings" - - sitter "github.com/smacker/go-tree-sitter" ) // A Secret represents any bit of secret or otherwise interesting @@ -21,14 +19,14 @@ func (a *Analyzer) GetSecrets() []*Secret { out := make([]*Secret, 0) // we only want to run each query once so let's cache them - nodeCache := make(map[string][]*sitter.Node) + nodeCache := make(map[string][]*Node) matchers := AllSecretMatchers() for _, m := range matchers { if _, exists := nodeCache[m.Query]; !exists { - nodes := make([]*sitter.Node, 0) - query(a.rootNode, m.Query, func(n *sitter.Node) { + nodes := make([]*Node, 0) + a.Query(m.Query, func(n *Node) { nodes = append(nodes, n) }) nodeCache[m.Query] = nodes @@ -53,15 +51,15 @@ func (a *Analyzer) GetSecrets() []*Secret { // returning any Secret that is found. type SecretMatcher struct { Query string - Fn func(*sitter.Node, []byte) *Secret + Fn func(*Node, []byte) *Secret } // AllSecretMatchers returns the default list of SecretMatchers func AllSecretMatchers() []SecretMatcher { return []SecretMatcher{ // AWS Keys - {"(string) @matches", func(n *sitter.Node, source []byte) *Secret { - str := dequote(content(n, source)) + {"(string) @matches", func(n *Node, source []byte) *Secret { + str := n.RawString() // https://docs.aws.amazon.com/STS/latest/APIReference/API_Credentials.html if len(str) < 16 || len(str) > 128 { @@ -124,7 +122,7 @@ func AllSecretMatchers() []SecretMatcher { }}, // REACT_APP_... containing objects - {"(object) @matches", func(n *sitter.Node, source []byte) *Secret { + {"(object) @matches", func(n *Node, source []byte) *Secret { return nil o := newObject(n, source) @@ -148,7 +146,7 @@ func AllSecretMatchers() []SecretMatcher { }}, // Firebase objects - {"(object) @matches", func(n *sitter.Node, source []byte) *Secret { + {"(object) @matches", func(n *Node, source []byte) *Secret { o := newObject(n, source) mustHave := map[string]bool{ diff --git a/tree.go b/tree.go index 200bd85..0ae2490 100644 --- a/tree.go +++ b/tree.go @@ -8,20 +8,112 @@ import ( "github.com/smacker/go-tree-sitter/javascript" ) -// nil-safe wrapper around calling node.Content(source) -func content(n *sitter.Node, source []byte) string { - if n == nil { - return "" - } - return n.Content(source) +type Node struct { + node *sitter.Node + source []byte } -func isStringy(n *sitter.Node, source []byte) bool { +func NewNode(n *sitter.Node, source []byte) *Node { + return &Node{ + node: n, + source: source, + } +} + +func (n *Node) Content() string { + if n.node == nil { + return "" + } + return n.node.Content(n.source) +} + +func (n *Node) Type() string { + if n.node == nil { + return "" + } + return n.node.Type() +} + +func (n *Node) ChildByFieldName(name string) *Node { + return NewNode(n.node.ChildByFieldName(name), n.source) +} + +func (n *Node) NamedChild(index int) *Node { + return NewNode(n.node.NamedChild(0), n.source) +} + +func (n *Node) NamedChildCount() int { + return int(n.node.NamedChildCount()) +} + +// CollapsedString takes a node representing a URL and attempts to make it +// at least somewhat easily parseable. It's common to build URLs out +// of variables and function calls so we want to turn something like: +// +// './upload.php?profile='+res.id+'&show='+$('.participate_modal_container').attr('data-val') +// +// Into something more like: +// +// ./upload.php?profile=EXPR&show=EXPR +// +func (n *Node) CollapsedString() string { + if n.node == nil { + return "" + } + switch n.Type() { + case "binary_expression": + return fmt.Sprintf( + "%s%s", + n.ChildByFieldName("left").CollapsedString(), + n.ChildByFieldName("right").CollapsedString(), + ) + case "string": + return n.RawString() + default: + return "EXPR" + } +} + +func (n *Node) RawString() string { + return dequote(n.Content()) +} + +func (n *Node) Parent() *Node { + return NewNode(n.node.Parent(), n.source) +} + +func (n *Node) Query(query string, fn func(*Node)) { + q, err := sitter.NewQuery( + []byte(query), + javascript.GetLanguage(), + ) + if err != nil { + return + } + + qc := sitter.NewQueryCursor() + defer qc.Close() + + qc.Exec(q, n.node) + + for { + match, exists := qc.NextMatch() + if !exists || match == nil { + break + } + + for _, capture := range match.Captures { + fn(NewNode(capture.Node, n.source)) + } + } +} + +func (n *Node) IsStringy() bool { if n.Type() == "string" { return true } - c := content(n, source) + c := n.Content() if len(c) == 0 { return false } @@ -34,115 +126,15 @@ func isStringy(n *sitter.Node, source []byte) bool { } } -func hasDescendantOfType(n *sitter.Node, t string) bool { - if n == nil { - return false - } - - // node is provided type exactly - if n.Type() == t { - return true - } - - hasType := false - enter := func(n *sitter.Node) { - if n.Type() == t { - hasType = true - } - } - - walk(n, enter) - return hasType -} - -// cleanURL takes a node representing a URL and attempts to make it -// at least somewhat easily parseable. It's common to build URLs out -// of variables and function calls so we want to turn something like: -// -// './upload.php?profile='+res.id+'&show='+$('.participate_modal_container').attr('data-val') -// -// Into something more like: -// -// ./upload.php?profile=EXPR&show=EXPR -// -func cleanURL(n *sitter.Node, source []byte) string { - if n == nil { - return "" - } - switch n.Type() { - case "binary_expression": - return fmt.Sprintf( - "%s%s", - cleanURL(n.ChildByFieldName("left"), source), - cleanURL(n.ChildByFieldName("right"), source), - ) - case "string": - return dequote(content(n, source)) - default: - return "EXPR" - } -} - func dequote(in string) string { return strings.Trim(in, "'\"`") } -func query(n *sitter.Node, query string, enter func(*sitter.Node)) { - q, err := sitter.NewQuery( - []byte(query), - javascript.GetLanguage(), - ) - if err != nil { - return +func content(n *sitter.Node, source []byte) string { + if n == nil { + return "" } - - qc := sitter.NewQueryCursor() - defer qc.Close() - - qc.Exec(q, n) - - for { - match, exists := qc.NextMatch() - if !exists || match == nil { - break - } - - for _, capture := range match.Captures { - enter(capture.Node) - } - } -} - -func walk(n *sitter.Node, enter func(*sitter.Node)) { - - c := sitter.NewTreeCursor(n) - defer c.Close() - - // walkies - recurse := true - for { - // descend into the tree - if recurse && c.GoToFirstChild() { - recurse = true - enter(c.CurrentNode()) - continue - } - - // move sideways - if c.GoToNextSibling() { - recurse = true - enter(c.CurrentNode()) - continue - } - - // climb back up the tree, but make sure we don't descend right back to where we were - if c.GoToParent() { - recurse = false - continue - } - break - } - + return n.Content(source) } func PrintTree(source []byte) { diff --git a/tree_test.go b/tree_test.go index 31c73b9..60b4cfd 100644 --- a/tree_test.go +++ b/tree_test.go @@ -8,7 +8,7 @@ import ( "github.com/smacker/go-tree-sitter/javascript" ) -func TestCleanURL(t *testing.T) { +func TestCollapsedString(t *testing.T) { cases := []struct { JS []byte Expected string @@ -24,7 +24,7 @@ func TestCleanURL(t *testing.T) { for i, c := range cases { t.Run(strconv.Itoa(i), func(t *testing.T) { tree := parser.Parse(nil, c.JS) - root := tree.RootNode() + root := NewNode(tree.RootNode(), c.JS) // Example tree: // program @@ -33,12 +33,12 @@ func TestCleanURL(t *testing.T) { // left: string ("./login.php?redirect=") // right: identifier (url) // - // We want the binary_expression to pass to cleanURL, which is + // We want the binary_expression to pass to CollapsedString, which is // the first Named Child of the first Named Child of the root node. - actual := cleanURL(root.NamedChild(0).NamedChild(0), c.JS) + actual := root.NamedChild(0).NamedChild(0).CollapsedString() if actual != c.Expected { - t.Errorf("want %s for cleanURL(%s), have: %s", c.Expected, c.JS, actual) + t.Errorf("want %s for CollapsedString(%s), have: %s", c.Expected, c.JS, actual) } }) } diff --git a/url-match-jquery.go b/url-match-jquery.go index 290f3a3..d87e2e4 100644 --- a/url-match-jquery.go +++ b/url-match-jquery.go @@ -3,14 +3,13 @@ package jsluice import ( "strings" - sitter "github.com/smacker/go-tree-sitter" "golang.org/x/exp/slices" ) func matchJQuery() URLMatcher { - return URLMatcher{"call_expression", func(n *sitter.Node, source []byte) *URL { - callName := content(n.ChildByFieldName("function"), source) + return URLMatcher{"call_expression", func(n *Node, source []byte) *URL { + callName := n.ChildByFieldName("function").Content() if !slices.Contains( []string{ @@ -48,7 +47,7 @@ func matchJQuery() URLMatcher { m := &URL{ Type: callName, - Source: content(n, source), + Source: n.Content(), } // Infer the method for .post and .get calls @@ -58,11 +57,11 @@ func matchJQuery() URLMatcher { m.Method = "GET" } - var settingsNode *sitter.Node + var settingsNode *Node - if isStringy(firstArg, source) { + if firstArg.IsStringy() { // first argument is the URL - m.URL = cleanURL(firstArg, source) + m.URL = firstArg.CollapsedString() // If the first arg is a URL, the second arg is a // settings object for $.ajax, or a data object for @@ -93,7 +92,7 @@ func matchJQuery() URLMatcher { settings := newObject(settingsNode, source) if m.URL == "" { - m.URL = cleanURL(settings.getNode("url"), source) + m.URL = settings.getNode("url").CollapsedString() } m.Headers = settings.getObject("headers").asMap() diff --git a/url-match-xhr.go b/url-match-xhr.go index 22b52f8..a7b7cee 100644 --- a/url-match-xhr.go +++ b/url-match-xhr.go @@ -4,28 +4,27 @@ import ( "strings" "sync" - sitter "github.com/smacker/go-tree-sitter" "golang.org/x/exp/slices" ) type nodeCache struct { sync.RWMutex - data map[*sitter.Node][]*sitter.Node + data map[*Node][]*Node } func newNodeCache() *nodeCache { return &nodeCache{ - data: make(map[*sitter.Node][]*sitter.Node), + data: make(map[*Node][]*Node), } } -func (c *nodeCache) set(k *sitter.Node, v []*sitter.Node) { +func (c *nodeCache) set(k *Node, v []*Node) { c.Lock() c.data[k] = v c.Unlock() } -func (c *nodeCache) get(k *sitter.Node) ([]*sitter.Node, bool) { +func (c *nodeCache) get(k *Node) ([]*Node, bool) { c.RLock() v, exists := c.data[k] c.RUnlock() @@ -35,8 +34,8 @@ func (c *nodeCache) get(k *sitter.Node) ([]*sitter.Node, bool) { func matchXHR() URLMatcher { cache := newNodeCache() - return URLMatcher{"call_expression", func(n *sitter.Node, source []byte) *URL { - callName := content(n.ChildByFieldName("function"), source) + return URLMatcher{"call_expression", func(n *Node, source []byte) *URL { + callName := n.ChildByFieldName("function").Content() // We don't know what the XMLHttpRequest object will be called, // so we have to focus on just the .open bit @@ -50,7 +49,7 @@ func matchXHR() URLMatcher { // This will miss cases where the method is a variable. arguments := n.ChildByFieldName("arguments") - method := dequote(content(arguments.NamedChild(0), source)) + method := arguments.NamedChild(0).RawString() if !slices.Contains( []string{"GET", "HEAD", "OPTIONS", "POST", "PUT", "PATCH", "DELETE"}, @@ -60,15 +59,15 @@ func matchXHR() URLMatcher { } urlArg := arguments.NamedChild(1) - if !isStringy(urlArg, source) { + if !urlArg.IsStringy() { return nil } match := &URL{ - URL: cleanURL(urlArg, source), + URL: urlArg.CollapsedString(), Method: method, Type: "XMLHttpRequest.open", - Source: content(n, source), + Source: n.Content(), } // to find headers we need to look for calls to setRequestHeader() on @@ -103,7 +102,7 @@ func matchXHR() URLMatcher { // Look for call_expressions under the same parent as our .open call. // It's common to end up querying the exact same parent over and over // again, so we cache the results on a per-parent node basis. - nodes := make([]*sitter.Node, 0) + nodes := make([]*Node, 0) if v, exists := cache.get(parent); exists { nodes = v } else { @@ -116,7 +115,7 @@ func matchXHR() URLMatcher { arguments: (arguments (string)) ) @matches ` - query(parent, q, func(sibling *sitter.Node) { + parent.Query(q, func(sibling *Node) { nodes = append(nodes, sibling) }) cache.set(parent, nodes) @@ -131,7 +130,7 @@ func matchXHR() URLMatcher { // it's possible for the .send to be wrapped in a conditional so that might // cause us to miss some values. for _, sibling := range nodes { - name := content(sibling.ChildByFieldName("function"), source) + name := sibling.ChildByFieldName("function").Content() if !strings.HasSuffix(name, ".setRequestHeader") { continue } @@ -146,7 +145,7 @@ func matchXHR() URLMatcher { continue } - header := dequote(content(headerNode, source)) + header := headerNode.RawString() if _, exists := headers[header]; exists { continue } @@ -154,7 +153,7 @@ func matchXHR() URLMatcher { var value string valueNode := args.NamedChild(1) if valueNode != nil && valueNode.Type() == "string" { - value = dequote(content(valueNode, source)) + value = valueNode.RawString() } headers[header] = value diff --git a/url-matchers.go b/url-matchers.go index b9be5b2..98dddcb 100644 --- a/url-matchers.go +++ b/url-matchers.go @@ -4,8 +4,6 @@ import ( "net/url" "regexp" "strings" - - sitter "github.com/smacker/go-tree-sitter" ) // A URL is any URL found in the source code with accompanying details @@ -36,7 +34,7 @@ func (a *Analyzer) GetURLs() []*URL { re := regexp.MustCompile("[^A-Z-a-z]") // function to run on entry to each node in the tree - enter := func(n *sitter.Node) { + enter := func(n *Node) { for _, matcher := range a.urlMatchers { if matcher.Type != n.Type() { @@ -98,7 +96,7 @@ func (a *Analyzer) GetURLs() []*URL { } // find the nodes we need in the the tree and run the enter function for every node - query(a.rootNode, "[(assignment_expression) (call_expression) (string)] @matches", enter) + a.Query("[(assignment_expression) (call_expression) (string)] @matches", enter) return matches } @@ -121,7 +119,7 @@ func unique[T comparable](items []T) []T { // and a function to actually do the matching and producing of the *URL type URLMatcher struct { Type string - Fn func(*sitter.Node, []byte) *URL + Fn func(*Node, []byte) *URL } // AllURLMatchers returns the detault list of URLMatchers @@ -158,11 +156,11 @@ func AllURLMatchers() []URLMatcher { matchJQuery(), // location assignment - {"assignment_expression", func(n *sitter.Node, source []byte) *URL { + {"assignment_expression", func(n *Node, source []byte) *URL { left := n.ChildByFieldName("left") right := n.ChildByFieldName("right") - if !isInterestingAssignment(content(left, source)) { + if !isInterestingAssignment(left.Content()) { return nil } @@ -177,7 +175,7 @@ func AllURLMatchers() []URLMatcher { // // So while we might miss out on some things this way, they probably wouldn't // have been super useful to anything automated anyway. - rightContent := content(right, source) + rightContent := right.Content() if len(rightContent) < 2 { return nil } @@ -187,16 +185,16 @@ func AllURLMatchers() []URLMatcher { } return &URL{ - URL: cleanURL(right, source), + URL: right.CollapsedString(), Method: "GET", Type: "locationAssignment", - Source: content(n, source), + Source: n.Content(), } }}, // location replacement - {"call_expression", func(n *sitter.Node, source []byte) *URL { - callName := content(n.ChildByFieldName("function"), source) + {"call_expression", func(n *Node, source []byte) *URL { + callName := n.ChildByFieldName("function").Content() if !strings.HasSuffix(callName, "location.replace") { return nil @@ -205,69 +203,69 @@ func AllURLMatchers() []URLMatcher { arguments := n.ChildByFieldName("arguments") // check the argument contains at least one string literal - if !hasDescendantOfType(arguments.NamedChild(0), "string") { + if !arguments.NamedChild(0).IsStringy() { return nil } return &URL{ - URL: cleanURL(arguments.NamedChild(0), source), + URL: arguments.NamedChild(0).CollapsedString(), Method: "GET", Type: "locationReplacement", - Source: content(n, source), + Source: n.Content(), } }}, // window.open(url) - {"call_expression", func(n *sitter.Node, source []byte) *URL { - callName := content(n.ChildByFieldName("function"), source) + {"call_expression", func(n *Node, source []byte) *URL { + callName := n.ChildByFieldName("function").Content() if callName != "window.open" && callName != "open" { return nil } arguments := n.ChildByFieldName("arguments") // check the argument contains at least one string literal - if !hasDescendantOfType(arguments.NamedChild(0), "string") { + if !arguments.NamedChild(0).IsStringy() { return nil } return &URL{ - URL: cleanURL(arguments.NamedChild(0), source), + URL: arguments.NamedChild(0).CollapsedString(), Method: "GET", Type: "window.open", - Source: content(n, source), + Source: n.Content(), } return nil }}, // fetch(url, [init]) - {"call_expression", func(n *sitter.Node, source []byte) *URL { - callName := content(n.ChildByFieldName("function"), source) + {"call_expression", func(n *Node, source []byte) *URL { + callName := n.ChildByFieldName("function").Content() if callName != "fetch" { return nil } arguments := n.ChildByFieldName("arguments") // check the argument contains at least one string literal - if !hasDescendantOfType(arguments.NamedChild(0), "string") { + if !arguments.NamedChild(0).IsStringy() { return nil } init := newObject(arguments.NamedChild(1), source) return &URL{ - URL: cleanURL(arguments.NamedChild(0), source), + URL: arguments.NamedChild(0).CollapsedString(), Method: init.getString("method", "GET"), Headers: init.getObject("headers").asMap(), ContentType: init.getObject("headers").getStringI("content-type", ""), Type: "fetch", - Source: content(n, source), + Source: n.Content(), } return nil }}, // string literals - {"string", func(n *sitter.Node, source []byte) *URL { - trimmed := dequote(content(n, source)) + {"string", func(n *Node, source []byte) *URL { + trimmed := n.RawString() if !MaybeURL(trimmed) { return nil @@ -276,7 +274,7 @@ func AllURLMatchers() []URLMatcher { return &URL{ URL: trimmed, Type: "stringLiteral", - Source: content(n, source), + Source: n.Content(), } }}, }