Refactors to wrap *sitter.Node with *Node

This commit is contained in:
Tom Hudson
2022-09-09 16:45:50 +01:00
parent df1e68fac8
commit 2dc14cb4e7
10 changed files with 187 additions and 195 deletions

View File

@@ -1,7 +1,6 @@
# jsluice
A Go package and tool for extracting URLs, secrets, and other interesting data from JavaScript files.
Uses [go-tree-sitter](https://github.com/smacker/go-tree-sitter) for parsing.
A Go package for extracting URLs, secrets, and other interesting data from JavaScript.
## Extracting URLs
@@ -9,7 +8,7 @@ Uses [go-tree-sitter](https://github.com/smacker/go-tree-sitter) for parsing.
Rather than using regular expressions alone, `jsluice` uses `go-tree-sitter` to look for places that URLs are known to be used,
such as being assigned to `document.location`, passed to `window.open()`, or passed to `fetch()` etc.
A simple example program is provided [here](/bishopfoxmss/jsluice/blob/main/examples/basic/main.go):
A simple example program is provided [here](/examples/basic/main.go):
```go
package main

View File

@@ -12,7 +12,7 @@ type Analyzer struct {
source []byte
parser *sitter.Parser
urlMatchers []URLMatcher
rootNode *sitter.Node
rootNode *Node
}
// NewAnalyzer accepts a slice of bytes representing some JavaScript
@@ -26,6 +26,14 @@ func NewAnalyzer(source []byte) *Analyzer {
source: source,
parser: parser,
urlMatchers: AllURLMatchers(),
rootNode: tree.RootNode(),
rootNode: NewNode(tree.RootNode(), source),
}
}
// Query peforms a tree-sitter query on the JavaScript being analyzed.
// The provided function is called for every node that matches the query.
// See https://tree-sitter.github.io/tree-sitter/using-parsers#query-syntax
// for details on query syntax.
func (a *Analyzer) Query(q string, fn func(*Node)) {
a.rootNode.Query(q, fn)
}

1
cmd/jsecrets/.gitignore vendored Normal file
View File

@@ -0,0 +1 @@
jsecrets

View File

@@ -2,16 +2,14 @@ package jsluice
import (
"strings"
sitter "github.com/smacker/go-tree-sitter"
)
type object struct {
node *sitter.Node
node *Node
source []byte
}
func newObject(n *sitter.Node, source []byte) object {
func newObject(n *Node, source []byte) object {
return object{
node: n,
source: source,
@@ -34,7 +32,7 @@ func (o object) hasValidNode() bool {
return o.node != nil && o.node.Type() == "object"
}
func (o object) getNodeFunc(fn func(key string) bool) *sitter.Node {
func (o object) getNodeFunc(fn func(key string) bool) *Node {
if !o.hasValidNode() {
return nil
}
@@ -48,7 +46,7 @@ func (o object) getNodeFunc(fn func(key string) bool) *sitter.Node {
continue
}
if !fn(dequote(content(pair.ChildByFieldName("key"), o.source))) {
if !fn(pair.ChildByFieldName("key").RawString()) {
continue
}
@@ -57,13 +55,13 @@ func (o object) getNodeFunc(fn func(key string) bool) *sitter.Node {
return nil
}
func (o object) getNode(key string) *sitter.Node {
func (o object) getNode(key string) *Node {
return o.getNodeFunc(func(candidate string) bool {
return key == candidate
})
}
func (o object) getNodeI(key string) *sitter.Node {
func (o object) getNodeI(key string) *Node {
key = strings.ToLower(key)
return o.getNodeFunc(func(candidate string) bool {
return key == strings.ToLower(candidate)
@@ -85,7 +83,7 @@ func (o object) getKeys() []string {
continue
}
key := dequote(content(pair.ChildByFieldName("key"), o.source))
key := pair.ChildByFieldName("key").RawString()
out = append(out, key)
}
return out
@@ -100,7 +98,7 @@ func (o object) getString(key, defaultVal string) string {
if value == nil || value.Type() != "string" {
return defaultVal
}
return dequote(content(value, o.source))
return value.RawString()
}
func (o object) getStringI(key, defaultVal string) string {
@@ -108,5 +106,5 @@ func (o object) getStringI(key, defaultVal string) string {
if value == nil || value.Type() != "string" {
return defaultVal
}
return dequote(content(value, o.source))
return value.RawString()
}

View File

@@ -2,8 +2,6 @@ package jsluice
import (
"strings"
sitter "github.com/smacker/go-tree-sitter"
)
// A Secret represents any bit of secret or otherwise interesting
@@ -21,14 +19,14 @@ func (a *Analyzer) GetSecrets() []*Secret {
out := make([]*Secret, 0)
// we only want to run each query once so let's cache them
nodeCache := make(map[string][]*sitter.Node)
nodeCache := make(map[string][]*Node)
matchers := AllSecretMatchers()
for _, m := range matchers {
if _, exists := nodeCache[m.Query]; !exists {
nodes := make([]*sitter.Node, 0)
query(a.rootNode, m.Query, func(n *sitter.Node) {
nodes := make([]*Node, 0)
a.Query(m.Query, func(n *Node) {
nodes = append(nodes, n)
})
nodeCache[m.Query] = nodes
@@ -53,15 +51,15 @@ func (a *Analyzer) GetSecrets() []*Secret {
// returning any Secret that is found.
type SecretMatcher struct {
Query string
Fn func(*sitter.Node, []byte) *Secret
Fn func(*Node, []byte) *Secret
}
// AllSecretMatchers returns the default list of SecretMatchers
func AllSecretMatchers() []SecretMatcher {
return []SecretMatcher{
// AWS Keys
{"(string) @matches", func(n *sitter.Node, source []byte) *Secret {
str := dequote(content(n, source))
{"(string) @matches", func(n *Node, source []byte) *Secret {
str := n.RawString()
// https://docs.aws.amazon.com/STS/latest/APIReference/API_Credentials.html
if len(str) < 16 || len(str) > 128 {
@@ -124,7 +122,7 @@ func AllSecretMatchers() []SecretMatcher {
}},
// REACT_APP_... containing objects
{"(object) @matches", func(n *sitter.Node, source []byte) *Secret {
{"(object) @matches", func(n *Node, source []byte) *Secret {
return nil
o := newObject(n, source)
@@ -148,7 +146,7 @@ func AllSecretMatchers() []SecretMatcher {
}},
// Firebase objects
{"(object) @matches", func(n *sitter.Node, source []byte) *Secret {
{"(object) @matches", func(n *Node, source []byte) *Secret {
o := newObject(n, source)
mustHave := map[string]bool{

216
tree.go
View File

@@ -8,20 +8,112 @@ import (
"github.com/smacker/go-tree-sitter/javascript"
)
// nil-safe wrapper around calling node.Content(source)
func content(n *sitter.Node, source []byte) string {
if n == nil {
return ""
}
return n.Content(source)
type Node struct {
node *sitter.Node
source []byte
}
func isStringy(n *sitter.Node, source []byte) bool {
func NewNode(n *sitter.Node, source []byte) *Node {
return &Node{
node: n,
source: source,
}
}
func (n *Node) Content() string {
if n.node == nil {
return ""
}
return n.node.Content(n.source)
}
func (n *Node) Type() string {
if n.node == nil {
return ""
}
return n.node.Type()
}
func (n *Node) ChildByFieldName(name string) *Node {
return NewNode(n.node.ChildByFieldName(name), n.source)
}
func (n *Node) NamedChild(index int) *Node {
return NewNode(n.node.NamedChild(0), n.source)
}
func (n *Node) NamedChildCount() int {
return int(n.node.NamedChildCount())
}
// CollapsedString takes a node representing a URL and attempts to make it
// at least somewhat easily parseable. It's common to build URLs out
// of variables and function calls so we want to turn something like:
//
// './upload.php?profile='+res.id+'&show='+$('.participate_modal_container').attr('data-val')
//
// Into something more like:
//
// ./upload.php?profile=EXPR&show=EXPR
//
func (n *Node) CollapsedString() string {
if n.node == nil {
return ""
}
switch n.Type() {
case "binary_expression":
return fmt.Sprintf(
"%s%s",
n.ChildByFieldName("left").CollapsedString(),
n.ChildByFieldName("right").CollapsedString(),
)
case "string":
return n.RawString()
default:
return "EXPR"
}
}
func (n *Node) RawString() string {
return dequote(n.Content())
}
func (n *Node) Parent() *Node {
return NewNode(n.node.Parent(), n.source)
}
func (n *Node) Query(query string, fn func(*Node)) {
q, err := sitter.NewQuery(
[]byte(query),
javascript.GetLanguage(),
)
if err != nil {
return
}
qc := sitter.NewQueryCursor()
defer qc.Close()
qc.Exec(q, n.node)
for {
match, exists := qc.NextMatch()
if !exists || match == nil {
break
}
for _, capture := range match.Captures {
fn(NewNode(capture.Node, n.source))
}
}
}
func (n *Node) IsStringy() bool {
if n.Type() == "string" {
return true
}
c := content(n, source)
c := n.Content()
if len(c) == 0 {
return false
}
@@ -34,115 +126,15 @@ func isStringy(n *sitter.Node, source []byte) bool {
}
}
func hasDescendantOfType(n *sitter.Node, t string) bool {
if n == nil {
return false
}
// node is provided type exactly
if n.Type() == t {
return true
}
hasType := false
enter := func(n *sitter.Node) {
if n.Type() == t {
hasType = true
}
}
walk(n, enter)
return hasType
}
// cleanURL takes a node representing a URL and attempts to make it
// at least somewhat easily parseable. It's common to build URLs out
// of variables and function calls so we want to turn something like:
//
// './upload.php?profile='+res.id+'&show='+$('.participate_modal_container').attr('data-val')
//
// Into something more like:
//
// ./upload.php?profile=EXPR&show=EXPR
//
func cleanURL(n *sitter.Node, source []byte) string {
if n == nil {
return ""
}
switch n.Type() {
case "binary_expression":
return fmt.Sprintf(
"%s%s",
cleanURL(n.ChildByFieldName("left"), source),
cleanURL(n.ChildByFieldName("right"), source),
)
case "string":
return dequote(content(n, source))
default:
return "EXPR"
}
}
func dequote(in string) string {
return strings.Trim(in, "'\"`")
}
func query(n *sitter.Node, query string, enter func(*sitter.Node)) {
q, err := sitter.NewQuery(
[]byte(query),
javascript.GetLanguage(),
)
if err != nil {
return
func content(n *sitter.Node, source []byte) string {
if n == nil {
return ""
}
qc := sitter.NewQueryCursor()
defer qc.Close()
qc.Exec(q, n)
for {
match, exists := qc.NextMatch()
if !exists || match == nil {
break
}
for _, capture := range match.Captures {
enter(capture.Node)
}
}
}
func walk(n *sitter.Node, enter func(*sitter.Node)) {
c := sitter.NewTreeCursor(n)
defer c.Close()
// walkies
recurse := true
for {
// descend into the tree
if recurse && c.GoToFirstChild() {
recurse = true
enter(c.CurrentNode())
continue
}
// move sideways
if c.GoToNextSibling() {
recurse = true
enter(c.CurrentNode())
continue
}
// climb back up the tree, but make sure we don't descend right back to where we were
if c.GoToParent() {
recurse = false
continue
}
break
}
return n.Content(source)
}
func PrintTree(source []byte) {

View File

@@ -8,7 +8,7 @@ import (
"github.com/smacker/go-tree-sitter/javascript"
)
func TestCleanURL(t *testing.T) {
func TestCollapsedString(t *testing.T) {
cases := []struct {
JS []byte
Expected string
@@ -24,7 +24,7 @@ func TestCleanURL(t *testing.T) {
for i, c := range cases {
t.Run(strconv.Itoa(i), func(t *testing.T) {
tree := parser.Parse(nil, c.JS)
root := tree.RootNode()
root := NewNode(tree.RootNode(), c.JS)
// Example tree:
// program
@@ -33,12 +33,12 @@ func TestCleanURL(t *testing.T) {
// left: string ("./login.php?redirect=")
// right: identifier (url)
//
// We want the binary_expression to pass to cleanURL, which is
// We want the binary_expression to pass to CollapsedString, which is
// the first Named Child of the first Named Child of the root node.
actual := cleanURL(root.NamedChild(0).NamedChild(0), c.JS)
actual := root.NamedChild(0).NamedChild(0).CollapsedString()
if actual != c.Expected {
t.Errorf("want %s for cleanURL(%s), have: %s", c.Expected, c.JS, actual)
t.Errorf("want %s for CollapsedString(%s), have: %s", c.Expected, c.JS, actual)
}
})
}

View File

@@ -3,14 +3,13 @@ package jsluice
import (
"strings"
sitter "github.com/smacker/go-tree-sitter"
"golang.org/x/exp/slices"
)
func matchJQuery() URLMatcher {
return URLMatcher{"call_expression", func(n *sitter.Node, source []byte) *URL {
callName := content(n.ChildByFieldName("function"), source)
return URLMatcher{"call_expression", func(n *Node, source []byte) *URL {
callName := n.ChildByFieldName("function").Content()
if !slices.Contains(
[]string{
@@ -48,7 +47,7 @@ func matchJQuery() URLMatcher {
m := &URL{
Type: callName,
Source: content(n, source),
Source: n.Content(),
}
// Infer the method for .post and .get calls
@@ -58,11 +57,11 @@ func matchJQuery() URLMatcher {
m.Method = "GET"
}
var settingsNode *sitter.Node
var settingsNode *Node
if isStringy(firstArg, source) {
if firstArg.IsStringy() {
// first argument is the URL
m.URL = cleanURL(firstArg, source)
m.URL = firstArg.CollapsedString()
// If the first arg is a URL, the second arg is a
// settings object for $.ajax, or a data object for
@@ -93,7 +92,7 @@ func matchJQuery() URLMatcher {
settings := newObject(settingsNode, source)
if m.URL == "" {
m.URL = cleanURL(settings.getNode("url"), source)
m.URL = settings.getNode("url").CollapsedString()
}
m.Headers = settings.getObject("headers").asMap()

View File

@@ -4,28 +4,27 @@ import (
"strings"
"sync"
sitter "github.com/smacker/go-tree-sitter"
"golang.org/x/exp/slices"
)
type nodeCache struct {
sync.RWMutex
data map[*sitter.Node][]*sitter.Node
data map[*Node][]*Node
}
func newNodeCache() *nodeCache {
return &nodeCache{
data: make(map[*sitter.Node][]*sitter.Node),
data: make(map[*Node][]*Node),
}
}
func (c *nodeCache) set(k *sitter.Node, v []*sitter.Node) {
func (c *nodeCache) set(k *Node, v []*Node) {
c.Lock()
c.data[k] = v
c.Unlock()
}
func (c *nodeCache) get(k *sitter.Node) ([]*sitter.Node, bool) {
func (c *nodeCache) get(k *Node) ([]*Node, bool) {
c.RLock()
v, exists := c.data[k]
c.RUnlock()
@@ -35,8 +34,8 @@ func (c *nodeCache) get(k *sitter.Node) ([]*sitter.Node, bool) {
func matchXHR() URLMatcher {
cache := newNodeCache()
return URLMatcher{"call_expression", func(n *sitter.Node, source []byte) *URL {
callName := content(n.ChildByFieldName("function"), source)
return URLMatcher{"call_expression", func(n *Node, source []byte) *URL {
callName := n.ChildByFieldName("function").Content()
// We don't know what the XMLHttpRequest object will be called,
// so we have to focus on just the .open bit
@@ -50,7 +49,7 @@ func matchXHR() URLMatcher {
// This will miss cases where the method is a variable.
arguments := n.ChildByFieldName("arguments")
method := dequote(content(arguments.NamedChild(0), source))
method := arguments.NamedChild(0).RawString()
if !slices.Contains(
[]string{"GET", "HEAD", "OPTIONS", "POST", "PUT", "PATCH", "DELETE"},
@@ -60,15 +59,15 @@ func matchXHR() URLMatcher {
}
urlArg := arguments.NamedChild(1)
if !isStringy(urlArg, source) {
if !urlArg.IsStringy() {
return nil
}
match := &URL{
URL: cleanURL(urlArg, source),
URL: urlArg.CollapsedString(),
Method: method,
Type: "XMLHttpRequest.open",
Source: content(n, source),
Source: n.Content(),
}
// to find headers we need to look for calls to setRequestHeader() on
@@ -103,7 +102,7 @@ func matchXHR() URLMatcher {
// Look for call_expressions under the same parent as our .open call.
// It's common to end up querying the exact same parent over and over
// again, so we cache the results on a per-parent node basis.
nodes := make([]*sitter.Node, 0)
nodes := make([]*Node, 0)
if v, exists := cache.get(parent); exists {
nodes = v
} else {
@@ -116,7 +115,7 @@ func matchXHR() URLMatcher {
arguments: (arguments (string))
) @matches
`
query(parent, q, func(sibling *sitter.Node) {
parent.Query(q, func(sibling *Node) {
nodes = append(nodes, sibling)
})
cache.set(parent, nodes)
@@ -131,7 +130,7 @@ func matchXHR() URLMatcher {
// it's possible for the .send to be wrapped in a conditional so that might
// cause us to miss some values.
for _, sibling := range nodes {
name := content(sibling.ChildByFieldName("function"), source)
name := sibling.ChildByFieldName("function").Content()
if !strings.HasSuffix(name, ".setRequestHeader") {
continue
}
@@ -146,7 +145,7 @@ func matchXHR() URLMatcher {
continue
}
header := dequote(content(headerNode, source))
header := headerNode.RawString()
if _, exists := headers[header]; exists {
continue
}
@@ -154,7 +153,7 @@ func matchXHR() URLMatcher {
var value string
valueNode := args.NamedChild(1)
if valueNode != nil && valueNode.Type() == "string" {
value = dequote(content(valueNode, source))
value = valueNode.RawString()
}
headers[header] = value

View File

@@ -4,8 +4,6 @@ import (
"net/url"
"regexp"
"strings"
sitter "github.com/smacker/go-tree-sitter"
)
// A URL is any URL found in the source code with accompanying details
@@ -36,7 +34,7 @@ func (a *Analyzer) GetURLs() []*URL {
re := regexp.MustCompile("[^A-Z-a-z]")
// function to run on entry to each node in the tree
enter := func(n *sitter.Node) {
enter := func(n *Node) {
for _, matcher := range a.urlMatchers {
if matcher.Type != n.Type() {
@@ -98,7 +96,7 @@ func (a *Analyzer) GetURLs() []*URL {
}
// find the nodes we need in the the tree and run the enter function for every node
query(a.rootNode, "[(assignment_expression) (call_expression) (string)] @matches", enter)
a.Query("[(assignment_expression) (call_expression) (string)] @matches", enter)
return matches
}
@@ -121,7 +119,7 @@ func unique[T comparable](items []T) []T {
// and a function to actually do the matching and producing of the *URL
type URLMatcher struct {
Type string
Fn func(*sitter.Node, []byte) *URL
Fn func(*Node, []byte) *URL
}
// AllURLMatchers returns the detault list of URLMatchers
@@ -158,11 +156,11 @@ func AllURLMatchers() []URLMatcher {
matchJQuery(),
// location assignment
{"assignment_expression", func(n *sitter.Node, source []byte) *URL {
{"assignment_expression", func(n *Node, source []byte) *URL {
left := n.ChildByFieldName("left")
right := n.ChildByFieldName("right")
if !isInterestingAssignment(content(left, source)) {
if !isInterestingAssignment(left.Content()) {
return nil
}
@@ -177,7 +175,7 @@ func AllURLMatchers() []URLMatcher {
//
// So while we might miss out on some things this way, they probably wouldn't
// have been super useful to anything automated anyway.
rightContent := content(right, source)
rightContent := right.Content()
if len(rightContent) < 2 {
return nil
}
@@ -187,16 +185,16 @@ func AllURLMatchers() []URLMatcher {
}
return &URL{
URL: cleanURL(right, source),
URL: right.CollapsedString(),
Method: "GET",
Type: "locationAssignment",
Source: content(n, source),
Source: n.Content(),
}
}},
// location replacement
{"call_expression", func(n *sitter.Node, source []byte) *URL {
callName := content(n.ChildByFieldName("function"), source)
{"call_expression", func(n *Node, source []byte) *URL {
callName := n.ChildByFieldName("function").Content()
if !strings.HasSuffix(callName, "location.replace") {
return nil
@@ -205,69 +203,69 @@ func AllURLMatchers() []URLMatcher {
arguments := n.ChildByFieldName("arguments")
// check the argument contains at least one string literal
if !hasDescendantOfType(arguments.NamedChild(0), "string") {
if !arguments.NamedChild(0).IsStringy() {
return nil
}
return &URL{
URL: cleanURL(arguments.NamedChild(0), source),
URL: arguments.NamedChild(0).CollapsedString(),
Method: "GET",
Type: "locationReplacement",
Source: content(n, source),
Source: n.Content(),
}
}},
// window.open(url)
{"call_expression", func(n *sitter.Node, source []byte) *URL {
callName := content(n.ChildByFieldName("function"), source)
{"call_expression", func(n *Node, source []byte) *URL {
callName := n.ChildByFieldName("function").Content()
if callName != "window.open" && callName != "open" {
return nil
}
arguments := n.ChildByFieldName("arguments")
// check the argument contains at least one string literal
if !hasDescendantOfType(arguments.NamedChild(0), "string") {
if !arguments.NamedChild(0).IsStringy() {
return nil
}
return &URL{
URL: cleanURL(arguments.NamedChild(0), source),
URL: arguments.NamedChild(0).CollapsedString(),
Method: "GET",
Type: "window.open",
Source: content(n, source),
Source: n.Content(),
}
return nil
}},
// fetch(url, [init])
{"call_expression", func(n *sitter.Node, source []byte) *URL {
callName := content(n.ChildByFieldName("function"), source)
{"call_expression", func(n *Node, source []byte) *URL {
callName := n.ChildByFieldName("function").Content()
if callName != "fetch" {
return nil
}
arguments := n.ChildByFieldName("arguments")
// check the argument contains at least one string literal
if !hasDescendantOfType(arguments.NamedChild(0), "string") {
if !arguments.NamedChild(0).IsStringy() {
return nil
}
init := newObject(arguments.NamedChild(1), source)
return &URL{
URL: cleanURL(arguments.NamedChild(0), source),
URL: arguments.NamedChild(0).CollapsedString(),
Method: init.getString("method", "GET"),
Headers: init.getObject("headers").asMap(),
ContentType: init.getObject("headers").getStringI("content-type", ""),
Type: "fetch",
Source: content(n, source),
Source: n.Content(),
}
return nil
}},
// string literals
{"string", func(n *sitter.Node, source []byte) *URL {
trimmed := dequote(content(n, source))
{"string", func(n *Node, source []byte) *URL {
trimmed := n.RawString()
if !MaybeURL(trimmed) {
return nil
@@ -276,7 +274,7 @@ func AllURLMatchers() []URLMatcher {
return &URL{
URL: trimmed,
Type: "stringLiteral",
Source: content(n, source),
Source: n.Content(),
}
}},
}