Merge pull request #34 from mjsalmi/main

Add ability to exclude files and directories
This commit is contained in:
yunzheng
2021-12-17 21:33:48 +01:00
committed by GitHub
2 changed files with 24 additions and 5 deletions

View File

@@ -148,6 +148,11 @@ Or multiple directories and or files:
$ python3 log4j-finder.py /path/to/dir1 /path/to/dir2 /path/to/jarfile.jar
```
Exclude files or directories:
```bash
$ python3 log4j-finder.py / --exclude "/*/.dontgohere" --exclude "/home/user/*.war"
```
Note that on Windows it only scans the root `c:\` drive if you don't give any extra arguments.
We recommend specifying the drives you need to scan on the commandline such as (drives that don't exist are skipped):

View File

@@ -15,6 +15,9 @@
# Or multiple directories:
# $ python3 log4j-finder.py /path/to/dir1 /path/to/dir2
#
# Exclude files or directories:
# $ python3 log4j-finder.py / --exclude "/*/.dontgohere" --exclude "/home/user/*.war"
#
import os
import io
import sys
@@ -28,6 +31,7 @@ import datetime
import functools
import itertools
import collections
import fnmatch
from pathlib import Path
@@ -96,7 +100,7 @@ def md5_digest(fobj):
return d.hexdigest()
def iter_scandir(path, stats=None):
def iter_scandir(path, stats=None, exclude=None):
"""
Yields all files matcthing JAR_EXTENSIONS or FILENAMES recursively in path
"""
@@ -106,7 +110,7 @@ def iter_scandir(path, stats=None):
stats["files"] += 1
yield p
try:
for entry in scantree(path, stats=stats):
for entry in scantree(path, stats=stats, exclude=exclude):
if entry.is_symlink():
continue
elif entry.is_file():
@@ -119,15 +123,18 @@ def iter_scandir(path, stats=None):
log.debug(e)
def scantree(path, stats=None):
def scantree(path, stats=None, exclude=None):
"""Recursively yield DirEntry objects for given directory."""
exclude = exclude or []
try:
with os.scandir(path) as it:
for entry in it:
if any(fnmatch.fnmatch(entry.path, exclusion) for exclusion in exclude):
continue
if entry.is_dir(follow_symlinks=False):
if stats:
stats["directories"] += 1
yield from scantree(entry.path, stats=stats)
yield from scantree(entry.path, stats=stats, exclude=exclude)
else:
if stats:
stats["files"] += 1
@@ -277,6 +284,13 @@ def main():
parser.add_argument(
"-V", "--version", action="version", version=f"%(prog)s {__version__}"
)
parser.add_argument(
"-e",
"--exclude",
action='append',
help="exclude files/directories by pattern (can be used multiple times)",
metavar='PATTERN'
)
args = parser.parse_args()
logging.basicConfig(
format="%(asctime)s %(levelname)s %(message)s",
@@ -302,7 +316,7 @@ def main():
now = datetime.datetime.utcnow().replace(microsecond=0)
if not args.quiet:
print(f"[{now}] {hostname} Scanning: {directory}")
for p in iter_scandir(directory, stats=stats):
for p in iter_scandir(directory, stats=stats, exclude=args.exclude):
if p.name.lower() in FILENAMES:
stats["scanned"] += 1
log.info(f"Found file: {p}")