279 lines
5.8 KiB
Python
Executable File
279 lines
5.8 KiB
Python
Executable File
# The script scans multiple TrueNAS datasets, collects detailed metadata about every file,
|
|
# categorizes them by type, and saves the results to a JSON file (glacier_files_inventory.json).
|
|
# This is useful for inventory management, organization, and analysis of files across your storage.
|
|
#
|
|
import hashlib
|
|
import json
|
|
import os
|
|
from datetime import datetime
|
|
|
|
# ---------------------------------------------
|
|
# CONFIGURATION
|
|
# ---------------------------------------------
|
|
|
|
# Add all your NFS mounted TrueNAS datasets here
|
|
DATASETS = {
|
|
"GlacierBucket1": "/mnt/bucket1",
|
|
"GlacierArchives": "/mnt/archives",
|
|
"GlacierBackups": "/mnt/backups",
|
|
"GlacierVault": "/mnt/vault_bucket",
|
|
}
|
|
|
|
OUTPUT = "glacier_files_inventory.json"
|
|
|
|
# Enable hashing? (slow on remote mounts)
|
|
ENABLE_HASH = False
|
|
|
|
|
|
# ---------------------------------------------
|
|
# HELPERS
|
|
# ---------------------------------------------
|
|
|
|
|
|
def sha1(path):
|
|
if not ENABLE_HASH:
|
|
return None
|
|
try:
|
|
h = hashlib.sha1()
|
|
with open(path, "rb") as f:
|
|
for chunk in iter(lambda: f.read(8192), b""):
|
|
h.update(chunk)
|
|
return h.hexdigest()
|
|
except:
|
|
return None
|
|
|
|
|
|
def categorize(ext, filename):
|
|
ext = ext.lower()
|
|
fname = filename.lower()
|
|
|
|
# DJI drone media detection (case-insensitive)
|
|
if fname.startswith("dji"):
|
|
if ext in [".jpg", ".jpeg", ".dng", ".rw2"]:
|
|
return "dji_photo"
|
|
if ext in [".mp4", ".mov"]:
|
|
return "dji_video"
|
|
if ext in [".srt", ".lrv"]:
|
|
return "dji_sidecar"
|
|
|
|
# Tax / finance software
|
|
if ext in [
|
|
# TurboTax (US)
|
|
".tax",
|
|
".tax2018",
|
|
".tax2019",
|
|
".tax2020",
|
|
".tax2021",
|
|
".tax2022",
|
|
".tax2023",
|
|
".tax2024",
|
|
".tax2025",
|
|
".tax2026",
|
|
# UFile (Canada)
|
|
".u12",
|
|
".u13",
|
|
".u14",
|
|
".u15",
|
|
".u16",
|
|
".u17",
|
|
".u18",
|
|
".u19",
|
|
".u20",
|
|
".u21",
|
|
".u22",
|
|
".u23",
|
|
".u24",
|
|
".u25",
|
|
".u26",
|
|
# TaxTron (Canada)
|
|
".tt18",
|
|
".tt19",
|
|
".tt20",
|
|
".tt21",
|
|
".tt22",
|
|
".tt23",
|
|
".tt24",
|
|
".tt25",
|
|
".tt26",
|
|
# Profile (Canada)
|
|
".pr0",
|
|
".pr1",
|
|
".pr2",
|
|
".pr3",
|
|
".pr4",
|
|
".pr5",
|
|
".pr6",
|
|
".pr7",
|
|
".pr8",
|
|
".pr9",
|
|
".p20",
|
|
".p21",
|
|
".p22",
|
|
".p23",
|
|
".fx0",
|
|
".fx1",
|
|
".fx2",
|
|
# Other finance formats
|
|
".qif",
|
|
".ofx",
|
|
".qfx",
|
|
".money",
|
|
".t1",
|
|
".t2",
|
|
]:
|
|
return "tax"
|
|
|
|
# Video
|
|
if ext in [".mp4", ".mkv", ".avi", ".mov"]:
|
|
return "video"
|
|
|
|
# Audio
|
|
if ext in [".mp3", ".wav", ".flac", ".aac"]:
|
|
return "audio"
|
|
|
|
# Documents (expanded)
|
|
if ext in [
|
|
".doc",
|
|
".docx",
|
|
".dot",
|
|
".dotx",
|
|
".xls",
|
|
".xlsx",
|
|
".xlsm",
|
|
".xlsb",
|
|
".xlt",
|
|
".xltx",
|
|
".ppt",
|
|
".pptx",
|
|
".pptm",
|
|
".pot",
|
|
".potx",
|
|
".odt",
|
|
".ods",
|
|
".odp",
|
|
".odg",
|
|
".pdf",
|
|
".rtf",
|
|
".txt",
|
|
".md",
|
|
".csv",
|
|
".tsv",
|
|
".epub",
|
|
]:
|
|
return "document"
|
|
|
|
# Images
|
|
if ext in [".jpg", ".jpeg", ".png", ".gif", ".raw", ".tiff"]:
|
|
return "image"
|
|
|
|
# ISOs / disk images
|
|
if ext in [".iso", ".img"]:
|
|
return "iso"
|
|
|
|
# Archives / compressed files
|
|
if ext in [
|
|
".zip",
|
|
".tar",
|
|
".gz",
|
|
".tgz",
|
|
".tar.gz",
|
|
".bz2",
|
|
".tbz",
|
|
".tar.bz2",
|
|
".xz",
|
|
".txz",
|
|
".tar.xz",
|
|
".7z",
|
|
".rar",
|
|
".lz",
|
|
".lzma",
|
|
".zst",
|
|
]:
|
|
return "archive"
|
|
|
|
# Source code
|
|
if ext in [
|
|
".c",
|
|
".h",
|
|
".cpp",
|
|
".hpp",
|
|
".cc",
|
|
".sh",
|
|
".bash",
|
|
".zsh",
|
|
".py",
|
|
".js",
|
|
".ts",
|
|
".go",
|
|
".rs",
|
|
".java",
|
|
".php",
|
|
".rb",
|
|
".swift",
|
|
".cs",
|
|
".sql",
|
|
".yaml",
|
|
".yml",
|
|
".json",
|
|
".xml",
|
|
".ini",
|
|
".cfg",
|
|
".toml",
|
|
".make",
|
|
".mk",
|
|
]:
|
|
return "source"
|
|
|
|
return "other"
|
|
|
|
|
|
# ---------------------------------------------
|
|
# MAIN SCAN LOGIC
|
|
# ---------------------------------------------
|
|
|
|
inventory = []
|
|
|
|
# print(f"Running the script ...")
|
|
|
|
for dataset_name, root in DATASETS.items():
|
|
print(f"Scanning dataset: {dataset_name} → {root}")
|
|
|
|
if not os.path.exists(root):
|
|
print(f"⚠ WARNING: Path does not exist: {root}")
|
|
continue
|
|
|
|
for dirpath, _, files in os.walk(root):
|
|
for f in files:
|
|
full = os.path.join(dirpath, f)
|
|
try:
|
|
stat = os.stat(full)
|
|
ext = os.path.splitext(f)[1]
|
|
|
|
inventory.append(
|
|
{
|
|
"dataset": dataset_name,
|
|
"path": full,
|
|
"filename": f,
|
|
"extension": ext,
|
|
"size_bytes": stat.st_size,
|
|
"modified": stat.st_mtime,
|
|
"created": stat.st_ctime,
|
|
"hash_sha1": sha1(full),
|
|
"category": categorize(ext, f),
|
|
}
|
|
)
|
|
|
|
except Exception as e:
|
|
print(f"Error reading file {full}: {e}")
|
|
|
|
|
|
# ---------------------------------------------
|
|
# SAVE OUTPUT
|
|
# ---------------------------------------------
|
|
|
|
with open(OUTPUT, "w") as out:
|
|
json.dump(inventory, out, indent=2)
|
|
|
|
print(f"\nInventory complete. {len(inventory)} files indexed.")
|
|
print(f"Output saved to: {OUTPUT}")
|