75 lines
2.1 KiB
Python
Executable File
75 lines
2.1 KiB
Python
Executable File
import json
|
|
from collections import Counter, defaultdict
|
|
|
|
INPUT_FILE = "glacier_files_inventory.json"
|
|
OUTPUT_FILE = "glacier_files_inventory_summary.md"
|
|
|
|
def summarize(records):
|
|
datasets = Counter()
|
|
categories = Counter()
|
|
extensions = Counter()
|
|
notable = defaultdict(list)
|
|
|
|
for r in records:
|
|
dataset = r.get("dataset", "unknown")
|
|
category = r.get("category", "unknown")
|
|
ext = r.get("extension", "").lower()
|
|
fname = r.get("filename", "").lower()
|
|
|
|
datasets[dataset] += 1
|
|
categories[category] += 1
|
|
extensions[ext] += 1
|
|
|
|
# Detect notable patterns
|
|
if fname.startswith("dji"):
|
|
notable["DJI media"].append(fname)
|
|
if category == "tax":
|
|
notable["Tax files"].append(fname)
|
|
if category == "archive":
|
|
notable["Archives"].append(fname)
|
|
if category == "source":
|
|
notable["Source code"].append(fname)
|
|
|
|
summary = []
|
|
summary.append("# GlacierEdge Inventory Summary\n")
|
|
summary.append(f"Total files indexed: **{len(records)}**\n")
|
|
|
|
summary.append("## Datasets")
|
|
for ds, count in datasets.most_common():
|
|
summary.append(f"- **{ds}**: {count} files")
|
|
summary.append("")
|
|
|
|
summary.append("## Categories")
|
|
for cat, count in categories.most_common():
|
|
summary.append(f"- **{cat}**: {count}")
|
|
summary.append("")
|
|
|
|
summary.append("## Top Extensions")
|
|
for ext, count in extensions.most_common(20):
|
|
summary.append(f"- `{ext}`: {count}")
|
|
summary.append("")
|
|
|
|
summary.append("## Notable Patterns")
|
|
if not notable:
|
|
summary.append("- None detected")
|
|
else:
|
|
for label, files in notable.items():
|
|
summary.append(f"- **{label}**: {len(files)} files")
|
|
summary.append("")
|
|
|
|
return "\n".join(summary)
|
|
|
|
def main():
|
|
with open(INPUT_FILE, "r") as f:
|
|
records = json.load(f)
|
|
|
|
summary_text = summarize(records)
|
|
|
|
with open(OUTPUT_FILE, "w") as out:
|
|
out.write(summary_text)
|
|
|
|
print(f"\n✅ Summary saved to: {OUTPUT_FILE}")
|
|
|
|
if __name__ == "__main__":
|
|
main()
|