119 lines
4.6 KiB
Python
119 lines
4.6 KiB
Python
"""
|
|
Data analysis module for Unlighthouse reports.
|
|
"""
|
|
|
|
import json
|
|
from pathlib import Path
|
|
from collections import defaultdict
|
|
|
|
|
|
def load_ci_result(ci_result_path: Path) -> dict:
|
|
"""Load the main CI result JSON."""
|
|
with open(ci_result_path, "r", encoding="utf-8") as f:
|
|
return json.load(f)
|
|
|
|
|
|
def load_all_audits(reports_dir: Path, site_url: str = ""):
|
|
"""
|
|
Load ALL detailed audit data from lighthouse.json files.
|
|
|
|
Returns:
|
|
tuple: (all_audits, resource_issues, element_issues, page_count)
|
|
"""
|
|
all_audits = defaultdict(lambda: {
|
|
"count": 0, "pages": [], "score_sum": 0, "title": "", "description": "",
|
|
"total_wasted_bytes": 0, "display_values": []
|
|
})
|
|
|
|
resource_issues = defaultdict(lambda: {
|
|
"count": 0, "wasted_bytes": 0, "total_bytes": 0, "pages": []
|
|
})
|
|
|
|
element_issues = defaultdict(lambda: {
|
|
"count": 0, "audit": "", "pages": []
|
|
})
|
|
|
|
page_count = 0
|
|
site_domain = site_url.replace("https://", "").replace("http://", "").split("/")[0]
|
|
|
|
for report_dir in reports_dir.iterdir():
|
|
if not report_dir.is_dir():
|
|
continue
|
|
|
|
lighthouse_json = report_dir / "lighthouse.json"
|
|
if not lighthouse_json.exists():
|
|
continue
|
|
|
|
try:
|
|
with open(lighthouse_json, "r", encoding="utf-8") as f:
|
|
report = json.load(f)
|
|
|
|
page_path = report.get("finalDisplayedUrl", report_dir.name)
|
|
page_path_short = page_path.replace(site_url, "") or "/"
|
|
audits = report.get("audits", {})
|
|
page_count += 1
|
|
|
|
for audit_id, audit_data in audits.items():
|
|
score = audit_data.get("score")
|
|
if score is None:
|
|
continue
|
|
|
|
if score < 1:
|
|
all_audits[audit_id]["count"] += 1
|
|
all_audits[audit_id]["title"] = audit_data.get("title", audit_id)
|
|
all_audits[audit_id]["description"] = audit_data.get("description", "")
|
|
all_audits[audit_id]["score_sum"] += score
|
|
|
|
if len(all_audits[audit_id]["pages"]) < 20:
|
|
all_audits[audit_id]["pages"].append(page_path_short)
|
|
|
|
display_val = audit_data.get("displayValue", "")
|
|
if display_val:
|
|
all_audits[audit_id]["display_values"].append(display_val)
|
|
|
|
details = audit_data.get("details", {})
|
|
items = details.get("items", [])
|
|
|
|
for item in items:
|
|
if not isinstance(item, dict):
|
|
continue
|
|
|
|
url = item.get("url", "")
|
|
if url and site_domain and site_domain in url:
|
|
url_short = url.replace(site_url, "")
|
|
key = (audit_id, url_short)
|
|
resource_issues[key]["count"] += 1
|
|
resource_issues[key]["wasted_bytes"] += item.get("wastedBytes", 0)
|
|
resource_issues[key]["total_bytes"] += item.get("totalBytes", 0)
|
|
if len(resource_issues[key]["pages"]) < 5:
|
|
resource_issues[key]["pages"].append(page_path_short)
|
|
|
|
node = item.get("node", {})
|
|
snippet = node.get("snippet", "") if isinstance(node, dict) else str(node) if node else ""
|
|
|
|
if snippet:
|
|
key = (audit_id, snippet[:80])
|
|
element_issues[key]["count"] += 1
|
|
element_issues[key]["audit"] = audit_id
|
|
if len(element_issues[key]["pages"]) < 5:
|
|
element_issues[key]["pages"].append(page_path_short)
|
|
|
|
wasted = item.get("wastedBytes", 0)
|
|
if wasted:
|
|
all_audits[audit_id]["total_wasted_bytes"] += wasted
|
|
|
|
except Exception:
|
|
continue
|
|
|
|
return dict(all_audits), dict(resource_issues), dict(element_issues), page_count
|
|
|
|
|
|
def format_bytes(bytes_val: int) -> str:
|
|
"""Format bytes to human readable."""
|
|
if bytes_val >= 1024 * 1024:
|
|
return f"{bytes_val / (1024*1024):.1f} MB"
|
|
elif bytes_val >= 1024:
|
|
return f"{bytes_val / 1024:.0f} KB"
|
|
return f"{bytes_val} B"
|
|
|