Improve code analyzer for independent packages, CI: only run release-check on push to main

This commit is contained in:
quotentiroler 2026-02-09 19:57:13 -08:00
parent 0c7bc303c9
commit 5c62e4d51b
2 changed files with 294 additions and 161 deletions

View File

@ -145,10 +145,10 @@ jobs:
path: dist/
retention-days: 1
# Validate npm pack contents after build.
# Validate npm pack contents after build (only on push to main, not PRs).
release-check:
needs: [docs-scope, build-artifacts]
if: needs.docs-scope.outputs.docs_only != 'true'
if: github.event_name == 'push' && needs.docs-scope.outputs.docs_only != 'true'
runs-on: blacksmith-4vcpu-ubuntu-2404
steps:
- name: Checkout

View File

@ -21,27 +21,47 @@ from collections import defaultdict
# File extensions to consider as code files
CODE_EXTENSIONS = {
'.ts', '.tsx', '.js', '.jsx', '.mjs', '.cjs', # TypeScript/JavaScript
'.swift', # macOS/iOS
'.kt', '.java', # Android
'.py', '.sh', # Scripts
".ts",
".tsx",
".js",
".jsx",
".mjs",
".cjs", # TypeScript/JavaScript
".swift", # macOS/iOS
".kt",
".java", # Android
".py",
".sh", # Scripts
}
# Directories to skip
SKIP_DIRS = {
'node_modules', '.git', 'dist', 'build', 'coverage',
'__pycache__', '.turbo', 'out', '.worktrees', 'vendor',
'Pods', 'DerivedData', '.gradle', '.idea',
'Swabble', # Separate Swift package
'skills', # Standalone skill scripts
'.pi', # Pi editor extensions
"node_modules",
".git",
"dist",
"build",
"coverage",
"__pycache__",
".turbo",
"out",
".worktrees",
"vendor",
"Pods",
"DerivedData",
".gradle",
".idea",
"Swabble", # Separate Swift package
"skills", # Standalone skill scripts
".pi", # Pi editor extensions
}
# Filename patterns to skip in short-file warnings (barrel exports, stubs)
SKIP_SHORT_PATTERNS = {
'index.js', 'index.ts', 'postinstall.js',
"index.js",
"index.ts",
"postinstall.js",
}
SKIP_SHORT_SUFFIXES = ('-cli.ts',)
SKIP_SHORT_SUFFIXES = ("-cli.ts",)
# Function names to skip in duplicate detection.
# Only list names so generic they're expected to appear independently in many modules.
@ -49,20 +69,56 @@ SKIP_SHORT_SUFFIXES = ('-cli.ts',)
# stripPrefix, parseConfig are specific enough to flag).
SKIP_DUPLICATE_FUNCTIONS = {
# Lifecycle / framework plumbing
'main', 'init', 'setup', 'teardown', 'cleanup', 'dispose', 'destroy',
'open', 'close', 'connect', 'disconnect', 'execute', 'run', 'start', 'stop',
'render', 'update', 'refresh', 'reset', 'clear', 'flush',
"main",
"init",
"setup",
"teardown",
"cleanup",
"dispose",
"destroy",
"open",
"close",
"connect",
"disconnect",
"execute",
"run",
"start",
"stop",
"render",
"update",
"refresh",
"reset",
"clear",
"flush",
# Too-short / too-generic identifiers
'text', 'json', 'pad', 'mask', 'digest', 'confirm', 'intro', 'outro',
'exists', 'send', 'receive', 'listen', 'log', 'warn', 'error', 'info',
'help', 'version', 'config', 'configure', 'describe', 'test', 'action',
"text",
"json",
"pad",
"mask",
"digest",
"confirm",
"intro",
"outro",
"exists",
"send",
"receive",
"listen",
"log",
"warn",
"error",
"info",
"help",
"version",
"config",
"configure",
"describe",
"test",
"action",
}
SKIP_DUPLICATE_FILE_PATTERNS = ('.test.ts', '.test.tsx', '.spec.ts')
SKIP_DUPLICATE_FILE_PATTERNS = (".test.ts", ".test.tsx", ".spec.ts")
# Known packages in the monorepo
PACKAGES = {
'src', 'apps', 'extensions', 'packages', 'scripts', 'ui', 'test', 'docs'
}
PACKAGES = {"src", "apps", "extensions", "packages", "scripts", "ui", "test", "docs"}
def get_package(file_path: Path, root_dir: Path) -> str:
@ -72,15 +128,15 @@ def get_package(file_path: Path, root_dir: Path) -> str:
parts = relative.parts
if len(parts) > 0 and parts[0] in PACKAGES:
return parts[0]
return 'root'
return "root"
except ValueError:
return 'root'
return "root"
def count_lines(file_path: Path) -> int:
"""Count the number of lines in a file."""
try:
with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
with open(file_path, "r", encoding="utf-8", errors="ignore") as f:
return sum(1 for _ in f)
except Exception:
return 0
@ -89,81 +145,100 @@ def count_lines(file_path: Path) -> int:
def find_code_files(root_dir: Path) -> List[Tuple[Path, int]]:
"""Find all code files and their line counts."""
files_with_counts = []
for dirpath, dirnames, filenames in os.walk(root_dir):
# Remove skip directories from dirnames to prevent walking into them
dirnames[:] = [d for d in dirnames if d not in SKIP_DIRS]
for filename in filenames:
file_path = Path(dirpath) / filename
if file_path.suffix.lower() in CODE_EXTENSIONS:
line_count = count_lines(file_path)
files_with_counts.append((file_path, line_count))
return files_with_counts
# Regex patterns for TypeScript functions (exported and internal)
TS_FUNCTION_PATTERNS = [
# export function name(...) or function name(...)
re.compile(r'^(?:export\s+)?(?:async\s+)?function\s+(\w+)', re.MULTILINE),
re.compile(r"^(?:export\s+)?(?:async\s+)?function\s+(\w+)", re.MULTILINE),
# export const name = or const name =
re.compile(r'^(?:export\s+)?const\s+(\w+)\s*=\s*(?:\([^)]*\)|\w+)\s*=>', re.MULTILINE),
re.compile(
r"^(?:export\s+)?const\s+(\w+)\s*=\s*(?:\([^)]*\)|\w+)\s*=>", re.MULTILINE
),
]
def extract_functions(file_path: Path) -> Set[str]:
"""Extract function names from a TypeScript file."""
if file_path.suffix.lower() not in {'.ts', '.tsx'}:
if file_path.suffix.lower() not in {".ts", ".tsx"}:
return set()
try:
with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
with open(file_path, "r", encoding="utf-8", errors="ignore") as f:
content = f.read()
except Exception:
return set()
return extract_functions_from_content(content)
def find_duplicate_functions(files: List[Tuple[Path, int]], root_dir: Path) -> Dict[str, List[Path]]:
def find_duplicate_functions(
files: List[Tuple[Path, int]], root_dir: Path
) -> Dict[str, List[Path]]:
"""Find function names that appear in multiple files."""
function_locations: Dict[str, List[Path]] = defaultdict(list)
for file_path, _ in files:
# Skip test files for duplicate detection
if any(file_path.name.endswith(pat) for pat in SKIP_DUPLICATE_FILE_PATTERNS):
continue
functions = extract_functions(file_path)
for func in functions:
# Skip known common function names
if func in SKIP_DUPLICATE_FUNCTIONS:
continue
function_locations[func].append(file_path)
# Filter to only duplicates, ignoring cross-extension duplicates.
# Extensions are independent packages — the same function name in
# extensions/telegram and extensions/discord is expected, not duplication.
# Filter to only duplicates, ignoring cross-package duplicates.
# Independent packages (extensions/*, apps/*, ui/) are treated like separate codebases —
# the same function name in extensions/telegram and extensions/discord,
# or in apps/ios and apps/macos, is expected, not duplication.
result: Dict[str, List[Path]] = {}
for name, paths in function_locations.items():
if len(paths) < 2:
continue
# If ALL instances are in different extensions, skip
ext_dirs = set()
non_ext = False
for p in paths:
# Identify which independent package each path belongs to (if any)
# Returns a unique package key or None if it's core code
def get_independent_package(p: Path) -> Optional[str]:
try:
rel = p.relative_to(root_dir)
parts = rel.parts
if len(parts) >= 2 and parts[0] == 'extensions':
ext_dirs.add(parts[1])
else:
non_ext = True
if len(parts) >= 2:
# extensions/<name>, apps/<name> are each independent
if parts[0] in ("extensions", "apps"):
return f"{parts[0]}/{parts[1]}"
# ui/ is a single independent package (browser frontend)
if len(parts) >= 1 and parts[0] == "ui":
return "ui"
return None
except ValueError:
non_ext = True
# Skip if every instance lives in a different extension (no core overlap)
if not non_ext and len(ext_dirs) == len(paths):
return None
package_keys = set()
has_core = False
for p in paths:
pkg = get_independent_package(p)
if pkg:
package_keys.add(pkg)
else:
has_core = True
# Skip if ALL instances are in different independent packages (no core overlap)
if not has_core and len(package_keys) == len(paths):
continue
result[name] = paths
return result
@ -173,10 +248,10 @@ def validate_git_ref(root_dir: Path, ref: str) -> bool:
"""Validate that a git ref exists. Exits with error if not."""
try:
result = subprocess.run(
['git', 'rev-parse', '--verify', ref],
["git", "rev-parse", "--verify", ref],
capture_output=True,
cwd=root_dir,
encoding='utf-8',
encoding="utf-8",
)
return result.returncode == 0
except Exception:
@ -188,18 +263,18 @@ def get_file_content_at_ref(file_path: Path, root_dir: Path, ref: str) -> Option
try:
relative_path = file_path.relative_to(root_dir)
# Use forward slashes for git paths
git_path = str(relative_path).replace('\\', '/')
git_path = str(relative_path).replace("\\", "/")
result = subprocess.run(
['git', 'show', f'{ref}:{git_path}'],
["git", "show", f"{ref}:{git_path}"],
capture_output=True,
cwd=root_dir,
encoding='utf-8',
errors='ignore',
encoding="utf-8",
errors="ignore",
)
if result.returncode != 0:
stderr = result.stderr.strip()
# "does not exist" or "exists on disk, but not in" = file missing at ref (OK)
if 'does not exist' in stderr or 'exists on disk' in stderr:
if "does not exist" in stderr or "exists on disk" in stderr:
return None
# Other errors (bad ref, git broken) = genuine failure
if stderr:
@ -232,11 +307,11 @@ def get_changed_files(root_dir: Path, compare_ref: str) -> Set[str]:
"""Get set of files changed between compare_ref and HEAD (relative paths with forward slashes)."""
try:
result = subprocess.run(
['git', 'diff', '--name-only', compare_ref, 'HEAD'],
["git", "diff", "--name-only", compare_ref, "HEAD"],
capture_output=True,
cwd=root_dir,
encoding='utf-8',
errors='ignore',
encoding="utf-8",
errors="ignore",
)
if result.returncode != 0:
return set()
@ -270,7 +345,7 @@ def find_duplicate_regressions(
relevant_dupes: Dict[str, List[Path]] = {}
for func_name, paths in current_dupes.items():
involves_changed = any(
str(p.relative_to(root_dir)).replace('\\', '/') in changed_files
str(p.relative_to(root_dir)).replace("\\", "/") in changed_files
for p in paths
)
if involves_changed:
@ -287,7 +362,7 @@ def find_duplicate_regressions(
base_function_locations: Dict[str, List[Path]] = defaultdict(list)
for file_path in files_to_check:
if file_path.suffix.lower() not in {'.ts', '.tsx'}:
if file_path.suffix.lower() not in {".ts", ".tsx"}:
continue
content = get_file_content_at_ref(file_path, root_dir, compare_ref)
if content is None:
@ -298,10 +373,14 @@ def find_duplicate_regressions(
continue
base_function_locations[func].append(file_path)
base_dupes = {name for name, paths in base_function_locations.items() if len(paths) > 1}
base_dupes = {
name for name, paths in base_function_locations.items() if len(paths) > 1
}
# Return only new duplicates
return {name: paths for name, paths in relevant_dupes.items() if name not in base_dupes}
return {
name: paths for name, paths in relevant_dupes.items() if name not in base_dupes
}
def find_threshold_regressions(
@ -318,20 +397,20 @@ def find_threshold_regressions(
"""
crossed = []
grew = []
for file_path, current_lines in files:
if current_lines < threshold:
continue # Not over threshold now, skip
base_lines = get_line_count_at_ref(file_path, root_dir, compare_ref)
if base_lines is None or base_lines < threshold:
# New file or crossed the threshold
crossed.append((file_path, current_lines, base_lines))
elif current_lines > base_lines:
# Already over threshold and grew larger
grew.append((file_path, current_lines, base_lines))
return crossed, grew
@ -350,13 +429,17 @@ def _write_github_summary(
lines.append("> ⚠️ **DO NOT trash the code base!** The goal is maintainability.\n")
if crossed:
lines.append(f"### {len(crossed)} file(s) crossed the {threshold}-line threshold\n")
lines.append(
f"### {len(crossed)} file(s) crossed the {threshold}-line threshold\n"
)
lines.append("| File | Before | After | Delta |")
lines.append("|------|-------:|------:|------:|")
for file_path, current, base in crossed:
rel = str(file_path.relative_to(root_dir)).replace('\\', '/')
rel = str(file_path.relative_to(root_dir)).replace("\\", "/")
before = f"{base:,}" if base is not None else "new"
lines.append(f"| `{rel}` | {before} | {current:,} | +{current - (base or 0):,} |")
lines.append(
f"| `{rel}` | {before} | {current:,} | +{current - (base or 0):,} |"
)
lines.append("")
if grew:
@ -364,7 +447,7 @@ def _write_github_summary(
lines.append("| File | Before | After | Delta |")
lines.append("|------|-------:|------:|------:|")
for file_path, current, base in grew:
rel = str(file_path.relative_to(root_dir)).replace('\\', '/')
rel = str(file_path.relative_to(root_dir)).replace("\\", "/")
lines.append(f"| `{rel}` | {base:,} | {current:,} | +{current - base:,} |")
lines.append("")
@ -374,7 +457,9 @@ def _write_github_summary(
lines.append("|----------|-------|")
for func_name in sorted(new_dupes.keys()):
paths = new_dupes[func_name]
file_list = ", ".join(f"`{str(p.relative_to(root_dir)).replace(chr(92), '/')}`" for p in paths)
file_list = ", ".join(
f"`{str(p.relative_to(root_dir)).replace(chr(92), '/')}`" for p in paths
)
lines.append(f"| `{func_name}` | {file_list} |")
lines.append("")
@ -383,67 +468,73 @@ def _write_github_summary(
lines.append("- Extract helpers, types, or constants into separate files")
lines.append("- See `AGENTS.md` for guidelines (~500700 LOC target)")
lines.append(f"- This check compares your PR against `{compare_ref}`")
lines.append(f"- Only code files are checked: {', '.join(f'`{e}`' for e in sorted(CODE_EXTENSIONS))}")
lines.append(
f"- Only code files are checked: {', '.join(f'`{e}`' for e in sorted(CODE_EXTENSIONS))}"
)
lines.append("- Docs, test names, and config files are **not** affected")
lines.append("\n</details>")
try:
with open(summary_path, 'a', encoding='utf-8') as f:
f.write('\n'.join(lines) + '\n')
with open(summary_path, "a", encoding="utf-8") as f:
f.write("\n".join(lines) + "\n")
except Exception as e:
print(f"⚠️ Failed to write job summary: {e}", file=sys.stderr)
def main():
parser = argparse.ArgumentParser(
description='Analyze code files: list longest/shortest files, find duplicate function names'
description="Analyze code files: list longest/shortest files, find duplicate function names"
)
parser.add_argument(
'-t', '--threshold',
"-t",
"--threshold",
type=int,
default=1000,
help='Warn about files longer than this many lines (default: 1000)'
help="Warn about files longer than this many lines (default: 1000)",
)
parser.add_argument(
'--min-threshold',
"--min-threshold",
type=int,
default=10,
help='Warn about files shorter than this many lines (default: 10)'
help="Warn about files shorter than this many lines (default: 10)",
)
parser.add_argument(
'-n', '--top',
"-n",
"--top",
type=int,
default=20,
help='Show top N longest files (default: 20)'
help="Show top N longest files (default: 20)",
)
parser.add_argument(
'-b', '--bottom',
"-b",
"--bottom",
type=int,
default=10,
help='Show bottom N shortest files (default: 10)'
help="Show bottom N shortest files (default: 10)",
)
parser.add_argument(
'-d', '--directory',
"-d",
"--directory",
type=str,
default='.',
help='Directory to scan (default: current directory)'
default=".",
help="Directory to scan (default: current directory)",
)
parser.add_argument(
'--compare-to',
"--compare-to",
type=str,
default=None,
help='Git ref to compare against (e.g., origin/main). Only warn about files that grew past threshold.'
help="Git ref to compare against (e.g., origin/main). Only warn about files that grew past threshold.",
)
parser.add_argument(
'--strict',
action='store_true',
help='Exit with non-zero status if any violations found (for CI)'
"--strict",
action="store_true",
help="Exit with non-zero status if any violations found (for CI)",
)
args = parser.parse_args()
root_dir = Path(args.directory).resolve()
# CI delta mode: only show regressions
if args.compare_to:
print(f"\n📂 Scanning: {root_dir}")
@ -451,23 +542,32 @@ def main():
if not validate_git_ref(root_dir, args.compare_to):
print(f"❌ Invalid git ref: {args.compare_to}", file=sys.stderr)
print(" Make sure the ref exists (e.g. run 'git fetch origin <branch>')", file=sys.stderr)
print(
" Make sure the ref exists (e.g. run 'git fetch origin <branch>')",
file=sys.stderr,
)
sys.exit(2)
files = find_code_files(root_dir)
violations = False
# Check file length regressions
crossed, grew = find_threshold_regressions(files, root_dir, args.compare_to, args.threshold)
crossed, grew = find_threshold_regressions(
files, root_dir, args.compare_to, args.threshold
)
if crossed:
print(f"⚠️ {len(crossed)} file(s) crossed {args.threshold} line threshold:\n")
print(
f"⚠️ {len(crossed)} file(s) crossed {args.threshold} line threshold:\n"
)
for file_path, current, base in crossed:
relative_path = file_path.relative_to(root_dir)
if base is None:
print(f" {relative_path}: {current:,} lines (new file)")
else:
print(f" {relative_path}: {base:,}{current:,} lines (+{current - base:,})")
print(
f" {relative_path}: {base:,}{current:,} lines (+{current - base:,})"
)
print()
violations = True
else:
@ -477,7 +577,9 @@ def main():
print(f"⚠️ {len(grew)} already-large file(s) grew larger:\n")
for file_path, current, base in grew:
relative_path = file_path.relative_to(root_dir)
print(f" {relative_path}: {base:,}{current:,} lines (+{current - base:,})")
print(
f" {relative_path}: {base:,}{current:,} lines (+{current - base:,})"
)
print()
violations = True
else:
@ -501,26 +603,42 @@ def main():
print()
if args.strict and violations:
# Emit GitHub Actions file annotations so violations appear inline in the PR diff
in_gha = os.environ.get('GITHUB_ACTIONS') == 'true'
in_gha = os.environ.get("GITHUB_ACTIONS") == "true"
if in_gha:
for file_path, current, base in crossed:
rel = str(file_path.relative_to(root_dir)).replace('\\', '/')
rel = str(file_path.relative_to(root_dir)).replace("\\", "/")
if base is None:
print(f"::error file={rel},title=File over {args.threshold} lines::{rel} is {current:,} lines (new file). Split into smaller modules.")
print(
f"::error file={rel},title=File over {args.threshold} lines::{rel} is {current:,} lines (new file). Split into smaller modules."
)
else:
print(f"::error file={rel},title=File crossed {args.threshold} lines::{rel} grew from {base:,} to {current:,} lines (+{current - base:,}). Split into smaller modules.")
print(
f"::error file={rel},title=File crossed {args.threshold} lines::{rel} grew from {base:,} to {current:,} lines (+{current - base:,}). Split into smaller modules."
)
for file_path, current, base in grew:
rel = str(file_path.relative_to(root_dir)).replace('\\', '/')
print(f"::error file={rel},title=Large file grew larger::{rel} is already {base:,} lines and grew to {current:,} (+{current - base:,}). Consider refactoring.")
rel = str(file_path.relative_to(root_dir)).replace("\\", "/")
print(
f"::error file={rel},title=Large file grew larger::{rel} is already {base:,} lines and grew to {current:,} (+{current - base:,}). Consider refactoring."
)
for func_name in sorted(new_dupes.keys()):
for p in new_dupes[func_name]:
rel = str(p.relative_to(root_dir)).replace('\\', '/')
print(f"::error file={rel},title=Duplicate function '{func_name}'::Function '{func_name}' appears in multiple files. Centralize or rename.")
rel = str(p.relative_to(root_dir)).replace("\\", "/")
print(
f"::error file={rel},title=Duplicate function '{func_name}'::Function '{func_name}' appears in multiple files. Centralize or rename."
)
# Write GitHub Actions job summary (visible in the Actions check details)
summary_path = os.environ.get('GITHUB_STEP_SUMMARY')
summary_path = os.environ.get("GITHUB_STEP_SUMMARY")
if summary_path:
_write_github_summary(summary_path, crossed, grew, new_dupes, root_dir, args.threshold, args.compare_to)
_write_github_summary(
summary_path,
crossed,
grew,
new_dupes,
root_dir,
args.threshold,
args.compare_to,
)
# Print actionable summary so contributors know what to do
print("" * 60)
@ -528,9 +646,13 @@ def main():
print(" ⚠️ DO NOT just trash the code base!")
print(" The goal is maintainability.\n")
if crossed:
print(f" {len(crossed)} file(s) grew past the {args.threshold}-line limit.")
print(
f" {len(crossed)} file(s) grew past the {args.threshold}-line limit."
)
if grew:
print(f" {len(grew)} file(s) already over {args.threshold} lines got larger.")
print(
f" {len(grew)} file(s) already over {args.threshold} lines got larger."
)
print()
print(" How to fix:")
print(" • Split large files into smaller, focused modules")
@ -538,7 +660,9 @@ def main():
print(" • See AGENTS.md for guidelines (~500-700 LOC target)")
print()
print(f" This check compares your PR against {args.compare_to}.")
print(f" Only code files are checked ({', '.join(sorted(e for e in CODE_EXTENSIONS))}).")
print(
f" Only code files are checked ({', '.join(sorted(e for e in CODE_EXTENSIONS))})."
)
print(" Docs, tests names, and config files are not affected.")
print("" * 60)
sys.exit(1)
@ -546,113 +670,122 @@ def main():
print("" * 60)
print("✅ Code size check passed — no files exceed thresholds.")
print("" * 60)
return
print(f"\n📂 Scanning: {root_dir}\n")
# Find and sort files by line count
files = find_code_files(root_dir)
files_desc = sorted(files, key=lambda x: x[1], reverse=True)
files_asc = sorted(files, key=lambda x: x[1])
# Show top N longest files
top_files = files_desc[:args.top]
top_files = files_desc[: args.top]
print(f"📊 Top {min(args.top, len(top_files))} longest code files:\n")
print(f"{'Lines':>8} {'File'}")
print("-" * 60)
long_warnings = []
for file_path, line_count in top_files:
relative_path = file_path.relative_to(root_dir)
# Check if over threshold
if line_count >= args.threshold:
marker = " ⚠️"
long_warnings.append((relative_path, line_count))
else:
marker = ""
print(f"{line_count:>8} {relative_path}{marker}")
# Show bottom N shortest files
bottom_files = files_asc[:args.bottom]
bottom_files = files_asc[: args.bottom]
print(f"\n📉 Bottom {min(args.bottom, len(bottom_files))} shortest code files:\n")
print(f"{'Lines':>8} {'File'}")
print("-" * 60)
short_warnings = []
for file_path, line_count in bottom_files:
relative_path = file_path.relative_to(root_dir)
filename = file_path.name
# Skip known barrel exports and stubs
is_expected_short = (
filename in SKIP_SHORT_PATTERNS or
any(filename.endswith(suffix) for suffix in SKIP_SHORT_SUFFIXES)
is_expected_short = filename in SKIP_SHORT_PATTERNS or any(
filename.endswith(suffix) for suffix in SKIP_SHORT_SUFFIXES
)
# Check if under threshold
if line_count <= args.min_threshold and not is_expected_short:
marker = " ⚠️"
short_warnings.append((relative_path, line_count))
else:
marker = ""
print(f"{line_count:>8} {relative_path}{marker}")
# Summary
total_files = len(files)
total_lines = sum(count for _, count in files)
print("-" * 60)
print(f"\n📈 Summary:")
print(f" Total code files: {total_files:,}")
print(f" Total lines: {total_lines:,}")
print(f" Average lines/file: {total_lines // total_files if total_files else 0:,}")
print(
f" Average lines/file: {total_lines // total_files if total_files else 0:,}"
)
# Per-package breakdown
package_stats: dict[str, dict] = {}
for file_path, line_count in files:
pkg = get_package(file_path, root_dir)
if pkg not in package_stats:
package_stats[pkg] = {'files': 0, 'lines': 0}
package_stats[pkg]['files'] += 1
package_stats[pkg]['lines'] += line_count
package_stats[pkg] = {"files": 0, "lines": 0}
package_stats[pkg]["files"] += 1
package_stats[pkg]["lines"] += line_count
print(f"\n📦 Per-package breakdown:\n")
print(f"{'Package':<15} {'Files':>8} {'Lines':>10} {'Avg':>8}")
print("-" * 45)
for pkg in sorted(package_stats.keys(), key=lambda p: package_stats[p]['lines'], reverse=True):
for pkg in sorted(
package_stats.keys(), key=lambda p: package_stats[p]["lines"], reverse=True
):
stats = package_stats[pkg]
avg = stats['lines'] // stats['files'] if stats['files'] else 0
avg = stats["lines"] // stats["files"] if stats["files"] else 0
print(f"{pkg:<15} {stats['files']:>8,} {stats['lines']:>10,} {avg:>8,}")
# Long file warnings
if long_warnings:
print(f"\n⚠️ Warning: {len(long_warnings)} file(s) exceed {args.threshold} lines (consider refactoring):")
print(
f"\n⚠️ Warning: {len(long_warnings)} file(s) exceed {args.threshold} lines (consider refactoring):"
)
for path, count in long_warnings:
print(f" - {path} ({count:,} lines)")
else:
print(f"\n✅ No files exceed {args.threshold} lines")
# Short file warnings
if short_warnings:
print(f"\n⚠️ Warning: {len(short_warnings)} file(s) are {args.min_threshold} lines or less (check if needed):")
print(
f"\n⚠️ Warning: {len(short_warnings)} file(s) are {args.min_threshold} lines or less (check if needed):"
)
for path, count in short_warnings:
print(f" - {path} ({count} lines)")
else:
print(f"\n✅ No files are {args.min_threshold} lines or less")
# Duplicate function names
duplicates = find_duplicate_functions(files, root_dir)
if duplicates:
print(f"\n⚠️ Warning: {len(duplicates)} function name(s) appear in multiple files (consider renaming):")
print(
f"\n⚠️ Warning: {len(duplicates)} function name(s) appear in multiple files (consider renaming):"
)
for func_name in sorted(duplicates.keys()):
paths = duplicates[func_name]
print(f" - {func_name}:")
@ -660,13 +793,13 @@ def main():
print(f" {path.relative_to(root_dir)}")
else:
print(f"\n✅ No duplicate function names")
print()
# Exit with error if --strict and there are violations
if args.strict and long_warnings:
sys.exit(1)
if __name__ == '__main__':
if __name__ == "__main__":
main()