Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
sagemathinc
GitHub Repository: sagemathinc/cocalc
Path: blob/master/src/scripts/npm_analyze_duplicates.py
1706 views
1
#!/usr/bin/env python3
2
"""
3
Analyze pnpm-lock.yaml for duplicate packages with different versions.
4
"""
5
6
import re
7
import sys
8
from collections import defaultdict
9
from packaging import version
10
import yaml
11
12
def parse_pnpm_lock(file_path):
13
"""Parse pnpm-lock.yaml and extract package versions."""
14
try:
15
with open(file_path, 'r') as f:
16
data = yaml.safe_load(f)
17
except FileNotFoundError:
18
print(f"Error: {file_path} not found")
19
return None
20
except yaml.YAMLError as e:
21
print(f"Error parsing YAML: {e}")
22
return None
23
24
packages = {}
25
26
# Extract from packages section
27
if 'packages' in data:
28
for pkg_spec, pkg_info in data['packages'].items():
29
# Parse package name and version from spec like "[email protected]"
30
match = re.match(r'^(.+?)@([^@]+)$', pkg_spec)
31
if match:
32
name, ver = match.groups()
33
if name not in packages:
34
packages[name] = []
35
packages[name].append(ver)
36
37
return packages
38
39
def find_duplicates(packages):
40
"""Find packages with multiple versions."""
41
duplicates = {}
42
43
for name, versions in packages.items():
44
if len(set(versions)) > 1: # More than one unique version
45
unique_versions = sorted(set(versions), key=lambda v: version.parse(v) if is_valid_version(v) else version.parse("0.0.0"))
46
duplicates[name] = {
47
'versions': unique_versions,
48
'count': len(versions),
49
'unique_count': len(unique_versions)
50
}
51
52
return duplicates
53
54
def is_valid_version(ver_str):
55
"""Check if version string is valid semver."""
56
try:
57
version.parse(ver_str)
58
return True
59
except version.InvalidVersion:
60
return False
61
62
def analyze_version_differences(versions):
63
"""Analyze how different the versions are."""
64
if len(versions) < 2:
65
return "single"
66
67
try:
68
parsed_versions = [version.parse(v) for v in versions if is_valid_version(v)]
69
if len(parsed_versions) < 2:
70
return "invalid"
71
72
parsed_versions.sort()
73
74
# Check if only patch versions differ
75
major_minor_same = all(
76
(v.major, v.minor) == (parsed_versions[0].major, parsed_versions[0].minor)
77
for v in parsed_versions
78
)
79
if major_minor_same:
80
return "patch_diff"
81
82
# Check if only minor versions differ (same major)
83
major_same = all(v.major == parsed_versions[0].major for v in parsed_versions)
84
if major_same:
85
return "minor_diff"
86
87
return "major_diff"
88
89
except Exception:
90
return "unknown"
91
92
def main():
93
lock_file = "packages/pnpm-lock.yaml"
94
95
print("Analyzing pnpm-lock.yaml for duplicate packages...")
96
97
packages = parse_pnpm_lock(lock_file)
98
if packages is None:
99
sys.exit(1)
100
101
duplicates = find_duplicates(packages)
102
103
if not duplicates:
104
print("No duplicate packages found!")
105
return
106
107
print(f"\nFound {len(duplicates)} packages with multiple versions:\n")
108
109
# Group by difference type
110
by_diff_type = defaultdict(list)
111
112
for name, info in duplicates.items():
113
diff_type = analyze_version_differences(info['versions'])
114
by_diff_type[diff_type].append((name, info))
115
116
# Report patch differences first (most concerning)
117
if 'patch_diff' in by_diff_type:
118
print("🔴 PATCH VERSION DIFFERENCES (most concerning):")
119
print("=" * 50)
120
for name, info in sorted(by_diff_type['patch_diff']):
121
versions_str = " vs ".join(info['versions'])
122
print(f" {name}: {versions_str} ({info['count']} total installations)")
123
print()
124
125
if 'minor_diff' in by_diff_type:
126
print("🟡 MINOR VERSION DIFFERENCES:")
127
print("=" * 30)
128
for name, info in sorted(by_diff_type['minor_diff']):
129
versions_str = " vs ".join(info['versions'])
130
print(f" {name}: {versions_str} ({info['count']} total installations)")
131
print()
132
133
if 'major_diff' in by_diff_type:
134
print("🟠 MAJOR VERSION DIFFERENCES (expected for breaking changes):")
135
print("=" * 60)
136
for name, info in sorted(by_diff_type['major_diff'])[:10]: # Limit to 10
137
versions_str = " vs ".join(info['versions'])
138
print(f" {name}: {versions_str} ({info['count']} total installations)")
139
if len(by_diff_type['major_diff']) > 10:
140
print(f" ... and {len(by_diff_type['major_diff']) - 10} more")
141
print()
142
143
# Summary
144
total_patch = len(by_diff_type['patch_diff'])
145
total_minor = len(by_diff_type['minor_diff'])
146
147
print("SUMMARY:")
148
print(f" 🔴 Patch differences: {total_patch} (should be unified)")
149
print(f" 🟡 Minor differences: {total_minor} (may need review)")
150
print(f" 🟠 Major differences: {len(by_diff_type['major_diff'])} (usually expected)")
151
152
if __name__ == "__main__":
153
main()
154