Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
Download
225 views
ubuntu2404
1
#!/usr/bin/env python3
2
"""
3
Sample data generation for computational chemistry template
4
Provides realistic molecular data for testing and demonstration
5
"""
6
7
import numpy as np
8
import matplotlib.pyplot as plt
9
10
def generate_molecular_coordinates(molecule_name="benzene"):
11
"""Generate sample molecular coordinates for common molecules"""
12
13
coordinates = {
14
"benzene": {
15
"atoms": ["C", "C", "C", "C", "C", "C", "H", "H", "H", "H", "H", "H"],
16
"coords": np.array([
17
[0.000, 1.396, 0.000], # C1
18
[1.209, 0.698, 0.000], # C2
19
[1.209, -0.698, 0.000], # C3
20
[0.000, -1.396, 0.000], # C4
21
[-1.209, -0.698, 0.000], # C5
22
[-1.209, 0.698, 0.000], # C6
23
[0.000, 2.479, 0.000], # H1
24
[2.147, 1.240, 0.000], # H2
25
[2.147, -1.240, 0.000], # H3
26
[0.000, -2.479, 0.000], # H4
27
[-2.147, -1.240, 0.000], # H5
28
[-2.147, 1.240, 0.000] # H6
29
])
30
},
31
"water": {
32
"atoms": ["O", "H", "H"],
33
"coords": np.array([
34
[0.000, 0.000, 0.119], # O
35
[0.000, 0.757, -0.477], # H1
36
[0.000, -0.757, -0.477] # H2
37
])
38
},
39
"methane": {
40
"atoms": ["C", "H", "H", "H", "H"],
41
"coords": np.array([
42
[0.000, 0.000, 0.000], # C
43
[0.629, 0.629, 0.629], # H1
44
[-0.629, -0.629, 0.629], # H2
45
[-0.629, 0.629, -0.629], # H3
46
[0.629, -0.629, -0.629] # H4
47
])
48
}
49
}
50
51
return coordinates.get(molecule_name, coordinates["benzene"])
52
53
def generate_energy_profile(npoints=50, barrier_height=25.0, reaction_energy=-15.0):
54
"""Generate a realistic reaction energy profile"""
55
56
x = np.linspace(0, 6, npoints)
57
58
# Create a double-well potential with transition states
59
energy = np.zeros_like(x)
60
61
# Initial reactant state
62
energy[x <= 1] = 0
63
64
# First transition state
65
mask1 = (x > 1) & (x <= 2)
66
energy[mask1] = barrier_height * np.sin(np.pi * (x[mask1] - 1))**2
67
68
# Intermediate state
69
mask2 = (x > 2) & (x <= 3)
70
energy[mask2] = reaction_energy * 0.6
71
72
# Second transition state
73
mask3 = (x > 3) & (x <= 4)
74
energy[mask3] = reaction_energy * 0.6 + (barrier_height * 0.8) * np.sin(np.pi * (x[mask3] - 3))**2
75
76
# Second intermediate
77
mask4 = (x > 4) & (x <= 5)
78
energy[mask4] = reaction_energy * 0.3
79
80
# Final products
81
energy[x > 5] = reaction_energy
82
83
return x, energy
84
85
def generate_md_trajectory(nframes=1000, nresidues=50):
86
"""Generate sample molecular dynamics trajectory data"""
87
88
time = np.linspace(0, 100, nframes) # 100 ns simulation
89
90
# RMSD with realistic behavior
91
rmsd_backbone = 1.0 + 0.5 * (1 - np.exp(-time/10)) + 0.2 * np.random.normal(0, 1, nframes)
92
rmsd_backbone = np.maximum(rmsd_backbone, 0.5) # Ensure positive RMSD
93
94
# RMSF per residue
95
rmsf_values = np.random.gamma(1.5, 0.8, nresidues)
96
97
# Temperature and pressure
98
temperature = 300 + 5 * np.sin(time/5) + 2 * np.random.normal(0, 1, nframes)
99
pressure = 1.0 + 0.1 * np.sin(time/3) + 0.05 * np.random.normal(0, 1, nframes)
100
101
return {
102
"time": time,
103
"rmsd_backbone": rmsd_backbone,
104
"rmsf_values": rmsf_values,
105
"temperature": temperature,
106
"pressure": pressure
107
}
108
109
def generate_docking_results(ncompounds=1000):
110
"""Generate virtual screening/docking results"""
111
112
np.random.seed(42) # For reproducibility
113
114
# Binding affinities with realistic distribution
115
binding_scores = np.random.gamma(2, 2, ncompounds) * -1 - 4
116
binding_scores = np.clip(binding_scores, -12, -4)
117
118
# ADMET properties
119
molecular_weight = np.random.normal(350, 80, ncompounds)
120
lipophilicity = np.random.normal(3.5, 1.2, ncompounds)
121
polar_surface_area = np.random.normal(70, 25, ncompounds)
122
123
# Filter for hits
124
hits_mask = binding_scores <= -8.0
125
126
return {
127
"binding_scores": binding_scores,
128
"molecular_weight": molecular_weight,
129
"lipophilicity": lipophilicity,
130
"polar_surface_area": polar_surface_area,
131
"hits_mask": hits_mask,
132
"n_hits": np.sum(hits_mask)
133
}
134
135
if __name__ == "__main__":
136
# Generate and save sample datasets
137
print("Generating sample computational chemistry datasets...")
138
139
# Molecular coordinates
140
benzene = generate_molecular_coordinates("benzene")
141
print(f"Benzene molecule: {len(benzene['atoms'])} atoms")
142
143
# Energy profile
144
x, energy = generate_energy_profile()
145
print(f"Energy profile: {len(x)} points, barrier = {max(energy):.1f} kcal/mol")
146
147
# MD trajectory
148
md_data = generate_md_trajectory()
149
print(f"MD trajectory: {len(md_data['time'])} frames")
150
151
# Docking results
152
docking_data = generate_docking_results()
153
print(f"Docking results: {len(docking_data['binding_scores'])} compounds, {docking_data['n_hits']} hits")
154
155
print("Sample data generation complete!")
156