ubuntu2404
#!/usr/bin/env python31"""2Sample data generation for computational chemistry template3Provides realistic molecular data for testing and demonstration4"""56import numpy as np7import matplotlib.pyplot as plt89def generate_molecular_coordinates(molecule_name="benzene"):10"""Generate sample molecular coordinates for common molecules"""1112coordinates = {13"benzene": {14"atoms": ["C", "C", "C", "C", "C", "C", "H", "H", "H", "H", "H", "H"],15"coords": np.array([16[0.000, 1.396, 0.000], # C117[1.209, 0.698, 0.000], # C218[1.209, -0.698, 0.000], # C319[0.000, -1.396, 0.000], # C420[-1.209, -0.698, 0.000], # C521[-1.209, 0.698, 0.000], # C622[0.000, 2.479, 0.000], # H123[2.147, 1.240, 0.000], # H224[2.147, -1.240, 0.000], # H325[0.000, -2.479, 0.000], # H426[-2.147, -1.240, 0.000], # H527[-2.147, 1.240, 0.000] # H628])29},30"water": {31"atoms": ["O", "H", "H"],32"coords": np.array([33[0.000, 0.000, 0.119], # O34[0.000, 0.757, -0.477], # H135[0.000, -0.757, -0.477] # H236])37},38"methane": {39"atoms": ["C", "H", "H", "H", "H"],40"coords": np.array([41[0.000, 0.000, 0.000], # C42[0.629, 0.629, 0.629], # H143[-0.629, -0.629, 0.629], # H244[-0.629, 0.629, -0.629], # H345[0.629, -0.629, -0.629] # H446])47}48}4950return coordinates.get(molecule_name, coordinates["benzene"])5152def generate_energy_profile(npoints=50, barrier_height=25.0, reaction_energy=-15.0):53"""Generate a realistic reaction energy profile"""5455x = np.linspace(0, 6, npoints)5657# Create a double-well potential with transition states58energy = np.zeros_like(x)5960# Initial reactant state61energy[x <= 1] = 06263# First transition state64mask1 = (x > 1) & (x <= 2)65energy[mask1] = barrier_height * np.sin(np.pi * (x[mask1] - 1))**26667# Intermediate state68mask2 = (x > 2) & (x <= 3)69energy[mask2] = reaction_energy * 0.67071# Second transition state72mask3 = (x > 3) & (x <= 4)73energy[mask3] = reaction_energy * 0.6 + (barrier_height * 0.8) * np.sin(np.pi * (x[mask3] - 3))**27475# Second intermediate76mask4 = (x > 4) & (x <= 5)77energy[mask4] = reaction_energy * 0.37879# Final products80energy[x > 5] = reaction_energy8182return x, energy8384def generate_md_trajectory(nframes=1000, nresidues=50):85"""Generate sample molecular dynamics trajectory data"""8687time = np.linspace(0, 100, nframes) # 100 ns simulation8889# RMSD with realistic behavior90rmsd_backbone = 1.0 + 0.5 * (1 - np.exp(-time/10)) + 0.2 * np.random.normal(0, 1, nframes)91rmsd_backbone = np.maximum(rmsd_backbone, 0.5) # Ensure positive RMSD9293# RMSF per residue94rmsf_values = np.random.gamma(1.5, 0.8, nresidues)9596# Temperature and pressure97temperature = 300 + 5 * np.sin(time/5) + 2 * np.random.normal(0, 1, nframes)98pressure = 1.0 + 0.1 * np.sin(time/3) + 0.05 * np.random.normal(0, 1, nframes)99100return {101"time": time,102"rmsd_backbone": rmsd_backbone,103"rmsf_values": rmsf_values,104"temperature": temperature,105"pressure": pressure106}107108def generate_docking_results(ncompounds=1000):109"""Generate virtual screening/docking results"""110111np.random.seed(42) # For reproducibility112113# Binding affinities with realistic distribution114binding_scores = np.random.gamma(2, 2, ncompounds) * -1 - 4115binding_scores = np.clip(binding_scores, -12, -4)116117# ADMET properties118molecular_weight = np.random.normal(350, 80, ncompounds)119lipophilicity = np.random.normal(3.5, 1.2, ncompounds)120polar_surface_area = np.random.normal(70, 25, ncompounds)121122# Filter for hits123hits_mask = binding_scores <= -8.0124125return {126"binding_scores": binding_scores,127"molecular_weight": molecular_weight,128"lipophilicity": lipophilicity,129"polar_surface_area": polar_surface_area,130"hits_mask": hits_mask,131"n_hits": np.sum(hits_mask)132}133134if __name__ == "__main__":135# Generate and save sample datasets136print("Generating sample computational chemistry datasets...")137138# Molecular coordinates139benzene = generate_molecular_coordinates("benzene")140print(f"Benzene molecule: {len(benzene['atoms'])} atoms")141142# Energy profile143x, energy = generate_energy_profile()144print(f"Energy profile: {len(x)} points, barrier = {max(energy):.1f} kcal/mol")145146# MD trajectory147md_data = generate_md_trajectory()148print(f"MD trajectory: {len(md_data['time'])} frames")149150# Docking results151docking_data = generate_docking_results()152print(f"Docking results: {len(docking_data['binding_scores'])} compounds, {docking_data['n_hits']} hits")153154print("Sample data generation complete!")155156