CoCalc -- uwg_preparation.ipynb

JFM-2024-1227

JFM-Notebooks / Experimental_Dataset / with_flow / 5cm_water_depth / uwg_preparation.ipynb

³⁴⁸² views

unlisted

ubuntu2204

Kernel: Python 3 (ipykernel)

In [1]:

import numpy as np
import matplotlib.pyplot as plt

In [2]:

data_dir = "/nfs/stak/users/harrys/depot/Sam/Rivermouth_Tsunami/With_Flow_Experiments/Data/h50_12gpm_wavegauges"
prep_dir = "/nfs/stak/users/harrys/depot/Sam/Rivermouth_Tsunami/With_Flow_Experiments/Prepared_Data/wavegauges"

filenames = [
    [f"sol3rd_h50_a05_12gpm_loc0_trial{n+1}.csv" for n in range(3)],
    [f"sol3rd_h50_a10_12gpm_loc0_trial{n+1}.csv" for n in range(3)],
    [f"sol3rd_h50_a15_12gpm_loc0_trial{n+1}.csv" for n in range(3)],
    [f"sol3rd_h50_a20_12gpm_loc0_trial{n+1}.csv" for n in range(3)],
    [f"undular_h50_a05_12gpm_loc0_trial{n+1}.csv" for n in range(3)],
    [f"sol3rd_h50_a05_12gpm_loc3_trial{n+1}.csv" for n in range(3)],
    [f"sol3rd_h50_a10_12gpm_loc3_trial{n+1}.csv" for n in range(3)],
    [f"sol3rd_h50_a15_12gpm_loc3_trial{n+1}.csv" for n in range(3)],
    [f"sol3rd_h50_a20_12gpm_loc3_trial{n+1}.csv" for n in range(3)],
    [f"undular_h50_a05_12gpm_loc3_trial{n+1}.csv" for n in range(3)],
    [f"sol3rd_h50_a05_12gpm_loc6_trial{n+1}.csv" for n in range(3)],
    [f"sol3rd_h50_a10_12gpm_loc6_trial{n+1}.csv" for n in range(3)],
    [f"sol3rd_h50_a15_12gpm_loc6_trial{n+1}.csv" for n in range(3)],
    [f"sol3rd_h50_a20_12gpm_loc6_trial{n+1}.csv" for n in range(3)],
    [f"undular_h50_a05_12gpm_loc6_trial{n+1}.csv" for n in range(3)],
    [f"sol3rd_h50_a05_12gpm_loc9_trial{n+1}.csv" for n in range(3)],
    [f"sol3rd_h50_a10_12gpm_loc9_trial{n+1}.csv" for n in range(3)],
    [f"sol3rd_h50_a15_12gpm_loc9_trial{n+1}.csv" for n in range(3)],
    [f"sol3rd_h50_a20_12gpm_loc9_trial{n+1}.csv" for n in range(3)],
    [f"undular_h50_a05_12gpm_loc9_trial{n+1}.csv" for n in range(3)],
]

casenames = [
    'sol3rd_h50_a05_12gpm_loc0',
    'sol3rd_h50_a10_12gpm_loc0',
    'sol3rd_h50_a15_12gpm_loc0',
    'sol3rd_h50_a20_12gpm_loc0',
    'undular_h50_a05_12gpm_loc0',
    'sol3rd_h50_a05_12gpm_loc3',
    'sol3rd_h50_a10_12gpm_loc3',
    'sol3rd_h50_a15_12gpm_loc3',
    'sol3rd_h50_a20_12gpm_loc3',
    'undular_h50_a05_12gpm_loc3',
    'sol3rd_h50_a05_12gpm_loc6',
    'sol3rd_h50_a10_12gpm_loc6',
    'sol3rd_h50_a15_12gpm_loc6',
    'sol3rd_h50_a20_12gpm_loc6',
    'undular_h50_a05_12gpm_loc6',
    'sol3rd_h50_a05_12gpm_loc9',
    'sol3rd_h50_a10_12gpm_loc9',
    'sol3rd_h50_a15_12gpm_loc9',
    'sol3rd_h50_a20_12gpm_loc9',
    'undular_h50_a05_12gpm_loc9',
]

data = {}

for case in filenames:
    for name in case:
        data[name] = {}
        data[name]['raw_data'] = np.loadtxt(f"{data_dir}/{name}",usecols=(1,2,3,4),delimiter=',')

In [3]:

for case, casename in zip(filenames,casenames):
    # each should be the same number of data points but sometimes there is a short one
    # so find the smallest number of data points and truncate the rest to match
    # assumes that all 4 gauges are used
    # assumes that each name in case is a trial
    shapes = [ data[name]['raw_data'].shape for name in case ]
    shape_0 = sorted([ shape[0] for shape in shapes ])[0]
    shape_1 = sorted([ shape[1] for shape in shapes ])[0]
    shape = (shape_0,shape_1,len(case))
    
    xpos = np.empty((shape_1,len(case)),dtype=int)
    ypos = np.empty((shape_1,len(case)),dtype=int)
    eta  = np.empty(shape) #allocate an array to fill
    for idx, name in enumerate(case):
        # determine location data based on name
        if 'loc0' in name:
            x = [-2161, -1551,  -980,  -370]
            y = [    0,     0,     0,     0]
        elif 'loc3' in name:
            x = [-2161, -1551,  -980,  -370]
            y = [  300,   300,   300,   300]
        elif 'loc6' in name:
            x = [-2161, -1551,  -980,  -370]
            y = [  600,   600,   600,   600]
        elif 'loc9' in name:
            x = [-2161, -1551,  -980,  -370]
            y = [  900,   900,   900,   900]
        else:
            raise ValueError(f'No known location positions for {casename}')
        
        # write position on per name basis
        data[name]['x'] = np.array(x,dtype=int)
        data[name]['y'] = np.array(y,dtype=int)
        # write position on per name-trial basis
        xpos[:,idx]  = data[name]['x']
        ypos[:,idx]  = data[name]['y']
        # write raw data to large array
        eta[:,:,idx] = data[name]['raw_data'][:shape_0,:shape_1]
    
    data[casename] = {}
    data[casename]['eta_raw'] = eta

    # check location data
    assert xpos.all(axis=1).all(), f"x data not all the same for case {casename}, something has gone wrong"
    assert xpos.all(axis=1).all(), f"y data not all the same for case {casename}, something has gone wrong"
    
    # verified that they're all the same - take the zeroth
    data[casename]['x'] = xpos[:,0]
    data[casename]['y'] = ypos[:,0]

In [4]:

for case, casename in zip(filenames,casenames):
    xpos = np.repeat(data[casename]['x'].reshape(-1,1),3,axis=1)
    ypos = np.repeat(data[casename]['y'].reshape(-1,1),3,axis=1)
    eta_raw = data[casename]['eta_raw']
    
    # because of the way we repeat xpos, the ravel ends up like [x1 x1 x1 x2 x2 x2 x3 x3 x3 x4 x4 x4]
    xpos = xpos.ravel()
    ypos = ypos.ravel()
    
    # want to reshape eta to have columns like [g1_t1 g1_t2 g1_t3 g2_t1 g2_t2 g2_t3 ...]
    s1, s2, s3 = eta_raw.shape
    
    eta_raw = eta_raw.reshape(s1,s2*s3)
    plt.plot(eta_raw[:,:3])
    plt.show()
    
    header = f"# This data is for {casename} where each row is a point in time with dt={1/50:.4f} seconds where each x location (millimeters) in the first and each y position (millimters) in the second." \
       "\n" + "# There are four gauges and three trials at each location so reshape from (L,12) to (L,4,3) when using numpy arrays"
    x_locations = " ".join([ str(x) for x in xpos ])
    y_locations = " ".join([ str(y) for y in ypos ])
    
    header = '\n'.join([header,x_locations,y_locations])
    
    np.savetxt(f"{prep_dir}/{casename}_rawdata.txt",eta_raw,header=header,comments='')

Out[4]:

In [5]:

for case, casename in zip(filenames,casenames):
    # row 0 and 1 are comments
    xpos = np.loadtxt(f"{prep_dir}/{casename}_rawdata.txt",skiprows=2,max_rows=1,dtype=int)
    ypos = np.loadtxt(f"{prep_dir}/{casename}_rawdata.txt",skiprows=3,max_rows=1,dtype=int)
    eta_raw = np.loadtxt(f"{prep_dir}/{casename}_rawdata.txt",skiprows=4)
    
    xpos = xpos.reshape(4,3)[:,0]
    ypos = ypos.reshape(4,3)[:,0]
    eta_raw = eta_raw.reshape(-1,4,3)
    
    for gauge in range(eta_raw.shape[1]):
        plt.plot(eta_raw[:,gauge,:])
        plt.title(f'{casename}: x={xpos[gauge]}mm,y={ypos[gauge]}mm')
        plt.show()

Out[5]:

In [6]:

for case, casename in zip(filenames,casenames):
    eta_raw = data[casename]['eta_raw']
    eta_raw -= eta_raw[0,:,:]
    
    eta_mean = np.mean(eta_raw,axis=-1)
    data[casename]['eta_ensemble_mean'] = eta_mean
    
    eta_median = np.median(eta_raw,axis=-1)
    data[casename]['eta_ensemble_median'] = eta_median
    
    header = f"# This data is for {casename} where each row is a point in time with dt={1/50:.4f} seconds where each x location (millimeters) in the first and each y position (millimters) in the second."
    x_locations = " ".join([ str(x) for x in data[casename]['x'] ])
    y_locations = " ".join([ str(y) for y in data[casename]['y'] ])
    
    header = '\n'.join([header,x_locations,y_locations])
    
    np.savetxt(f"{prep_dir}/{casename}_ensemble_mean.txt",eta_mean,header=header,comments='')
    np.savetxt(f"{prep_dir}/{casename}_ensemble_median.txt",eta_mean,header=header,comments='')

In [ ]:

Product

Resources

Company