CoCalc -- prepare_uwg

JFM-2024-1227

JFM-Notebooks / Experimental_Dataset / with_flow / 8cm_water_depth / prepare_uwg_data.ipynb

³⁴⁸² views

unlisted

ubuntu2204

Kernel: Python 3 (ipykernel)

In [1]:

import os
import numpy as np
import matplotlib.pyplot as plt

import scipy.signal as signal

In [2]:

plt.style.use('thesis')

In [3]:

!find "ultrasonic_wave_gauge/raw_data" -maxdepth 1 -type f -name "*.csv" | sort | grep -v "stats" | xargs readlink -f > "filenames.txt"

In [4]:

with open("filenames.txt","r") as filenames_file:
    filenames = filenames_file.read().splitlines()
    
basenames  = [os.path.basename(filename) for filename in filenames]

In [5]:

data = {}

for name, path in zip(basenames,filenames):
    # break name into parts
    wavetype, depth, rate, amp, loc, trial = name.split('_')
    # convert to integers by trimming off text from each part
    depth = int(depth[len('h'):])
    rate  = int(rate[:-len('gpm')])
    amp   = int(amp[len('a'):])
    loc   = int(loc[len('loc'):])
    trial = int(trial[len('trial'):-len('.csv')]) # keeps trailing .csv
    #print(depth, rate, amp, loc, trial)
    
    if   loc == 0:
        x_data = [  600,    0, -600,-1200]
        y_data = [    0,    0,    0,    0]
    elif loc == 1:
        x_data = [ -296, -896,-1496,-2096]
        y_data = [    0,    0,    0,    0]
    elif loc == 2:
        x_data = [ -296, -896,-1496,-2096]
        y_data = [  183,  183,  183,  183]
    elif loc == 3:
        x_data = [ -296, -896,-1496,-2096]
        y_data = [  366,  366,  366,  366]
    elif loc == 4:
        x_data = [ 2100, 1500,  900,  300]
        y_data = [    0,    0,    0,    0]
    else:
        print(f"No location data entred for loc {loc}")
        raise ValueError(f"No location data entred for loc {loc}")
    
    if not f"{wavetype}_a{amp:02d}" in data:
        data[f"{wavetype}_a{amp:02d}"] = {}

    if not f"loc{loc}" in data[f"{wavetype}_a{amp:02d}"]:
        data[f"{wavetype}_a{amp:02d}"][f"loc{loc}"] = {}

    data[f"{wavetype}_a{amp:02d}"][f"loc{loc}"][f"trial{trial}"] = np.loadtxt(path,delimiter=',',usecols=(1,2,3,4))
    data[f"{wavetype}_a{amp:02d}"][f"loc{loc}"]["x_data"] = np.array(x_data,dtype=int)
    data[f"{wavetype}_a{amp:02d}"][f"loc{loc}"]["y_data"] = np.array(y_data,dtype=int)

In [6]:

for wavecase in data.keys():
    for loc in data[wavecase].keys():
        # build a list of numpy arrays to be merged into a single array
        all_trials = list()
        # possible to have more keys if run twice
        for trial in [ key for key in data[wavecase][loc].keys() if "trial" in key ]:
            all_trials.append(data[wavecase][loc][trial])

        # new array will have shape determined by data
        # and number of trials
        shapes = [d.shape for d in all_trials ]
        p = min([s[0] for s in shapes])
        q = min([s[1] for s in shapes])
        r = len(all_trials)
        loc_data = np.empty((p,q,r))
        
        # use trial num like index to insert into loc_data
        for idx, dat in enumerate(all_trials):
            loc_data[:,:,idx] = dat[:p,:q]

        
        data[wavecase][loc]["loc_data"] = loc_data
        
        # absolute value varies with time of day due to temperature fluctuations
        # only keep the relative change in elevation
        loc_data -= np.median(loc_data[:100],axis=0)
        
        # trial axis is always the last one, i.e. 2
        ensemble_avg = np.mean(loc_data,axis=-1)
        ensemble_med = np.median(loc_data,axis=-1)

        # convert to mm
        ensemble_avg *= 1000
        ensemble_med *= 1000
        
        data[wavecase][loc]["ensemble_avg"] = ensemble_avg 
        data[wavecase][loc]["ensemble_med"] = ensemble_med

        plt.plot(ensemble_avg)
        
    plt.title(f"{wavecase}")
    plt.show()
    plt.close()

Out[6]:

In [7]:

for wavecase in data.keys():
    for loc in data[wavecase].keys():
        avg    = data[wavecase][loc]["ensemble_avg"]
        med    = data[wavecase][loc]["ensemble_med"]
        x_data = data[wavecase][loc]["x_data"]
        y_data = data[wavecase][loc]["y_data"]
        
        wavetype, amp = wavecase.split('_')
        
        filepath = "/nfs/depot/cce_u1/yeh/shared/Sam/Rivermouth_Tsunami/Experimental_Dataset/with_flow/8cm_water_depth/ultrasonic_wave_gauge/prepared/"
        filename = f"{wavetype}_h80_g20_{amp}_{loc}_ensemble_average.csv"
        ffp = filepath + filename
        
        l1 = f"# This data is for {wavetype}_h80_g20_{amp}_{loc} where each row after the second is a point in time with time step 0.02 seconds (i.e. 50 Hz sampling)."
        l2 = f"# The first row is x location (millimeter) and the second row is y position (millimters)."
        l3 = ' '.join([f"{x:d}" for x in x_data])
        l4 = ' '.join([f"{y:d}" for y in y_data])
        header = '\n'.join([l1,l2,l3,l4])
        
        # comments='' because the custom header includes comment and metadata
        np.savetxt(ffp,avg,header=header,comments='',delimiter=',')
        
        # again for ensemble median
        filename = f"{wavetype}_h80_g20_{amp}_{loc}_ensemble_median.csv"
        ffp = filepath + filename
        
        np.savetxt(ffp,med,header=header,comments='',delimiter=',')

In [8]:

a06_case = [wavecase for wavecase in data.keys() if 'a06' in wavecase]
a07_case = [wavecase for wavecase in data.keys() if 'a07' in wavecase]
a08_case = [wavecase for wavecase in data.keys() if 'a08' in wavecase]
a16_case = [wavecase for wavecase in data.keys() if 'a16' in wavecase]
a24_case = [wavecase for wavecase in data.keys() if 'a24' in wavecase]
a32_case = [wavecase for wavecase in data.keys() if 'a32' in wavecase]

for wavecase in a07_case:
    for loc in ['loc4']:
        # build a list of numpy arrays to be merged into a single array
        all_trials = list()
        # possible to have more keys if run twice
        for trial in [ key for key in data[wavecase][loc].keys() if "trial" in key ]:
            all_trials.append(data[wavecase][loc][trial])

        # new array will have shape determined by data
        # and number of trials
        shapes = [d.shape for d in all_trials ]
        p = min([s[0] for s in shapes])
        q = min([s[1] for s in shapes])
        r = len(all_trials)
        loc_data = np.empty((p,q,r))
        
        # use trial num like index to insert into loc_data
        for idx, dat in enumerate(all_trials):
            loc_data[:,:,idx] = dat[:p,:q]

        data[wavecase][loc]["loc_data"] = loc_data
        
        loc_data -= np.mean(loc_data[:100],axis=0)
        
        # trial axis is always the last one, i.e. 2
        ensemble_avg = np.mean(loc_data,axis=-1)
        ensemble_med = np.median(loc_data,axis=-1)

        data[wavecase][loc]["ensemble_avg"] = ensemble_avg
        data[wavecase][loc]["ensemble_med"] = ensemble_med
        
        x_data = data[wavecase][loc]["x_data"]
        x_idx = np.argwhere(x_data == 900)
        
        for trial in range(r):
            plt.plot(loc_data[:,x_idx,trial].ravel(),label=f'trial {trial}')
        
        #plt.plot(loc_data[])
        #plt.plot(ensemble_med)
        #plt.plot(ensemble_avg[:,0] - ensemble_med[:,0],'k--')
        
    plt.title(f"{wavecase}")
    plt.legend()
    #plt.xlim(400,500)
    plt.show()
    plt.close()

Out[8]:

In [ ]:

Product

Resources

Company