CoCalc -- e1_time_series_data

GitHub Repository: RWTH-EBC/ebcpy
Path: blob/master/examples/e1_time_series_data_example.py
⁵⁰⁵ views
1
"""
2
Goals of this part of the examples:
3
1. Learn how to use `load_time_series_data` and the `.tsd` accessor
4
2. Understand why we use this approach
5
3. Get to know the different processing functions
6
4. See how this compares to the legacy `TimeSeriesData` class
7
"""
8
# Start by importing all relevant packages
9
import pathlib
10
import numpy as np
11
import matplotlib.pyplot as plt
12
# Imports from ebcpy
13
from ebcpy import load_time_series_data
14

15
# For backwards compatibility example
16
from ebcpy import TimeSeriesData  # Will show a DeprecationWarning
17

18

19
def main(with_plot=True):
20
    """
21
    Arguments of this example:
22

23
    :param bool with_plot:
24
        Show the plot at the end of the script. Default is True.
25
    """
26
    # First get the path with relevant input files:
27
    basepath = pathlib.Path(__file__).parents[1].joinpath("tutorial", "data")
28
    # Note: We often use pathlib. If you're not familiar and want to learn more,
29
    # just search for any of the many tutorials available online.
30

31
    # ######################### Loading Time Series Data ##########################
32
    # First we open a simulation result file (.mat)
33
    df_mat = load_time_series_data(basepath.joinpath('simulatedData.mat'))
34
    print(df_mat)
35
    # Now a .csv. .xlsx works as well (with sheet_name parameter).
36
    df_csv = load_time_series_data(basepath.joinpath('excelData.csv'))
37
    print(df_csv)
38
    # Or construct like any pandas DataFrame
39
    df_random = load_time_series_data({"A": np.random.rand(100), "B": np.random.rand(100)})
40
    print(df_random)
41

42
    # ######################### Why do we use this approach? ##########################
43
    # Unlike the old TimeSeriesData which inherited from DataFrame,
44
    # our new approach uses standard pandas DataFrames with a custom accessor.
45
    # This makes it fully compatible with pandas ecosystem and tools like PyCharm's DataFrame viewer.
46
    # Moreover, the old MultiColumn approach using variable names and tags was useful when processing
47
    # variables with multiple stages, but made data handling much harder for everyone else.
48
    # Obviously, you can still create a multicolumn pd.DataFrame and use the old tag system,
49
    # it is just not the default anymore.
50
    print("The loaded object is a standard", type(df_csv).__name__)
51
    print("Time series functionality is available through the .tsd accessor")
52

53
    # ######################### Processing Time Series Data ##########################
54
    # Index changing:
55
    print(df_csv.index)
56
    df_csv.tsd.to_datetime_index(unit_of_index="s")
57
    print(df_csv.index)
58
    df_csv.tsd.to_float_index(offset=0)
59
    print(df_csv.index)
60

61
    # Some filter options
62
    # Apply filters and create new columns with results
63
    df_csv["outputs.TRoom_lowPass2"] = df_csv.tsd.low_pass_filter(
64
        crit_freq=0.1, filter_order=2, variable="outputs.TRoom")
65
    print(df_csv)
66

67
    # Moving average
68
    df_csv["outputs.TRoom_MovingAverage"] = df_csv.tsd.moving_average(
69
        window=50, variable="outputs.TRoom")
70
    print(df_csv)
71

72
    # Plot the different processed signals
73
    plt.figure()
74
    plt.plot(df_csv.index, df_csv["outputs.TRoom"], label="Raw")
75
    plt.plot(df_csv.index, df_csv["outputs.TRoom_lowPass2"], label="Low-pass (order 2)")
76
    plt.plot(df_csv.index, df_csv["outputs.TRoom_MovingAverage"], label="Moving Average")
77
    plt.legend()
78

79
    # How-to re-sample your data:
80
    # Call the function. Desired frequency is a string (s: seconds), 60: 60 seconds.
81
    # Play around with this value to see what happens.
82
    # First convert to DateTimeIndex (required for this function)
83
    df_csv.tsd.to_datetime_index(unit_of_index="s")
84
    # Create a copy to later reference the change.
85
    df_csv_ref = df_csv.copy()
86
    df_csv.tsd.clean_and_space_equally(desired_freq="60s")
87
    plt.figure()
88
    plt.plot(df_csv_ref.index, df_csv_ref["outputs.TRoom"], label="Reference", color="blue")
89
    plt.plot(df_csv.index, df_csv["outputs.TRoom"], label="Resampled", color="red")
90
    plt.legend()
91

92
    # ######################### Legacy TimeSeriesData Example ##########################
93
    # For reference, here's how the same operations would be done with the legacy class
94
    # Note: This will display a DeprecationWarning
95
    print("\n--- Legacy TimeSeriesData Example (Deprecated) ---")
96
    tsd_legacy = TimeSeriesData(basepath.joinpath('excelData.csv'), use_multicolumn=True)
97
    tsd_legacy.to_datetime_index(unit_of_index="s")
98
    tsd_legacy.low_pass_filter(crit_freq=0.1, filter_order=2,
99
                               variable="outputs.TRoom", new_tag="lowPass2")
100
    tsd_legacy.moving_average(window=50, variable="outputs.TRoom",
101
                              tag="raw", new_tag="MovingAverage")
102
    print("Legacy TimeSeriesData object with tags:", tsd_legacy.get_tags(variable="outputs.TRoom"))
103
    print(tsd_legacy)
104

105
    if with_plot:
106
        plt.show()
107

108

109
if __name__ == '__main__':
110
    from ebcpy.utils import reproduction
111
    main()
112
    reproduction.save_reproduction_archive(title="log-testing")
113

114
Product

Resources

Company