Contact
CoCalc Logo Icon
StoreFeaturesDocsShareSupport News AboutSign UpSign In
| Download
Views: 42
1
import numpy as np
2
import sys
3
import inspect
4
import os
5
import pandas
6
import matplotlib.pyplot as plt
7
from sklearn.svm import SVR
8
from sklearn import model_selection, tree, neighbors, ensemble, utils
9
from sklearn.metrics import mean_squared_error
10
from sklearn.kernel_ridge import KernelRidge
11
12
13
def error_mean_square_distance(x, y):
14
return mean_squared_error(x, y)
15
16
17
def test_regression(x_train, y_train, x_test, y_test, model_fit=None):
18
"""Check regression for various parameter settings."""
19
mod_pred = None
20
rng = utils.check_random_state(0)
21
msg_info = """type: %s;
22
mse: %f;
23
Parameters: %s with a score of %0.2f;"""
24
y_pred = None
25
error_pred = None
26
c = [0.1]#np.linspace(0.1, 2000.0, num=3)
27
gamma = np.linspace(0.0, 5.0, num=3)
28
grid = model_selection.ParameterGrid({"max_samples": [0.5, 1.0],
29
"max_features": [0.5, 1.0],
30
"bootstrap": [True, False],
31
"bootstrap_features": [True, False]})
32
if model_fit == "SVR":
33
grid_svr = [{'kernel': ['linear'], 'C': c},
34
{'kernel': ['rbf'], 'C': c, 'gamma': gamma},
35
{'kernel': ['sigmoid'], 'C': c, 'gamma': gamma}]
36
base_estimator = model_selection.GridSearchCV(SVR(), cv=5, param_grid=grid_svr)
37
elif model_fit == "KNR":
38
grid_knr = [{'weights': ['uniform', 'distance'], 'algorithm': ['auto'], 'p': [2]}]
39
base_estimator = model_selection.GridSearchCV(neighbors.KNeighborsRegressor(), param_grid=grid_knr)
40
elif model_fit == "DTR":
41
base_estimator = tree.DecisionTreeRegressor()
42
elif model_fit == "KRR":
43
grid_krr = [{'kernel': ['linear'], 'alpha': 1/(2.*c)},
44
{'kernel': ['rbf'], 'alpha': 1/(2.*c), 'gamma': gamma},
45
{'kernel': ['sigmoid'], 'alpha': 1/(2.*c), "gamma": gamma}]
46
base_estimator = model_selection.GridSearchCV(KernelRidge(), cv=5, param_grid=grid_krr)
47
else:
48
base_estimator = None
49
for params in grid:
50
mod_pred_temp = ensemble.BaggingRegressor(base_estimator=base_estimator, random_state=rng, **params)
51
mod_pred_temp.fit(x_train, y_train)
52
y_pred_temp = mod_pred_temp.predict(x_test)
53
error_pred_temp = error_mean_square_distance(y_test, y_pred_temp)
54
if error_pred and error_pred > error_pred_temp:
55
error_pred = error_pred_temp
56
y_pred = y_pred_temp
57
mod_pred = mod_pred_temp
58
if error_pred is None:
59
error_pred = error_pred_temp
60
y_pred = y_pred_temp
61
mod_pred = mod_pred_temp
62
if mod_pred is not None:
63
print(msg_info % (model_fit, error_pred, mod_pred.get_params(), mod_pred.score(x_test, y_test)))
64
return y_pred
65
66
67
def get_file_list_from_root_dir(root_dir, extension=".xlsx"):
68
file_list = []
69
for file in os.listdir(root_dir):
70
if file.endswith(extension):
71
file_list.append(os.path.join(root_dir, file))
72
return file_list
73
74
75
def collect_data_from_file_list(file_list, sheet_name):
76
data = {}
77
for path in file_list:
78
base_name_without_ext = os.path.basename(path)
79
base_name_without_ext = os.path.splitext(base_name_without_ext)[0]
80
try:
81
data[base_name_without_ext] = pandas.read_excel(open(path, 'rb'), index_col=0,
82
sheetname=sheet_name).to_dict(orient='index')
83
except Exception as e:
84
print(e, base_name_without_ext)
85
pass
86
return data
87
88
89
def extract_vectors_of_specific_data(data, st_name_t_stamps, mark):
90
vecs = []
91
st_name_t_stamps_len = len(st_name_t_stamps)
92
for stock_name, t_stamp in st_name_t_stamps:
93
if st_name_t_stamps_len != 1:
94
vecs.append(extract_vectors_of_specific_data_1d(data[stock_name], t_stamp, mark))
95
else:
96
vecs = extract_vectors_of_specific_data_1d(data[stock_name], t_stamp, mark)
97
break
98
return vecs
99
100
101
def extract_vectors_of_specific_data_1d(data, st_name_t_stamps, mark):
102
vec = []
103
for t_stamp in st_name_t_stamps:
104
vec.append(data[t_stamp][mark])
105
return vec
106
107
108
def plot_data(x, y, title="", xlabel="", ylabel=""):
109
plt.figure(figsize=(20, 5))
110
plt.title(title)
111
plt.xlabel(xlabel)
112
plt.ylabel(ylabel)
113
plt.plot(x, y)
114
plt.show()
115
116
117
def period_error(period, length):
118
if length < period or period < 0:
119
try:
120
caller_name = inspect.currentframe().f_back.f_code.co_name
121
error_massage = '\n'.join((
122
"Function name: " + caller_name,
123
"Length of data is: %d" % length,
124
"Period is: %d" % period,
125
"We need period to be grather then 0 and less then length of data"
126
))
127
raise ValueError(error_massage)
128
except Exception as error:
129
print("Error occured: " + str(error) + "\nEXIT")
130
sys.exit(-1)
131
132
133
def relative_strength_index(close_prices_vecs, period):
134
"""
135
Relative Strength Index (RSI): A technical momentum indicator that compares
136
the magnitude of recent gains to recent losses in an attempt to determine
137
overbought and oversold conditions of an asset. The formula for computing
138
the Relative Strength Index is as follows.
139
:return: [RSI = 100-[100/(1+RS)]]
140
where RS = Avg. of x days’ up closes / Average of x days’ down closes.
141
"""
142
rsi = []
143
for prices_vec in close_prices_vecs:
144
if np.shape(prices_vec):
145
rsi.append(relative_strength_index_1d(prices_vec, period))
146
else:
147
rsi = relative_strength_index_1d(close_prices_vecs, period)
148
break
149
return rsi
150
151
152
def relative_strength_index_1d(close_prices_vec, period):
153
len_cl_prices_vec = len(close_prices_vec)
154
period_error(period, len_cl_prices_vec)
155
deltas = np.diff(close_prices_vec)
156
seed = deltas[:period]
157
up = seed[seed >= 0].sum() / period
158
down = -seed[seed < 0].sum() / period
159
rsi = [0]*(len_cl_prices_vec - period)
160
for i in range(0, len_cl_prices_vec - period):
161
delta = deltas[i + period - 1]
162
up = (up * (period - 1) + (delta if delta >= 0 else 0)) / period
163
down = (down * (period - 1) + (-delta if delta < 0 else 0)) / period
164
rs = up / down if down != 0 else 100
165
rsi[i] = 100. - 100. / (1. + rs)
166
return rsi
167
168
169
def money_flow_index(typc_prices_vecs, volume_data_vecs, period):
170
"""
171
Money Flow Index (MFI): This one measures the strength of money in and out
172
of a security. The formula for MFI is as follows:
173
Money Flow (MF) = Typical Price * Volume.
174
Money Ratio (MR) = (Positive MF / Negative MF).
175
:return: MFI = 100 – (100/ (1+MR)).
176
"""
177
mfi = []
178
for typical_prices_vec, volume_data_vec in zip(typc_prices_vecs, volume_data_vecs):
179
if np.shape(typical_prices_vec):
180
mfi.append(money_flow_index_1d(typical_prices_vec, volume_data_vec, period))
181
else:
182
mfi = money_flow_index_1d(typc_prices_vecs, volume_data_vecs, period)
183
break
184
return mfi
185
186
187
def money_flow_index_1d(typical_prices_vec, volume_data_vec, period):
188
len_ty_prices_vec = len(typical_prices_vec)
189
period_error(period, len_ty_prices_vec)
190
deltas = np.diff(typical_prices_vec)
191
raw_money_flow = np.array([typ*vol for typ, vol in zip(typical_prices_vec[1:], volume_data_vec)])
192
mfi = [0]*(len_ty_prices_vec - period)
193
for i in range(0, len_ty_prices_vec - period):
194
seed = deltas[i:i + period]
195
up = raw_money_flow[i:i + period][seed >= 0].sum() / period
196
down = raw_money_flow[i:i + period][seed < 0].sum() / period
197
mr = up / down if down != 0 else 100
198
mfi[i] = 100. - 100. / (1. + mr)
199
return mfi
200
201
202
def typical_prices_vecs(price_list_high, price_list_low, price_list_close):
203
typ = []
204
for high, low, close in zip(price_list_high, price_list_low, price_list_close):
205
if np.shape(high):
206
typ.append(typical_prices_vecs_1d(high, low, close))
207
else:
208
typ = typical_prices_vecs_1d(price_list_high, price_list_low, price_list_close)
209
break
210
return typ
211
212
213
def typical_prices_vecs_1d(price_list_high, price_list_low, price_list_close):
214
return [np.mean([high, low, close]) for high, low, close in
215
zip(price_list_high, price_list_low, price_list_close)]
216
217
218
def exponential_moving_average(close_price_vecs, period):
219
"""
220
Exponential Moving Average (EMA): This indicator
221
returns the exponential moving average of a field over a
222
given period of time. EMA formula is as follows.
223
:return: EMA = [alpha *T Close] + [1-alpha *Y close]
224
Where T is Today’s close and Y is Yesterday’s close
225
"""
226
ema = []
227
for close_price_vec in close_price_vecs:
228
if np.shape(close_price_vec):
229
ema.append(exponential_moving_average_1d(close_price_vec, period))
230
else:
231
ema = exponential_moving_average_1d(close_price_vecs, period)
232
break
233
return ema
234
235
236
def exponential_moving_average_1d(close_prices_vec, period):
237
len_cl_prices_vec = len(close_prices_vec)
238
period_error(period, len_cl_prices_vec)
239
close_prices_vec = np.asarray(close_prices_vec)
240
weights = np.exp(np.linspace(-1., 0., period))
241
weights /= weights.sum()
242
ema = np.convolve(close_prices_vec, weights, mode='full')[:len(close_prices_vec)]
243
ema = ema[period:]
244
return ema
245
246
247
def stochastic_oscillator(high_prices_vecs, low_prices_vecs, close_prices_vecs, period):
248
"""
249
Stochastic Oscillator (SO): The stochastic oscillator
250
defined as a measure of the difference between the
251
current closing price of a security and its lowest low
252
price, relative to its highest high price for a given period
253
of time. The formula for this computation is as follows:
254
:return: %K = [(Close price – Lowest price) / (Highest Price – Lowest Price)] * 100
255
"""
256
so = []
257
for high_prices_vec, low_prices_vec, close_prices_vec in zip(high_prices_vecs, low_prices_vecs, close_prices_vecs):
258
if np.shape(high_prices_vec):
259
so.append(stochastic_oscillator_1d(high_prices_vec, low_prices_vec, close_prices_vec, period))
260
else:
261
so = stochastic_oscillator_1d(high_prices_vecs, low_prices_vecs, close_prices_vecs, period)
262
break
263
return so
264
265
266
def stochastic_oscillator_1d(high_prices_vec, low_prices_vec, close_prices_vec, period):
267
len_high_prices_vec = len(high_prices_vec)
268
period_error(period, len_high_prices_vec)
269
max_high_vec = [max(high_prices_vec[i:i + period+1]) for i in range(0, len_high_prices_vec - period)]
270
min_low_vec = [min(low_prices_vec[i:i + period+1]) for i in range(0, len_high_prices_vec - period)]
271
curr_low_sub = np.subtract(close_prices_vec[period:], min_low_vec)
272
high_min_sub = np.subtract(max_high_vec, min_low_vec)
273
return [(curr_low_s / high_min_s) * 100 if high_min_s != 0 else 100
274
for curr_low_s, high_min_s in zip(curr_low_sub, high_min_sub)]
275
276
277
def moving_average_convergence_divergence(close_prices_vecs, slow_period=26, fast_period=12):
278
"""
279
Moving Average Convergence/Divergence (MACD):
280
This function calculates difference between a short and a long term moving average for a field.
281
The formulas for calculating MACD.
282
:return: MACD = [EMA of Closing prices] - [EMA of closing prices]
283
"""
284
macd = []
285
for close_prices_vec in close_prices_vecs:
286
if np.shape(close_prices_vec):
287
macd.append(moving_average_convergence_divergence_1d(close_prices_vec, slow_period, fast_period))
288
else:
289
macd = moving_average_convergence_divergence_1d(close_prices_vecs, slow_period, fast_period)
290
break
291
return macd
292
293
294
def moving_average_convergence_divergence_1d(close_prices_vec, slow_period=26, fast_period=12):
295
len_cl_prices_vec = len(close_prices_vec)
296
period_error(slow_period, len_cl_prices_vec)
297
period_error(fast_period, len_cl_prices_vec)
298
emaslow = exponential_moving_average(close_prices_vec, slow_period)
299
emafast = exponential_moving_average(close_prices_vec, fast_period)
300
return np.subtract(emafast[len(emafast) - len(emaslow):], emaslow)
301
302
303
def signal_line(macd_data_vecs, period=9):
304
"""
305
:return: Signal Line = period day EMA of MACD
306
"""
307
sl = []
308
for macd_data_vec in macd_data_vecs:
309
if np.shape(macd_data_vec):
310
sl.append(signal_line_1d(macd_data_vec, period))
311
else:
312
sl = signal_line_1d(macd_data_vecs, period)
313
break
314
return sl
315
316
317
def signal_line_1d(macd_data_vec, period=9):
318
len_macd_data_vec = len(macd_data_vec)
319
period_error(period, len_macd_data_vec)
320
return exponential_moving_average(macd_data_vec, period)
321
322