from lale.lib.lale import Relational
from lale.lib.lale import Scan
from lale.expressions import it
from lale.lib.lale import Map
from lale.lib.lale import Join
from lale.lib.lale import Aggregate
from lale.expressions import mean
from lale.expressions import min
from lale.expressions import sum
from lale.expressions import max
from lale.expressions import variance
from sklearn.pipeline import make_pipeline
from lale.expressions import distinct_count
from lale.expressions import count
from lale.lib.lale import ConcatFeatures
from lale.expressions import string_indexer
from lale.operators import make_pipeline_graph
from autoai_libs.transformers.exportable import NumpyColumnSelector
from autoai_libs.transformers.exportable import CompressStrings
from autoai_libs.transformers.exportable import NumpyReplaceMissingValues
from autoai_libs.transformers.exportable import NumpyReplaceUnknownValues
from autoai_libs.transformers.exportable import boolean2float
from autoai_libs.transformers.exportable import CatImputer
from autoai_libs.transformers.exportable import CatEncoder
import numpy as np
from autoai_libs.transformers.exportable import float32_transform
from autoai_libs.transformers.exportable import FloatStr2Float
from autoai_libs.transformers.exportable import NumImputer
from autoai_libs.transformers.exportable import OptStandardScaler
from sklearn.pipeline import make_union
from autoai_libs.transformers.exportable import NumpyPermuteArray
from autoai_libs.cognito.transforms.transform_utils import TA1
import autoai_libs.cognito.transforms.textras_methods
import autoai_libs.utils.fc_methods
from autoai_libs.cognito.transforms.transform_utils import FS1
from autoai_libs.cognito.transforms.transform_utils import TA2
from sklearn.ensemble import GradientBoostingRegressor
scan = Scan(table=it["go_1k.csv"])
map = Map(
columns={
"[go_1k.csv]|Product number|identity": it["Product number"],
"[go_1k.csv]|Retailer code|identity": it["Retailer code"],
"Quantity": it["Quantity"],
"prefix_0_id": it["prefix_0_id"],
},
remainder="drop",
)
scan_0 = Scan(table=it["go_daily_sales.csv"])
join = Join(
pred=[
(
it["go_1k.csv"]["Retailer code"]
== it["go_daily_sales.csv"]["Retailer code"]
),
(
it["go_1k.csv"]["Product number"]
== it["go_daily_sales.csv"]["Product number"]
),
],
join_limit=50.0,
)
aggregate = Aggregate(
columns={
"[go_1k.csv](Retailer code Product number)[go_daily_sales.csv]|Unit price|mean": mean(
it["Unit price"]
),
"[go_1k.csv](Retailer code Product number)[go_daily_sales.csv]|Quantity|mean": mean(
it["Quantity"]
),
"[go_1k.csv](Retailer code Product number)[go_daily_sales.csv]|Unit price|sum": sum(
it["Unit price"]
),
"[go_1k.csv](Retailer code Product number)[go_daily_sales.csv]|Quantity|sum": sum(
it["Quantity"]
),
"[go_1k.csv](Retailer code Product number)[go_daily_sales.csv]|Quantity|max": max(
it["Quantity"]
),
"[go_1k.csv](Retailer code Product number)[go_daily_sales.csv]|Unit price|min": min(
it["Unit price"]
),
"[go_1k.csv](Retailer code Product number)[go_daily_sales.csv]|Quantity|variance": variance(
it["Quantity"]
),
"[go_1k.csv](Retailer code Product number)[go_daily_sales.csv]|Order method code|variance": variance(
it["Order method code"]
),
"[go_1k.csv](Retailer code Product number)[go_daily_sales.csv]|Unit sale price|mean": mean(
it["Unit sale price"]
),
"[go_1k.csv](Retailer code Product number)[go_daily_sales.csv]|Unit price|max": max(
it["Unit price"]
),
"[go_1k.csv](Retailer code Product number)[go_daily_sales.csv]|Unit sale price|max": max(
it["Unit sale price"]
),
"[go_1k.csv](Retailer code Product number)[go_daily_sales.csv]|Unit sale price|sum": sum(
it["Unit sale price"]
),
"[go_1k.csv](Retailer code Product number)[go_daily_sales.csv]|Unit price|variance": variance(
it["Unit price"]
),
"[go_1k.csv](Retailer code Product number)[go_daily_sales.csv]|Order method code|min": min(
it["Order method code"]
),
"[go_1k.csv](Retailer code Product number)[go_daily_sales.csv]|Order method code|sum": sum(
it["Order method code"]
),
"[go_1k.csv](Retailer code Product number)[go_daily_sales.csv]|Unit sale price|variance": variance(
it["Unit sale price"]
),
"[go_1k.csv](Retailer code Product number)[go_daily_sales.csv]|Quantity|min": min(
it["Quantity"]
),
"[go_1k.csv](Retailer code Product number)[go_daily_sales.csv]|Unit sale price|min": min(
it["Unit sale price"]
),
"[go_1k.csv](Retailer code Product number)[go_daily_sales.csv]|Order method code|mean": mean(
it["Order method code"]
),
"[go_1k.csv](Retailer code Product number)[go_daily_sales.csv]|Order method code|max": max(
it["Order method code"]
),
},
group_by=it["row_id"],
)
pipeline_4 = make_pipeline(join, aggregate)
scan_1 = Scan(table=it["go_retailers.csv"])
join_0 = Join(
pred=[
(
it["go_1k.csv"]["Retailer code"]
== it["go_retailers.csv"]["Retailer code"]
)
]
)
map_0 = Map(
columns={
"[go_1k.csv](Retailer code)[go_retailers.csv]|Retailer name|identity": it[
"Retailer name"
],
"[go_1k.csv](Retailer code)[go_retailers.csv]|Country|identity": it[
"Country"
],
"[go_1k.csv](Retailer code)[go_retailers.csv]|Type|identity": it[
"Type"
],
},
remainder="drop",
)
pipeline_5 = make_pipeline(join_0, map_0)
scan_2 = Scan(table=it["go_products.csv"])
join_1 = Join(
pred=[
(
it["go_1k.csv"]["Product number"]
== it["go_products.csv"]["Product number"]
)
]
)
map_1 = Map(
columns={
"[go_1k.csv](Product number)[go_products.csv]|Product type|identity": it[
"Product type"
],
"[go_1k.csv](Product number)[go_products.csv]|Product brand|identity": it[
"Product brand"
],
"[go_1k.csv](Product number)[go_products.csv]|Unit cost|identity": it[
"Unit cost"
],
"[go_1k.csv](Product number)[go_products.csv]|Unit price|identity": it[
"Unit price"
],
"[go_1k.csv](Product number)[go_products.csv]|Product line|identity": it[
"Product line"
],
"[go_1k.csv](Product number)[go_products.csv]|Product|identity": it[
"Product"
],
"[go_1k.csv](Product number)[go_products.csv]|Product color|identity": it[
"Product color"
],
},
remainder="drop",
)
pipeline_6 = make_pipeline(join_1, map_1)
scan_3 = Scan(table=it["go_methods.csv"])
join_2 = Join(
pred=[
(
it["go_1k.csv"]["Retailer code"]
== it["go_daily_sales.csv"]["Retailer code"]
),
(
it["go_1k.csv"]["Product number"]
== it["go_daily_sales.csv"]["Product number"]
),
(
it["go_daily_sales.csv"]["Order method code"]
== it["go_methods.csv"]["Order method code"]
),
],
join_limit=50.0,
)
aggregate_0 = Aggregate(
columns={
"[go_1k.csv](Retailer code Product number)[go_daily_sales.csv](Order method code)[go_methods.csv]|Order method type|distinct_count": distinct_count(
it["Order method type"]
),
"[go_1k.csv](Retailer code Product number)[go_daily_sales.csv](Order method code)[go_methods.csv]|Order method type|count": count(
it["Order method type"]
),
},
group_by=it["row_id"],
)
pipeline_7 = make_pipeline(join_2, aggregate_0)
map_2 = Map(
columns=[
string_indexer(
it[
"[go_1k.csv](Retailer code)[go_retailers.csv]|Retailer name|identity"
]
),
string_indexer(
it["[go_1k.csv](Retailer code)[go_retailers.csv]|Type|identity"]
),
string_indexer(
it[
"[go_1k.csv](Retailer code)[go_retailers.csv]|Country|identity"
]
),
string_indexer(
it[
"[go_1k.csv](Product number)[go_products.csv]|Product line|identity"
]
),
string_indexer(
it[
"[go_1k.csv](Product number)[go_products.csv]|Product brand|identity"
]
),
string_indexer(
it[
"[go_1k.csv](Product number)[go_products.csv]|Product color|identity"
]
),
string_indexer(
it[
"[go_1k.csv](Product number)[go_products.csv]|Product|identity"
]
),
string_indexer(
it[
"[go_1k.csv](Product number)[go_products.csv]|Product type|identity"
]
),
]
)
pipeline_8 = make_pipeline(ConcatFeatures(), map_2)
relational = Relational(
operator=make_pipeline_graph(
steps=[
scan,
map,
scan_0,
pipeline_4,
scan_1,
pipeline_5,
scan_2,
pipeline_6,
scan_3,
pipeline_7,
pipeline_8,
],
edges=[
(scan, map),
(scan, pipeline_4),
(scan, pipeline_5),
(scan, pipeline_6),
(scan, pipeline_7),
(map, pipeline_8),
(scan_0, pipeline_4),
(scan_0, pipeline_7),
(pipeline_4, pipeline_8),
(scan_1, pipeline_5),
(pipeline_5, pipeline_8),
(scan_2, pipeline_6),
(pipeline_6, pipeline_8),
(scan_3, pipeline_7),
(pipeline_7, pipeline_8),
],
)
)
numpy_column_selector_0 = NumpyColumnSelector(
columns=[0, 3, 9, 11, 18, 23, 24, 27, 28, 33, 34, 35, 36, 37, 39]
)
compress_strings = CompressStrings(
compress_type="hash",
dtypes_list=[
"float_int_num", "float_int_num", "float_int_num", "float_int_num",
"float_int_num", "float_int_num", "float_int_num", "float_int_num",
"float_int_num", "float_int_num", "float_int_num", "float_int_num",
"float_int_num", "float_int_num", "float_int_num",
],
missing_values_reference_list=["", "-", "?", float("nan")],
misslist_list=[
[], [], [], [], [], [], [], [], [], [], [], [], [], [], [],
],
)
numpy_replace_missing_values_0 = NumpyReplaceMissingValues(
missing_values=[], filling_values=100001
)
numpy_replace_unknown_values = NumpyReplaceUnknownValues(
filling_values=100001,
filling_values_list=[
100001, 100001, 100001, 100001, 100001, 100001, 100001, 100001, 100001,
100001, 100001, 100001, 100001, 100001, 100001,
],
known_values_list=[
[0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0],
[1.0, 2.0, 3.0, 4.0, 5.0],
[
0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 8.0, 10.0, 11.0, 13.0, 14.0,
15.0, 18.0, 23.0,
],
[2.0, 4.0, 5.0, 6.0, 7.0],
[
17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0,
28.0, 29.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0, 36.0, 37.0, 38.0,
39.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0, 47.0, 48.0, 49.0,
50.0,
],
[0.0, 1.0, 2.0, 3.0, 4.0, 6.0],
[1.0, 2.0, 3.0, 4.0, 5.0],
[
0.0, 1.0, 2.0, 3.0, 4.0, 6.0, 7.0, 8.0, 9.0, 11.0, 12.0, 13.0, 14.0,
15.0, 16.0, 17.0, 18.0, 21.0, 22.0, 23.0, 25.0, 26.0, 27.0, 28.0,
43.0, 50.0,
],
[
0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0,
13.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 24.0,
26.0, 27.0, 34.0, 36.0, 37.0, 39.0, 48.0,
],
[0.0, 1.0, 2.0, 3.0, 4.0, 5.0],
[
0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0,
13.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0,
],
[0.0, 1.0, 2.0, 3.0, 4.0],
[
0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0,
13.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0,
],
[
0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0,
13.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0, 22.0,
],
[
0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0,
13.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0,
],
],
missing_values_reference_list=["", "-", "?", float("nan")],
)
cat_imputer = CatImputer(
strategy="most_frequent",
missing_values=100001,
sklearn_version_family="23",
)
cat_encoder = CatEncoder(
encoding="ordinal",
categories="auto",
dtype=np.float64,
handle_unknown="error",
sklearn_version_family="23",
)
pipeline_1 = make_pipeline(
numpy_column_selector_0,
compress_strings,
numpy_replace_missing_values_0,
numpy_replace_unknown_values,
boolean2float(),
cat_imputer,
cat_encoder,
float32_transform(),
)
numpy_column_selector_1 = NumpyColumnSelector(
columns=[
1, 2, 4, 5, 6, 7, 8, 10, 12, 13, 14, 15, 16, 17, 19, 20, 21, 22, 25, 26,
29, 30, 31, 32, 38,
]
)
float_str2_float = FloatStr2Float(
dtypes_list=[
"float_num", "float_num", "float_num", "float_num", "float_num",
"float_num", "float_num", "float_num", "float_int_num", "float_num",
"float_int_num", "float_num", "float_int_num", "float_int_num",
"float_num", "float_num", "float_num", "float_num", "float_num",
"float_int_num", "float_num", "float_int_num", "float_int_num",
"float_int_num", "float_int_num",
],
missing_values_reference_list=[],
)
numpy_replace_missing_values_1 = NumpyReplaceMissingValues(
missing_values=[], filling_values=float("nan")
)
num_imputer = NumImputer(strategy="median", missing_values=float("nan"))
opt_standard_scaler = OptStandardScaler(
num_scaler_copy=None,
num_scaler_with_mean=None,
num_scaler_with_std=None,
use_scaler_flag=False,
)
pipeline_2 = make_pipeline(
numpy_column_selector_1,
float_str2_float,
numpy_replace_missing_values_1,
num_imputer,
opt_standard_scaler,
float32_transform(),
)
union = make_union(pipeline_1, pipeline_2)
numpy_permute_array = NumpyPermuteArray(
axis=0,
permutation_indices=[
0, 3, 9, 11, 18, 23, 24, 27, 28, 33, 34, 35, 36, 37, 39, 1, 2, 4, 5, 6,
7, 8, 10, 12, 13, 14, 15, 16, 17, 19, 20, 21, 22, 25, 26, 29, 30, 31,
32, 38,
],
)
ta1 = TA1(
fun=autoai_libs.cognito.transforms.textras_methods.sigmoid,
name="sigmoid",
datatypes=["numeric"],
feat_constraints=[autoai_libs.utils.fc_methods.is_not_categorical],
col_names=[
"[go_1k.csv](Retailer code Product number)[go_daily_sales.csv](Order method code)[go_methods.csv]|Order method type|ItemsetCOR_Mail",
"[go_1k.csv](Retailer code Product number)[go_daily_sales.csv]|Unit price|min",
"[go_1k.csv](Retailer code Product number)[go_daily_sales.csv]|Order method code|mean",
"[go_1k.csv](Retailer code Product number)[go_daily_sales.csv]|Order method code|min",
"[go_1k.csv](Retailer code Product number)[go_daily_sales.csv]|Unit sale price|min",
"[go_1k.csv](Retailer code Product number)[go_daily_sales.csv]|Quantity|mean",
"[go_1k.csv](Retailer code Product number)[go_daily_sales.csv]|Unit price|sum",
"[go_1k.csv](Product number)[go_products.csv]|Unit cost|identity",
"[go_1k.csv](Retailer code Product number)[go_daily_sales.csv]|Unit price|mean",
"[go_1k.csv](Retailer code Product number)[go_daily_sales.csv](Order method code)[go_methods.csv]|Order method type|ItemsetCOR_Sales_visit",
"[go_1k.csv](Retailer code Product number)[go_daily_sales.csv]|Unit sale price|sum",
"[go_1k.csv](Retailer code Product number)[go_daily_sales.csv]|Order method code|max",
"[go_1k.csv](Retailer code Product number)[go_daily_sales.csv]|Quantity|sum",
"[go_1k.csv](Retailer code Product number)[go_daily_sales.csv]|Unit price|variance",
"[go_1k.csv](Retailer code Product number)[go_daily_sales.csv]|Order method code|sum",
"[go_1k.csv](Retailer code Product number)[go_daily_sales.csv]|Unit sale price|variance",
"[go_1k.csv](Retailer code Product number)[go_daily_sales.csv](Order method code)[go_methods.csv]|Order method type|ItemsetCOR_Web",
"[go_1k.csv](Retailer code Product number)[go_daily_sales.csv]|Quantity|min",
"[go_1k.csv](Retailer code Product number)[go_daily_sales.csv](Order method code)[go_methods.csv]|Order method type|count",
"[go_1k.csv](Product number)[go_products.csv]|Unit price|identity",
"[go_1k.csv](Retailer code Product number)[go_daily_sales.csv]|Unit price|max",
"[go_1k.csv](Retailer code Product number)[go_daily_sales.csv]|Quantity|variance",
"[go_1k.csv](Retailer code Product number)[go_daily_sales.csv]|Unit sale price|max",
"[go_1k.csv](Retailer code Product number)[go_daily_sales.csv](Order method code)[go_methods.csv]|Order method type|ItemsetCOR_Special",
"[go_1k.csv](Retailer code Product number)[go_daily_sales.csv](Order method code)[go_methods.csv]|Order method type|distinct_count",
"[go_1k.csv](Retailer code Product number)[go_daily_sales.csv]|Order method code|variance",
"[go_1k.csv]|Retailer code|identity",
"[go_1k.csv](Retailer code Product number)[go_daily_sales.csv](Order method code)[go_methods.csv]|Order method type|ItemsetCOR_E-mail",
"[go_1k.csv](Retailer code Product number)[go_daily_sales.csv](Order method code)[go_methods.csv]|Order method type|ItemsetCOR_Telephone",
"[go_1k.csv](Retailer code Product number)[go_daily_sales.csv]|Unit sale price|mean",
"[go_1k.csv](Retailer code Product number)[go_daily_sales.csv]|Quantity|max",
"[go_1k.csv]|Product number|identity",
"[go_1k.csv](Retailer code)[go_retailers.csv]|Retailer name|identity",
"[go_1k.csv](Retailer code)[go_retailers.csv]|Type|identity",
"[go_1k.csv](Retailer code)[go_retailers.csv]|Country|identity",
"[go_1k.csv](Product number)[go_products.csv]|Product line|identity",
"[go_1k.csv](Product number)[go_products.csv]|Product brand|identity",
"[go_1k.csv](Product number)[go_products.csv]|Product color|identity",
"[go_1k.csv](Product number)[go_products.csv]|Product|identity",
"[go_1k.csv](Product number)[go_products.csv]|Product type|identity",
],
col_dtypes=[
np.dtype("float32"), np.dtype("float32"), np.dtype("float32"),
np.dtype("float32"), np.dtype("float32"), np.dtype("float32"),
np.dtype("float32"), np.dtype("float32"), np.dtype("float32"),
np.dtype("float32"), np.dtype("float32"), np.dtype("float32"),
np.dtype("float32"), np.dtype("float32"), np.dtype("float32"),
np.dtype("float32"), np.dtype("float32"), np.dtype("float32"),
np.dtype("float32"), np.dtype("float32"), np.dtype("float32"),
np.dtype("float32"), np.dtype("float32"), np.dtype("float32"),
np.dtype("float32"), np.dtype("float32"), np.dtype("float32"),
np.dtype("float32"), np.dtype("float32"), np.dtype("float32"),
np.dtype("float32"), np.dtype("float32"), np.dtype("float32"),
np.dtype("float32"), np.dtype("float32"), np.dtype("float32"),
np.dtype("float32"), np.dtype("float32"), np.dtype("float32"),
np.dtype("float32"),
],
)
fs1_0 = FS1(
cols_ids_must_keep=range(0, 40),
additional_col_count_to_keep=30,
ptype="regression",
)
ta2 = TA2(
fun=np.add,
name="sum",
datatypes1=[
"intc", "intp", "int_", "uint8", "uint16", "uint32", "uint64", "int8",
"int16", "int32", "int64", "short", "long", "longlong", "float16",
"float32", "float64",
],
feat_constraints1=[autoai_libs.utils.fc_methods.is_not_categorical],
datatypes2=[
"intc", "intp", "int_", "uint8", "uint16", "uint32", "uint64", "int8",
"int16", "int32", "int64", "short", "long", "longlong", "float16",
"float32", "float64",
],
feat_constraints2=[autoai_libs.utils.fc_methods.is_not_categorical],
col_names=[
"[go_1k.csv](Retailer code Product number)[go_daily_sales.csv](Order method code)[go_methods.csv]|Order method type|ItemsetCOR_Mail",
"[go_1k.csv](Retailer code Product number)[go_daily_sales.csv]|Unit price|min",
"[go_1k.csv](Retailer code Product number)[go_daily_sales.csv]|Order method code|mean",
"[go_1k.csv](Retailer code Product number)[go_daily_sales.csv]|Order method code|min",
"[go_1k.csv](Retailer code Product number)[go_daily_sales.csv]|Unit sale price|min",
"[go_1k.csv](Retailer code Product number)[go_daily_sales.csv]|Quantity|mean",
"[go_1k.csv](Retailer code Product number)[go_daily_sales.csv]|Unit price|sum",
"[go_1k.csv](Product number)[go_products.csv]|Unit cost|identity",
"[go_1k.csv](Retailer code Product number)[go_daily_sales.csv]|Unit price|mean",
"[go_1k.csv](Retailer code Product number)[go_daily_sales.csv](Order method code)[go_methods.csv]|Order method type|ItemsetCOR_Sales_visit",
"[go_1k.csv](Retailer code Product number)[go_daily_sales.csv]|Unit sale price|sum",
"[go_1k.csv](Retailer code Product number)[go_daily_sales.csv]|Order method code|max",
"[go_1k.csv](Retailer code Product number)[go_daily_sales.csv]|Quantity|sum",
"[go_1k.csv](Retailer code Product number)[go_daily_sales.csv]|Unit price|variance",
"[go_1k.csv](Retailer code Product number)[go_daily_sales.csv]|Order method code|sum",
"[go_1k.csv](Retailer code Product number)[go_daily_sales.csv]|Unit sale price|variance",
"[go_1k.csv](Retailer code Product number)[go_daily_sales.csv](Order method code)[go_methods.csv]|Order method type|ItemsetCOR_Web",
"[go_1k.csv](Retailer code Product number)[go_daily_sales.csv]|Quantity|min",
"[go_1k.csv](Retailer code Product number)[go_daily_sales.csv](Order method code)[go_methods.csv]|Order method type|count",
"[go_1k.csv](Product number)[go_products.csv]|Unit price|identity",
"[go_1k.csv](Retailer code Product number)[go_daily_sales.csv]|Unit price|max",
"[go_1k.csv](Retailer code Product number)[go_daily_sales.csv]|Quantity|variance",
"[go_1k.csv](Retailer code Product number)[go_daily_sales.csv]|Unit sale price|max",
"[go_1k.csv](Retailer code Product number)[go_daily_sales.csv](Order method code)[go_methods.csv]|Order method type|ItemsetCOR_Special",
"[go_1k.csv](Retailer code Product number)[go_daily_sales.csv](Order method code)[go_methods.csv]|Order method type|distinct_count",
"[go_1k.csv](Retailer code Product number)[go_daily_sales.csv]|Order method code|variance",
"[go_1k.csv]|Retailer code|identity",
"[go_1k.csv](Retailer code Product number)[go_daily_sales.csv](Order method code)[go_methods.csv]|Order method type|ItemsetCOR_E-mail",
"[go_1k.csv](Retailer code Product number)[go_daily_sales.csv](Order method code)[go_methods.csv]|Order method type|ItemsetCOR_Telephone",
"[go_1k.csv](Retailer code Product number)[go_daily_sales.csv]|Unit sale price|mean",
"[go_1k.csv](Retailer code Product number)[go_daily_sales.csv]|Quantity|max",
"[go_1k.csv]|Product number|identity",
"[go_1k.csv](Retailer code)[go_retailers.csv]|Retailer name|identity",
"[go_1k.csv](Retailer code)[go_retailers.csv]|Type|identity",
"[go_1k.csv](Retailer code)[go_retailers.csv]|Country|identity",
"[go_1k.csv](Product number)[go_products.csv]|Product line|identity",
"[go_1k.csv](Product number)[go_products.csv]|Product brand|identity",
"[go_1k.csv](Product number)[go_products.csv]|Product color|identity",
"[go_1k.csv](Product number)[go_products.csv]|Product|identity",
"[go_1k.csv](Product number)[go_products.csv]|Product type|identity",
"sigmoid([go_1k.csv](Retailer code Product number)[go_daily_sales.csv]|Unit price|min)",
"sigmoid([go_1k.csv](Retailer code Product number)[go_daily_sales.csv]|Order method code|mean)",
"sigmoid([go_1k.csv](Retailer code Product number)[go_daily_sales.csv]|Unit sale price|min)",
"sigmoid([go_1k.csv](Retailer code Product number)[go_daily_sales.csv]|Quantity|mean)",
"sigmoid([go_1k.csv](Retailer code Product number)[go_daily_sales.csv]|Unit price|sum)",
"sigmoid([go_1k.csv](Product number)[go_products.csv]|Unit cost|identity)",
"sigmoid([go_1k.csv](Retailer code Product number)[go_daily_sales.csv]|Unit price|mean)",
"sigmoid([go_1k.csv](Retailer code Product number)[go_daily_sales.csv]|Unit sale price|sum)",
"sigmoid([go_1k.csv](Retailer code Product number)[go_daily_sales.csv]|Quantity|sum)",
"sigmoid([go_1k.csv](Retailer code Product number)[go_daily_sales.csv]|Unit price|variance)",
"sigmoid([go_1k.csv](Retailer code Product number)[go_daily_sales.csv]|Order method code|sum)",
"sigmoid([go_1k.csv](Retailer code Product number)[go_daily_sales.csv]|Unit sale price|variance)",
"sigmoid([go_1k.csv](Retailer code Product number)[go_daily_sales.csv](Order method code)[go_methods.csv]|Order method type|ItemsetCOR_Web)",
"sigmoid([go_1k.csv](Retailer code Product number)[go_daily_sales.csv]|Quantity|min)",
"sigmoid([go_1k.csv](Retailer code Product number)[go_daily_sales.csv](Order method code)[go_methods.csv]|Order method type|count)",
"sigmoid([go_1k.csv](Product number)[go_products.csv]|Unit price|identity)",
"sigmoid([go_1k.csv](Retailer code Product number)[go_daily_sales.csv]|Unit price|max)",
"sigmoid([go_1k.csv](Retailer code Product number)[go_daily_sales.csv]|Quantity|variance)",
"sigmoid([go_1k.csv](Retailer code Product number)[go_daily_sales.csv]|Unit sale price|max)",
"sigmoid([go_1k.csv](Retailer code Product number)[go_daily_sales.csv]|Order method code|variance)",
"sigmoid([go_1k.csv](Retailer code Product number)[go_daily_sales.csv](Order method code)[go_methods.csv]|Order method type|ItemsetCOR_E-mail)",
"sigmoid([go_1k.csv](Retailer code Product number)[go_daily_sales.csv](Order method code)[go_methods.csv]|Order method type|ItemsetCOR_Telephone)",
"sigmoid([go_1k.csv](Retailer code Product number)[go_daily_sales.csv]|Unit sale price|mean)",
"sigmoid([go_1k.csv](Retailer code Product number)[go_daily_sales.csv]|Quantity|max)",
"sigmoid([go_1k.csv](Retailer code)[go_retailers.csv]|Retailer name|identity)",
"sigmoid([go_1k.csv](Retailer code)[go_retailers.csv]|Country|identity)",
"sigmoid([go_1k.csv](Product number)[go_products.csv]|Product brand|identity)",
"sigmoid([go_1k.csv](Product number)[go_products.csv]|Product color|identity)",
"sigmoid([go_1k.csv](Product number)[go_products.csv]|Product|identity)",
"sigmoid([go_1k.csv](Product number)[go_products.csv]|Product type|identity)",
],
col_dtypes=[
np.dtype("float32"), np.dtype("float32"), np.dtype("float32"),
np.dtype("float32"), np.dtype("float32"), np.dtype("float32"),
np.dtype("float32"), np.dtype("float32"), np.dtype("float32"),
np.dtype("float32"), np.dtype("float32"), np.dtype("float32"),
np.dtype("float32"), np.dtype("float32"), np.dtype("float32"),
np.dtype("float32"), np.dtype("float32"), np.dtype("float32"),
np.dtype("float32"), np.dtype("float32"), np.dtype("float32"),
np.dtype("float32"), np.dtype("float32"), np.dtype("float32"),
np.dtype("float32"), np.dtype("float32"), np.dtype("float32"),
np.dtype("float32"), np.dtype("float32"), np.dtype("float32"),
np.dtype("float32"), np.dtype("float32"), np.dtype("float32"),
np.dtype("float32"), np.dtype("float32"), np.dtype("float32"),
np.dtype("float32"), np.dtype("float32"), np.dtype("float32"),
np.dtype("float32"), np.dtype("float32"), np.dtype("float32"),
np.dtype("float32"), np.dtype("float32"), np.dtype("float32"),
np.dtype("float32"), np.dtype("float32"), np.dtype("float32"),
np.dtype("float32"), np.dtype("float32"), np.dtype("float32"),
np.dtype("float32"), np.dtype("float32"), np.dtype("float32"),
np.dtype("float32"), np.dtype("float32"), np.dtype("float32"),
np.dtype("float32"), np.dtype("float32"), np.dtype("float32"),
np.dtype("float32"), np.dtype("float32"), np.dtype("float32"),
np.dtype("float32"), np.dtype("float32"), np.dtype("float32"),
np.dtype("float32"), np.dtype("float32"), np.dtype("float32"),
np.dtype("float32"),
],
)
fs1_1 = FS1(
cols_ids_must_keep=range(0, 40),
additional_col_count_to_keep=30,
ptype="regression",
)
gradient_boosting_regressor = GradientBoostingRegressor(
alpha=0.5155667001476331,
loss="huber",
max_depth=4,
max_features=0.33761641026359407,
min_samples_leaf=0.010685492408650153,
min_samples_split=0.49922469327197383,
n_estimators=69,
presort="auto",
random_state=33,
)
pipeline = make_pipeline(
relational,
union,
numpy_permute_array,
ta1,
fs1_0,
ta2,
fs1_1,
gradient_boosting_regressor,
)