AutoOfflineRL¶

In [1]:
using Distributed

nprocs() == 1 && addprocs() 

@everywhere begin
   using AutoOfflineRL
   using AutoMLPipeline
   using Parquet
   using DataFrames
end
    CondaPkg Found dependencies: /Users/ppalmes/.julia/packages/PythonCall/1f5yE/CondaPkg.toml
    CondaPkg Found dependencies: /Users/ppalmes/phome/julia/AutoMLPipeline.jl/AutoOfflineRL/CondaPkg.toml
    CondaPkg Found dependencies: /Users/ppalmes/.julia/packages/AutoMLPipeline/7l0TC/CondaPkg.toml
    CondaPkg Found dependencies: /Users/ppalmes/.julia/packages/PythonCall/1f5yE/CondaPkg.toml
    CondaPkg Found dependencies: /Users/ppalmes/.julia/packages/AutoMLPipeline/iy1ee/CondaPkg.toml
    CondaPkg Dependencies already up to date
      From worker 4:	┌ Info: CondaPkg: Waiting for lock to be freed. You may delete this file if no other process is resolving.
      From worker 4:	└   lock_file = "/Users/ppalmes/phome/julia/AutoMLPipeline.jl/AutoOfflineRL/examples/.CondaPkg/lock"
      From worker 5:	┌ Info: CondaPkg: Waiting for lock to be freed. You may delete this file if no other process is resolving.
      From worker 5:	└   lock_file = "/Users/ppalmes/phome/julia/AutoMLPipeline.jl/AutoOfflineRL/examples/.CondaPkg/lock"
      From worker 9:	┌ Info: CondaPkg: Waiting for lock to be freed. You may delete this file if no other process is resolving.
      From worker 9:	└   lock_file = "/Users/ppalmes/phome/julia/AutoMLPipeline.jl/AutoOfflineRL/examples/.CondaPkg/lock"
      From worker 7:	┌ Info: CondaPkg: Waiting for lock to be freed. You may delete this file if no other process is resolving.
      From worker 7:	└   lock_file = "/Users/ppalmes/phome/julia/AutoMLPipeline.jl/AutoOfflineRL/examples/.CondaPkg/lock"
      From worker 3:	┌ Info: CondaPkg: Waiting for lock to be freed. You may delete this file if no other process is resolving.
      From worker 3:	└   lock_file = "/Users/ppalmes/phome/julia/AutoMLPipeline.jl/AutoOfflineRL/examples/.CondaPkg/lock"
      From worker 6:	┌ Info: CondaPkg: Waiting for lock to be freed. You may delete this file if no other process is resolving.
      From worker 6:	└   lock_file = "/Users/ppalmes/phome/julia/AutoMLPipeline.jl/AutoOfflineRL/examples/.CondaPkg/lock"
      From worker 2:	┌ Info: CondaPkg: Waiting for lock to be freed. You may delete this file if no other process is resolving.
      From worker 2:	└   lock_file = "/Users/ppalmes/phome/julia/AutoMLPipeline.jl/AutoOfflineRL/examples/.CondaPkg/lock"
In [2]:
@everywhere begin
   # load preprocessing elements
   #### Scaler
   rb = SKPreprocessor("RobustScaler");
   pt = SKPreprocessor("PowerTransformer");
   norm = SKPreprocessor("Normalizer");
   mx = SKPreprocessor("MinMaxScaler");
   std = SKPreprocessor("StandardScaler")
   ##### Column selector
   catf = CatFeatureSelector();
   numf = NumFeatureSelector();
   ## load filters
   ##### Decomposition
   #apca = SKPreprocessor("PCA",Dict(:autocomponent=>true,:name=>"autoPCA"));
   #afa = SKPreprocessor("FactorAnalysis",Dict(:autocomponent=>true,:name=>"autoFA"));
   #aica = SKPreprocessor("FastICA",Dict(:autocomponent=>true,:name=>"autoICA"));
   pca = SKPreprocessor("PCA");
   fa = SKPreprocessor("FactorAnalysis");
   ica = SKPreprocessor("FastICA");
   noop = Identity(Dict(:name => "Noop"));
end
In [5]:
# load dataset
path = pkgdir(AutoOfflineRL)
dataset = "$path/data/smalldata.parquet"
df = Parquet.read_parquet(dataset) |> DataFrame |> dropmissing
first(df,10)
Out[5]:
10×9 DataFrame
Rowdayhourminutedowsensor1sensor2sensor3actionreward
Int64Int64Int64Int64Int64Int64Int64Int64Float64
11002711025100.838122
210024445056100.639387
310029138611000.416196
410023655126100.384344
5100231212942100.37681
6100229514158100.641975
71012921846100.225288
8101277234500.335901
9101213161131000.993489
1010124731991000.402383
In [7]:
srow,_ = size(df)
observation = df[:, ["day", "hour", "minute", "dow", "sensor1", "sensor2", "sensor3"]]
reward = df[:,["reward"]] |> deepcopy |> DataFrame
action = df[:,["action"]] |> deepcopy |> DataFrame
_terminals = zeros(Int,srow)
_terminals[collect(100:1000:9000)] .= 1
_terminals[end] = 1
dterminal = DataFrame(terminal=_terminals)
action_reward_terminal = DataFrame[action, reward, dterminal];
In [10]:
agent = DiscreteRLOffline("NFQ")
pipe = (numf |> mx |> pca) |> agent
score=crossvalidateRL(pipe,observation,action_reward_terminal)
score
Epoch 1/3: 100%|█████████████| 190/190 [00:00<00:00, 908.70it/s, loss=5.76e+12]
Epoch 2/3: 100%|█████████████| 190/190 [00:00<00:00, 927.64it/s, loss=3.16e+12]
Epoch 3/3: 100%|██████████████| 190/190 [00:00<00:00, 886.60it/s, loss=3.8e+12]
Epoch 1/3: 100%|█████████████| 218/218 [00:00<00:00, 925.40it/s, loss=1.55e+12]
Epoch 2/3: 100%|█████████████| 218/218 [00:00<00:00, 927.41it/s, loss=5.75e+11]
Epoch 3/3: 100%|██████████████| 218/218 [00:00<00:00, 929.27it/s, loss=6.8e+11]
Epoch 1/3: 100%|██████████████| 218/218 [00:00<00:00, 925.34it/s, loss=1.9e+12]
Epoch 2/3: 100%|█████████████| 218/218 [00:00<00:00, 924.56it/s, loss=5.09e+11]
Epoch 3/3: 100%|█████████████| 218/218 [00:00<00:00, 877.23it/s, loss=5.11e+11]
2023-06-21 08:37:52 [debug    ] RoundIterator is selected.
2023-06-21 08:37:52 [info     ] Directory is created at d3rlpy_logs/NFQ_20230621083752
2023-06-21 08:37:52 [debug    ] Fitting scaler...              scaler=min_max
2023-06-21 08:37:52 [debug    ] Building models...
2023-06-21 08:37:52 [debug    ] Models have been built.
2023-06-21 08:37:52 [info     ] Parameters are saved to d3rlpy_logs/NFQ_20230621083752/params.json params={'action_scaler': None, 'batch_size': 32, 'encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'gamma': 0.99, 'generated_maxlen': 100000, 'learning_rate': 6.25e-05, 'n_critics': 1, 'n_frames': 1, 'n_steps': 1, 'optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'q_func_factory': {'type': 'mean', 'params': {'share_encoder': False}}, 'real_ratio': 1.0, 'reward_scaler': None, 'scaler': {'type': 'min_max', 'params': {'maximum': array([[ 1.94568764e-02,  7.42516160e-01,  8.01553965e-01,
         7.23483264e-01,  8.17517638e-01,  5.66567123e-01,
        -1.01158134e-16]], dtype=float32), 'minimum': array([[-3.9477360e-01, -7.3005491e-01, -7.9195231e-01, -7.1986693e-01,
        -7.9193425e-01, -4.7263184e-01, -3.7894311e-16]], dtype=float32)}}, 'use_gpu': None, 'algorithm': 'NFQ', 'observation_shape': (7,), 'action_size': 101}
2023-06-21 08:37:53 [info     ] NFQ_20230621083752: epoch=1 step=190 epoch=1 metrics={'time_sample_batch': 2.509418286775288e-05, 'time_algorithm_update': 0.0010575696041709498, 'loss': 5584023431620.716, 'time_step': 0.001096251136378238, 'td_error': 1.6375533317638637e+27} step=190
2023-06-21 08:37:53 [info     ] Model parameters are saved to d3rlpy_logs/NFQ_20230621083752/model_190.pt
2023-06-21 08:37:53 [info     ] NFQ_20230621083752: epoch=2 step=380 epoch=2 metrics={'time_sample_batch': 2.1763851768092105e-05, 'time_algorithm_update': 0.0010391084771407277, 'loss': 3188635471225.263, 'time_step': 0.0010739200993588097, 'td_error': 9.189468740019443e+26} step=380
2023-06-21 08:37:53 [info     ] Model parameters are saved to d3rlpy_logs/NFQ_20230621083752/model_380.pt
2023-06-21 08:37:53 [info     ] NFQ_20230621083752: epoch=3 step=570 epoch=3 metrics={'time_sample_batch': 2.2755171123303866e-05, 'time_algorithm_update': 0.0010874949003520766, 'loss': 3805664482357.8945, 'time_step': 0.0011236190795898438, 'td_error': 3.5583898240510854e+27} step=570
2023-06-21 08:37:53 [info     ] Model parameters are saved to d3rlpy_logs/NFQ_20230621083752/model_570.pt
2023-06-21 08:37:53 [debug    ] RoundIterator is selected.
2023-06-21 08:37:53 [info     ] Directory is created at d3rlpy_logs/NFQ_20230621083753
2023-06-21 08:37:53 [debug    ] Fitting scaler...              scaler=min_max
2023-06-21 08:37:53 [debug    ] Building models...
2023-06-21 08:37:53 [debug    ] Models have been built.
2023-06-21 08:37:53 [info     ] Parameters are saved to d3rlpy_logs/NFQ_20230621083753/params.json params={'action_scaler': None, 'batch_size': 32, 'encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'gamma': 0.99, 'generated_maxlen': 100000, 'learning_rate': 6.25e-05, 'n_critics': 1, 'n_frames': 1, 'n_steps': 1, 'optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'q_func_factory': {'type': 'mean', 'params': {'share_encoder': False}}, 'real_ratio': 1.0, 'reward_scaler': None, 'scaler': {'type': 'min_max', 'params': {'maximum': array([[1.3158090e+00, 7.4283278e-01, 8.5022020e-01, 7.3996544e-01,
        8.1751764e-01, 5.6656712e-01, 1.6202297e-15]], dtype=float32), 'minimum': array([[-4.2337933e-01, -7.3005491e-01, -7.9443091e-01, -7.1224165e-01,
        -7.9193425e-01, -5.2358818e-01, -3.7152091e-16]], dtype=float32)}}, 'use_gpu': None, 'algorithm': 'NFQ', 'observation_shape': (7,), 'action_size': 101}
2023-06-21 08:37:53 [info     ] NFQ_20230621083753: epoch=1 step=218 epoch=1 metrics={'time_sample_batch': 2.1528760227588338e-05, 'time_algorithm_update': 0.0010424764878159268, 'loss': 1533027375122.789, 'time_step': 0.0010769914049621023, 'td_error': 8.202037799008613e+23} step=218
2023-06-21 08:37:53 [info     ] Model parameters are saved to d3rlpy_logs/NFQ_20230621083753/model_218.pt
2023-06-21 08:37:54 [info     ] NFQ_20230621083753: epoch=2 step=436 epoch=2 metrics={'time_sample_batch': 2.2574302253373173e-05, 'time_algorithm_update': 0.0010391167544443673, 'loss': 576618199434.5688, 'time_step': 0.0010745230071041562, 'td_error': 8.590744526813287e+23} step=436
2023-06-21 08:37:54 [info     ] Model parameters are saved to d3rlpy_logs/NFQ_20230621083753/model_436.pt
2023-06-21 08:37:54 [info     ] NFQ_20230621083753: epoch=3 step=654 epoch=3 metrics={'time_sample_batch': 2.2000129069757025e-05, 'time_algorithm_update': 0.001037401890535967, 'loss': 680976747905.1743, 'time_step': 0.0010722372510017606, 'td_error': 8.63720593951574e+23} step=654
2023-06-21 08:37:54 [info     ] Model parameters are saved to d3rlpy_logs/NFQ_20230621083753/model_654.pt
2023-06-21 08:37:54 [debug    ] RoundIterator is selected.
2023-06-21 08:37:54 [info     ] Directory is created at d3rlpy_logs/NFQ_20230621083754
2023-06-21 08:37:54 [debug    ] Fitting scaler...              scaler=min_max
2023-06-21 08:37:54 [debug    ] Building models...
2023-06-21 08:37:54 [debug    ] Models have been built.
2023-06-21 08:37:54 [info     ] Parameters are saved to d3rlpy_logs/NFQ_20230621083754/params.json params={'action_scaler': None, 'batch_size': 32, 'encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'gamma': 0.99, 'generated_maxlen': 100000, 'learning_rate': 6.25e-05, 'n_critics': 1, 'n_frames': 1, 'n_steps': 1, 'optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'q_func_factory': {'type': 'mean', 'params': {'share_encoder': False}}, 'real_ratio': 1.0, 'reward_scaler': None, 'scaler': {'type': 'min_max', 'params': {'maximum': array([[1.3158090e+00, 7.4283278e-01, 8.5022020e-01, 7.3996544e-01,
        8.1751764e-01, 5.6656712e-01, 1.6202297e-15]], dtype=float32), 'minimum': array([[-4.2337933e-01, -7.2828460e-01, -7.9443091e-01, -7.1224165e-01,
        -8.0387592e-01, -5.2358818e-01, -3.7894311e-16]], dtype=float32)}}, 'use_gpu': None, 'algorithm': 'NFQ', 'observation_shape': (7,), 'action_size': 101}
2023-06-21 08:37:54 [info     ] NFQ_20230621083754: epoch=1 step=218 epoch=1 metrics={'time_sample_batch': 2.134721213524495e-05, 'time_algorithm_update': 0.0010429959778391986, 'loss': 1862506510580.2568, 'time_step': 0.0010770942093035498, 'td_error': 6.475237976362436e+23} step=218
2023-06-21 08:37:54 [info     ] Model parameters are saved to d3rlpy_logs/NFQ_20230621083754/model_218.pt
2023-06-21 08:37:54 [info     ] NFQ_20230621083754: epoch=2 step=436 epoch=2 metrics={'time_sample_batch': 2.260711214957981e-05, 'time_algorithm_update': 0.0010425432012715471, 'loss': 507404306037.4312, 'time_step': 0.001077783217123889, 'td_error': 3.670942430670977e+23} step=436
2023-06-21 08:37:54 [info     ] Model parameters are saved to d3rlpy_logs/NFQ_20230621083754/model_436.pt
2023-06-21 08:37:55 [info     ] NFQ_20230621083754: epoch=3 step=654 epoch=3 metrics={'time_sample_batch': 2.33672080783669e-05, 'time_algorithm_update': 0.0010992747928024432, 'loss': 511688790729.9816, 'time_step': 0.0011360339068491525, 'td_error': 7.400041935179813e+23} step=654
2023-06-21 08:37:55 [info     ] Model parameters are saved to d3rlpy_logs/NFQ_20230621083754/model_654.pt
Out[10]:
6.799097389864054e26
In [ ]: