"""
IMPORTANT:
THE EXAMPLE IN THIS FILE IS CURRENTLY NOT FUNCTIONAL
BECAUSE THE `download_from_public_repository` FUNCTION
NO LONGER EXISTS. WE HAD TO MAKE A QUICK RELEASE TO
REMEDIATE AN ISSUE IN OUR PREVIOUS STORAGE SOLUTION.
THIS WILL BE FIXED IN A FUTURE RELEASE.
IN THE MEAN TIME IF YOU NEED ANY CLARIFICATION
REGARDING THE PACKAGE PLEASE FEEL FREE TO OPEN AN ISSUE.
"""
import os
import poutyne
from deepparse import download_from_public_repository
from deepparse.dataset_container import PickleDatasetContainer
from deepparse.parser import AddressParser
saving_dir = "./data"
file_extension = "p"
training_dataset_name = "sample_incomplete_data"
test_dataset_name = "test_sample_data"
download_from_public_repository(training_dataset_name, saving_dir, file_extension=file_extension)
download_from_public_repository(test_dataset_name, saving_dir, file_extension=file_extension)
training_container = PickleDatasetContainer(os.path.join(saving_dir, training_dataset_name + "." + file_extension))
test_container = PickleDatasetContainer(os.path.join(saving_dir, test_dataset_name + "." + file_extension))
address_parser = AddressParser(model_type="fasttext", device=0)
lr_scheduler = poutyne.StepLR(step_size=1, gamma=0.1)
address_parser.retrain(
training_container,
train_ratio=0.8,
epochs=5,
batch_size=8,
num_workers=2,
callbacks=[lr_scheduler],
)
address_parser.test(test_container, batch_size=256)
address_parser = AddressParser(model_type="fasttext", device=0, attention_mechanism=True)
address_parser.retrain(
training_container,
train_ratio=0.8,
epochs=5,
batch_size=8,
num_workers=2,
callbacks=[lr_scheduler],
logging_path="checkpoints_attention",
)
address_parser.test(test_container, batch_size=256)