Path: blob/main/examples/retrained_model_parsing.py
1233 views
# pylint: skip-file1###################2"""3IMPORTANT:4THE EXAMPLE IN THIS FILE IS CURRENTLY NOT FUNCTIONAL5BECAUSE THE `download_from_public_repository` FUNCTION6NO LONGER EXISTS. WE HAD TO MAKE A QUICK RELEASE TO7REMEDIATE AN ISSUE IN OUR PREVIOUS STORAGE SOLUTION.8THIS WILL BE FIXED IN A FUTURE RELEASE.910IN THE MEAN TIME IF YOU NEED ANY CLARIFICATION11REGARDING THE PACKAGE PLEASE FEEL FREE TO OPEN AN ISSUE.12"""13import os1415from deepparse import download_from_public_repository16from deepparse.dataset_container import PickleDatasetContainer17from deepparse.parser import AddressParser1819# Here is an example on how to parse multiple addresses20# First, let's download the train and test data from the public repository.21data_saving_dir = "./data"22file_extension = "p"23test_dataset_name = "predict"24download_from_public_repository(test_dataset_name, data_saving_dir, file_extension=file_extension)2526# Now let's load the dataset using one of our dataset container27addresses_to_parse = PickleDatasetContainer("./data/predict.p", is_training_container=False)2829# Let's download a BPEmb retrained model create just for this example, but you can also use one of yours.30model_saving_dir = "./retrained_models"31retrained_model_name = "retrained_light_bpemb_address_parser"32model_file_extension = "ckpt"33download_from_public_repository(retrained_model_name, model_saving_dir, file_extension=model_file_extension)3435address_parser = AddressParser(36model_type="bpemb",37device=0,38path_to_retrained_model=os.path.join(model_saving_dir, retrained_model_name + "." + model_file_extension),39)4041# We can now parse some addresses42parsed_addresses = address_parser(addresses_to_parse[0:300])434445