Path: blob/main/examples/parse_addresses_uri.py
1231 views
# pylint: skip-file1###################2"""3IMPORTANT:4THE EXAMPLE IN THIS FILE IS CURRENTLY NOT FUNCTIONAL5BECAUSE THE `download_from_public_repository` FUNCTION6NO LONGER EXISTS. WE HAD TO MAKE A QUICK RELEASE TO7REMEDIATE AN ISSUE IN OUR PREVIOUS STORAGE SOLUTION.8THIS WILL BE FIXED IN A FUTURE RELEASE.910IN THE MEAN TIME IF YOU NEED ANY CLARIFICATION11REGARDING THE PACKAGE PLEASE FEEL FREE TO OPEN AN ISSUE.12"""13from deepparse import download_from_public_repository14from deepparse.dataset_container import PickleDatasetContainer15from deepparse.parser import AddressParser1617# Here is an example on how to parse multiple addresses using a URI model place in a S3 Bucket18# First, let's download the train and test data from the public repository.19saving_dir = "./data"20file_extension = "p"21test_dataset_name = "predict"22download_from_public_repository(test_dataset_name, saving_dir, file_extension=file_extension)2324# Now let's load the dataset using one of our dataset container25addresses_to_parse = PickleDatasetContainer("./data/predict.p", is_training_container=False)2627# We can sneak peek some addresses28print(addresses_to_parse[:2])2930# Let's use the FastText model on a GPU31path_to_your_uri = "s3://<path_to_your_bucket>/fasttext.ckpt"32address_parser = AddressParser(model_type="fasttext", device=0, path_to_retrained_model=path_to_your_uri)3334# We can now parse some addresses35parsed_addresses = address_parser(addresses_to_parse[0:300])363738