Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
keras-team
GitHub Repository: keras-team/keras-io
Path: blob/master/scripts/upload.py
3273 views
1
import boto3
2
from pathlib import Path
3
import mimetypes
4
import hashlib
5
import os
6
import json
7
from multiprocessing.pool import ThreadPool
8
9
AKEY = os.environ["AWS_S3_ACCESS_KEY"]
10
SKEY = os.environ["AWS_S3_SECRET_KEY"]
11
12
BUCKET = "keras.io"
13
USE_THREADING = True
14
HASH_CACHE = "contents_hashes.json"
15
16
s3 = boto3.client("s3", aws_access_key_id=AKEY, aws_secret_access_key=SKEY)
17
18
19
def hash_file(fpath):
20
h = hashlib.sha256()
21
b = bytearray(128 * 1024)
22
mv = memoryview(b)
23
with open(fpath, "rb", buffering=0) as f:
24
while n := f.readinto(mv):
25
h.update(mv[:n])
26
return h.hexdigest()[:8]
27
28
29
def upload_file(bucket, fpath, key_name, redirect=None):
30
print(f"...Upload to {bucket}:{key_name}")
31
mime = mimetypes.guess_type(fpath)[0]
32
extra_args = {"ContentType": mime, "ACL": "public-read"}
33
if redirect:
34
extra_args["WebsiteRedirectLocation"] = redirect
35
s3.upload_file(
36
fpath, bucket, key_name, ExtraArgs={"ContentType": mime, "ACL": "public-read"}
37
)
38
39
40
def load_hash_cache():
41
try:
42
s3.download_file(BUCKET, HASH_CACHE, HASH_CACHE)
43
except:
44
print(f"[ERROR] Could not dowload hash cache {HASH_CACHE}")
45
return {}
46
with open(HASH_CACHE) as f:
47
contents = f.read()
48
return json.loads(contents)
49
50
51
def save_hash_cache(hash_cache):
52
with open(HASH_CACHE, "w") as f:
53
f.write(json.dumps(hash_cache))
54
upload_file(BUCKET, HASH_CACHE, HASH_CACHE)
55
56
57
def wrapped_upload_file(args):
58
bucket, fpath, key_name = args
59
upload_file(bucket, fpath, key_name)
60
61
62
def cleanup(site_directory, redirect_directory):
63
paginator = s3.get_paginator("list_objects_v2")
64
page_iterator = paginator.paginate(Bucket=BUCKET)
65
for page in page_iterator:
66
for obj in page["Contents"]:
67
key = obj["Key"]
68
if key.endswith(".html"):
69
site_fpath = os.path.join(site_directory, key)
70
redirect_fpath = os.path.join(redirect_directory, key)
71
if not os.path.exists(site_fpath) and not os.path.exists(
72
redirect_fpath
73
):
74
print(f"[DELETE] {key}")
75
s3.delete_object(Bucket=BUCKET, Key=key)
76
77
78
def upload_dir(directory, include_img=True, hash_cache=None):
79
print(f"Uploading files from '{directory}'...")
80
all_targets = []
81
for dp, _, fn in os.walk(directory):
82
if fn:
83
for f in fn:
84
fpath = os.path.join(dp, f)
85
if f.startswith("."):
86
continue
87
if not include_img and "/img/" in fpath:
88
continue
89
key_name = fpath[len(directory) :]
90
key_name = key_name.removeprefix("/")
91
print(f"...{key_name}")
92
all_targets.append((BUCKET, fpath, key_name))
93
94
if hash_cache is not None:
95
filtered_targets = []
96
new_hash_cache = {}
97
for bucket, fpath, key_name in all_targets:
98
new_hash = hash_file(fpath)
99
old_hash = hash_cache.get(key_name)
100
if new_hash != old_hash:
101
filtered_targets.append((bucket, fpath, key_name))
102
new_hash_cache[key_name] = new_hash
103
all_targets = filtered_targets
104
105
if USE_THREADING:
106
pool = ThreadPool(processes=8)
107
pool.map(wrapped_upload_file, all_targets)
108
else:
109
for args in all_targets:
110
wrapped_upload_file(args)
111
112
if hash_cache is not None:
113
return new_hash_cache
114
115
116
def upload_redirects(directory):
117
print("Uploading redirects...")
118
for dp, _, fn in os.walk(directory):
119
if fn:
120
for f in fn:
121
fpath = os.path.join(dp, f)
122
if not f == "index.html":
123
continue
124
content = open(fpath).read()
125
url = content[content.find("URL=") + 5 :]
126
url = url[: url.find("'")]
127
print(fpath)
128
print(url)
129
key_name = fpath[len(directory) :]
130
upload_file(BUCKET, fpath, key_name, redirect=url)
131
132
133
if __name__ == "__main__":
134
root = Path(__file__).parent.parent.resolve()
135
hash_cache = load_hash_cache()
136
site_directory = os.path.join(root, "site")
137
redirect_directory = os.path.join(root, "redirects")
138
hash_cache = upload_dir(site_directory, hash_cache=hash_cache)
139
upload_redirects(redirect_directory)
140
cleanup(site_directory, redirect_directory)
141
save_hash_cache(hash_cache)
142
143