Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
tensorflow
GitHub Repository: tensorflow/docs-l10n
Path: blob/master/tools/snapshot-source.sh
21430 views
1
#!/usr/bin/env bash
2
## Pull the source of all TensorFlow doc projects into a single snapshot.
3
## Usage:
4
## $ ./tools/snapshot-source.sh [-h]
5
## $ ./tools/snapshot-source.sh -c # Create git commit/branch after pull
6
##
7
## Docs are collected in the ./site/en-snapshot directory
8
set -e
9
10
usage() {
11
echo "Usage: $(basename $0) [options]"
12
echo " Copy source docs from all projects into local directory."
13
echo "Options:"
14
echo " -c Auto-create the git commit w/ status message"
15
echo " -F Do not format notebooks. Formatting requires the tensorflow-docs pip package."
16
echo " -o=dir Set a different output directory (default: site/en-snapshot)"
17
echo " -h Print this help and exit"
18
}
19
20
while getopts "cFo:h" opt; do
21
case $opt in
22
c) COMMIT_FLAG=1;;
23
F) NO_FORMAT_FLAG=1;;
24
o) SNAPSHOT_ROOT="$OPTARG";;
25
h | *)
26
usage
27
exit 0
28
;;
29
esac
30
done
31
32
# List of all TensorFlow doc projects to pull in.
33
# format: project = repo:branch:src-dir:dest-dir
34
declare -A PROJECTS=(
35
[addons]="tensorflow/addons:master:docs:addons"
36
[agents]="tensorflow/agents:master:docs:agents"
37
[datasets]="tensorflow/datasets:master:docs:datasets"
38
[docs]="tensorflow/docs:master:site/en:."
39
[federated]="tensorflow/federated:main:docs:federated"
40
[graphics]="tensorflow/graphics:master:tensorflow_graphics/g3doc:graphics"
41
[hub]="tensorflow/hub:master:docs:hub"
42
[hub_tutorials]="tensorflow/hub:master:examples/colab:hub/tutorials"
43
[io]="tensorflow/io:master:docs:io"
44
[js]="tensorflow/tfjs-website:master:docs:js"
45
[keras_guides]="tensorflow/docs:snapshot-keras:site/en/guide/keras:guide/keras"
46
[lattice]="tensorflow/lattice:master:docs:lattice"
47
[lite]="tensorflow/tensorflow:master:tensorflow/lite/g3doc:lite"
48
[mlir]="tensorflow/tensorflow:master:tensorflow/compiler/mlir/g3doc:mlir"
49
[model_optimization]="tensorflow/model-optimization:master:tensorflow_model_optimization/g3doc:model_optimization"
50
[neural_structured_learning]="tensorflow/neural-structured-learning:master:g3doc:neural_structured_learning"
51
[probability]="tensorflow/probability:main:tensorflow_probability/g3doc:probability"
52
[probability_examples]="tensorflow/probability:main:tensorflow_probability/examples/jupyter_notebooks:probability/examples"
53
[quantum]="tensorflow/quantum:master:docs:quantum"
54
[tensorboard]="tensorflow/tensorboard:master:docs:tensorboard"
55
[tfx]="tensorflow/tfx:master:docs:tfx"
56
[xla]="tensorflow/tensorflow:master:tensorflow/compiler/xla/g3doc:xla"
57
)
58
59
LOG_NAME="[$(basename $0)]"
60
REPO_ROOT="$(cd $(dirname ${BASH_SOURCE[0]}) >/dev/null 2>&1 && cd .. && pwd)"
61
TEMP_DIR=$(mktemp -d -t "$(basename $0 '.sh').XXXXX")
62
TEMP_SITE_ROOT="$TEMP_DIR/_siteroot"
63
TIMESTAMP=$(date '+%s')
64
65
declare -A LAST_COMMITS # Last commit ID for each project
66
67
##
68
## CHECK SETUP
69
##
70
71
if [[ -z "$SNAPSHOT_ROOT" ]]; then
72
SNAPSHOT_ROOT="${REPO_ROOT}/site/en-snapshot"
73
mkdir -p "$SNAPSHOT_ROOT"
74
fi
75
76
if [[ ! -d "$SNAPSHOT_ROOT" ]]; then
77
echo "${LOG_NAME} Output directory does not exist: ${SNAPSHOT_ROOT}" >&2
78
exit 1
79
fi
80
81
# Notebook formatting requires the tensorflow-docs package.
82
# https://github.com/tensorflow/docs/tree/master/tools/tensorflow_docs/tools
83
if [[ -z "$NO_FORMAT_FLAG" ]]; then
84
if ! python3 -m pip list | grep "tensorflow-docs" > /dev/null 2>&1; then
85
echo "${LOG_NAME} Error: Can't find the tensorflow-docs pip package for formatting. (Use -F to disable.)" >&2
86
exit 1
87
fi
88
fi
89
90
# Git status
91
if [[ -n "$COMMIT_FLAG" ]]; then
92
# Want a clean branch if making a commit.
93
if [[ -n $(git status -s) ]]; then
94
echo "${LOG_NAME} git status not clear, exiting." >&2
95
echo "Can run without committing, see help: $(basename $0) -h" >&2
96
exit 1
97
fi
98
99
# Create new branch if on master
100
if [[ $(git branch --show-current) == "master" ]]; then
101
branch_name="en-snapshot-${TIMESTAMP}"
102
echo "${LOG_NAME} Create new branch for snapshot: ${branch_name}"
103
git checkout -b "${branch_name}"
104
fi
105
fi
106
107
##
108
## DOWNLOAD PROJECTS
109
##
110
111
echo "${LOG_NAME} Download projects to: ${TEMP_DIR}"
112
113
for project in "${!PROJECTS[@]}"; do
114
repo=$(echo "${PROJECTS[$project]}" | cut -f1 -d':')
115
branch=$(echo "${PROJECTS[$project]}" | cut -f2 -d':')
116
src_path=$(echo "${PROJECTS[$project]}" | cut -f3 -d':')
117
dest_path=$(echo "${PROJECTS[$project]}" | cut -f4 -d':')
118
119
# Download shallow clone of each project in temp.
120
cd "$TEMP_DIR"
121
git clone "https://github.com/${repo}.git" \
122
--branch "$branch" --single-branch --depth 1 "$project"
123
124
# Store last commit id for project.
125
cd "./${project}"
126
last_commit=$(git log --format="%H" -n 1)
127
LAST_COMMITS[$project]="$last_commit"
128
129
# Assemble shadow site
130
mkdir -p "$TEMP_SITE_ROOT/$dest_path"
131
cp -R "$src_path"/* "$TEMP_SITE_ROOT/$dest_path"/
132
done
133
134
###
135
## PRUNE
136
##
137
138
# Keep only doc formats.
139
find "$TEMP_SITE_ROOT" \
140
-type f \( ! -name "*.ipynb" ! -name "*.md" ! -name "*.html" \) \
141
| xargs rm
142
143
# Remove files we don't publish or don't translate.
144
find "$TEMP_SITE_ROOT" \
145
-type f \( -name "README*" -or -name "_*" -or -name "index.*" \) -or \
146
-type f \( -path "*/api_docs/*" -or -path "*/r1/*" \) \
147
| xargs rm
148
149
# Remove specific pages or sections.
150
rm -rf "$TEMP_SITE_ROOT/install/" # Different process.
151
rm -rf "$TEMP_SITE_ROOT/datasets/catalog/" # Reference
152
rm -rf "$TEMP_SITE_ROOT/tensorboard/design" # Design docs
153
rm "$TEMP_SITE_ROOT/xla/operation_semantics.md" # Reference
154
# Cloud integration not available here (b/197880392)
155
rm "$TEMP_SITE_ROOT/guide/keras/training_keras_models_on_cloud.ipynb"
156
157
##
158
## SYNC
159
##
160
161
echo "${LOG_NAME} Copy projects to: ${SNAPSHOT_ROOT}"
162
163
rsync --archive --del --checksum "$TEMP_SITE_ROOT/" "$SNAPSHOT_ROOT/"
164
165
##
166
## STATUS REPORT
167
##
168
169
COMMIT_MSG_LIST=""
170
README_MSG_LIST=""
171
172
for project in "${!LAST_COMMITS[@]}"; do
173
last_commit="${LAST_COMMITS[$project]}"
174
short_id=$(echo "$last_commit" | head -c 8)
175
repo=$(echo "${PROJECTS[$project]}" | cut -f1 -d':')
176
branch=$(echo "${PROJECTS[$project]}" | cut -f2 -d':')
177
src_path=$(echo "${PROJECTS[$project]}" | cut -f3 -d':')
178
179
project_url="https://github.com/${repo}/tree/${branch}/${src_path}"
180
commit_url="https://github.com/${repo}/commit/${last_commit}"
181
182
# Append to both logs
183
COMMIT_MSG_LIST+="- ${project}: ${commit_url}\n"
184
README_MSG_LIST+="- [${project}](${project_url}) @ <a href='${commit_url}'><code>${short_id}</code></a>\n"
185
done
186
187
# Order project list
188
COMMIT_MSG_LIST="$(echo -e $COMMIT_MSG_LIST | sort)"
189
README_MSG_LIST="$(echo -e $README_MSG_LIST | sort)"
190
191
print_timestamp() {
192
local timestamp="$1" timestamp_str=""
193
if [[ $(uname) == "Darwin" ]]; then
194
timestamp_str=$(date -r "$timestamp") # BSD style
195
else
196
timestamp_str=$(date -d "@$timestamp") # Linux
197
fi
198
echo "$timestamp_str"
199
}
200
201
TIMESTAMP_STR="$(print_timestamp $TIMESTAMP)"
202
203
COMMIT_MSG="Source snapshot: ${TIMESTAMP_STR}\n\n
204
Projects and last commit:\n
205
${COMMIT_MSG_LIST}\n"
206
207
README_STR="__DO NOT EDIT__
208
209
This snapshot of the English documentation is used for tensorflow.org
210
translations. Do not edit these files. The source-of-truth files are located in
211
the projects listed below.
212
213
Please submit translations from the GitLocalize project: https://gitlocalize.com/tensorflow/docs-l10n
214
215
Updated: ${TIMESTAMP_STR}
216
217
Projects and last commit:
218
${README_MSG_LIST}\n"
219
220
221
CHANGELOG_FILE="${SNAPSHOT_ROOT}/README.md"
222
echo -e "$README_STR" > "$CHANGELOG_FILE"
223
224
##
225
## FINISH OPTIONS
226
##
227
228
# Format notebooks
229
if [[ -z "$NO_FORMAT_FLAG" ]]; then
230
echo "${LOG_NAME} Format notebooks ..."
231
if ! python3 -m tensorflow_docs.tools.nbfmt "${SNAPSHOT_ROOT}" > /dev/null 2>&1; then
232
echo "${LOG_NAME} nbfmt error, exiting." >&2
233
exit 1
234
fi
235
fi
236
237
# Commit change
238
if [[ -n "$COMMIT_FLAG" ]]; then
239
cd "$REPO_ROOT"
240
# Want to commit more than a timestamp update. (READMEs already excluded)
241
modified_docs=$(git ls-files --modified | grep -v "README.md" | wc -l)
242
if (( "$modified_docs" == 0 )); then
243
echo "${LOG_NAME} No commit since there are no file changes."
244
git restore "$CHANGELOG_FILE"
245
else
246
echo "${LOG_NAME} Create snapshot commit ..."
247
git add "$SNAPSHOT_ROOT"
248
COMMIT_MSG=$(echo -e "$COMMIT_MSG")
249
git commit --message "$COMMIT_MSG"
250
fi
251
fi
252
253
# Cleanup
254
rm -rf "$TEMP_DIR"
255
256