Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
codebasics
GitHub Repository: codebasics/deep-learning-keras-tf-tutorial
Path: blob/master/47_BERT_text_classification/BERT_email_classification-Copy1.ipynb
1141 views
Kernel: Python 3
import tensorflow as tf import tensorflow_hub as hub import tensorflow_text as text
import pandas as pd df = pd.read_csv("spam.csv") df.head(5)
df.groupby('Category').describe()
df['spam']=df['Category'].apply(lambda x: 1 if x=='spam' else 0) df.head()
bert_preprocess = hub.KerasLayer("https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3") bert_encoder = hub.KerasLayer("https://tfhub.dev/tensorflow/bert_en_uncased_L-12_H-768_A-12/4")
def get_sentence_embeding(sentences): preprocessed_text = bert_preprocess(sentences) return bert_encoder(preprocessed_text)['pooled_output']
get_sentence_embeding(["500$ discount. hurry up", "Bhavin, are you up for a volleybal game tomorrow?"])
<tf.Tensor: shape=(2, 768), dtype=float32, numpy= array([[-0.8435169 , -0.51327276, -0.8884574 , ..., -0.74748874, -0.75314736, 0.91964483], [-0.87208366, -0.50543964, -0.94446677, ..., -0.858475 , -0.7174535 , 0.8808298 ]], dtype=float32)>
df_n = df.head(3) df_n
encoding_column = df['Message'].map(get_sentence_encoding) encoding_column
--------------------------------------------------------------------------- KeyboardInterrupt Traceback (most recent call last) <ipython-input-18-0ac2523913ee> in <module> ----> 1 encoding_column = df['Message'].map(get_sentence_encoding) 2 encoding_column C:\Program Files\Python38\lib\site-packages\pandas\core\series.py in map(self, arg, na_action) 3826 dtype: object 3827 """ -> 3828 new_values = super()._map_values(arg, na_action=na_action) 3829 return self._constructor(new_values, index=self.index).__finalize__(self) 3830 C:\Program Files\Python38\lib\site-packages\pandas\core\base.py in _map_values(self, mapper, na_action) 1298 1299 # mapper is a function -> 1300 new_values = map_f(values, mapper) 1301 1302 return new_values pandas/_libs/lib.pyx in pandas._libs.lib.map_infer() <ipython-input-6-b2b66895fa35> in get_sentence_encoding(sentence) 1 def get_sentence_encoding(sentence): ----> 2 preprocessed_result = bert_preprocess([sentence]) 3 return bert_encoder(preprocessed_result)['pooled_output'][0].numpy() ~\AppData\Roaming\Python\Python38\site-packages\tensorflow\python\keras\engine\base_layer.py in __call__(self, *args, **kwargs) 1028 with autocast_variable.enable_auto_cast_variables( 1029 self._compute_dtype_object): -> 1030 outputs = call_fn(inputs, *args, **kwargs) 1031 1032 if self._activity_regularizer: C:\Program Files\Python38\lib\site-packages\tensorflow_hub\keras_layer.py in call(self, inputs, training) 233 else: 234 training = False --> 235 result = smart_cond.smart_cond(training, 236 lambda: f(training=True), 237 lambda: f(training=False)) ~\AppData\Roaming\Python\Python38\site-packages\tensorflow\python\framework\smart_cond.py in smart_cond(pred, true_fn, false_fn, name) 54 return true_fn() 55 else: ---> 56 return false_fn() 57 else: 58 return control_flow_ops.cond(pred, true_fn=true_fn, false_fn=false_fn, C:\Program Files\Python38\lib\site-packages\tensorflow_hub\keras_layer.py in <lambda>() 235 result = smart_cond.smart_cond(training, 236 lambda: f(training=True), --> 237 lambda: f(training=False)) 238 239 # Unwrap dicts returned by signatures. ~\AppData\Roaming\Python\Python38\site-packages\tensorflow\python\saved_model\load.py in _call_attribute(instance, *args, **kwargs) 668 669 def _call_attribute(instance, *args, **kwargs): --> 670 return instance.__call__(*args, **kwargs) 671 672 ~\AppData\Roaming\Python\Python38\site-packages\tensorflow\python\eager\def_function.py in __call__(self, *args, **kwds) 887 888 with OptionalXlaContext(self._jit_compile): --> 889 result = self._call(*args, **kwds) 890 891 new_tracing_count = self.experimental_get_tracing_count() ~\AppData\Roaming\Python\Python38\site-packages\tensorflow\python\eager\def_function.py in _call(self, *args, **kwds) 922 # In this case we have not created variables on the first call. So we can 923 # run the first trace but we should fail if variables are created. --> 924 results = self._stateful_fn(*args, **kwds) 925 if self._created_variables: 926 raise ValueError("Creating variables on a non-first call to a function" ~\AppData\Roaming\Python\Python38\site-packages\tensorflow\python\eager\function.py in __call__(self, *args, **kwargs) 3021 (graph_function, 3022 filtered_flat_args) = self._maybe_define_function(args, kwargs) -> 3023 return graph_function._call_flat( 3024 filtered_flat_args, captured_inputs=graph_function.captured_inputs) # pylint: disable=protected-access 3025 ~\AppData\Roaming\Python\Python38\site-packages\tensorflow\python\eager\function.py in _call_flat(self, args, captured_inputs, cancellation_manager) 1958 and executing_eagerly): 1959 # No tape is watching; skip to running the function. -> 1960 return self._build_call_outputs(self._inference_function.call( 1961 ctx, args, cancellation_manager=cancellation_manager)) 1962 forward_backward = self._select_forward_and_backward_functions( ~\AppData\Roaming\Python\Python38\site-packages\tensorflow\python\eager\function.py in call(self, ctx, args, cancellation_manager) 589 with _InterpolateFunctionError(self): 590 if cancellation_manager is None: --> 591 outputs = execute.execute( 592 str(self.signature.name), 593 num_outputs=self._num_outputs, ~\AppData\Roaming\Python\Python38\site-packages\tensorflow\python\eager\execute.py in quick_execute(op_name, num_outputs, inputs, attrs, ctx, name) 57 try: 58 ctx.ensure_initialized() ---> 59 tensors = pywrap_tfe.TFE_Py_Execute(ctx._handle, device_name, op_name, 60 inputs, attrs, num_outputs) 61 except core._NotOkStatusException as e: KeyboardInterrupt:
df_n['e'] = encoding_column
<ipython-input-17-68e77860ae13>:1: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy df_n['e'] = encoding_column
df['encoding'] = df['Message'].apply(get_sentence_encoding) df.head(3)
from sklearn.model_selection import train_test_split X_train, X_test, y_train, y_test = train_test_split(df['Message'],df['spam'])
X_train.head(4)
bert_preprocess = hub.KerasLayer("https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3") bert_encoder = hub.KerasLayer("https://tfhub.dev/tensorflow/bert_en_uncased_L-12_H-768_A-12/4")
text_input = tf.keras.layers.Input(shape=(), dtype=tf.string, name='text') preprocessed_text = bert_preprocess(text_input) outputs = bert_encoder(preprocessed_text) l = tf.keras.layers.Dropout(0.1, name="dropout")(outputs['pooled_output']) l = tf.keras.layers.Dense(1, activation='sigmoid', name="output")(l) model = tf.keras.Model(inputs=[text_input], outputs = [l])
tf.keras.utils.plot_model(model)
model.summary()
len(X_train)
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model.fit(X_train, y_train, epochs=5)
model.evaluate(X_test, y_test)
reviews = [ 'Reply to win £100 weekly! Where will the 2006 FIFA World Cup be held? Send STOP to 87239 to end service', 'You are awarded a SiPix Digital Camera! call 09061221061 from landline. Delivery within 28days. T Cs Box177. M221BP. 2yr warranty. 150ppm. 16 . p p£3.99', 'it to 80488. Your 500 free text messages are valid until 31 December 2005.', 'Hey Sam, Are you coming for a cricket game tomorrow', "Why don't you wait 'til at least wednesday to see if you get your ." ] model.predict(reviews)