# coding: utf-8123import sys4from python_environment_check import check_packages5from transformers import pipeline, set_seed6from transformers import GPT2Tokenizer7from transformers import GPT2Model89# # Machine Learning with PyTorch and Scikit-Learn10# # -- Code Examples1112# ## Package version checks1314# Add folder to path in order to load from the check_packages.py script:15161718sys.path.insert(0, '..')192021# Check recommended package versions:222324252627d = {28'torch': '1.9.0',29'transformers': '4.9.1',30}31check_packages(d)323334# # Chapter 16: Transformers – Improving Natural Language Processing with Attention Mechanisms (Part 2/3)3536# **Outline**37#38# - [Building large-scale language models by leveraging unlabeled data](#Building-large-scale-language-models-by-leveraging-unlabeled-data)39# - [Pre-training and fine-tuning transformer models](#Pre-training-and-fine-tuning-transformer-models)40# - [Leveraging unlabeled data with GPT](#Leveraging-unlabeled-data-with-GPT)41# - [Using GPT-2 to generate new text](#Using-GPT-2-to-generate-new-text)42# - [Bidirectional pre-training with BERT](#Bidirectional-pre-training-with-BERT)43# - [The best of both worlds: BART](#The-best-of-both-worlds-BART)444546474849# ## Building large-scale language models by leveraging unlabeled data50# ## Pre-training and fine-tuning transformer models51#52#535455565758# ## Leveraging unlabeled data with GPT59606162636465666768# ### Using GPT-2 to generate new text697071727374generator = pipeline('text-generation', model='gpt2')75set_seed(123)76generator("Hey readers, today is",77max_length=20,78num_return_sequences=3)798081828384tokenizer = GPT2Tokenizer.from_pretrained('gpt2')85text = "Let us encode this sentence"86encoded_input = tokenizer(text, return_tensors='pt')87encoded_input8889909192model = GPT2Model.from_pretrained('gpt2')9394959697output = model(**encoded_input)98output['last_hidden_state'].shape99100101# ### Bidirectional pre-training with BERT102#103104105106107108109110111112113114115116# ### The best of both worlds: BART117118119120121122# ---123#124# Readers may ignore the next cell.125126127128129130131132133134135136