Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
suyashi29
GitHub Repository: suyashi29/python-su
Path: blob/master/GenAI Transformers Basics/4 Hugging Face lib functions.ipynb
3074 views
Kernel: Python 3 (ipykernel)

The Hugging Face transformers library provides a variety of pretrained models and pipelines that can perform different natural language processing (NLP) tasks beyond sentiment analysis. Below are some examples of tasks you can perform using different pipelines provided by the transformers library, along with a set of functions demonstrating these capabilities.

pip install wordcloud --trusted-host pypi.org --trusted-host files.pythonhosted.org transformers==4.9.2 torch==1.9.0

#1. Text Classification (other than sentiment analysis) #Function: Classify text into predefined categories. from transformers import pipeline # Explicitly specify the model name model_name = "distilbert-base-uncased-finetuned-sst-2-english" def classify_text(text, model_name="distilbert-base-uncased-finetuned-sst-2-english"): classifier = pipeline("text-classification", model=model_name) results = classifier(text) return results # Example usage text = "I hated the movie" print(classify_text(text))
[{'label': 'NEGATIVE', 'score': 0.9996954202651978}]
from transformers import pipeline def recognize_entities(text): model_name = "dbmdz/bert-large-cased-finetuned-conll03-english" ner_pipeline = pipeline("ner", model=model_name, grouped_entities=True) results = ner_pipeline(text) return results # Example usage text = "Ashi is born in Delhi." print(recognize_entities(text))
Downloading: 0%| | 0.00/998 [00:00<?, ?B/s]
Downloading: 0%| | 0.00/1.33G [00:00<?, ?B/s]
Downloading: 0%| | 0.00/60.0 [00:00<?, ?B/s]
Downloading: 0%| | 0.00/213k [00:00<?, ?B/s]
C:\Users\suyashi144893\Anaconda3\lib\site-packages\transformers\pipelines\token_classification.py:154: UserWarning: `grouped_entities` is deprecated and will be removed in version v5.0.0, defaulted to `aggregation_strategy="AggregationStrategy.SIMPLE"` instead. warnings.warn(
[{'entity_group': 'PER', 'score': 0.9957955, 'word': 'Ashi', 'start': 0, 'end': 4}, {'entity_group': 'LOC', 'score': 0.99833214, 'word': 'Delhi', 'start': 16, 'end': 21}]
from transformers import pipeline def answer_question(question, context): model_name = "distilbert-base-cased-distilled-squad" qa_pipeline = pipeline("question-answering", model=model_name) results = qa_pipeline(question=question, context=context) return results # Example usage context = "Hugging Face Inc. is a company based in New York City. Its headquarters are in DUMBO, therefore very close to the Manhattan Bridge." question = "Where is Hugging Face Inc. based?" print(answer_question(question, context))
Downloading: 0%| | 0.00/473 [00:00<?, ?B/s]
Downloading: 0%| | 0.00/261M [00:00<?, ?B/s]
Downloading: 0%| | 0.00/49.0 [00:00<?, ?B/s]
Downloading: 0%| | 0.00/213k [00:00<?, ?B/s]
Downloading: 0%| | 0.00/436k [00:00<?, ?B/s]
{'score': 0.975246012210846, 'start': 40, 'end': 53, 'answer': 'New York City'}
from transformers import pipeline def summarize_text(text, max_length=50, min_length=25): model_name = "facebook/bart-large-cnn" summarization_pipeline = pipeline("summarization", model=model_name) summary = summarization_pipeline(text, max_length=max_length, min_length=min_length, do_sample=False) return summary[0]['summary_text'] # Example usage text = "A good human embodies kindness, empathy, and integrity. They act selflessly, helping others and showing compassion. Honesty and respect guide their interactions, fostering trust and positive relationships. A good human values diversity, promotes equality, and strives to make the world a better place through their actions and understanding." print(summarize_text(text))
Downloading: 0%| | 0.00/1.58k [00:00<?, ?B/s]
Downloading: 0%| | 0.00/1.63G [00:00<?, ?B/s]
Downloading: 0%| | 0.00/899k [00:00<?, ?B/s]
Downloading: 0%| | 0.00/456k [00:00<?, ?B/s]
Downloading: 0%| | 0.00/1.36M [00:00<?, ?B/s]
C:\Users\suyashi144893\Anaconda3\lib\site-packages\torch\_tensor.py:575: UserWarning: floor_divide is deprecated, and will be removed in a future version of pytorch. It currently rounds toward 0 (like the 'trunc' function NOT 'floor'). This results in incorrect rounding for negative values. To keep the current behavior, use torch.div(a, b, rounding_mode='trunc'), or for actual floor division, use torch.div(a, b, rounding_mode='floor'). (Triggered internally at ..\aten\src\ATen\native\BinaryOps.cpp:467.) return torch.floor_divide(self, other)
A good human embodies kindness, empathy, and integrity. They act selflessly, helping others and showing compassion. Honesty and respect guide their interactions, fostering trust and positive relationships.
from transformers import pipeline def generate_text(prompt, max_length=50): model_name = "gpt2" text_generator = pipeline("text-generation", model=model_name) generated_text = text_generator(prompt, max_length=max_length, num_return_sequences=1) return generated_text[0]['generated_text'] # Example usage prompt = input("Enter initial text or topic:") print(generate_text(prompt))
Enter initial text or topic:Language
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Language by the Book Society of America. It's so nice to be able to be so transparent about how we are going to use our power. I've been very lucky the time is right now. I have lots of work to
from transformers import pipeline def translate_text(text, source_lang="en", target_lang="fr"): model_name = f"Helsinki-NLP/opus-mt-{source_lang}-{target_lang}" translation_pipeline = pipeline("translation", model=model_name) translation = translation_pipeline(text) return translation[0]['translation_text'] # Example usage text = "A pretty cat" print(translate_text(text, source_lang="en", target_lang="fr"))
# Install or update required libraries pip install wordcloud --trusted-ho st pypi.org --trusted-host files.pythonhosted.org --upgrade datasets transformers huggingface_hub
# Import the necessary modules from transformers import BertTokenizer, BertForSequenceClassification, Trainer, TrainingArguments, DataCollatorWithPadding from datasets import load_dataset, load_metric import torch import numpy as np # Load dataset dataset = load_dataset("ag_news") # Initialize tokenizer and model tokenizer = BertTokenizer.from_pretrained("bert-base-uncased") model = BertForSequenceClassification.from_pretrained("bert-base-uncased", num_labels=4) # Tokenize the input data def tokenize_function(examples): return tokenizer(examples["text"], truncation=True) tokenized_datasets = dataset.map(tokenize_function, batched=True) # Create a data collator data_collator = DataCollatorWithPadding(tokenizer=tokenizer) # Define the training arguments training_args = TrainingArguments( output_dir="./results", evaluation_strategy="epoch", learning_rate=2e-5, per_device_train_batch_size=16, per_device_eval_batch_size=16, num_train_epochs=3, weight_decay=0.01, ) # Create a Trainer object trainer = Trainer( model=model, args=training_args, train_dataset=tokenized_datasets["train"], eval_dataset=tokenized_datasets["test"], tokenizer=tokenizer, data_collator=data_collator, ) # Train the model trainer.train() # Evaluate the model results = trainer.evaluate() print(results)