CoCalc -- Lab Work RNN for Alphabetical Sequence Generation .ipynb

GitHub Repository: suyashi29/python-su
Path: blob/master/Generative NLP Models using Python/Lab Work RNN for Alphabetical Sequence Generation .ipynb
³⁰⁷⁴ views

Kernel: Python 3 (ipykernel)

Alphabetical Sequence Generation with a Simple RNN

In [1]:

# Importing Libraries
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np

In [10]:

# Define the alphabet
alphabet = [chr(i) for i in range(97, 123)]  # 'a' to 'z'
vocab_size = len(alphabet)

# Create a mapping from character to index and back
char2idx = {ch: i for i, ch in enumerate(alphabet)}
idx2char = {i: ch for ch, i in char2idx.items()}

In [11]:


# Generate training data: input='a' -> target='b', 'b'->'c' ... 'y'->'z'
input_seq = [char2idx[ch] for ch in alphabet[:-1]]  # 'a' to 'y'
target_seq = [char2idx[ch] for ch in alphabet[1:]]  # 'b' to 'z'

# Convert to tensors
input_tensor = torch.tensor(input_seq).unsqueeze(1)   # Shape: [25, 1]
target_tensor = torch.tensor(target_seq)              # Shape: [25]

In [8]:

alphabet

Out[8]:

['a',
 'b',
 'c',
 'd',
 'e',
 'f',
 'g',
 'h',
 'i',
 'j',
 'k',
 'l',
 'm',
 'n',
 'o',
 'p',
 'q',
 'r',
 's',
 't',
 'u',
 'v',
 'w',
 'x',
 'y',
 'z']

In [5]:

## Check Input and Output
print("Input Sequence =",input_seq )

Out[5]:

Input Sequence = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24]

In [6]:

print("Output Sequence =",target_seq )

Out[6]:

Output Sequence = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25]

In [7]:

print("Input tensor =",input_tensor )

Out[7]:

Input tensor = tensor([[ 0],
        [ 1],
        [ 2],
        [ 3],
        [ 4],
        [ 5],
        [ 6],
        [ 7],
        [ 8],
        [ 9],
        [10],
        [11],
        [12],
        [13],
        [14],
        [15],
        [16],
        [17],
        [18],
        [19],
        [20],
        [21],
        [22],
        [23],
        [24]])

Alphabetical Sequence Generation using TensorFlow

In [18]:

import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.utils import to_categorical

In [19]:

# Define the alphabet
alphabet = [chr(i) for i in range(97, 123)]  # 'a' to 'z'
vocab_size = len(alphabet)

# Mapping characters to indices and vice versa
char2idx = {ch: i for i, ch in enumerate(alphabet)}
idx2char = {i: ch for ch, i in char2idx.items()}

# Create input-output pairs: 'a' → 'b', 'b' → 'c', ..., 'y' → 'z'
input_seq = [char2idx[ch] for ch in alphabet[:-1]]   # 'a' to 'y'
target_seq = [char2idx[ch] for ch in alphabet[1:]]   # 'b' to 'z'

# One-hot encode the input and target sequences
X = to_categorical(input_seq, num_classes=vocab_size)
y = to_categorical(target_seq, num_classes=vocab_size)

In [20]:

model = Sequential([
    Dense(64, activation='relu', input_shape=(vocab_size,)),
    Dense(64, activation='relu'),
    Dense(vocab_size, activation='softmax')  # Output layer for classification
])

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
model.summary()

Out[20]:

C:\Users\Suyashi144893\AppData\Local\anaconda3\Lib\site-packages\keras\src\layers\core\dense.py:87: UserWarning: Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead.
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)

In [21]:

model.fit(X, y, epochs=300, verbose=0)

# Print loss every 50 epochs
for i in range(0, 301, 50):
    loss, acc = model.evaluate(X, y, verbose=0)
    print(f"Epoch {i}, Loss: {loss:.4f}, Accuracy: {acc:.4f}")

Out[21]:

Epoch 0, Loss: 0.0111, Accuracy: 1.0000
Epoch 50, Loss: 0.0111, Accuracy: 1.0000
Epoch 100, Loss: 0.0111, Accuracy: 1.0000
Epoch 150, Loss: 0.0111, Accuracy: 1.0000
Epoch 200, Loss: 0.0111, Accuracy: 1.0000
Epoch 250, Loss: 0.0111, Accuracy: 1.0000
Epoch 300, Loss: 0.0111, Accuracy: 1.0000

In [24]:

def predict_next_char(start_char, length=10):
    current_char = start_char
    result = current_char
    for _ in range(length):
        x_input = to_categorical([char2idx[current_char]], num_classes=vocab_size)
        prediction = model.predict(x_input, verbose=0)
        next_idx = np.argmax(prediction)
        next_char = idx2char[next_idx]
        result += next_char
        current_char = next_char
    return result

# Try starting with 'a'
print(predict_next_char('l', length=14))

Out[24]:

lmnopqrstuvwxyz

Alphabetical Sequence Generation with a Simple RNN

Alphabetical Sequence Generation using TensorFlow

Product

Resources

Company