Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
yiming-wange
GitHub Repository: yiming-wange/cs224n-2023-solution
Path: blob/main/a5/mingpt-demo/mingpt/__pycache__/trainer.cpython-310.pyc
1004 views
o

M��c��@s~dZddlZddlZddlmZddlZddlZddlmZddl	m
Z
ddlmZe�
e�ZGdd�d�ZGdd	�d	�ZdS)
z�
Simple training loop; Boilerplate that could apply to any arbitrary neural network,
so nothing in this file really has anything to do with GPT specifically.
�N)�tqdm)�LambdaLR)�
DataLoaderc@s@eZdZdZdZdZdZdZdZdZ	dZ
d	Zd
ZdZ
dd
�Zd
S)�
TrainerConfig�
�@ga2U0*�3?)g�������?gffffff�?��?皙�����?Fg�Z�Ag��DNBNrcKs"|��D]
\}}t|||�qdS)N)�items�setattr)�self�kwargs�k�v�r�L/Users/yimingwang/Desktop/cs224n/assignment/a5/mingpt-demo/mingpt/trainer.py�__init__#s�zTrainerConfig.__init__)�__name__�
__module__�__qualname__�
max_epochs�
batch_size�
learning_rate�betas�grad_norm_clip�weight_decay�lr_decay�
warmup_tokens�final_tokens�	ckpt_path�num_workersrrrrrrsrc@s$eZdZdd�Zdd�Zdd�ZdS)�TrainercCsT||_||_||_||_d|_tj��r(tj��|_tj	�
|j��|j�|_dSdS)N�cpu)�model�
train_dataset�test_dataset�config�device�torch�cuda�is_available�current_device�nn�DataParallel�to)rr#r$r%r&rrrr)s
�zTrainer.__init__cCsBt|jd�r
|jjn|j}t�d|jj�t�|�	�|jj�dS)N�modulez	saving %s)
�hasattrr#r/�logger�infor&rr(�save�
state_dict)r�	raw_modelrrr�save_checkpoint5szTrainer.save_checkpointcs��j�j��t�jd�r�jn�}|���������fdd�}td�}d�_t�j�D]&�|d��j	dur<|d�}�j	dupD||k}�jj
durS|rS|}���q-dS)Nr/cs
|dk}��|�|r�jn�j}t|dd�j�jd�}g}|r*tt|�t|�d�nt|�}|D]�\}\}}|�	�j
�}|�	�j
�}t�|���||�\}	}
|
�
�}
|�|
���Wd�n1sdwY|r����|
��tjj�����j�����jrֈj|dk��7_�j�jkr�t�j�ttd�j��}n"t�j�j�ttd�j�j��}tddd	t� tj!|��}�j"|}
�j#D]}|
|d
<q�n�j"}
|�$d�d�d|�d
|
��d�d|
d���q0|�stt%�
|��}t&�'d|�|SdS)N�trainT)�shuffle�
pin_memoryrr )�totalr�r	g�?r�lrzepoch z iter z
: train loss z.5fz. lr �ez
test loss: %f)(r7r$r%rrr r�	enumerate�lenr.r'r(�set_grad_enabled�mean�append�item�	zero_grad�backwardr,�utils�clip_grad_norm_�
parametersr�stepr�tokens�sumr�float�maxr�math�cos�pir�param_groups�set_description�npr1r2)�splitZis_train�data�loaderZlosses�pbar�it�x�y�logits�lossZlr_mult�progressr<�param_group�	test_loss�r&�epochr#�	optimizerrrr�	run_epoch@sP
� �&


�.��z Trainer.train.<locals>.run_epoch�infrr7�test)r#r&r0r/�configure_optimizersrLrJ�rangerr%rr6)rr5rcZ	best_lossr_Z
good_modelrr`rr7;s 
6
��z
Trainer.trainN)rrrrr6r7rrrrr!'sr!)�__doc__rN�loggingr�numpyrSr(Ztorch.optim�optimZtorch.optim.lr_schedulerr�torch.utils.data.dataloaderr�	getLoggerrr1rr!rrrr�<module>s