"n = int(0.8*len(data)) # we directly take the 80% of data\n",
"# print(len(data))\n",
"train_data = data[:n]\n",
"val_data = data[n:] # we take 20% data as a validation data.\n",
"\n",
"def get_batch(split):\n",
" data = train_data if split == 'train' else val_data\n",
" # Randomly selects indices for creating a batch. The indices are chosen such that there is enough room for a sequence of size block_size in the data.\n",
" ix = torch.randint(len(data) - block_size, (batch_size,))\n",
"\n",
"# block_size is length of block which we are passing inside model and we set it = 8 and batch length we take is = 8\n",
" x = torch.stack([data[i:i+block_size] for i in ix])\n",
" y = torch.stack([data[i+1:i+block_size+1] for i in ix])\n",
" x, y = x.to(device), y.to(device) #Moves the tensors x and y to the specified device (e.g., GPU).\n",
" return x, y\n",
"\n",
"x, y = get_batch('train')\n",
"print('inputs:')\n",
"# print(x.shape)\n",
"print(x)\n",
"print('targets:')\n",
"print(y)\n",
"\n",
"\n",
"# as the batch sixe is 4 and length is 4"
]
},
{
"cell_type": "code",
"execution_count": 44,
"id": "ffa9c7b1-9309-4d52-8e42-7c66952af2bb",
"metadata": {},
"outputs": [],
"source": [
"@torch.no_grad()\n",
"# this is decorator In other words, it tells PyTorch not to track operations for gradient computation.\n",
"#This is useful when you are evaluating the model and don't need to update its parameters.\n",
"def estimate_loss():\n",
" out = {}\n",
" model.eval()\n",
"\n",
" # In other words, it tells PyTorch not to track operations for gradient computation.\n",
" # This is useful when you are evaluating the model and don't need to update its parameters.\n",
" for split in ['train', 'val']:\n",
" losses = torch.zeros(eval_iters)\n",
" #: Initializes a tensor of zeros to store individual losses for each iteration of evaluation.\n",
"\n",
"# for k in range(eval_iters):: Loops over a specified number of iterations (eval_iters) for evaluation. In each iteration:\n",
"# X, Y = get_batch(split): Obtains a batch of input-output pairs for the current split using the get_batch function.\n",
"# logits, loss = model(X, Y): Passes the input X and target Y through the model to get predictions (logits) and calculate the loss.\n",
"# losses[k] = loss.item(): Stores the loss in the losses tensor.\n",
"# out[split] = losses.mean(): Calculates the mean of the losses for the current split and stores it in the out dictionary.\n",
"# model.train(): Sets the model back to training mode. This is important to ensure that the model is ready for training after the evaluation.\n",
"# return out: Returns the dictionary containing the mean losses for both training and validation splits.\n",