Commit 37bcca08 by Saroj Dhiman

Upload new file

1 parent 6b2d81d3
Showing with 626 additions and 0 deletions
{
"cells": [
{
"cell_type": "code",
"execution_count": 18,
"id": "819c33ad-2957-4e1f-a5bd-e5b3d0834901",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"cuda\n"
]
}
],
"source": [
"import torch\n",
"import torch.nn as nn\n",
"from torch.nn import functional as F\n",
"device = 'cuda' if torch.cuda.is_available() else 'cpu'\n",
"print(device)\n",
"block_size = 8\n",
"batch_size = 4\n",
"max_iters = 1000\n",
"# eval_interval = 2500\n",
"learning_rate = 3e-4\n",
"eval_iters = 250"
]
},
{
"cell_type": "code",
"execution_count": 19,
"id": "5a753e05-cad9-4a5b-b87c-60faf5829802",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"DOROTHY AND THE WIZARD IN OZ\n",
"\n",
" BY\n",
"\n",
" L. FRANK BAUM\n",
"\n",
" AUTHOR OF THE WIZARD OF OZ, THE LAND OF OZ, OZMA OF OZ, ETC.\n",
"\n",
" ILLUSTRATED BY JOHN R. NEILL\n",
"\n",
" BOOKS OF WONDER WILLIAM MORROW & CO., INC. NEW YORK\n",
"\n",
"\n"
]
}
],
"source": [
"with open(\"wizard.txt\", 'r', encoding='utf-8') as f:\n",
" text = f.read()\n",
"# print(len(text))\n",
"print(text[:200])"
]
},
{
"cell_type": "code",
"execution_count": 26,
"id": "c98a9ad4-a8ca-4e69-ae23-b9bfa4c64022",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"['\\n', ' ', '!', '\"', '&', \"'\", '(', ')', '*', ',', '-', '.', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', ':', ';', '?', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', '[', ']', '_', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '\\ufeff']\n"
]
}
],
"source": [
"chars = sorted(set(text))\n",
"print(chars)\n",
"vocab_size = len(chars)\n",
"# here we have the empty spaces also so we need to remove this all"
]
},
{
"cell_type": "code",
"execution_count": 33,
"id": "3f31e119-e7f9-4487-b9ca-1c3562af48a7",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"tensor([80, 28, 39, 42, 39, 44, 32, 49, 1, 25, 38, 28, 1, 44, 32, 29, 1, 47,\n",
" 33, 50, 25, 42, 28, 1, 33, 38, 1, 39, 50, 0, 0, 1, 26, 49, 0, 0,\n",
" 1, 36, 11, 1, 30, 42, 25, 38, 35, 1, 26, 25, 45, 37, 0, 0, 1, 25,\n",
" 45, 44, 32, 39, 42, 1, 39, 30, 1, 44, 32, 29, 1, 47, 33, 50, 25, 42,\n",
" 28, 1, 39, 30, 1, 39, 50, 9, 1, 44, 32, 29, 1, 36, 25, 38, 28, 1,\n",
" 39, 30, 1, 39, 50, 9, 1, 39, 50, 37])\n"
]
}
],
"source": [
"string_to_int = { ch:i for i,ch in enumerate(chars) }\n",
"int_to_string = { i:ch for i,ch in enumerate(chars) }\n",
"# two functions i create here just to make encoder and decoder for conversion.\n",
"encode = lambda s: [string_to_int[c] for c in s]\n",
"decode = lambda l: ''.join([int_to_string[i] for i in l])\n",
"# here i mak tensors of all the data that i have in a text format.\n",
"data = torch.tensor(encode(text), dtype=torch.long)\n",
"print(data[:100])"
]
},
{
"cell_type": "code",
"execution_count": 39,
"id": "a6457c1f-c386-4e51-aa43-8cf6ddf46898",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"tensor([80, 28, 39, ..., 0, 0, 0])"
]
},
"execution_count": 39,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data"
]
},
{
"cell_type": "code",
"execution_count": 37,
"id": "b171d480-3e00-4a88-9075-946391c4baaf",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'\\n': 0,\n",
" ' ': 1,\n",
" '!': 2,\n",
" '\"': 3,\n",
" '&': 4,\n",
" \"'\": 5,\n",
" '(': 6,\n",
" ')': 7,\n",
" '*': 8,\n",
" ',': 9,\n",
" '-': 10,\n",
" '.': 11,\n",
" '0': 12,\n",
" '1': 13,\n",
" '2': 14,\n",
" '3': 15,\n",
" '4': 16,\n",
" '5': 17,\n",
" '6': 18,\n",
" '7': 19,\n",
" '8': 20,\n",
" '9': 21,\n",
" ':': 22,\n",
" ';': 23,\n",
" '?': 24,\n",
" 'A': 25,\n",
" 'B': 26,\n",
" 'C': 27,\n",
" 'D': 28,\n",
" 'E': 29,\n",
" 'F': 30,\n",
" 'G': 31,\n",
" 'H': 32,\n",
" 'I': 33,\n",
" 'J': 34,\n",
" 'K': 35,\n",
" 'L': 36,\n",
" 'M': 37,\n",
" 'N': 38,\n",
" 'O': 39,\n",
" 'P': 40,\n",
" 'Q': 41,\n",
" 'R': 42,\n",
" 'S': 43,\n",
" 'T': 44,\n",
" 'U': 45,\n",
" 'V': 46,\n",
" 'W': 47,\n",
" 'X': 48,\n",
" 'Y': 49,\n",
" 'Z': 50,\n",
" '[': 51,\n",
" ']': 52,\n",
" '_': 53,\n",
" 'a': 54,\n",
" 'b': 55,\n",
" 'c': 56,\n",
" 'd': 57,\n",
" 'e': 58,\n",
" 'f': 59,\n",
" 'g': 60,\n",
" 'h': 61,\n",
" 'i': 62,\n",
" 'j': 63,\n",
" 'k': 64,\n",
" 'l': 65,\n",
" 'm': 66,\n",
" 'n': 67,\n",
" 'o': 68,\n",
" 'p': 69,\n",
" 'q': 70,\n",
" 'r': 71,\n",
" 's': 72,\n",
" 't': 73,\n",
" 'u': 74,\n",
" 'v': 75,\n",
" 'w': 76,\n",
" 'x': 77,\n",
" 'y': 78,\n",
" 'z': 79,\n",
" '\\ufeff': 80}"
]
},
"execution_count": 37,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"string_to_int "
]
},
{
"cell_type": "code",
"execution_count": 38,
"id": "37291126-ab9c-4c86-b70e-ea669786b06f",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{0: '\\n',\n",
" 1: ' ',\n",
" 2: '!',\n",
" 3: '\"',\n",
" 4: '&',\n",
" 5: \"'\",\n",
" 6: '(',\n",
" 7: ')',\n",
" 8: '*',\n",
" 9: ',',\n",
" 10: '-',\n",
" 11: '.',\n",
" 12: '0',\n",
" 13: '1',\n",
" 14: '2',\n",
" 15: '3',\n",
" 16: '4',\n",
" 17: '5',\n",
" 18: '6',\n",
" 19: '7',\n",
" 20: '8',\n",
" 21: '9',\n",
" 22: ':',\n",
" 23: ';',\n",
" 24: '?',\n",
" 25: 'A',\n",
" 26: 'B',\n",
" 27: 'C',\n",
" 28: 'D',\n",
" 29: 'E',\n",
" 30: 'F',\n",
" 31: 'G',\n",
" 32: 'H',\n",
" 33: 'I',\n",
" 34: 'J',\n",
" 35: 'K',\n",
" 36: 'L',\n",
" 37: 'M',\n",
" 38: 'N',\n",
" 39: 'O',\n",
" 40: 'P',\n",
" 41: 'Q',\n",
" 42: 'R',\n",
" 43: 'S',\n",
" 44: 'T',\n",
" 45: 'U',\n",
" 46: 'V',\n",
" 47: 'W',\n",
" 48: 'X',\n",
" 49: 'Y',\n",
" 50: 'Z',\n",
" 51: '[',\n",
" 52: ']',\n",
" 53: '_',\n",
" 54: 'a',\n",
" 55: 'b',\n",
" 56: 'c',\n",
" 57: 'd',\n",
" 58: 'e',\n",
" 59: 'f',\n",
" 60: 'g',\n",
" 61: 'h',\n",
" 62: 'i',\n",
" 63: 'j',\n",
" 64: 'k',\n",
" 65: 'l',\n",
" 66: 'm',\n",
" 67: 'n',\n",
" 68: 'o',\n",
" 69: 'p',\n",
" 70: 'q',\n",
" 71: 'r',\n",
" 72: 's',\n",
" 73: 't',\n",
" 74: 'u',\n",
" 75: 'v',\n",
" 76: 'w',\n",
" 77: 'x',\n",
" 78: 'y',\n",
" 79: 'z',\n",
" 80: '\\ufeff'}"
]
},
"execution_count": 38,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"int_to_string"
]
},
{
"cell_type": "code",
"execution_count": 40,
"id": "e4dc9a92-0d4b-45b2-a17c-4db69f934179",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"True"
]
},
"execution_count": 40,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import torch\n",
"torch.cuda.is_available()"
]
},
{
"cell_type": "code",
"execution_count": 43,
"id": "6cb15691-7746-4ff3-8c21-e2e8109578d0",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"inputs:\n",
"tensor([[71, 74, 56, 64, 1, 54, 60, 54],\n",
" [73, 61, 58, 1, 47, 62, 79, 54],\n",
" [61, 54, 69, 69, 78, 1, 54, 66],\n",
" [68, 71, 58, 1, 73, 61, 58, 78]], device='cuda:0')\n",
"targets:\n",
"tensor([[74, 56, 64, 1, 54, 60, 54, 62],\n",
" [61, 58, 1, 47, 62, 79, 54, 71],\n",
" [54, 69, 69, 78, 1, 54, 66, 68],\n",
" [71, 58, 1, 73, 61, 58, 78, 1]], device='cuda:0')\n"
]
}
],
"source": [
"n = int(0.8*len(data)) # we directly take the 80% of data\n",
"# print(len(data))\n",
"train_data = data[:n]\n",
"val_data = data[n:] # we take 20% data as a validation data.\n",
"\n",
"def get_batch(split):\n",
" data = train_data if split == 'train' else val_data\n",
" # Randomly selects indices for creating a batch. The indices are chosen such that there is enough room for a sequence of size block_size in the data.\n",
" ix = torch.randint(len(data) - block_size, (batch_size,))\n",
"\n",
"# block_size is length of block which we are passing inside model and we set it = 8 and batch length we take is = 8\n",
" x = torch.stack([data[i:i+block_size] for i in ix])\n",
" y = torch.stack([data[i+1:i+block_size+1] for i in ix])\n",
" x, y = x.to(device), y.to(device) #Moves the tensors x and y to the specified device (e.g., GPU).\n",
" return x, y\n",
"\n",
"x, y = get_batch('train')\n",
"print('inputs:')\n",
"# print(x.shape)\n",
"print(x)\n",
"print('targets:')\n",
"print(y)\n",
"\n",
"\n",
"# as the batch sixe is 4 and length is 4"
]
},
{
"cell_type": "code",
"execution_count": 44,
"id": "ffa9c7b1-9309-4d52-8e42-7c66952af2bb",
"metadata": {},
"outputs": [],
"source": [
"@torch.no_grad()\n",
"# this is decorator In other words, it tells PyTorch not to track operations for gradient computation.\n",
"#This is useful when you are evaluating the model and don't need to update its parameters.\n",
"def estimate_loss():\n",
" out = {}\n",
" model.eval()\n",
"\n",
" # In other words, it tells PyTorch not to track operations for gradient computation.\n",
" # This is useful when you are evaluating the model and don't need to update its parameters.\n",
" for split in ['train', 'val']:\n",
" losses = torch.zeros(eval_iters)\n",
" #: Initializes a tensor of zeros to store individual losses for each iteration of evaluation.\n",
"\n",
"# for k in range(eval_iters):: Loops over a specified number of iterations (eval_iters) for evaluation. In each iteration:\n",
"# X, Y = get_batch(split): Obtains a batch of input-output pairs for the current split using the get_batch function.\n",
"# logits, loss = model(X, Y): Passes the input X and target Y through the model to get predictions (logits) and calculate the loss.\n",
"# losses[k] = loss.item(): Stores the loss in the losses tensor.\n",
"# out[split] = losses.mean(): Calculates the mean of the losses for the current split and stores it in the out dictionary.\n",
"# model.train(): Sets the model back to training mode. This is important to ensure that the model is ready for training after the evaluation.\n",
"# return out: Returns the dictionary containing the mean losses for both training and validation splits.\n",
" for k in range(eval_iters):\n",
" X, Y = get_batch(split)\n",
" logits, loss = model(X, Y)\n",
" losses[k] = loss.item()\n",
" out[split] = losses.mean()\n",
" model.train()\n",
" return out"
]
},
{
"cell_type": "code",
"execution_count": 31,
"id": "44fe8e12",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"qKE-NBKj .ZI!-uAB_FL6mV2*SDwbpGOWQICo_F4NV_B13re99)ITQy80\n",
"Y5c!HO:-Qdbmeul].PDXkze9;oVPDz1zZJ9vr,9mP'qKVBSKEiuAXQ*a&bPm6Y_X,_7Bhl_Zt2fuqYL]J[h38lWc7r(QOLsY9*!VTiCeKjc:Swy0bsrG;I-Q[md:eHf1yxakqvX[OA&z4oV_EHz)0vHXemwL]\n",
"i0!:d5]hn7Lp.px\n",
"744*]e*[PFAXR(z!GEiKD;zCp;fp;TJB*!9;e\n",
"7ixo97U-:PTM6msXR1ZTt5JE]_IQ9C3(mAWMg*1VbWZ[&A?0ndBR!.?lX&b1B;?0![uqYP\"q53Q*cnn7L]zd16?BWQ '-\n",
"c_B-p87Jq78)ru\n",
"cGWZ61? .!9!(mEX&W5;365'tKpI Nh0e Z,!8ClwF.Z9O*yG;dfu6R r5Bvu*QY_FOame;choDV*q 'O:zWZNecUT-*HM]1Uwh9*MhwGMdr-z9p;IA-\n"
]
}
],
"source": [
"class BigramLanguageModel(nn.Module):\n",
" def __init__(self, vocab_size):\n",
" super().__init__()\n",
" self.token_embedding_table = nn.Embedding(vocab_size, vocab_size)\n",
" \n",
" def forward(self, index, targets=None):\n",
" logits = self.token_embedding_table(index)\n",
" \n",
" \n",
" if targets is None:\n",
" loss = None\n",
" else:\n",
" B, T, C = logits.shape\n",
" logits = logits.view(B*T, C)\n",
" targets = targets.view(B*T)\n",
" loss = F.cross_entropy(logits, targets)\n",
" \n",
" return logits, loss\n",
" \n",
" def generate(self, index, max_new_tokens):\n",
" # index is (B, T) array of indices in the current context\n",
" for _ in range(max_new_tokens):\n",
" # get the predictions\n",
" logits, loss = self.forward(index)\n",
" # focus only on the last time step\n",
" logits = logits[:, -1, :] # becomes (B, C)\n",
" # apply softmax to get probabilities\n",
" probs = F.softmax(logits, dim=-1) # (B, C)\n",
" # sample from the distribution\n",
" index_next = torch.multinomial(probs, num_samples=1) # (B, 1)\n",
" # append sampled index to the running sequence\n",
" index = torch.cat((index, index_next), dim=1) # (B, T+1)\n",
" return index\n",
"\n",
"model = BigramLanguageModel(vocab_size)\n",
"m = model.to(device)\n",
"\n",
"context = torch.zeros((1,1), dtype=torch.long, device=device)\n",
"generated_chars = decode(m.generate(context, max_new_tokens=500)[0].tolist())\n",
"print(generated_chars)"
]
},
{
"cell_type": "code",
"execution_count": 45,
"id": "e5af5e00-5e67-44a6-8255-853ce4d8a784",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"step: 0, train loss: 4.683, val loss: 4.686\n",
"step: 250, train loss: 4.653, val loss: 4.612\n",
"step: 500, train loss: 4.611, val loss: 4.589\n",
"step: 750, train loss: 4.511, val loss: 4.515\n",
"4.8556976318359375\n"
]
}
],
"source": [
"# create a PyTorch optimizer\n",
"optimizer = torch.optim.AdamW(model.parameters(), lr=learning_rate)\n",
"\n",
"for iter in range(max_iters):\n",
" if iter % eval_iters == 0:\n",
" losses = estimate_loss()\n",
" print(f\"step: {iter}, train loss: {losses['train']:.3f}, val loss: {losses['val']:.3f}\")\n",
"\n",
" # sample a batch of data\n",
" xb, yb = get_batch('train')\n",
"\n",
" # evaluate the loss\n",
" logits, loss = model.forward(xb, yb)\n",
" optimizer.zero_grad(set_to_none=True)\n",
" loss.backward()\n",
" optimizer.step()\n",
"print(loss.item())"
]
},
{
"cell_type": "code",
"execution_count": 46,
"id": "cd7cc14b-5b34-4e16-a554-f3d41288f2bb",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"cnBaJ2rjwn?lmk9AWAh0tcMhG[,A&\"ETNkV*qXkv1eU]yJj-0h&oW_dB1zd\n",
"C\n",
"kzdY6et? .(z?C1.?KI6fZu\n",
" 'g_k\n",
"xK4M1Cv80oi&yUdTZ'0V_sx55p;Tig:,Tt.FamRbwlb[g&rDEDzT6JCF71G4RiSTafutz4peg;NMqk]y!(5pfN oRP..iRKXT!W4Ij:-Q9*HsWE5(g;L]fIwHK?L][OV_;dBfx9r4?_(sxw3zT0PwhwhCE588CS!a '*ls[2bqQp;TRZpwm49og0pact\n",
"7smPA-\n",
"tK'(moH5itc5Rx4[DpP m7]Os!t,7L4Oq3;J0\n",
"cW8*anxd8wIA WGpe;zWi&_(A90tc7O1UN3HL!jI1p;!8eg 'WM[wfvgsis?JF\n",
"ojx4rshGkHuARc4*e:ondTO(;oF8\"JQ_P16s?0P5; Z-Y0VgEioVpckjF7lqYB5Tv1]uuQMIwbG_6R!a&vjPG;9x0w.c)uE1p h].L5z:\n"
]
}
],
"source": [
"context = torch.zeros((1,1), dtype=torch.long, device=device)\n",
"generated_chars = decode(m.generate(context, max_new_tokens=500)[0].tolist())\n",
"print(generated_chars)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "b35d7353-9d1a-4811-91a7-127b5ee7af5f",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "3a201341-b090-40c0-a3cf-54c66f2ad3ee",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "8f17a4ec-549c-42d3-9049-72ea717f4c7c",
"metadata": {},
"outputs": [],
"source": [
"\n"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.18"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Markdown is supported
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!