How AI Models Actually Learn — The Math Behind Every AI (Interactive Guide) { "@context": "https://schema.org", "@type": "Article", "headline": "How AI Models Actually Learn — The Core Math Formula Explained", "description": "An interactive guide explaining the loss function and risk minimization formula that trains every AI model, from ChatGPT to image generators.", "author": { "@type": "Person", "name": "Maria" }, "datePublished": "2026-03-31", "inLanguage": "en", "educationalLevel": "Beginner", "keywords": "AI learning, loss function, machine learning basics, neural networks, AI math", "articleSection": "Technology & AI" } *, *::before, *::after { box-sizing: border-box; margin: 0; padding: 0; } :root { --ink: #0f0e0d; --ink2: #3a3834; --ink3: #7a776f; --paper: #faf8f4; --paper2: #f0ece3; --paper3: #e4dfd3; --accent: #c8401a; --accent2: #1a6bc8; --accent3: #1a8c4e; --gold: #c89a1a; --radius: 12px; } body { font-family: 'DM Sans', sans-serif; background: var(--paper); color: var(--ink); line-height: 1.8; padding: 0; margin: 0; -webkit-font-smoothing: antialiased; } .container { max-width: 820px; margin: 0 auto; padding: 0 24px; } .section { padding: 56px 0; border-bottom: 1px solid var(--paper3); } .section-label { font-family: 'DM Mono', monospace; font-size: 10px; letter-spacing: 3px; text-transform: uppercase; color: var(--ink3); margin-bottom: 10px; } .section h2 { font-family: 'Playfair Display', serif; font-size: clamp(22px, 3vw, 32px); font-weight: 700; margin-bottom: 18px; line-height: 1.25; } .section h3 { font-family: 'Playfair Display', serif; font-size: clamp(17px, 2.5vw, 21px); font-weight: 700; margin-bottom: 10px; } .section p { font-size: 16px; color: var(--ink2); margin-bottom: 16px; line-height: 1.8; } .section p:last-child { margin-bottom: 0; } .analogy-grid { display: grid; grid-template-columns: 1fr 1fr; gap: 16px; margin: 28px 0; } @media(max-width:560px){ .analogy-grid { grid-template-columns: 1fr; } } .analogy-card { background: var(--paper2); border-radius: var(--radius); padding: 24px; border: 1px solid var(--paper3); position: relative; overflow: hidden; } .analogy-card::before { content: ''; position: absolute; top: 0; left: 0; right: 0; height: 3px; } .analogy-card.baby::before { background: var(--gold); } .analogy-card.ai::before { background: var(--accent); } .analogy-icon { font-size: 28px; margin-bottom: 10px; display: block; } .analogy-card h3 { font-family: 'Playfair Display', serif; font-size: 17px; font-weight: 700; margin-bottom: 10px; } .step-row { display: flex; flex-direction: column; gap: 7px; margin-top: 10px; } .step { display: flex; align-items: flex-start; gap: 8px; font-size: 13px; color: var(--ink2); line-height: 1.5; } .step-dot { width: 6px; height: 6px; border-radius: 50%; flex-shrink: 0; margin-top: 7px; } .baby .step-dot { background: var(--gold); } .ai .step-dot { background: var(--accent); } .formula-block { background: var(--ink); color: var(--paper); border-radius: var(--radius); padding: 36px 32px; margin: 28px 0; text-align: center; position: relative; overflow: hidden; } .formula-block::after { content: 'THE FORMULA'; position: absolute; top: 12px; right: 16px; font-family: 'DM Mono', monospace; font-size: 9px; letter-spacing: 2px; color: rgba(250,248,244,0.15); } .formula-hint { font-size: 12px; color: rgba(250,248,244,0.4); font-family: 'DM Mono', monospace; margin-bottom: 20px; letter-spacing: 1px; } .formula-main { font-family: 'Playfair Display', serif; font-size: clamp(22px, 4vw, 38px); font-weight: 400; letter-spacing: 1px; display: flex; align-items: center; justify-content: center; gap: 12px; flex-wrap: wrap; } .f-part { cursor: pointer; padding: 4px 10px; border-radius: 6px; transition: all 0.2s; border: 1px solid transparent; } .f-part:hover { transform: scale(1.08); } .f-part.active { border-color: currentColor; background: rgba(255,255,255,0.08); } .f-Q { color: #e8845a; } .f-eq { color: rgba(250,248,244,0.35); font-size: 0.9em; } .f-1n { color: #7ab8e8; } .f-sum { color: #e8c85a; font-size: 1.4em; line-height: 1; } .f-L { color: #7ae8a8; } .fraction { display: inline-flex; flex-direction: column; align-items: center; gap: 2px; vertical-align: middle; } .frac-top { border-bottom: 1px solid #7ab8e8; padding-bottom: 2px; font-size: 0.75em; } .frac-bot { font-size: 0.75em; } .sum-wrap { display: inline-flex; flex-direction: column; align-items: center; font-size: 0.55em; gap: 0; vertical-align: middle; margin: 0 4px; } .explain-panel { background: var(--paper2); border-radius: var(--radius); padding: 20px 24px; margin: 0 0 28px; border-left: 3px solid var(--accent); min-height: 90px; transition: border-color 0.3s; } .explain-panel h4 { font-family: 'Playfair Display', serif; font-size: 16px; margin-bottom: 6px; } .explain-panel p { font-size: 14px; color: var(--ink2); margin: 0; line-height: 1.7; } .explain-tag { display: inline-block; font-family: 'DM Mono', monospace; font-size: 10px; letter-spacing: 1px; padding: 2px 8px; border-radius: 99px; margin-bottom: 8px; } .tag-red { background: #fde8e2; color: var(--accent); } .tag-blue { background: #e2edfd; color: var(--accent2); } .tag-green { background: #e2f5ec; color: var(--accent3); } .summary-table { width: 100%; border-collapse: collapse; margin: 24px 0; font-size: 14px; } .summary-table th { background: var(--ink); color: var(--paper); padding: 12px 16px; text-align: left; font-family: 'DM Mono', monospace; font-size: 11px; letter-spacing: 1px; font-weight: 400; } .summary-table th:first-child { border-radius: 8px 0 0 0; } .summary-table th:last-child { border-radius: 0 8px 0 0; } .summary-table td { padding: 11px 16px; border-bottom: 1px solid var(--paper3); color: var(--ink2); vertical-align: top; line-height: 1.6; } .summary-table tr:last-child td { border-bottom: none; } .summary-table tr:nth-child(even) td { background: var(--paper2); } .loss-demo { background: var(--paper2); border-radius: var(--radius); padding: 28px; border: 1px solid var(--paper3); margin: 28px 0; } .demo-sub { font-size: 13px; color: var(--ink3); font-family: 'DM Mono', monospace; margin-bottom: 20px; margin-top: 4px; } .clips-list { display: flex; flex-direction: column; gap: 10px; margin-bottom: 20px; } .clip-row { display: flex; align-items: center; gap: 12px; } .clip-name { font-size: 12px; color: var(--ink3); min-width: 120px; font-family: 'DM Mono', monospace; } .clip-bar-wrap { flex: 1; background: var(--paper3); border-radius: 99px; height: 22px; overflow: hidden; } .clip-bar { height: 100%; border-radius: 99px; display: flex; align-items: center; padding-left: 8px; font-size: 11px; color: white; font-weight: 500; transition: width 0.5s cubic-bezier(0.34, 1.56, 0.64, 1); min-width: 28px; font-family: 'DM Mono', monospace; } .q-display { display: flex; align-items: center; justify-content: space-between; border-top: 1px solid var(--paper3); padding-top: 16px; margin-bottom: 16px; } .q-label { font-family: 'DM Mono', monospace; font-size: 13px; color: var(--ink3); } .q-number { font-family: 'Playfair Display', serif; font-size: 30px; font-weight: 700; transition: color 0.4s; } .verdict { font-size: 12px; font-family: 'DM Mono', monospace; margin-top: 4px; transition: color 0.4s; } .epoch-control label { font-size: 13px; color: var(--ink3); display: block; margin-bottom: 6px; } input[type=range] { width: 100%; height: 4px; -webkit-appearance: none; background: var(--paper3); border-radius: 99px; outline: none; cursor: pointer; } input[type=range]::-webkit-slider-thumb { -webkit-appearance: none; width: 20px; height: 20px; border-radius: 50%; background: var(--ink); cursor: pointer; border: 2px solid var(--paper); box-shadow: 0 1px 4px rgba(0,0,0,0.2); } .pixel-section { background: var(--paper2); border-radius: var(--radius); padding: 28px; border: 1px solid var(--paper3); margin: 28px 0; } .pixel-compare { display: grid; grid-template-columns: 1fr auto 1fr auto 1fr; gap: 8px; align-items: start; margin-bottom: 20px; } @media(max-width:560px){ .pixel-compare { grid-template-columns: 1fr 1fr 1fr; gap: 4px; } .pixel-op { display: none; } } .pixel-op { font-family: 'Playfair Display', serif; font-size: 28px; color: var(--ink3); text-align: center; padding-top: 24px; } .pixel-grid { display: grid; grid-template-columns: repeat(5, 1fr); gap: 3px; } .pixel-cell { aspect-ratio: 1; border-radius: 4px; transition: background 0.3s; } .pixel-lbl { font-size: 11px; font-family: 'DM Mono', monospace; color: var(--ink3); text-align: center; margin-top: 6px; } .noise-control label { font-size: 13px; color: var(--ink3); display: block; margin-bottom: 6px; } .l-score-row { display: flex; align-items: center; justify-content: space-between; margin-top: 16px; border-top: 1px solid var(--paper3); padding-top: 14px; } .l-label { font-family: 'DM Mono', monospace; font-size: 13px; color: var(--ink3); } .l-sublabel { font-size: 11px; color: var(--ink3); font-family: 'DM Mono', monospace; margin-top: 3px; } .l-number { font-family: 'Playfair Display', serif; font-size: 28px; font-weight: 700; transition: color 0.3s; } .pullquote { border-left: 3px solid var(--accent); padding: 16px 24px; margin: 32px 0; background: var(--paper2); border-radius: 0 var(--radius) var(--radius) 0; } .pullquote p { font-family: 'Playfair Display', serif; font-size: 19px; font-style: italic; color: var(--ink); margin: 0; line-height: 1.5; } .highlight-box { background: var(--paper2); border-radius: var(--radius); padding: 24px 28px; border: 1px solid var(--paper3); margin: 24px 0; } .highlight-box h3 { margin-bottom: 14px; font-size: 17px; } code { font-family: 'DM Mono', monospace; background: var(--paper3); padding: 2px 7px; border-radius: 4px; font-size: 0.9em; } strong { color: var(--ink); } .faq-item { border-bottom: 1px solid var(--paper3); padding: 18px 0; } .faq-item:last-child { border-bottom: none; } .faq-q { font-family: 'Playfair Display', serif; font-size: 16px; font-weight: 700; margin-bottom: 8px; color: var(--ink); } .faq-a { font-size: 15px; color: var(--ink2); line-height: 1.7; }

In this post I'm not going to try to showcase the usage of ChatGPT or other chat interfaces with AI models like Claude or OpenAI.

What I would like to share with you is the core understanding of how AI models actually work. The reality is brighter than what most people try to say about it — that AI is taking over everything.

Think about it like the Internet. When it arrived, people panicked. "It's taking over everything." And in the end? We adapted to that new reality. And in my opinion it brought a lot of light — better connection between people, helping to solve the distance, the gap of missing something when you are not right there on the spot.

I would say the same about AI.

But what we need to learn and understand is the core of every model — MATH.

"If you want to control the world, learn deep the digits." — I'm not sure if this quote actually exists or I just made it up, but something like that I'm sure I heard somewhere in the movies 😄

Ok, now the Math part. Let's learn in the most easy way the first algorithm — the core of the core 🚀

A Baby and an AI Are Doing the Exact Same Thing

I know — the moment someone mentions "math" most people mentally check out. I get it. But give me two minutes, because what I'm about to show you completely changed how I think about AI. And honestly? It's way simpler than anyone makes it sound.

Here's what nobody tells you: the way an AI model learns is identical to the way a baby learns to walk. Same process. Same feedback loop. Different hardware.

Think about a toddler touching a hot stove. Something happens — there's a signal saying "that was wrong" — and the brain updates. The child doesn't consciously think "I must recalibrate my hand trajectory." It just adjusts. And next time, the hand stays far away.

AI training works the exact same way. The model makes a prediction, gets a signal measuring how wrong it was, and adjusts its internal settings slightly. Do that a million times and you have a trained model. That's the whole secret.

👶

How a Baby Learns

Touches something hot
Feels pain — mistake signal fires
Brain connections update automatically
Behaviour improves next time
🤖

How an AI Model Learns

Sees a training example
Makes a prediction — loss is calculated
Weights inside the model update automatically
Prediction improves next time

The only real difference? The baby's neurons are made of biology. The AI's neurons are made of math. The learning loop is the same. And once you feel that in your gut, the formula below becomes obvious rather than intimidating.

"Teaching a child to count and training an AI model are the same thing — repeated feedback until the mistake shrinks to nothing."

The One Equation That Trains Every AI Model on the Planet

Every AI you've ever used — ChatGPT, Midjourney, Google Translate, the Face ID on your phone — was trained by minimising this single formula. Click each coloured part to see what it actually means in plain English.

↓ tap any coloured part to understand it ↓
Q(a, X) = 1 n i = 1 to n L(a, xi)

← Tap any coloured part of the formula above to understand it

In plain English the whole formula is just asking: "On average, how wrong is my model across all of my training examples?" That's it. The lower that number, the better your model. Training is simply the process of making it smaller and smaller over time.

Symbol What it actually means Think of it like...
Q(a, X) The total score — how wrong the model is on average Your average exam grade across the whole semester
a The model itself — its weights, its internal settings The student sitting the exam
X Your entire dataset — every training example you have Every question on every exam this semester
∑ / n Add all individual errors up, then divide by how many there are Sum all scores then divide to get the average
L(a, xᵢ) How wrong the model was on one single training example Your score on one specific exam question

Drag This Slider and Watch an AI Model Actually Train

This is the part I wish someone had shown me earlier. Below are 5 video clips that a model is learning from. Each bar shows how wrong the model currently is on that clip. Drag the slider to simulate training — watch what happens to Q as the epochs go by.

Training on 5 Video Clips

Your dataset: X = { clip₁, clip₂, clip₃, clip₄, clip₅ }
Q(a, X) — current average loss

See how Q shrinks with each epoch? That's the whole game. The model is failing less and less on each clip. When Q reaches near zero — the model has learned. This exact process runs inside every AI training job in the world, whether it's training for 10 minutes on your laptop or 6 months on 10,000 GPUs in a data centre.

The scale changes. The math doesn't.

But How Does the AI Know It Got Something Wrong?

This is the question that unlocks everything. The model has no eyes, no feelings, no intuition. So how does it know it made a mistake?

The answer is beautiful in its simplicity: every image is just a grid of numbers. A single pixel is just a number between 0 and 255. A full image is a matrix — rows and columns of those numbers. A video is a sequence of those matrices, one per frame.

So being wrong is just... the difference between two numbers. The real image had pixel value 200. The model generated 120. The error is 80. No judgment required. Pure arithmetic. Drag the slider below and see it happen live.

Images Are Just Matrices of Numbers — Here's the Proof

Drag the slider to change model accuracy — watch the error column react
Real xᵢ — target
Model output
=
Error (L)
L(a, xᵢ) on this frame
formula: mean( (real − generated)² )

The formula uses (real − generated)² — squaring the difference. This does two things: it makes every error positive so mistakes don't cancel each other out, and it punishes large mistakes much more severely than small ones. A model that's slightly off gets a gentle nudge. A model that's wildly wrong gets hit hard.

That's exactly how you'd teach a child too, isn't it? Gentle correction for small mistakes, stronger feedback for big ones.

"An image is a matrix. A video is a sequence of matrices. A word is a vector. At the bottom of every AI model — it's all just numbers doing arithmetic."

Questions People Actually Ask About This

Does every AI model use this same formula?
Yes — at its core, every model from a spam filter to GPT-4 optimises some version of Q(a, X). The specific definition of L changes depending on the task — image generation, text prediction, voice recognition — but the structure is universal: average your mistakes, then make them smaller.
Do I need to be good at math to understand AI?
You need intuition, not calculation. You don't need to solve the formula by hand — computers do that billions of times per second. What you need is to understand what each part represents so you can make smart decisions about your model. Think of it like driving: you don't need to know how to build an engine, but you should know what the warning lights mean.
What is the dataset X in practice?
X is whatever you're training on. For image generation it's thousands of images. For a language model it's billions of text documents. For a video model it's video clips with captions. The bigger and more diverse your X, the better your model will perform on real-world inputs it has never seen before.
What comes after understanding the loss function?
The next step is gradient descent — the mechanism that actually adjusts the weights to make Q smaller. Imagine Q as a hilly landscape and the model as a ball trying to roll to the lowest point. We'll cover that in the next post, and once you see it visually, everything clicks into place.

What You Now Understand That Most People Don't

You just looked at the formula that trains every AI model on the planet — and understood what each part does. Most people who use AI every day have no idea this exists. You do now.

And remember: the math doesn't care whether it's training a chatbot, a video generator, a medical imaging tool, or a self-driving car. The loop is always the same.

The Core Training Loop — Burn This Into Your Memory

Step 1 — Take one example xᵢ from your dataset X and run it through the model
Step 2 — Calculate L — how wrong was the prediction on that one example?
Step 3 — Average all the L values across the full dataset to get Q
Step 4 — Adjust the model's weights slightly to push Q lower
Step 5 — Repeat this thousands or millions of times. The model learns.

Next up: gradient descent — the mechanism behind step 4. It's the ball rolling downhill. And once you see it, you'll never look at AI the same way again.

document.querySelectorAll('.f-part').forEach(el => { el.addEventListener('keydown', e => { if (e.key === 'Enter' || e.key === ' ') { e.preventDefault(); el.click(); } }); }); const explanations = { Q: { color:'#c8401a', tag:'The Goal', tagClass:'tag-red', title:'Q(a, X) — the total risk score', text:'This is the number that training is trying to shrink. It tells you how bad your model is right now, averaged across your whole dataset. A high Q means the model is making lots of mistakes. A low Q means it\'s learned well. Everything in AI training is aimed at making this number smaller and smaller.' }, '1n': { color:'#1a6bc8', tag:'The Fairness Fix', tagClass:'tag-blue', title:'1/n — divide by the number of examples', text:'If you have 10,000 training clips, you divide the total error by 10,000 to get the average per clip. Without this, a larger dataset would always produce a bigger Q — which would be misleading. This makes Q a fair comparison no matter how large or small your dataset is.' }, sum: { color:'#c89a1a', tag:'The For-Loop', tagClass:'tag-red', title:'∑ — add everything up (a for-loop written in math)', text:'Sigma (∑) is just a for-loop written in math notation. It says: go through every single example in your dataset, from example 1 all the way to example n, and add up all the individual loss values. If you can write a for-loop in code, you already understand what sigma means.' }, L: { color:'#1a8c4e', tag:'The Mistake Meter', tagClass:'tag-green', title:'L(a, xᵢ) — the error on one single example', text:'This measures how wrong the model was on just one training example. For image and video models, it\'s literally the difference between the real pixel values and what the model generated. No human judgment involved — just arithmetic. Small L means the model was close. Large L means it missed badly.' } }; function explainPart(key) { document.querySelectorAll('.f-part').forEach(p => p.classList.remove('active')); document.getElementById('fp-' + key).classList.add('active'); const d = explanations[key]; const panel = document.getElementById('explain-panel'); panel.style.borderLeftColor = d.color; panel.innerHTML = `${d.tag}

${d.title}

${d.text}

`; } const clipNames = ['x₁ — sunset timelapse','x₂ — ocean waves','x₃ — character walk','x₄ — city at night','x₅ — forest scene']; const baseErrors = [0.84, 0.67, 0.78, 0.59, 0.71]; const barColors = ['#c8401a','#c87a1a','#c8a01a','#1a6bc8','#1a8c4e']; function trainEpoch(e) { const epoch = parseInt(e); document.getElementById('epoch-val').textContent = epoch; const factor = Math.pow(0.72, epoch - 1); const errors = baseErrors.map(b => Math.round(b * factor * 100) / 100); const q = Math.round(errors.reduce((a, b) => a + b, 0) / errors.length * 100) / 100; document.getElementById('clips-list').innerHTML = clipNames.map((name, i) => { const pct = Math.round(errors[i] * 100); return `
${name}
${errors[i]}
`; }).join(''); const qEl = document.getElementById('q-number'); qEl.textContent = q; qEl.style.color = q < 0.08 ? '#1a8c4e' : q < 0.25 ? '#c8401a' : '#0f0e0d'; const v = document.getElementById('verdict'); if (q < 0.08) { v.textContent = '✓ The model has learned — loss is near zero'; v.style.color = '#1a8c4e'; } else if (q { const row = Math.floor(i/5), col = i%5; if (row { const n = Math.round((Math.random()*2-1)*150*noise); return Math.max(0, Math.min(255, p+n)); }); const errPx = realPixels.map((p, i) => { const diff = Math.abs(p-genPx[i]); totalErr += (p-genPx[i])**2; return diff; }); const makeGrid = (id, values, colorFn) => { document.getElementById(id).innerHTML = values.map(v => `
`).join(''); }; makeGrid('pg-real', realPixels, pixelColor); makeGrid('pg-gen', genPx, pixelColor); makeGrid('pg-diff', errPx, v => { const t = Math.min(1, v/150); return `rgb(${Math.round(200*t+60*(1-t))},${Math.round(30*t+150*(1-t))},${Math.round(30*t+60*(1-t))})`; }); const mse = Math.round(totalErr/realPixels.length/(255*255)*1000)/1000; const lEl = document.getElementById('l-number'); lEl.textContent = mse; lEl.style.color = mse < 0.05 ? '#1a8c4e' : mse < 0.2 ? '#c89a1a' : '#c8401a'; } updatePixels(60);