This walkalong consists of hard questions and their solutions from sql-practice.com, written in both SQL and django
ORM, They query results are also compared for equality.
Insert, Update and Delete queries are not included to keep database consistent.
The notebook (Django Shell-plus) is located in here
First cd into the notebook directory cd server/notebook
Then open the notebook using python ../manage.py shell_plus --notebook
import os
os.environ["DJANGO_ALLOW_ASYNC_UNSAFE"] = "true"
from utils import *
from django.db.models import (
Q,
F,
Case,
When,
Count,
Func,
Min,
Max,
Sum,
Avg,
Value,
OuterRef,
Subquery,
Window,
CharField,
FloatField,
)
from django.db.models.lookups import Exact, GreaterThanOrEqual
from django.db.models.functions import Concat, Cast, Round, Length, Lag
from django.db import connection
from customer_db.models import Provinces, Patients, Doctors, Admissions
def orm_to_df(record: any) -> pd.DataFrame:
try:
iter(record)
except:
record = [record]
if isinstance(record, dict):
record = [record]
df = pd.DataFrame.from_records(record)
print(df.to_markdown())
patient_fields = [
"first_name",
"last_name",
"gender",
"birth_date",
"city",
"province_id",
"allergies",
"height",
"weight",
]
Show all of the patients grouped into weight groups. Show the total amount of patients in each weight group. Order the list by the weight group decending.
For example, if they weight 100 to 109 they are placed in the 100 weight group, 110-119 = 110 weight group, etc.
qstr = """
SELECT
ROUND(weight / 10, 0) * 10 AS weight_class,
COUNT(patient_id) AS patient_count
FROM patients
GROUP BY weight_class
ORDER BY weight_class DESC
"""
sqlq = sql_raw(qstr)
ormq = (
Patients.objects.values(weight_class=Round(F("weight") / 10, 0) * 10)
.annotate(patient_count=Count("id"))
.order_by("-weight_class")
.values("weight_class", "patient_count")
)
equal(sqlq, ormq)
print_sql(ormq)
Equal ✔️
SELECT (ROUND(("patients"."weight" / 10), 0) * 10) AS "weight_class",
COUNT("patients"."patient_id") AS "patient_count"
FROM "patients"
GROUP BY 1
ORDER BY 1 DESC
orm_to_df(ormq[:3])
| | weight_class | patient_count |
|---:|---------------:|----------------:|
| 0 | 90 | 1 |
| 1 | 80 | 20 |
| 2 | 70 | 29 |
Show patient_id, weight, height, isObese from the patients table.
Display isObese as a boolean 0 or 1. Obese is defined as weight(kg)/(height(m)2) >= 30. weight is in units kg. height is in units cm.
We will do 24
qstr = """
SELECT
patient_id AS id,
weight,
height,
CASE
WHEN weight * 10000/ (height * height) >= 24 THEN 1
ELSE 0
END AS isObese
FROM patients
"""
sqlq = sql_raw(qstr)
ormq = Patients.objects.annotate(
tmp_weight=F("weight") * 10000 / (F("height") * F("height"))
).values(
"id", "weight", "height", isObese=Case(When(tmp_weight__gte=24, then=1), default=0)
)
equal(sqlq, ormq)
print_sql(ormq)
Equal ✔️
SELECT "patients"."patient_id",
"patients"."weight",
"patients"."height",
CASE
WHEN (("patients"."weight" * 10000) / ("patients"."height" * "patients"."height")) >= 24 THEN 1
ELSE 0
END AS "isObese"
FROM "patients"
orm_to_df(ormq[:3])
| | id | weight | height | isObese |
|---:|-----:|---------:|---------:|----------:|
| 0 | 1 | 65 | 170 | 0 |
| 1 | 2 | 80 | 185 | 0 |
| 2 | 3 | 58 | 155 | 1 |
Show patient_id, first_name, last_name, and attending doctor's specialty. Show only the patients who has a diagnosis as 'Epilepsy' and the doctor's first name is 'Lisa'
Check patients, admissions, and doctors tables for required information. We will do 'Anemia'
qstr = """
SELECT
p.patient_id,
p.first_name,
p.last_name,
speciality
FROM admissions AS a
JOIN patients p ON p.patient_id = a.patient_id
JOIN doctors d ON a.attending_doctor_id = d.doctor_id
WHERE
diagnosis = 'Anemia'
and d.first_name = 'Lisa'
"""
sqlq = sql_raw(qstr)
ormq = Admissions.objects.filter(
diagnosis="Anemia", attending_doctor__first_name="Lisa"
).values(
"patient_id",
first_name=F("patient__first_name"),
last_name=F("patient__last_name"),
speciality=F("attending_doctor__speciality"),
)
equal(sqlq, ormq)
print_sql(ormq)
Equal ✔️
SELECT "admissions"."patient_id",
"patients"."first_name" AS "first_name",
"patients"."last_name" AS "last_name",
"doctors"."speciality" AS "speciality"
FROM "admissions"
INNER JOIN "doctors" ON ("admissions"."attending_doctor_id" = "doctors"."doctor_id")
INNER JOIN "patients" ON ("admissions"."patient_id" = "patients"."patient_id")
WHERE ("doctors"."first_name" = Lisa
AND "admissions"."diagnosis" = Anemia)
orm_to_df(ormq[:3])
| | patient_id | first_name | last_name | speciality |
|---:|-------------:|:-------------|:------------|:--------------|
| 0 | 51 | Isabelle | Lee | Endocrinology |
All patients who have gone through admissions, can see their medical documents on our site. Those patients are given a temporary password after their first admission. Show the patient_id and temp_password.
The password must be the following, in order: 1. patient_id 2. the numerical length of patient's last_name 3. year of patient's birth_date
# use YEAR(birth_date) in other databases
qstr = """
SELECT
p.patient_id,
p.patient_id || LENGTH(last_name) || strftime('%Y', birth_date) AS temp_password
FROM patients p
WHERE patient_id IN (
SELECT DISTINCT(patient_id)
FROM admissions
)
"""
sqlq = sql_raw(qstr)
ormq = Patients.objects.filter(
id__in=Subquery(Admissions.objects.values("patient_id").distinct())
).values(
patient_id=F("id"),
temp_password=Cast(
Concat("id", Length("last_name"), "birth_date__year"), CharField()
),
)
equal(sqlq, ormq)
print_sql(ormq)
Equal ✔️
SELECT "patients"."patient_id" AS "patient_id",
CAST(COALESCE("patients"."patient_id",) || COALESCE(COALESCE(LENGTH("patients"."last_name"),) || COALESCE(django_date_extract(year, "patients"."birth_date"),),) AS text) AS "temp_password"
FROM "patients"
WHERE "patients"."patient_id" IN
(SELECT DISTINCT U0."patient_id"
FROM "admissions" U0)
ormq1 = (
Patients.objects.filter(admissions__admission_date__isnull=False)
.values(
patient_id=F("admissions__patient_id"),
temp_password=Cast(
Concat("id", Length("last_name"), "birth_date__year"), CharField()
),
)
.distinct()
)
equal(sqlq, ormq1, True)
print_sql(ormq1)
Equal ✔️
SELECT DISTINCT "admissions"."patient_id" AS "patient_id",
CAST(COALESCE("patients"."patient_id",) || COALESCE(COALESCE(LENGTH("patients"."last_name"),) || COALESCE(django_date_extract(year, "patients"."birth_date"),),) AS text) AS "temp_password"
FROM "patients"
INNER JOIN "admissions" ON ("patients"."patient_id" = "admissions"."patient_id")
WHERE "admissions"."admission_date" IS NOT NULL
orm_to_df(ormq[:3])
| | patient_id | temp_password |
|---:|-------------:|----------------:|
| 0 | 1 | 151978 |
| 1 | 2 | 271990 |
| 2 | 3 | 381985 |
Each admission costs \$50 for patients without insurance, and \$10 for patients with insurance. All patients with an even patient_id have insurance.
Give each patient a 'Yes' if they have insurance, and a 'No' if they don't have insurance. Add up the admission_total cost for each has_insurance group.
qstr = """
SELECT has_insurance, SUM(insurance_cost) AS total_insurance_cost
FROM(
SELECT
CASE
WHEN patient_id % 2 = 0 THEN "Yes"
ELSE "No"
END AS has_insurance,
CASE
WHEN patient_id % 2 = 0 THEN 10
ELSE 50
END AS insurance_cost
FROM admissions
)
GROUP BY has_insurance;
"""
sqlq = sql_raw(qstr)
ormq = (
Admissions.objects.values(
has_insurance=Case(
When(Exact(F("id") % 2, 0), then=Value("Yes")), default=Value("No")
),
insurance_cost=Case(When(Exact(F("patient_id") % 2, 0), then=10), default=50),
)
.annotate(total_insurance_cost=Sum("insurance_cost"))
.values("has_insurance", "total_insurance_cost")
)
equal(sqlq, ormq)
print_sql(ormq)
Equal ✔️
SELECT CASE
WHEN ("admissions"."id" % 2) = (0) THEN Yes
ELSE No
END AS "has_insurance",
SUM(CASE
WHEN ("admissions"."patient_id" % 2) = (0) THEN 10
ELSE 50
END) AS "total_insurance_cost"
FROM "admissions"
GROUP BY 1,
CASE
WHEN ("admissions"."patient_id" % 2) = (0) THEN 10
ELSE 50
END
ormq1 = (
Admissions.objects.values(has_insurance=Value("Yes"))
.filter(Exact(F("id")%2, 0))
.annotate(total_insurance_cost=Count("has_insurance") * 10)
.values("has_insurance", "total_insurance_cost")
.union(
Admissions.objects.values(has_insurance=Value("No"))
.filter(Exact(F("id")%2, 1))
.annotate(total_insurance_cost=Count("has_insurance") * 50)
.values("has_insurance", "total_insurance_cost")
)
)
equal(sqlq, ormq1, True)
print_sql(ormq1)
Equal ✔️
SELECT Yes AS "has_insurance",
(COUNT(Yes) * 10) AS "total_insurance_cost"
FROM "admissions"
WHERE ("admissions"."id" % 2) = (0)
UNION
SELECT No AS "has_insurance",
(COUNT(No) * 50) AS "total_insurance_cost"
FROM "admissions"
WHERE ("admissions"."id" % 2) = (1)
orm_to_df(ormq[:3])
| | has_insurance | total_insurance_cost |
|---:|:----------------|-----------------------:|
| 0 | No | 4250 |
| 1 | Yes | 850 |
Show the provinces that has more patients identified as 'M' than 'F'. Must only show full province_name
qstr = """
SELECT pr.province_name
FROM patients AS pa
JOIN province_names AS pr ON pa.province_id = pr.province_id
GROUP BY pr.province_name
HAVING SUM(gender = 'M') > SUM(gender = 'F')
"""
sqlq = sql_raw(qstr)
ormq = (
Patients.objects.values("province__name")
.annotate(female=Sum(Q(gender="F")), male=Sum(Q(gender="M")))
.filter(male__gt=F("female"), province__name__isnull=False)
.values(province_name=F("province__name"))
)
equal(sqlq, ormq)
print_sql(ormq)
Equal ✔️
SELECT "province_names"."province_name" AS "province_name"
FROM "patients"
INNER JOIN "province_names" ON ("patients"."province_id" = "province_names"."province_id")
WHERE "province_names"."province_name" IS NOT NULL
GROUP BY 1
HAVING SUM(("patients"."gender" = M)) > (SUM("patients"."gender" = F))
orm_to_df(ormq[:3])
| | province_name |
|---:|:----------------|
| 0 | Manitoba |
| 1 | Ontario |
| 2 | Quebec |
We are looking for a specific patient. Pull all columns for the patient who matches the following criteria: - First_name contains an 'r' after the first two letters. - Identifies their gender as 'F' (We will do 'M') - Born in February, May, or December - Their weight would be between 60kg and 80kg - Their patient_id is an odd number (We will do even) - They are from the city 'Kingston' (We will do 'Vancouver')
qstr = """
SELECT *
FROM patients
WHERE
first_name LIKE "__r%"
AND gender = 'M'
AND (
CAST(strftime('%m', birth_date) AS INT) IN (2, 5, 12)
)
AND weight between 60 AND 80
AND patient_id % 2 = 0
AND city = 'Vancouver'
"""
sqlq = sql_raw(qstr)
ormq = (
Patients.objects.filter(
Exact(F("id") % 2, 0),
first_name__regex=r"(?i)^\w{2,}r\w*$",
gender="M",
birth_date__month__in=(2, 5, 12),
weight__range=(60, 80),
city="Vancouver",
)
).values(patient_id=F("id"), *patient_fields)
equal(sqlq, ormq)
print_sql(ormq)
Equal ✔️
SELECT "patients"."first_name",
"patients"."last_name",
"patients"."gender",
"patients"."birth_date",
"patients"."city",
"patients"."province_id",
"patients"."allergies",
"patients"."height",
"patients"."weight",
"patients"."patient_id" AS "patient_id"
FROM "patients"
WHERE (("patients"."patient_id" % 2) = (0)
AND django_date_extract(month, "patients"."birth_date") IN (2,
5,
12)
AND "patients"."city" = Vancouver
AND "patients"."first_name" REGEXP (?i)^\w{2,}r\w*$
AND "patients"."gender" = M
AND "patients"."weight" BETWEEN 60 AND 80)
orm_to_df(ormq)
| | first_name | last_name | gender | birth_date | city | province_id | allergies | height | weight | patient_id |
|---:|:-------------|:------------|:---------|:-------------|:----------|:--------------|:------------|---------:|---------:|-------------:|
| 0 | Jared | Taylor | M | 1986-02-20 | Vancouver | BC | | 178 | 75 | 34 |
Show the percent of patients that have 'M' as their gender. Round the answer to the nearest hundreth number and in percent form.
qstr = """
SELECT
ROUND(
CAST(AVG(gender = 'M') as FLOAT) * 100,
2
) || '%' AS pct
FROM patients
"""
sqlq = sql_raw(qstr)
class NonAggrAvg(Avg):
contains_aggregate = False
ormq = Patients.objects.annotate(
pct=Concat(
Cast(
Round(NonAggrAvg(Q(gender="M"), output_field=FloatField()) * 100, 2),
CharField(),
),
Value("%"),
)
).values("pct")
equal(sqlq, ormq)
print_sql(ormq)
Equal ✔️
SELECT COALESCE(CAST(ROUND((AVG("patients"."gender" = M) * 100), 2) AS text),) || COALESCE(%,) AS "pct"
FROM "patients"
orm_to_df(ormq)
| | pct |
|---:|:------|
| 0 | 49.5% |
For each day display the total amount of admissions on that day. Display the amount changed from the previous date.
qstr = """
SELECT
admission_date,
daily_admissions,
daily_admissions - prev_admissions AS admission_diff
FROM (
SELECT
admission_date,
COUNT(*) daily_admissions,
LAG(count(*), 1) OVER (
ORDER BY
admission_date
) AS prev_admissions
FROM admissions
GROUP BY admission_date
)
"""
sqlq = sql_raw(qstr)
ormq = (
Admissions.objects.values("admission_date")
.annotate(daily_admissions=Count("patient_id"))
.values(
"admission_date",
"daily_admissions",
admission_diff=F("daily_admissions") - Window(Lag("daily_admissions")),
)
)
equal(sqlq, ormq)
print_sql(ormq)
Equal ✔️
SELECT "admissions"."admission_date",
COUNT("admissions"."patient_id") AS "daily_admissions",
(COUNT("admissions"."patient_id") - LAG(COUNT("admissions"."patient_id"), 1) OVER ()) AS "admission_diff"
FROM "admissions"
GROUP BY "admissions"."admission_date"
orm_to_df(ormq[:3])
| | admission_date | daily_admissions | admission_diff |
|---:|:-----------------|-------------------:|-----------------:|
| 0 | 2022-01-01 | 1 | nan |
| 1 | 2022-01-02 | 1 | 0 |
| 2 | 2022-01-03 | 1 | 0 |
Show the total number of admissions
qstr = """
select province_name
from province_names
order by
province_name = 'Ontario' desc,
province_name
"""
sqlq = sql_raw(qstr)
ormq = Provinces.objects.order_by(Exact(F("name"), "Ontario").desc(), "name").values(
province_name=F("name")
)
equal(sqlq, ormq)
print_sql(ormq)
Equal ✔️
SELECT "province_names"."province_name" AS "province_name"
FROM "province_names"
ORDER BY "province_names"."province_name" = (Ontario) DESC, "province_names"."province_name" ASC
orm_to_df(ormq[:3])
| | province_name |
|---:|:-----------------|
| 0 | Ontario |
| 1 | Alberta |
| 2 | British Columbia |
We need a breakdown for the total amount of admissions each doctor has started each year. Show the doctor_id, doctor_full_name, specialty, year, total_admissions for that year.
qstr = """
SELECT
doctor_id,
first_name || ' ' || last_name AS full_name,
speciality,
CAST(strftime('%Y', admission_date) AS INT) AS current_year,
Count(strftime('%Y', admission_date)) AS patients_attended
FROM admissions a
JOIN doctors d ON a.attending_doctor_id = d.doctor_id
GROUP BY current_year, doctor_id
"""
sqlq = sql_raw(qstr)
ormq = (
Admissions.objects.values("admission_date__year", "attending_doctor_id")
.annotate(patients_attended=Count("admission_date__year"))
.values(
"patients_attended",
doctor_id=F("attending_doctor_id"),
full_name=Concat(
"attending_doctor__first_name", Value(" "), "attending_doctor__last_name"
),
speciality=F("attending_doctor__speciality"),
current_year=F("admission_date__year"),
)
)
equal(sqlq, ormq)
print_sql(ormq)
Equal ✔️
SELECT COUNT(django_date_extract(year, "admissions"."admission_date")) AS "patients_attended",
"admissions"."attending_doctor_id" AS "doctor_id",
COALESCE("doctors"."first_name",) || COALESCE(COALESCE(,) || COALESCE("doctors"."last_name",),) AS "full_name",
"doctors"."speciality" AS "speciality",
django_date_extract(year, "admissions"."admission_date") AS "current_year"
FROM "admissions"
INNER JOIN "doctors" ON ("admissions"."attending_doctor_id" = "doctors"."doctor_id")
GROUP BY 2,
5,
3,
4
orm_to_df(ormq[:3])
| | patients_attended | doctor_id | full_name | speciality | current_year |
|---:|--------------------:|------------:|:--------------|:-------------|---------------:|
| 0 | 8 | 1 | Maggie Chen | Cardiology | 2022 |
| 1 | 8 | 2 | Karen Wong | Neurology | 2022 |
| 2 | 7 | 3 | Samuel Nguyen | Dermatology | 2022 |