import csv
import datetime
import urllib.request
import json
import matplotlib.pyplot as plt
import numpy as np
AWARD_DAY = 10
AWARD_MONTH = 12
CATEGORIES = {
"Physics": {
"code": "phy",
"name": "Physics",
"csv": "physics_nobel_laureates_ages_1901_2025.csv",
"color": "blue",
"extra_2025": [
("John Clarke", "1942-02-10"),
("Michel H. Devoret", "1953-00-00"),
("John M. Martinis", "1958-00-00"),
],
},
"Chemistry": {
"code": "che",
"name": "Chemistry",
"csv": "chemistry_nobel_laureates_ages_1901_2025.csv",
"color": "green",
"extra_2025": [
("Susumu Kitagawa", "1951-07-04"),
("Richard Robson", "1937-06-04"),
("Omar M. Yaghi", "1965-02-09"),
],
},
"Medicine": {
"code": "med",
"name": "Physiology or Medicine",
"csv": "medicine_nobel_laureates_ages_1901_2025.csv",
"color": "red",
"extra_2025": [
("Mary E. Brunkow", "1961-00-00"),
("Fred Ramsdell", "1960-12-04"),
("Shimon Sakaguchi", "1951-01-19"),
],
},
}
def parse_date(s):
"""Parse a date string like YYYY-MM-DD, YYYY-MM, or YYYY"""
parts = [int(p) if p != "00" else 1 for p in s.split('-')]
while len(parts) < 3:
parts.append(1)
return datetime.date(*parts[:3])
def age_on(dob, year):
"""Calculate age on Nobel award date in the given year"""
award_date = datetime.date(year, AWARD_MONTH, AWARD_DAY)
return award_date.year - dob.year - ((award_date.month, award_date.day) < (dob.month, dob.day))
all_data = {}
for category_name, info in CATEGORIES.items():
print(f"\n🔍 Processing {category_name} laureates...")
code = info["code"]
api_url = f"https://[Log in to view URL]"
with urllib.request.urlopen(api_url) as url:
data = json.loads(url.read().decode())
rows = []
for person in data.get("laureates", []):
if person.get("knownName") is None:
continue
name = person["knownName"]["en"]
birth = person.get("birth", {})
birthdate = birth.get("date")
if not birthdate:
continue
dob = parse_date(birthdate)
for prize in person.get("nobelPrizes", []):
if prize.get("category", {}).get("en") != info["name"]:
continue
year = int(prize["awardYear"])
age = age_on(dob, year)
rows.append({
"year": year,
"laureate": name,
"birthdate": birthdate,
"age_on_award_date": age
})
for name, birthdate in info["extra_2025"]:
dob = parse_date(birthdate)
rows.append({
"year": 2025,
"laureate": name,
"birthdate": birthdate,
"age_on_award_date": age_on(dob, 2025),
})
seen = set()
deduped = []
for r in rows:
key = (r["year"], r["laureate"])
if key not in seen:
seen.add(key)
deduped.append(r)
deduped.sort(key=lambda r: (r["year"], r["laureate"]))
with open(info["csv"], "w", newline="", encoding="utf-8") as f:
writer = csv.DictWriter(f, fieldnames=["year", "laureate", "birthdate", "age_on_award_date"])
writer.writeheader()
writer.writerows(deduped)
print(f"Saved {len(deduped)} laureates to {info['csv']}")
all_data[category_name] = {
"years": [r["year"] for r in deduped],
"ages": [r["age_on_award_date"] for r in deduped],
"color": info["color"]
}
plt.figure(figsize=(12, 8))
for category_name, data in all_data.items():
years = np.array(data["years"])
ages = np.array(data["ages"])
plt.scatter(years, ages, label=f"{category_name} Laureates", color=data["color"], alpha=0.6)
if len(years) >= 2:
slope, intercept = np.polyfit(years, ages, 1)
plt.plot(years, slope * years + intercept, color=data["color"], linestyle='-',
label=f"{category_name} trend: y={slope:.2f}x + {intercept:.1f}")
plt.title("Age of Nobel Laureates at Award Time (Physics, Chemistry, and Medicine)")
plt.xlabel("Year")
plt.ylabel("Age at Award")
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()
plt.show()
To embed this project on your website, copy the following code and paste it into your website's HTML: