Age Trend Comparison of Nobel Laureates in Physics, Chemistry, and Medicine (1901-2025) (Python)

Fork
import csv
import datetime
import urllib.request
import json
import matplotlib.pyplot as plt
import numpy as np

AWARD_DAY = 10
AWARD_MONTH = 12

CATEGORIES = {
    "Physics": {
        "code": "phy",
        "name": "Physics",
        "csv": "physics_nobel_laureates_ages_1901_2025.csv",
        "color": "blue",
        "extra_2025": [
            ("John Clarke", "1942-02-10"),
            ("Michel H. Devoret", "1953-00-00"),
            ("John M. Martinis", "1958-00-00"),
        ],
    },
    "Chemistry": {
        "code": "che",
        "name": "Chemistry",
        "csv": "chemistry_nobel_laureates_ages_1901_2025.csv",
        "color": "green",
        "extra_2025": [
            ("Susumu Kitagawa", "1951-07-04"),
            ("Richard Robson", "1937-06-04"),
            ("Omar M. Yaghi", "1965-02-09"),
        ],
    },
    "Medicine": {
        "code": "med",
        "name": "Physiology or Medicine",
        "csv": "medicine_nobel_laureates_ages_1901_2025.csv",
        "color": "red",
        "extra_2025": [
            ("Mary E. Brunkow", "1961-00-00"),
            ("Fred Ramsdell", "1960-12-04"),
            ("Shimon Sakaguchi", "1951-01-19"),
        ],
    },
}

def parse_date(s):
    """Parse a date string like YYYY-MM-DD, YYYY-MM, or YYYY"""
    parts = [int(p) if p != "00" else 1 for p in s.split('-')]
    while len(parts) < 3:
        parts.append(1)
    return datetime.date(*parts[:3])

def age_on(dob, year):
    """Calculate age on Nobel award date in the given year"""
    award_date = datetime.date(year, AWARD_MONTH, AWARD_DAY)
    return award_date.year - dob.year - ((award_date.month, award_date.day) < (dob.month, dob.day))

all_data = {}

for category_name, info in CATEGORIES.items():
    print(f"\n🔍 Processing {category_name} laureates...")
    code = info["code"]
    api_url = f"https://[Log in to view URL]"

    with urllib.request.urlopen(api_url) as url:
        data = json.loads(url.read().decode())

    rows = []

    for person in data.get("laureates", []):
        if person.get("knownName") is None:
            continue
        name = person["knownName"]["en"]
        birth = person.get("birth", {})
        birthdate = birth.get("date")
        if not birthdate:
            continue
        dob = parse_date(birthdate)

        for prize in person.get("nobelPrizes", []):
            if prize.get("category", {}).get("en") != info["name"]:
                continue
            year = int(prize["awardYear"])
            age = age_on(dob, year)
            rows.append({
                "year": year,
                "laureate": name,
                "birthdate": birthdate,
                "age_on_award_date": age
            })

    for name, birthdate in info["extra_2025"]:
        dob = parse_date(birthdate)
        rows.append({
            "year": 2025,
            "laureate": name,
            "birthdate": birthdate,
            "age_on_award_date": age_on(dob, 2025),
        })

    seen = set()
    deduped = []
    for r in rows:
        key = (r["year"], r["laureate"])
        if key not in seen:
            seen.add(key)
            deduped.append(r)
    deduped.sort(key=lambda r: (r["year"], r["laureate"]))

    with open(info["csv"], "w", newline="", encoding="utf-8") as f:
        writer = csv.DictWriter(f, fieldnames=["year", "laureate", "birthdate", "age_on_award_date"])
        writer.writeheader()
        writer.writerows(deduped)
    print(f"Saved {len(deduped)} laureates to {info['csv']}")

    all_data[category_name] = {
        "years": [r["year"] for r in deduped],
        "ages": [r["age_on_award_date"] for r in deduped],
        "color": info["color"]
    }

plt.figure(figsize=(12, 8))

for category_name, data in all_data.items():
    years = np.array(data["years"])
    ages = np.array(data["ages"])
    plt.scatter(years, ages, label=f"{category_name} Laureates", color=data["color"], alpha=0.6)

    if len(years) >= 2:
        slope, intercept = np.polyfit(years, ages, 1)
        plt.plot(years, slope * years + intercept, color=data["color"], linestyle='-',
                 label=f"{category_name} trend: y={slope:.2f}x + {intercept:.1f}")

plt.title("Age of Nobel Laureates at Award Time (Physics, Chemistry, and Medicine)")
plt.xlabel("Year")
plt.ylabel("Age at Award")
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()
plt.show()
Fork
Embed on website