Woche 2

Autor:in

Benedikt Schnur

Code
import math
import numpy as np
import scipy.interpolate as interp
from typing import Tuple
from collections import namedtuple
import plotly.graph_objects as go
import plotly.io as pio


BENE_COLORS_DARK: Tuple[str, ...] = (
    "#47476b",
    "#6B5706",
    "#206260",
    "#93003a",
    "#215F80",
    "#973C2B",
    "#008381",
    "#6d3d3d",
    "#595865",
)
pio.templates["bene"] = go.layout.Template(layout=go.Layout(colorway=BENE_COLORS_DARK))
pio.templates.default = "plotly_white+bene"

FourByFourTable = namedtuple("FourByFourTable", ["a", "b", "c", "d"])


def berechne_ppv(sens: float, spez: float, praev: float) -> float:
    return (sens * praev) / (sens * praev + (1 - spez) * (1 - praev))


def berechne_npv(sens: float, spez: float, praev: float) -> float:
    return (spez * (1 - praev)) / (spez * (1 - praev) + (1 - sens) * praev)


def berechne_95_ci(s: float, n: int) -> tuple:
    lower = s - 1.96 * math.sqrt(s * (1 - s) / n)
    upper = s + 1.96 * math.sqrt(s * (1 - s) / n)
    return lower, upper


def berechne_se_4x4(four_by_four_table: FourByFourTable) -> float:
    return four_by_four_table.a / (four_by_four_table.a + four_by_four_table.c)


def berechne_sp_4x4(four_by_four_table: FourByFourTable) -> float:
    return four_by_four_table.d / (four_by_four_table.b + four_by_four_table.d)

Aufgabe 2: Glaukom

  • Prävalenz: 0.9 % = 0.009
  • Sensitivität: 85 % = 0.85
  • Spezifität: 90 % = 0.90
Code
prevalence = 0.009
sensitivity = 0.85
specificity = 0.90

2

Code
ppv_screening = berechne_ppv(sensitivity, specificity, prevalence)
ppv_screening
0.07166276346604215
Code
npv_screening = berechne_npv(sensitivity, specificity, prevalence)
npv_screening
0.9984886649874056

3

Code
population = 100000
true_positives = sensitivity * prevalence * population
false_negatives = (1 - sensitivity) * prevalence * population
false_positives = (1 - specificity) * (1 - prevalence) * population
true_negatives = specificity * (1 - prevalence) * population
true_positives, false_negatives, false_positives, true_negatives
(764.9999999999999, 135.0, 9909.999999999998, 89190.0)

Aufgabe 3

1

Code
# Gegebene Werte aus der Studie (Katz et al., 1993)
sensitivity_study = 0.836  # Sensitivität des Suprathreshold Visual Field Test
specificity_study = 0.749  # Spezifität des Suprathreshold Visual Field Test

# Populationsgröße in der Studie
total_population_study = 5341  

# Anzahl tatsächlich kranker (Glaukomfälle)
true_cases_study = 146  

# Anzahl gesunder Personen
healthy_cases_study = total_population_study - true_cases_study  

# Berechnung der Prävalenz in der Studienpopulation
prevalence_study = true_cases_study / total_population_study  

# Berechnung des positiven prädiktiven Werts (PPV)
ppv_study = berechne_ppv(sensitivity_study, specificity_study, prevalence_study)

# Berechnung des negativen prädiktiven Werts (NPV)
npv_study = berechne_npv(sensitivity_study, specificity_study, prevalence_study)

ppv_study, npv_study
(0.08559320785890051, 0.9938840341977099)

Prävalenz bei beiden Test nacheinander. Prävalenz des zweiten Tests ist PPV des ersten Tests.

Code
ppv_study_after_positive_screening = berechne_ppv(sensitivity_study, specificity_study, ppv_screening)
npv_study_after_positive_screening = berechne_npv(sensitivity_study, specificity_study, ppv_screening)
ppv_study_after_positive_screening, npv_study_after_positive_screening
(0.20452517628999506, 0.9833784884837806)

2

a

Code
1-npv_study
0.006115965802290124

c

Code
1-ppv_study
0.9144067921410994

3

Code
berechne_95_ci(sensitivity_study, total_population_study), berechne_95_ci(specificity_study, total_population_study)
((0.8260695290539164, 0.8459304709460835),
 (0.7373715306646761, 0.7606284693353239))

Aufgabe 4

2

Code
cutoff_1_44 = FourByFourTable(53, 285, 0, 182)
cutoff_1_44_se = berechne_se_4x4(cutoff_1_44)
cutoff_1_44_sp = berechne_sp_4x4(cutoff_1_44)
cutoff_1_44_se, cutoff_1_44_sp, 1-cutoff_1_44_sp
(1.0, 0.3897216274089936, 0.6102783725910064)
Code
cutoff_1_14 = FourByFourTable(50, 187, 3, 280)
cutoff_1_14_se = berechne_se_4x4(cutoff_1_14)
cutoff_1_14_sp = berechne_sp_4x4(cutoff_1_14)
cutoff_1_14_se, cutoff_1_14_sp, 1-cutoff_1_14_sp
(0.9433962264150944, 0.5995717344753747, 0.4004282655246253)
Code
cutoff_1_02 = FourByFourTable(43, 112, 10, 355)
cutoff_1_02_se = berechne_se_4x4(cutoff_1_02)
cutoff_1_02_sp = berechne_sp_4x4(cutoff_1_02)
cutoff_1_02_se, cutoff_1_02_sp, 1-cutoff_1_02_sp
(0.8113207547169812, 0.7601713062098501, 0.23982869379014993)
Code
cutoff_0_8 = FourByFourTable(34, 47, 19, 420)
cutoff_0_8_se = berechne_se_4x4(cutoff_0_8)
cutoff_0_8_sp = berechne_sp_4x4(cutoff_0_8)
cutoff_0_8_se, cutoff_0_8_sp, 1-cutoff_0_8_sp
(0.6415094339622641, 0.8993576017130621, 0.10064239828693788)
Code
cutoff_0_6 = FourByFourTable(23, 5, 30, 462)
cutoff_0_6_se = berechne_se_4x4(cutoff_0_6)
cutoff_0_6_sp = berechne_sp_4x4(cutoff_0_6)
cutoff_0_6_se, cutoff_0_6_sp, 1-cutoff_0_6_sp
(0.4339622641509434, 0.9892933618843683, 0.010706638115631661)
Code
specificity = np.array([
    0,
    1 - cutoff_0_6_sp,
    1 - cutoff_0_8_sp,
    1 - cutoff_1_02_sp,
    1 - cutoff_1_14_sp,
    1 - cutoff_1_44_sp,
    1,
])
sensitivity = np.array([
    0,
    cutoff_0_6_se,
    cutoff_0_8_se,
    cutoff_1_02_se,
    cutoff_1_14_se,
    cutoff_1_44_se,
    1,
])
cutoffs = np.array([0, 0.6, 0.8, 1.02, 1.14, 1.44, 2])

# Initialize variables
best_cutoff = None
max_j = -np.inf

# Loop through each segment between two cutoffs
for i in range(len(cutoffs) - 1):
    C1, C2 = cutoffs[i], cutoffs[i + 1]
    S1, S2 = sensitivity[i], sensitivity[i + 1]
    P1, P2 = specificity[i], specificity[i + 1]
    
    # Compute the slope for linear interpolation
    sensitivity_slope = (S2 - S1) / (C2 - C1)
    specificity_slope = (P2 - P1) / (C2 - C1)

    # Solve for cutoff that maximizes Youden's J in this segment
    # J(c) = (S1 + sensitivity_slope * (c - C1)) + (P1 + specificity_slope * (c - C1)) - 1
    # dJ/dc = sensitivity_slope + specificity_slope = 0
    if sensitivity_slope + specificity_slope != 0:  # Avoid division by zero
        optimal_c = C1 - (S1 + P1 - 1) / (sensitivity_slope + specificity_slope)

        # Ensure the found cutoff is within the segment
        if C1 <= optimal_c <= C2:
            optimal_j = (S1 + sensitivity_slope * (optimal_c - C1)) + \
                        (P1 + specificity_slope * (optimal_c - C1)) - 1
            
            # Update the best cutoff if this J value is better
            if optimal_j > max_j:
                max_j = optimal_j
                best_cutoff = optimal_c
                best_sensitivity = sensitivity_slope
                best_specificity = specificity_slope

print(f"Optimal Cutoff (linear): {best_cutoff:.3f}")
Optimal Cutoff (linear): 0.984
Code
roc_x = [
    0,
    1 - cutoff_0_6_sp,
    1 - cutoff_0_8_sp,
    1 - cutoff_1_02_sp,
    1 - cutoff_1_14_sp,
    1 - cutoff_1_44_sp,
    1,
]
roc_y = [
    0,
    cutoff_0_6_se,
    cutoff_0_8_se,
    cutoff_1_02_se,
    cutoff_1_14_se,
    cutoff_1_44_se,
    1,
]
roc_labels = ["", "0.6 mm²", "0.8 mm²", "1.02 mm²", "1.14 mm²", "1.44 mm²", ""]

fig = go.Figure()
fig.add_shape(
    type="line", line=dict(dash="dash"), fillcolor="black", x0=0, x1=1, y0=0, y1=1
)
fig.add_trace(
    go.Scatter(
        x=roc_x,
        y=roc_y,
        text=roc_labels,
        textposition="bottom right",
        mode="lines+markers+text",
        fill="tozeroy",
        fillcolor="rgba(71, 71, 107, 0.2)",
    )
)
fig.update_layout(
    title="ROC Curve RNFL Area",
    xaxis_title="1 - Spezifität",
    yaxis_title="Sensitivität",
    xaxis=dict(range=[0, 1]),
    yaxis=dict(range=[0, 1], scaleanchor="x", scaleratio=1),
)
fig.show()

Aufgabe 7

2

Code
a, b, c, d = 30, 366, 56, 352

p1 = a / (a + b)
p2 = c / (c + d)

p1, p2
(0.07575757575757576, 0.13725490196078433)
Code
p = (a + c) / (a + b + c + d)
SE = math.sqrt((p * (1 - p) / (a + b)) + (p * (1 - p) / (c + d)))

SE
0.02180247257454774
Code
T = (p1 - p2) / SE
T
-2.820658344732913