import matplotlib.pyplot as plt import numpy as np from sklearn.datasets import make_classification from sklearn.linear_model import LogisticRegression from sklearn.metrics import precision_recall_curve, f1_score # Generate synthetic data with make_classification X, y_true = make_classification(n_samples=1000, n_features=20, random_state=42) # Create a Logistic Regression model model = LogisticRegression() # Fit the model on the data model.fit(X, y_true) # Predict probabilities for the positive class y_scores = model.predict_proba(X)[:, 1] # Compute precision, recall, and F-score precision, recall, thresholds = precision_recall_curve(y_true, y_scores) f_scores = 2 * (precision * recall) / (precision + recall) # Find the threshold with the maximal F-score max_f_score_idx = np.argmax(f_scores) max_f_score_threshold = thresholds[max_f_score_idx] # Create the PR curve plot plt.figure(figsize=(8, 6)) plt.scatter(recall[:-1], precision[:-1], c=thresholds) plt.scatter(recall[max_f_score_idx], precision[max_f_score_idx], c='red', marker='o', label=f'Max F-score ({max_f_score_threshold:.2f})', s=100) plt.colorbar() plt.xlabel('Recall') plt.ylabel('Precision') plt.title('Precision-Recall Curve') plt.legend() plt.grid(True) plt.show()