Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Polish fpca regression example #552

Merged
merged 4 commits into from
Jul 2, 2023
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 19 additions & 11 deletions examples/plot_fpca_regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
# curves. The color of these curves depends on the amount of fat, from least
# (yellow) to highest (red).

X.plot(gradient_criteria=y, legend=True)
X.plot(gradient_criteria=y, legend=True, colormap="Greens")
plt.show()

##############################################################################
Expand All @@ -53,7 +53,8 @@

reg = FPCARegression(n_components=5)
reg.fit(X_train, y_train)
print(reg.score(X_test, y_test))
test_score = reg.score(X_test, y_test)
print(f"Score with 5 components: {test_score:.4f}")

##############################################################################
# We have obtained a pretty good result considering that
Expand All @@ -73,7 +74,7 @@


print("Best params:", gscv.best_params_)
print("Best cross-validation score:", gscv.best_score_)
print(f"Best cross-validation score: {gscv.best_score_:.4f}")

##############################################################################
# The best performance for the train set is obtained using 30 components.
Expand All @@ -87,24 +88,31 @@

fig = plt.figure()
ax = fig.add_subplot(1, 1, 1)
ax.bar(param_grid["n_components"], gscv.cv_results_["mean_test_score"])
ax.set_xticks(range(0, 100, 10))
ax.set_ylabel("Number of Components")
ax.set_xlabel("Cross-validation score")
ax.plot(
param_grid["n_components"],
gscv.cv_results_["mean_test_score"],
linestyle="dashed",
marker="o",
)
ax.set_xticks(range(0, 110, 10))
ax.set_xlabel("Number of Components")
ax.set_ylabel("Cross-validation score")
ax.set_ylim((0.5, 1))
fig.show()

##############################################################################
# To conclude, we can calculate the score of the model on the test set after
# it has been trained on the whole train set. As expected, the score is
# slightly higher than the one reported by the cross-validation.
# it has been trained on the whole train set.
#
# Moreover, we can check that the score barely changes when we use a somewhat
# smaller number of components.

reg = FPCARegression(n_components=30)
reg.fit(X_train, y_train)
print("Score with 30 components:", reg.score(X_test, y_test))
test_score = reg.score(X_test, y_test)
print(f"Score with 30 components: {test_score:.4f}")

reg = FPCARegression(n_components=15)
reg.fit(X_train, y_train)
print("Score with 15 components:", reg.score(X_test, y_test))
test_score = reg.score(X_test, y_test)
print(f"Score with 15 components: {test_score:.4f}")