diff --git a/integrations/model-training/xgboost/notebooks/how_to_use_comet_with_xgboost_tutorial.ipynb b/integrations/model-training/xgboost/notebooks/how_to_use_comet_with_xgboost_tutorial.ipynb new file mode 100644 index 00000000..ced9e84a --- /dev/null +++ b/integrations/model-training/xgboost/notebooks/how_to_use_comet_with_xgboost_tutorial.ipynb @@ -0,0 +1,170 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + " \n", + "\n", + "[Comet](https://www.comet.com/?utm_source=xgboost&utm_medium=colab&utm_content=intro_cell) helps Data Scientists track, compare, debug, and visualize their model training runs. Many ML teams leverage Comet as their single system of record of model's performance in both training anf production \n", + "This notebook shows you how to log your XGBoost Training Runs with Comet with just a additional lines of code\n", + "\n", + "For more information about Comet's integration with XGBoost visit our [Docs](https://www.comet.com/docs/v2/integrations/ml-frameworks/xgboost/?utm_source=xgboost&utm_medium=colab&utm_content=intro_cell) page." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# ๐Ÿšง Install Required Packages " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%pip install xgboost comet_ml --quiet" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# ๐Ÿงช Initialize Comet\n", + "\n", + "Create your free account at [Comet.com](https://www.comet.com/signup?utm_source=xgboost&utm_medium=colab&utm_content=signup_cell) and grab your API key which can be found under account settings." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import comet_ml\n", + "\n", + "comet_ml.init(project_name=\"comet-xgboost-tutorial\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# ๐Ÿ‘๏ธ Train A Classification Model on the Iris Dataset" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "\n", + "from xgboost import XGBClassifier\n", + "# read data\n", + "from sklearn.datasets import load_iris\n", + "from sklearn.model_selection import train_test_split\n", + "\n", + "experiment = comet_ml.Experiment()\n", + "\n", + "data = load_iris(as_frame=True)\n", + "X_train, X_test, y_train, y_test = train_test_split(data['data'], data['target'], test_size=.2)\n", + "# create model instance\n", + "bst = XGBClassifier(n_estimators=10, max_depth=5, learning_rate=0.1, objective='binary:logistic')\n", + "# fit model\n", + "bst.fit(X_train, y_train, eval_set=[(X_test, y_test)],\n", + " eval_metric=\"merror\",)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# ๐Ÿ“Š View the Results in Comet \n", + "\n", + "Comet auto-logs the hyper-parameters, model graph, and metrics for a XGBoost training run" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "\n", + "experiment.display(\"charts\")\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# ๐Ÿ“™ Log a prediction table to Comet " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# make predictions\n", + "y_pred = bst.predict(X_test)\n", + "\n", + "print(type(X_test))\n", + "\n", + "debug_df = X_test.copy()\n", + "\n", + "debug_df['pred'] = y_pred\n", + "debug_df['ground_truth'] = y_test\n", + "\n", + "experiment.log_table('prediction_debug_table.csv', debug_df)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# ๐Ÿ–ฅ๏ธ Debug Model Predictions with Comet's Data Panel\n", + "\n", + "![Gif](xg_data_panel.gif)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3.11.0 ('nb': venv)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.0" + }, + "orig_nbformat": 4, + "vscode": { + "interpreter": { + "hash": "8c9587381b2341d562742e36a89690be32a732b11830813473890249dd40a07d" + } + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/integrations/model-training/xgboost/notebooks/xg_comet.ipynb b/integrations/model-training/xgboost/notebooks/xg_comet.ipynb new file mode 100644 index 00000000..f73e7b21 --- /dev/null +++ b/integrations/model-training/xgboost/notebooks/xg_comet.ipynb @@ -0,0 +1,242 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "0M3sL7lrGHC3" + }, + "source": [ + " \n", + "\n", + "[Comet](https://www.comet.com/?utm_source=xgboost&utm_medium=colab&utm_content=intro_cell) helps Data Scientists track, compare, debug, and visualize their model training runs.\n", + "\n", + "\n", + "**This notebook shows you how to log your XGBoost Training Runs with Comet.** For more information about Comet's integration with XGBoost visit our [Docs](https://www.comet.com/docs/v2/integrations/ml-frameworks/xgboost/?utm_source=xgboost&utm_medium=colab&utm_content=intro_cell) page." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "F52bYB07GHDB" + }, + "source": [ + "# Install Required Packages" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "zS0IaNKSGHDE" + }, + "outputs": [], + "source": [ + "%pip install xgboost comet_ml" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "WtUkfOzSGHDR" + }, + "source": [ + "# Initialize Comet\n", + "\n", + "Create your free account at [Comet.com](https://www.comet.com/signup?utm_source=xgboost&utm_medium=colab&utm_content=signup_cell)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "k4J9C0e4GHDT" + }, + "outputs": [], + "source": [ + "import comet_ml\n", + "\n", + "comet_ml.init(project_name=\"comet-xgboost-tutorial\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "bWsjy3XUGHDU" + }, + "source": [ + "# Import libraries" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Ufimm7d4GHDW" + }, + "outputs": [], + "source": [ + "from xgboost import XGBClassifier\n", + "from sklearn.datasets import load_iris\n", + "from sklearn.model_selection import train_test_split" + ] + }, + { + "cell_type": "markdown", + "source": [ + "# Create a Comet Experiment" + ], + "metadata": { + "id": "3RJx33J7GUiE" + } + }, + { + "cell_type": "code", + "source": [ + "experiment = comet_ml.Experiment()" + ], + "metadata": { + "id": "4IUCQ21vGRZL" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "# Train A Classification Model on the Iris Dataset" + ], + "metadata": { + "id": "sFKxD6L7GZdJ" + } + }, + { + "cell_type": "code", + "source": [ + "data = load_iris(as_frame=True)\n", + "\n", + "X_train, X_test, y_train, y_test = train_test_split(\n", + " data[\"data\"], data[\"target\"], test_size=0.2\n", + ")\n", + "\n", + "# create model instance\n", + "bst = XGBClassifier(\n", + " n_estimators=10,\n", + " max_depth=5,\n", + " learning_rate=0.1,\n", + " objective=\"binary:logistic\",\n", + " eval_metric=\"merror\",\n", + ")\n", + "\n", + "# fit model\n", + "bst.fit(X_train, y_train, eval_set=[(X_test, y_test)])" + ], + "metadata": { + "id": "Xv51yx3eGY4B" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "OcoTd5jZGHDY" + }, + "source": [ + "# View the Results in Comet\n", + "\n", + "Comet auto-logs the hyper-parameters, model graph, and metrics for a XGBoost training run" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "MEJAAveDGHDb" + }, + "outputs": [], + "source": [ + "experiment.display(\"parameters\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "sXopQ7iaGHDe" + }, + "source": [ + "# Log a prediction table to Comet" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "m4P1zBrMGHDg" + }, + "outputs": [], + "source": [ + "# make predictions\n", + "y_pred = bst.predict(X_test)\n", + "\n", + "print(type(X_test))\n", + "\n", + "debug_df = X_test.copy()\n", + "\n", + "debug_df[\"pred\"] = y_pred\n", + "debug_df[\"ground_truth\"] = y_test\n", + "\n", + "experiment.log_table(\"prediction_debug_table.csv\", debug_df)\n", + "\n", + "experiment.end()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "tqhjVfL5GHDh" + }, + "source": [ + "# Debug Model Predictions with Comet's Data Panel\n", + "\n", + "![Gif](https://github.com/comet-ml/comet-examples/blob/xgboost_dp_example/integrations/model-training/xgboost/notebooks/xg_data_panel.gif?raw=1)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "UDlxdjpeGHDi" + }, + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3.11.0 ('nb': venv)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.0" + }, + "orig_nbformat": 4, + "vscode": { + "interpreter": { + "hash": "8c9587381b2341d562742e36a89690be32a732b11830813473890249dd40a07d" + } + }, + "colab": { + "provenance": [] + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} \ No newline at end of file diff --git a/integrations/model-training/xgboost/notebooks/xg_data_panel.gif b/integrations/model-training/xgboost/notebooks/xg_data_panel.gif new file mode 100644 index 00000000..827795ce Binary files /dev/null and b/integrations/model-training/xgboost/notebooks/xg_data_panel.gif differ