From bc6c826351b119c8f800a2c847ad78bc5d77ba3b Mon Sep 17 00:00:00 2001
From: Matt Bowers <bowers.matt.c@gmail.com>
Date: Mon, 18 Sep 2023 14:36:29 +0200
Subject: [PATCH] Built site for gh-pages

---
 .nojekyll                                     |    2 +-
 about.html                                    |    2 +-
 archive-python.xml                            | 4942 +++++++++-----
 archive.html                                  |   47 +-
 archive.xml                                   | 5880 +++++++++--------
 gradient-boosting-series.html                 |   16 +-
 index.html                                    |   71 +-
 listings.json                                 |    2 +
 posts/8020-pandas-tutorial/index.html         |    2 +-
 .../index.html                                |    2 +-
 posts/consider-the-decision-tree/index.html   |    2 +-
 posts/decision-tree-from-scratch/index.html   |    2 +-
 posts/drafts/conda-cheat-sheet/index.html     |    2 +-
 .../get-down-with-gradient-descent/index.html |    2 +-
 .../index.html                                |    2 +-
 .../index.html                                |    2 +-
 posts/hello-pyspark/index.html                |    2 +-
 posts/hello-world/index.html                  |    2 +-
 .../index.html                                |    2 +-
 .../index.html                                |    2 +-
 posts/xgboost-explained/index.html            |    2 +-
 .../index.html                                | 1446 ++++
 .../figure-html/cell-19-output-1.png          |  Bin 0 -> 17808 bytes
 .../figure-html/cell-20-output-1.png          |  Bin 0 -> 94075 bytes
 .../figure-html/cell-7-output-1.png           |  Bin 0 -> 17274 bytes
 .../figure-html/cell-8-output-1.png           |  Bin 0 -> 16837 bytes
 .../kigali-branches.jpg                       |  Bin 0 -> 79493 bytes
 posts/xgboost-from-scratch/index.html         |    2 +-
 search.json                                   |  405 +-
 sitemap.xml                                   |   50 +-
 30 files changed, 8348 insertions(+), 4543 deletions(-)
 create mode 100644 posts/xgboost-for-regression-in-python/index.html
 create mode 100644 posts/xgboost-for-regression-in-python/index_files/figure-html/cell-19-output-1.png
 create mode 100644 posts/xgboost-for-regression-in-python/index_files/figure-html/cell-20-output-1.png
 create mode 100644 posts/xgboost-for-regression-in-python/index_files/figure-html/cell-7-output-1.png
 create mode 100644 posts/xgboost-for-regression-in-python/index_files/figure-html/cell-8-output-1.png
 create mode 100644 posts/xgboost-for-regression-in-python/kigali-branches.jpg
diff --git a/.nojekyll b/.nojekyll
index d06df9a..72312a6 100644
--- a/.nojekyll
+++ b/.nojekyll
@@ -1 +1 @@
-7d0c39bd
\ No newline at end of file
+e110b739
\ No newline at end of file
diff --git a/about.html b/about.html
index 489a190..ccb50da 100644
--- a/about.html
+++ b/about.html
@@ -134,7 +134,7 @@ <h5 style="font-weight:600; font-size:1rem">Subscribe</h5>
     </div>
     
     <div style="margin-top: 10px;">
-    <button type="submit" class="btn btn-secondary btn-sm">Submit</button>
+    <button type="submit" class="btn btn-secondary btn-sm" data-umami-event="Subscribe">Submit</button>
     
     <!-- real people should not fill this in and expect good things - do not remove this or risk form bot signups-->
     <div style="position: absolute; left: -5000px;" aria-hidden="true"><input type="text" name="b_f718424fc5df77c22533bdaa6_a3c37fb57b" tabindex="-1" value=""></div>
diff --git a/archive-python.xml b/archive-python.xml
index 8c47f22..95005db 100644
--- a/archive-python.xml
+++ b/archive-python.xml
@@ -10,7 +10,2235 @@
 <atom:link href="https://randomrealizations.com/archive-python.xml" rel="self" type="application/rss+xml"/>
 <description>A blog about data science, statistics, machine learning, and the scientific method</description>
 <generator>quarto-1.3.433</generator>
-<lastBuildDate>Tue, 05 Sep 2023 21:00:00 GMT</lastBuildDate>
+<lastBuildDate>Tue, 24 Oct 2023 22:00:00 GMT</lastBuildDate>
+<item>
+  <title>XGBoost for Regression in Python</title>
+  <dc:creator>Matt Bowers</dc:creator>
+  <link>https://randomrealizations.com/posts/xgboost-for-regression-in-python/index.html</link>
+  <description><![CDATA[ 
+
+
+
+<p>In this post I’m going to show you my process for solving regression problems with XGBoost in python, using either the native <code>xgboost</code> API or the scikit-learn interface. This is a powerful methodology that can produce world class results in a short time with minimal thought or effort. While we’ll be working on an old Kagle competition for predicting the sale prices of bulldozers and other heavy machinery, you can use this flow to solve whatever tabular data regression problem you’re working on.</p>
+<p>This post serves as the explanation and documentation for the XGBoost regression jupyter notebook from my <a href="https://github.com/mcb00/ds-templates">ds-templates repo</a> on GitHub, so go ahead and download the notebook and follow along with your own data.</p>
+<p>If you’re not already comfortable with the ideas behind gradient boosting and XGBoost, you’ll find it helpful to read some of my previous posts to get up to speed. I’d start with this <a href="../../posts/gradient-boosting-machine-from-scratch/">introduction to gradient boosting</a>, and then read this <a href="../../posts/xgboost-explained/">explanation of how XGBoost works</a>.</p>
+<p>Let’s get into it! 🚀</p>
+<section id="install-and-import-the-xgboost-library" class="level2">
+<h2 class="anchored" data-anchor-id="install-and-import-the-xgboost-library">Install and import the <code>xgboost</code> library</h2>
+<p>If you don’t already have it, go ahead and <a href="https://anaconda.org/conda-forge/xgboost">use conda to install the xgboost library</a>, e.g.</p>
+<div class="sourceCode" id="cb1" style="background: #f1f3f5;"><pre class="sourceCode .zsh code-with-copy"><code class="sourceCode zsh"><span id="cb1-1"><span class="ex" style="color: null;
+background-color: null;
+font-style: inherit;">$</span> conda install <span class="at" style="color: #657422;
+background-color: null;
+font-style: inherit;">-c</span> conda-forge xgboost</span></code></pre></div>
+<p>Then import it along with the usual suspects.</p>
+<div class="cell" data-execution_count="1">
+<div class="sourceCode cell-code" id="cb2" style="background: #f1f3f5;"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb2-1"><span class="im" style="color: #00769E;
+background-color: null;
+font-style: inherit;">import</span> numpy <span class="im" style="color: #00769E;
+background-color: null;
+font-style: inherit;">as</span> np</span>
+<span id="cb2-2"><span class="im" style="color: #00769E;
+background-color: null;
+font-style: inherit;">import</span> pandas <span class="im" style="color: #00769E;
+background-color: null;
+font-style: inherit;">as</span> pd</span>
+<span id="cb2-3"><span class="im" style="color: #00769E;
+background-color: null;
+font-style: inherit;">import</span> matplotlib.pyplot <span class="im" style="color: #00769E;
+background-color: null;
+font-style: inherit;">as</span> plt</span>
+<span id="cb2-4"><span class="im" style="color: #00769E;
+background-color: null;
+font-style: inherit;">import</span> xgboost <span class="im" style="color: #00769E;
+background-color: null;
+font-style: inherit;">as</span> xgb</span></code></pre></div>
+</div>
+</section>
+<section id="read-dataset-into-python" class="level2">
+<h2 class="anchored" data-anchor-id="read-dataset-into-python">Read dataset into python</h2>
+<p>In this example we’ll work on the <a href="https://www.kaggle.com/competitions/bluebook-for-bulldozers/overview">Kagle Bluebook for Bulldozers</a> competition, which asks us to build a regression model to predict the sale price of heavy equipment. Amazingly, you can solve your own regression problem by swapping this data out with your organization’s data before proceeding with the tutorial.</p>
+<p>Go ahead and download the <code>Train.zip</code> file from Kagle and extract it into <code>Train.csv</code>. Then read the data into a pandas dataframe.</p>
+<div class="cell" data-execution_count="2">
+<div class="sourceCode cell-code" id="cb3" style="background: #f1f3f5;"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb3-1">df <span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">=</span> pd.read_csv(<span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'Train.csv'</span>, parse_dates<span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">=</span>[<span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'saledate'</span>])<span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">;</span></span></code></pre></div>
+</div>
+<p>Notice I cheated a little bit, checking the columns ahead of time and telling pandas to treat the <code>saledate</code> column as a date. In general it will make life easier to read in any date-like columns as dates.</p>
+<div class="cell" data-execution_count="3">
+<div class="sourceCode cell-code" id="cb4" style="background: #f1f3f5;"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb4-1">df.info()</span></code></pre></div>
+<div class="cell-output cell-output-stdout">
+<pre><code>&lt;class 'pandas.core.frame.DataFrame'&gt;
+RangeIndex: 401125 entries, 0 to 401124
+Data columns (total 53 columns):
+ #   Column                    Non-Null Count   Dtype         
+---  ------                    --------------   -----         
+ 0   SalesID                   401125 non-null  int64         
+ 1   SalePrice                 401125 non-null  int64         
+ 2   MachineID                 401125 non-null  int64         
+ 3   ModelID                   401125 non-null  int64         
+ 4   datasource                401125 non-null  int64         
+ 5   auctioneerID              380989 non-null  float64       
+ 6   YearMade                  401125 non-null  int64         
+ 7   MachineHoursCurrentMeter  142765 non-null  float64       
+ 8   UsageBand                 69639 non-null   object        
+ 9   saledate                  401125 non-null  datetime64[ns]
+ 10  fiModelDesc               401125 non-null  object        
+ 11  fiBaseModel               401125 non-null  object        
+ 12  fiSecondaryDesc           263934 non-null  object        
+ 13  fiModelSeries             56908 non-null   object        
+ 14  fiModelDescriptor         71919 non-null   object        
+ 15  ProductSize               190350 non-null  object        
+ 16  fiProductClassDesc        401125 non-null  object        
+ 17  state                     401125 non-null  object        
+ 18  ProductGroup              401125 non-null  object        
+ 19  ProductGroupDesc          401125 non-null  object        
+ 20  Drive_System              104361 non-null  object        
+ 21  Enclosure                 400800 non-null  object        
+ 22  Forks                     192077 non-null  object        
+ 23  Pad_Type                  79134 non-null   object        
+ 24  Ride_Control              148606 non-null  object        
+ 25  Stick                     79134 non-null   object        
+ 26  Transmission              183230 non-null  object        
+ 27  Turbocharged              79134 non-null   object        
+ 28  Blade_Extension           25219 non-null   object        
+ 29  Blade_Width               25219 non-null   object        
+ 30  Enclosure_Type            25219 non-null   object        
+ 31  Engine_Horsepower         25219 non-null   object        
+ 32  Hydraulics                320570 non-null  object        
+ 33  Pushblock                 25219 non-null   object        
+ 34  Ripper                    104137 non-null  object        
+ 35  Scarifier                 25230 non-null   object        
+ 36  Tip_Control               25219 non-null   object        
+ 37  Tire_Size                 94718 non-null   object        
+ 38  Coupler                   213952 non-null  object        
+ 39  Coupler_System            43458 non-null   object        
+ 40  Grouser_Tracks            43362 non-null   object        
+ 41  Hydraulics_Flow           43362 non-null   object        
+ 42  Track_Type                99153 non-null   object        
+ 43  Undercarriage_Pad_Width   99872 non-null   object        
+ 44  Stick_Length              99218 non-null   object        
+ 45  Thumb                     99288 non-null   object        
+ 46  Pattern_Changer           99218 non-null   object        
+ 47  Grouser_Type              99153 non-null   object        
+ 48  Backhoe_Mounting          78672 non-null   object        
+ 49  Blade_Type                79833 non-null   object        
+ 50  Travel_Controls           79834 non-null   object        
+ 51  Differential_Type         69411 non-null   object        
+ 52  Steering_Controls         69369 non-null   object        
+dtypes: datetime64[ns](1), float64(2), int64(6), object(44)
+memory usage: 162.2+ MB</code></pre>
+</div>
+</div>
+</section>
+<section id="prepare-raw-data-for-xgboost" class="level2">
+<h2 class="anchored" data-anchor-id="prepare-raw-data-for-xgboost">Prepare raw data for XGBoost</h2>
+<p>When faced with a new tabular dataset for modeling, we have two format considerations: data types and missingness. From the call to <code>df.info()</code> above, we can see we have both mixed types and missing values.</p>
+<p>When it comes to missing values, some models like the gradient booster or random forest in scikit-learn require purely non-missing inputs. One of the great strengths of XGBoost is that it relaxes this requirement, allowing us to pass in missing feature values, so we don’t have to worry about them.</p>
+<p>Regarding data types, all ML models for tabular data require inputs to be numeric, either integers or floats, so we’re going to have to deal with those <code>object</code> columns.</p>
+<section id="encode-string-features" class="level3">
+<h3 class="anchored" data-anchor-id="encode-string-features">Encode string features</h3>
+<p>The simplest way to encode string variables is to map each unique string value to an integer; this is called <em>integer encoding</em>.</p>
+<p>We have a couple of options for how to implement this transformation: pandas categoricals or the scikit-learn label encoder. We can use the categorical type in pandas to generate mappings from string values to integers for each string feature. The category type is a bit like the factor type in R. Pandas stores the underlying data as integers, and it also keeps a mapping from the integers to the string values. XGBoost will be able to access the integers for model fitting. This is nice because we can still access the actual categories which can be helpful when we start taking a closer look at the data. If you prefer, you can also use the scikit-learn label encoder to replace the string columns with their integer-mapped counterparts.</p>
+<div class="cell" data-execution_count="4">
+<div class="sourceCode cell-code" id="cb6" style="background: #f1f3f5;"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb6-1"><span class="kw" style="color: #003B4F;
+background-color: null;
+font-style: inherit;">def</span> encode_string_features(df, use_cats<span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">=</span><span class="va" style="color: #111111;
+background-color: null;
+font-style: inherit;">True</span>):</span>
+<span id="cb6-2">    out_df <span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">=</span> df.copy()</span>
+<span id="cb6-3">    <span class="cf" style="color: #003B4F;
+background-color: null;
+font-style: inherit;">for</span> feature, feature_type <span class="kw" style="color: #003B4F;
+background-color: null;
+font-style: inherit;">in</span> df.dtypes.items():</span>
+<span id="cb6-4">        <span class="cf" style="color: #003B4F;
+background-color: null;
+font-style: inherit;">if</span> feature_type <span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">==</span> <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'object'</span>:</span>
+<span id="cb6-5">            <span class="cf" style="color: #003B4F;
+background-color: null;
+font-style: inherit;">if</span> use_cats:</span>
+<span id="cb6-6">                out_df[feature] <span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">=</span> out_df[feature].astype(<span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'category'</span>)</span>
+<span id="cb6-7">            <span class="cf" style="color: #003B4F;
+background-color: null;
+font-style: inherit;">else</span>:</span>
+<span id="cb6-8">                <span class="im" style="color: #00769E;
+background-color: null;
+font-style: inherit;">from</span> sklearn.preprocessing <span class="im" style="color: #00769E;
+background-color: null;
+font-style: inherit;">import</span> LabelEncoder</span>
+<span id="cb6-9">                out_df[feature] <span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">=</span> LabelEncoder() <span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">\</span></span>
+<span id="cb6-10">                    .fit_transform(out_df[feature].astype(<span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'str'</span>))</span>
+<span id="cb6-11">    <span class="cf" style="color: #003B4F;
+background-color: null;
+font-style: inherit;">return</span> out_df</span>
+<span id="cb6-12"></span>
+<span id="cb6-13">df <span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">=</span> encode_string_features(df, use_cats<span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">=</span><span class="va" style="color: #111111;
+background-color: null;
+font-style: inherit;">False</span>)</span></code></pre></div>
+</div>
+</section>
+<section id="encode-date-and-timestamp-features" class="level3">
+<h3 class="anchored" data-anchor-id="encode-date-and-timestamp-features">Encode date and timestamp features</h3>
+<p>While dates feel sort of numeric, they are not numbers, so we need to transform them into numeric columns. Unfortunately, encoding timestamps isn’t as straightforward as encoding strings, so we actually might need to engage in a little bit of feature engineering. A single date has many different attributes, e.g.&nbsp;days since epoch, year, quarter, month, day, day of year, day of week, is holiday, etc. As a starting point, we can just add a few of these attributes as features. Once a feature is represented as a date or timestamp data type, you can access various attributes via the <code>dt</code> attribute.</p>
+<div class="cell" data-execution_count="5">
+<div class="sourceCode cell-code" id="cb7" style="background: #f1f3f5;"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb7-1"><span class="kw" style="color: #003B4F;
+background-color: null;
+font-style: inherit;">def</span> encode_datetime_features(df, datetime_features, datetime_attributes):</span>
+<span id="cb7-2">    out_df <span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">=</span> df.copy()</span>
+<span id="cb7-3">    <span class="cf" style="color: #003B4F;
+background-color: null;
+font-style: inherit;">for</span> datetime_feature <span class="kw" style="color: #003B4F;
+background-color: null;
+font-style: inherit;">in</span> datetime_features:</span>
+<span id="cb7-4">        <span class="cf" style="color: #003B4F;
+background-color: null;
+font-style: inherit;">for</span> datetime_attribute <span class="kw" style="color: #003B4F;
+background-color: null;
+font-style: inherit;">in</span> datetime_attributes:</span>
+<span id="cb7-5">            <span class="cf" style="color: #003B4F;
+background-color: null;
+font-style: inherit;">if</span> datetime_attribute <span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">==</span> <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'days_since_epoch'</span>:</span>
+<span id="cb7-6">                out_df[<span class="ss" style="color: #20794D;
+background-color: null;
+font-style: inherit;">f'</span><span class="sc" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">{</span>datetime_feature<span class="sc" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">}</span><span class="ss" style="color: #20794D;
+background-color: null;
+font-style: inherit;">_</span><span class="sc" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">{</span>datetime_attribute<span class="sc" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">}</span><span class="ss" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'</span>] <span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">=</span> <span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">\</span></span>
+<span id="cb7-7">                    (out_df[datetime_feature] </span>
+<span id="cb7-8">                     <span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">-</span> pd.Timestamp(year<span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">=</span><span class="dv" style="color: #AD0000;
+background-color: null;
+font-style: inherit;">1970</span>, month<span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">=</span><span class="dv" style="color: #AD0000;
+background-color: null;
+font-style: inherit;">1</span>, day<span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">=</span><span class="dv" style="color: #AD0000;
+background-color: null;
+font-style: inherit;">1</span>)).dt.days</span>
+<span id="cb7-9">            <span class="cf" style="color: #003B4F;
+background-color: null;
+font-style: inherit;">else</span>:</span>
+<span id="cb7-10">                out_df[<span class="ss" style="color: #20794D;
+background-color: null;
+font-style: inherit;">f'</span><span class="sc" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">{</span>datetime_feature<span class="sc" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">}</span><span class="ss" style="color: #20794D;
+background-color: null;
+font-style: inherit;">_</span><span class="sc" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">{</span>datetime_attribute<span class="sc" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">}</span><span class="ss" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'</span>] <span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">=</span> <span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">\</span></span>
+<span id="cb7-11">                    <span class="bu" style="color: null;
+background-color: null;
+font-style: inherit;">getattr</span>(out_df[datetime_feature].dt, datetime_attribute)</span>
+<span id="cb7-12">    <span class="cf" style="color: #003B4F;
+background-color: null;
+font-style: inherit;">return</span> out_df</span>
+<span id="cb7-13"></span>
+<span id="cb7-14">datetime_features <span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">=</span> [</span>
+<span id="cb7-15">    <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'saledate'</span>,</span>
+<span id="cb7-16">]</span>
+<span id="cb7-17">datetime_attributes <span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">=</span> [</span>
+<span id="cb7-18">    <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'year'</span>,</span>
+<span id="cb7-19">    <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'month'</span>,</span>
+<span id="cb7-20">    <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'day'</span>,</span>
+<span id="cb7-21">    <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'quarter'</span>,</span>
+<span id="cb7-22">    <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'day_of_year'</span>,</span>
+<span id="cb7-23">    <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'day_of_week'</span>,</span>
+<span id="cb7-24">    <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'days_since_epoch'</span>,</span>
+<span id="cb7-25">]</span>
+<span id="cb7-26"></span>
+<span id="cb7-27">df <span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">=</span> encode_datetime_features(df, datetime_features, datetime_attributes)</span></code></pre></div>
+</div>
+</section>
+<section id="transform-the-target-if-necessary" class="level3">
+<h3 class="anchored" data-anchor-id="transform-the-target-if-necessary">Transform the target if necessary</h3>
+<p>In the interest of speed and efficiency, we didn’t bother doing any EDA with the feature data. Part of my justification for this is that trees are incredibly robust to outliers, colinearity, missingness, and other assorted nonsense in the feature data. However, they are not necessarily robust to nonsense in the target variable, so it’s worth having a look at it before proceeding any further.</p>
+<div class="cell" data-execution_count="6">
+<div class="sourceCode cell-code" id="cb8" style="background: #f1f3f5;"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb8-1">df.SalePrice.hist()<span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">;</span> plt.xlabel(<span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'SalePrice'</span>)<span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">;</span></span></code></pre></div>
+<div class="cell-output cell-output-display">
+<p><img src="https://randomrealizations.com/posts/xgboost-for-regression-in-python/index_files/figure-html/cell-7-output-1.png" class="img-fluid" alt="histogram of sale price showing right-skewed data"></p>
+</div>
+</div>
+<p>Often when predicting prices it makes sense to use log price, especially when they span multiple orders of magnitude or have a strong right skew. These data look pretty friendly, lacking outliers and exhibiting only a mild positive skew; we could probably get away without doing any transformation. But checking the evaluation metric used to score the Kagle competition, we see they’re using root mean squared log error. That’s equivalent to using RMSE on log-transformed target data, so let’s go ahead and work with log prices.</p>
+<div class="cell" data-execution_count="7">
+<div class="sourceCode cell-code" id="cb9" style="background: #f1f3f5;"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb9-1">df[<span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'logSalePrice'</span>] <span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">=</span> np.log1p(df[<span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'SalePrice'</span>])</span>
+<span id="cb9-2">df.logSalePrice.hist()<span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">;</span> plt.xlabel(<span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'logSalePrice'</span>)<span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">;</span></span></code></pre></div>
+<div class="cell-output cell-output-display">
+<p><img src="https://randomrealizations.com/posts/xgboost-for-regression-in-python/index_files/figure-html/cell-8-output-1.png" class="img-fluid" alt="histogram of log sale price showing a more symetric distribution"></p>
+</div>
+</div>
+</section>
+</section>
+<section id="train-and-evaluate-the-xgboost-regression-model" class="level2">
+<h2 class="anchored" data-anchor-id="train-and-evaluate-the-xgboost-regression-model">Train and Evaluate the XGBoost regression model</h2>
+<p>Having prepared our dataset, we are now ready to train an XGBoost model. We’ll walk through the flow step-by-step first, then later we’ll collect the code in a single cell, so it’s easier to quickly iterate through variations of the model.</p>
+<section id="specify-target-and-feature-columns" class="level3">
+<h3 class="anchored" data-anchor-id="specify-target-and-feature-columns">Specify target and feature columns</h3>
+<p>First we’ll put together a list of our features and define the target column. I like to have an actual list defined in the code so it’s easier to see everything we’re puting into the model and easier to add or remove features as we iterate. Just run something like <code>list(df.columns)</code> in a cel to get a copy-pasteable list of columns, then edit it down to the full list of features, i.e.&nbsp;remove the target, date columns, and other non-feature columns..</p>
+<div class="cell" data-execution_count="8">
+<div class="sourceCode cell-code" id="cb10" style="background: #f1f3f5;"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb10-1"><span class="co" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;"># list(df.columns)</span></span></code></pre></div>
+</div>
+<div class="cell" data-execution_count="9">
+<div class="sourceCode cell-code" id="cb11" style="background: #f1f3f5;"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb11-1">features <span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">=</span> [</span>
+<span id="cb11-2">    <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'SalesID'</span>,</span>
+<span id="cb11-3">    <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'MachineID'</span>,</span>
+<span id="cb11-4">    <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'ModelID'</span>,</span>
+<span id="cb11-5">    <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'datasource'</span>,</span>
+<span id="cb11-6">    <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'auctioneerID'</span>,</span>
+<span id="cb11-7">    <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'YearMade'</span>,</span>
+<span id="cb11-8">    <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'MachineHoursCurrentMeter'</span>,</span>
+<span id="cb11-9">    <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'UsageBand'</span>,</span>
+<span id="cb11-10">    <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'fiModelDesc'</span>,</span>
+<span id="cb11-11">    <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'fiBaseModel'</span>,</span>
+<span id="cb11-12">    <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'fiSecondaryDesc'</span>,</span>
+<span id="cb11-13">    <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'fiModelSeries'</span>,</span>
+<span id="cb11-14">    <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'fiModelDescriptor'</span>,</span>
+<span id="cb11-15">    <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'ProductSize'</span>,</span>
+<span id="cb11-16">    <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'fiProductClassDesc'</span>,</span>
+<span id="cb11-17">    <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'state'</span>,</span>
+<span id="cb11-18">    <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'ProductGroup'</span>,</span>
+<span id="cb11-19">    <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'ProductGroupDesc'</span>,</span>
+<span id="cb11-20">    <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'Drive_System'</span>,</span>
+<span id="cb11-21">    <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'Enclosure'</span>,</span>
+<span id="cb11-22">    <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'Forks'</span>,</span>
+<span id="cb11-23">    <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'Pad_Type'</span>,</span>
+<span id="cb11-24">    <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'Ride_Control'</span>,</span>
+<span id="cb11-25">    <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'Stick'</span>,</span>
+<span id="cb11-26">    <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'Transmission'</span>,</span>
+<span id="cb11-27">    <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'Turbocharged'</span>,</span>
+<span id="cb11-28">    <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'Blade_Extension'</span>,</span>
+<span id="cb11-29">    <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'Blade_Width'</span>,</span>
+<span id="cb11-30">    <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'Enclosure_Type'</span>,</span>
+<span id="cb11-31">    <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'Engine_Horsepower'</span>,</span>
+<span id="cb11-32">    <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'Hydraulics'</span>,</span>
+<span id="cb11-33">    <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'Pushblock'</span>,</span>
+<span id="cb11-34">    <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'Ripper'</span>,</span>
+<span id="cb11-35">    <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'Scarifier'</span>,</span>
+<span id="cb11-36">    <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'Tip_Control'</span>,</span>
+<span id="cb11-37">    <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'Tire_Size'</span>,</span>
+<span id="cb11-38">    <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'Coupler'</span>,</span>
+<span id="cb11-39">    <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'Coupler_System'</span>,</span>
+<span id="cb11-40">    <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'Grouser_Tracks'</span>,</span>
+<span id="cb11-41">    <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'Hydraulics_Flow'</span>,</span>
+<span id="cb11-42">    <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'Track_Type'</span>,</span>
+<span id="cb11-43">    <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'Undercarriage_Pad_Width'</span>,</span>
+<span id="cb11-44">    <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'Stick_Length'</span>,</span>
+<span id="cb11-45">    <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'Thumb'</span>,</span>
+<span id="cb11-46">    <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'Pattern_Changer'</span>,</span>
+<span id="cb11-47">    <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'Grouser_Type'</span>,</span>
+<span id="cb11-48">    <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'Backhoe_Mounting'</span>,</span>
+<span id="cb11-49">    <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'Blade_Type'</span>,</span>
+<span id="cb11-50">    <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'Travel_Controls'</span>,</span>
+<span id="cb11-51">    <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'Differential_Type'</span>,</span>
+<span id="cb11-52">    <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'Steering_Controls'</span>,</span>
+<span id="cb11-53">    <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'saledate_year'</span>,</span>
+<span id="cb11-54">    <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'saledate_month'</span>,</span>
+<span id="cb11-55">    <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'saledate_day'</span>,</span>
+<span id="cb11-56">    <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'saledate_quarter'</span>,</span>
+<span id="cb11-57">    <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'saledate_day_of_year'</span>,</span>
+<span id="cb11-58">    <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'saledate_day_of_week'</span>,</span>
+<span id="cb11-59">    <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'saledate_days_since_epoch'</span></span>
+<span id="cb11-60">]</span>
+<span id="cb11-61"></span>
+<span id="cb11-62">target <span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">=</span> <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'logSalePrice'</span></span></code></pre></div>
+</div>
+</section>
+<section id="split-the-data-into-training-and-validation-sets" class="level3">
+<h3 class="anchored" data-anchor-id="split-the-data-into-training-and-validation-sets">Split the data into training and validation sets</h3>
+<p>Next we split the dataset into a training set and a validation set. Of course since we’re going to evaluate against the validation set a number of times as we iterate, it’s best practice to keep a separate test set reserved to check our final model to ensure it generalizes well. Assuming that final test set is hidden away, we can use the rest of the data for training and validation.</p>
+<p>There are two main ways we might want to select the validation set. If there isn’t a temporal ordering of the observations, we might be able to randomly sample. In practice, it’s much more common that observations have a temporal ordering, and that models are trained on observations up to a certain time and used to predict on observations occuring after that time. Since this data is temporal, we don’t want to split randomly; instead we’ll split on observation date, reserving the latest observations for the validation set.</p>
+<div class="cell" data-execution_count="10">
+<div class="sourceCode cell-code" id="cb12" style="background: #f1f3f5;"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb12-1"><span class="co" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;"># Temporal Validation Set</span></span>
+<span id="cb12-2"><span class="kw" style="color: #003B4F;
+background-color: null;
+font-style: inherit;">def</span> train_test_split_temporal(df, datetime_column, n_test):</span>
+<span id="cb12-3">    idx_sort <span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">=</span> np.argsort(df[datetime_column])</span>
+<span id="cb12-4">    idx_train, idx_test <span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">=</span> idx_sort[:<span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">-</span>n_valid], idx_sort[<span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">-</span>n_valid:]</span>
+<span id="cb12-5">    <span class="cf" style="color: #003B4F;
+background-color: null;
+font-style: inherit;">return</span> df.iloc[idx_train, :], df.iloc[idx_test, :]</span>
+<span id="cb12-6"></span>
+<span id="cb12-7"></span>
+<span id="cb12-8"><span class="co" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;"># Random Validation Set</span></span>
+<span id="cb12-9"><span class="kw" style="color: #003B4F;
+background-color: null;
+font-style: inherit;">def</span> train_test_split_random(df, n_test):</span>
+<span id="cb12-10">    np.random.seed(<span class="dv" style="color: #AD0000;
+background-color: null;
+font-style: inherit;">42</span>)</span>
+<span id="cb12-11">    idx_sort <span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">=</span> np.random.permutation(<span class="bu" style="color: null;
+background-color: null;
+font-style: inherit;">len</span>(df))</span>
+<span id="cb12-12">    idx_train, idx_test <span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">=</span> idx_sort[:<span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">-</span>n_valid], idx_sort[<span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">-</span>n_valid:]</span>
+<span id="cb12-13">    <span class="cf" style="color: #003B4F;
+background-color: null;
+font-style: inherit;">return</span> df.iloc[idx_train, :], df.iloc[idx_test, :]</span>
+<span id="cb12-14"></span>
+<span id="cb12-15">my_train_test_split <span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">=</span> <span class="kw" style="color: #003B4F;
+background-color: null;
+font-style: inherit;">lambda</span> d, n_valid: train_test_split_temporal(d, <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'saledate'</span>, n_valid)</span>
+<span id="cb12-16"><span class="co" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;"># my_train_test_split = lambda d, n_valid: train_test_split_random(d, n_valid)</span></span></code></pre></div>
+</div>
+<div class="cell" data-execution_count="11">
+<div class="sourceCode cell-code" id="cb13" style="background: #f1f3f5;"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb13-1">n_valid <span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">=</span> <span class="dv" style="color: #AD0000;
+background-color: null;
+font-style: inherit;">12000</span></span>
+<span id="cb13-2">train_df, valid_df <span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">=</span> my_train_test_split(df, n_valid)</span>
+<span id="cb13-3"></span>
+<span id="cb13-4">train_df.shape, valid_df.shape</span></code></pre></div>
+<div class="cell-output cell-output-display" data-execution_count="11">
+<pre><code>((389125, 61), (12000, 61))</code></pre>
+</div>
+</div>
+</section>
+<section id="create-dmatrix-data-objects" class="level3">
+<h3 class="anchored" data-anchor-id="create-dmatrix-data-objects">Create <code>DMatrix</code> data objects</h3>
+<p>XGBoost uses a data type called dense matrix for efficient training and prediction, so next we need to create <code>DMatrix</code> objects for our training and validation datasets.</p>
+<blockquote class="blockquote">
+<p>If you prefer to use the scikit-learn interface to XGBoost, you don’t need to create these dense matrix objects. More on that below.</p>
+</blockquote>
+<div class="cell" data-execution_count="12">
+<div class="sourceCode cell-code" id="cb15" style="background: #f1f3f5;"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb15-1">dtrain <span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">=</span> xgb.DMatrix(data<span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">=</span>train_df[features], label<span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">=</span>train_df[target], enable_categorical<span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">=</span><span class="va" style="color: #111111;
+background-color: null;
+font-style: inherit;">True</span>)</span>
+<span id="cb15-2">dvalid <span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">=</span> xgb.DMatrix(data<span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">=</span>valid_df[features], label<span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">=</span>valid_df[target], enable_categorical<span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">=</span><span class="va" style="color: #111111;
+background-color: null;
+font-style: inherit;">True</span>)</span></code></pre></div>
+</div>
+</section>
+<section id="set-the-xgboost-parameters" class="level3">
+<h3 class="anchored" data-anchor-id="set-the-xgboost-parameters">Set the XGBoost parameters</h3>
+<p>XGBoost has <a href="https://xgboost.readthedocs.io/en/latest/parameter.html">numerous hyperparameters</a>. Fortunately, just a handful of them tend to be the most influential; furthermore, the default values are not bad in most situations. I like to start out with a dictionary containing the default parameter values for just the ones I think are most important. For training there is one required boosting parameter called <code>num_boost_round</code> which I set to 50 as a starting point; you can make this smaller initially if training takes too long.</p>
+<div class="cell" data-execution_count="13">
+<div class="sourceCode cell-code" id="cb16" style="background: #f1f3f5;"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb16-1"><span class="co" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;"># default values for important parameters</span></span>
+<span id="cb16-2">params <span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">=</span> {</span>
+<span id="cb16-3">    <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'learning_rate'</span>: <span class="fl" style="color: #AD0000;
+background-color: null;
+font-style: inherit;">0.3</span>,</span>
+<span id="cb16-4">    <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'max_depth'</span>: <span class="dv" style="color: #AD0000;
+background-color: null;
+font-style: inherit;">6</span>,</span>
+<span id="cb16-5">    <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'min_child_weight'</span>: <span class="dv" style="color: #AD0000;
+background-color: null;
+font-style: inherit;">1</span>,</span>
+<span id="cb16-6">    <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'subsample'</span>: <span class="dv" style="color: #AD0000;
+background-color: null;
+font-style: inherit;">1</span>,</span>
+<span id="cb16-7">    <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'colsample_bynode'</span>: <span class="dv" style="color: #AD0000;
+background-color: null;
+font-style: inherit;">1</span>,</span>
+<span id="cb16-8">    <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'objective'</span>: <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'reg:squarederror'</span>,</span>
+<span id="cb16-9">}</span>
+<span id="cb16-10">num_boost_round <span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">=</span> <span class="dv" style="color: #AD0000;
+background-color: null;
+font-style: inherit;">50</span></span></code></pre></div>
+</div>
+</section>
+<section id="train-the-xgboost-model" class="level3">
+<h3 class="anchored" data-anchor-id="train-the-xgboost-model">Train the XGBoost model</h3>
+<p>Check out the <a href="https://xgboost.readthedocs.io/en/latest/python/python_api.html#module-xgboost.training">documentation on the learning API</a> to see all the training options. During training, I like to have XGBoost print out the evaluation metric on the train and validation set after every few boosting rounds and again at the end of training; that can be done by setting <code>evals</code> and <code>verbose_eval</code>. You can also save the evaluation results in a dictionary passed into <code>evals_result</code> to inspect and plot the objective curve over the training iterations.</p>
+<div class="cell" data-execution_count="14">
+<div class="sourceCode cell-code" id="cb17" style="background: #f1f3f5;"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb17-1">evals_result <span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">=</span> {}</span>
+<span id="cb17-2">m <span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">=</span> xgb.train(params<span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">=</span>params, dtrain<span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">=</span>dtrain, num_boost_round<span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">=</span>num_boost_round,</span>
+<span id="cb17-3">              evals<span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">=</span>[(dtrain, <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'train'</span>), (dvalid, <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'valid'</span>)],</span>
+<span id="cb17-4">              verbose_eval<span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">=</span><span class="dv" style="color: #AD0000;
+background-color: null;
+font-style: inherit;">10</span>,</span>
+<span id="cb17-5">              evals_result<span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">=</span>evals_result)</span></code></pre></div>
+<div class="cell-output cell-output-stdout">
+<pre><code>[0] train-rmse:6.74422  valid-rmse:6.79733
+[10]    train-rmse:0.34798  valid-rmse:0.37158
+[20]    train-rmse:0.26289  valid-rmse:0.28239
+[30]    train-rmse:0.25148  valid-rmse:0.27028
+[40]    train-rmse:0.24375  valid-rmse:0.26420
+[49]    train-rmse:0.23738  valid-rmse:0.25855</code></pre>
+</div>
+</div>
+</section>
+<section id="train-the-xgboost-model-using-the-sklearn-interface" class="level3">
+<h3 class="anchored" data-anchor-id="train-the-xgboost-model-using-the-sklearn-interface">Train the XGBoost model using the sklearn interface</h3>
+<p>You can optionally use the <a href="https://xgboost.readthedocs.io/en/latest/python/sklearn_estimator.html">sklearn estimator interface</a> to XGBoost. This will bypass the need to use the <code>DMatrix</code> data objects for training and prediction, and it will allow you to leverage many of the other scikit-learn ecosystem tools like pipelines, parameter search, partial dependence plots, etc. The <code>XGBRegressor</code> is available in the <code>xgboost</code> library that we’ve already imported.</p>
+<div class="cell" data-execution_count="15">
+<div class="sourceCode cell-code" id="cb19" style="background: #f1f3f5;"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb19-1"><span class="co" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;"># scikit-learn interface</span></span>
+<span id="cb19-2">reg <span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">=</span> xgb.XGBRegressor(n_estimators<span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">=</span>num_boost_round, <span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">**</span>params)</span>
+<span id="cb19-3">reg.fit(train_df[features], train_df[target], </span>
+<span id="cb19-4">        eval_set<span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">=</span>[(train_df[features], train_df[target]), (valid_df[features], valid_df[target])], </span>
+<span id="cb19-5">        verbose<span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">=</span><span class="dv" style="color: #AD0000;
+background-color: null;
+font-style: inherit;">10</span>)<span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">;</span></span></code></pre></div>
+<div class="cell-output cell-output-stdout">
+<pre><code>[0] validation_0-rmse:6.74422   validation_1-rmse:6.79733
+[10]    validation_0-rmse:0.34798   validation_1-rmse:0.37158
+[20]    validation_0-rmse:0.26289   validation_1-rmse:0.28239
+[30]    validation_0-rmse:0.25148   validation_1-rmse:0.27028
+[40]    validation_0-rmse:0.24375   validation_1-rmse:0.26420
+[49]    validation_0-rmse:0.23738   validation_1-rmse:0.25855</code></pre>
+</div>
+</div>
+<p>Since not all features of XGBoost are available through the scikit-learn estimator interface, you might want to get the native booster object back out of the sklearn wrapper.</p>
+<div class="cell" data-execution_count="16">
+<div class="sourceCode cell-code" id="cb21" style="background: #f1f3f5;"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb21-1">m <span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">=</span> reg.get_booster()</span></code></pre></div>
+</div>
+</section>
+<section id="evaluate-the-model-and-check-for-overfitting" class="level3">
+<h3 class="anchored" data-anchor-id="evaluate-the-model-and-check-for-overfitting">Evaluate the model and check for overfitting</h3>
+<p>We get the model evaluation metrics on the training and validation sets printed to stdout when we use the <code>evals</code> argument to the training API. Typically I just look at those printed metrics, but let’s double check by hand.</p>
+<div class="cell" data-execution_count="17">
+<div class="sourceCode cell-code" id="cb22" style="background: #f1f3f5;"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb22-1"><span class="kw" style="color: #003B4F;
+background-color: null;
+font-style: inherit;">def</span> root_mean_squared_error(y_true, y_pred):</span>
+<span id="cb22-2">    <span class="cf" style="color: #003B4F;
+background-color: null;
+font-style: inherit;">return</span> np.sqrt(np.mean((y_true <span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">-</span> y_pred)<span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">**</span><span class="dv" style="color: #AD0000;
+background-color: null;
+font-style: inherit;">2</span>))</span>
+<span id="cb22-3"></span>
+<span id="cb22-4">root_mean_squared_error(dvalid.get_label(), m.predict(dvalid))</span></code></pre></div>
+<div class="cell-output cell-output-display" data-execution_count="17">
+<pre><code>0.25855368</code></pre>
+</div>
+</div>
+<p>So, how good is that RMSLE of 0.259? Well, checking the <a href="https://www.kaggle.com/competitions/bluebook-for-bulldozers/leaderboard">Kagle leaderboard</a> for this competition, we would have come in 53rd out of 474, which is in the top 12% of submissions. That’s not bad for 10 minutes of work doing the bare minimum necessary to transform the raw data into a format consumable by XGBoost and then training a model using default hyperparameter values.</p>
+<blockquote class="blockquote">
+<p>Note that we’re using a different validation set from that used for the final leaderboard (which is long closed), but our score is likely still a decent approximation for how we would have done in the competition.</p>
+</blockquote>
+<p>It can be helpful to take a look at objective curves for training and validation data to get a sense for the extent of overfitting. A huge difference between training and validation performance indicates overfitting. In the below curve, there is very little overfitting, indicating we can be aggressive with hyperparameters that increase model flexibility. More on that soon.</p>
+<div class="cell" data-execution_count="18">
+<div class="sourceCode cell-code" id="cb24" style="background: #f1f3f5;"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb24-1">pd.DataFrame({</span>
+<span id="cb24-2">    <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'train'</span>: evals_result[<span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'train'</span>][<span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'rmse'</span>],</span>
+<span id="cb24-3">    <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'valid'</span>: evals_result[<span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'valid'</span>][<span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'rmse'</span>]</span>
+<span id="cb24-4">}).plot()<span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">;</span> plt.xlabel(<span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'boosting round'</span>)<span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">;</span> plt.ylabel(<span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'objective'</span>)<span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">;</span></span></code></pre></div>
+<div class="cell-output cell-output-display">
+<p><img src="https://randomrealizations.com/posts/xgboost-for-regression-in-python/index_files/figure-html/cell-19-output-1.png" class="img-fluid" alt="line plot showing objective function versus training iteration for training and validation sets"></p>
+</div>
+</div>
+</section>
+<section id="check-feature-importance" class="level3">
+<h3 class="anchored" data-anchor-id="check-feature-importance">Check feature importance</h3>
+<p>It’s helpful to get an idea of how much the model is using each feature. In following iterations we might want to try dropping low-signal features or examining the important ones more closely for feature engineering ideas. The gigantic caveat to keep in mind here is that there are different measures of feature importance, and each one will give different importances. XGBoost provides three importance measures; I tend to prefer looking at the weight measure because its rankings usually seem most intuitive.</p>
+<div class="cell" data-execution_count="19">
+<div class="sourceCode cell-code" id="cb25" style="background: #f1f3f5;"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb25-1">fig, ax <span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">=</span> plt.subplots(figsize<span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">=</span>(<span class="dv" style="color: #AD0000;
+background-color: null;
+font-style: inherit;">5</span>,<span class="dv" style="color: #AD0000;
+background-color: null;
+font-style: inherit;">10</span>))</span>
+<span id="cb25-2">feature_importances <span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">=</span> pd.Series(m.get_score(importance_type<span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">=</span><span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'weight'</span>)).sort_values(ascending<span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">=</span><span class="va" style="color: #111111;
+background-color: null;
+font-style: inherit;">False</span>)</span>
+<span id="cb25-3">feature_importances.plot.barh(ax<span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">=</span>ax)</span>
+<span id="cb25-4">plt.title(<span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'Feature Importance'</span>)<span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">;</span></span></code></pre></div>
+<div class="cell-output cell-output-display">
+<p><img src="https://randomrealizations.com/posts/xgboost-for-regression-in-python/index_files/figure-html/cell-20-output-1.png" class="img-fluid" alt="feature importance plot showing a few high importance features and many low importance ones"></p>
+</div>
+</div>
+</section>
+</section>
+<section id="improve-performance-using-a-model-iteration-loop" class="level2">
+<h2 class="anchored" data-anchor-id="improve-performance-using-a-model-iteration-loop">Improve performance using a model iteration loop</h2>
+<p>At this point we have a half-decent prototype model. Now we enter the model iteration loop in which we adjust features and model parameters to find configurations that have better and better performance.</p>
+<p>Let’s start by putting the feature and target specification, the training/validation split, the model training, and the evaluation all together in one code block that we can copy paste for easy model iteration.</p>
+<blockquote class="blockquote">
+<p>Note that for this process to be effective, model training needs to take less than 10 seconds. Otherwise you’ll be sitting around waiting way too long. If training takes too long, try training on a sample of the training data, or try reducing the number of boosting rounds.</p>
+</blockquote>
+<div class="cell" data-execution_count="20">
+<div class="sourceCode cell-code" id="cb26" style="background: #f1f3f5;"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb26-1">features <span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">=</span> [</span>
+<span id="cb26-2">    <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'SalesID'</span>,</span>
+<span id="cb26-3">    <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'MachineID'</span>,</span>
+<span id="cb26-4">    <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'ModelID'</span>,</span>
+<span id="cb26-5">    <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'datasource'</span>,</span>
+<span id="cb26-6">    <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'auctioneerID'</span>,</span>
+<span id="cb26-7">    <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'YearMade'</span>,</span>
+<span id="cb26-8">    <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'MachineHoursCurrentMeter'</span>,</span>
+<span id="cb26-9">    <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'UsageBand'</span>,</span>
+<span id="cb26-10">    <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'fiModelDesc'</span>,</span>
+<span id="cb26-11">    <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'fiBaseModel'</span>,</span>
+<span id="cb26-12">    <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'fiSecondaryDesc'</span>,</span>
+<span id="cb26-13">    <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'fiModelSeries'</span>,</span>
+<span id="cb26-14">    <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'fiModelDescriptor'</span>,</span>
+<span id="cb26-15">    <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'ProductSize'</span>,</span>
+<span id="cb26-16">    <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'fiProductClassDesc'</span>,</span>
+<span id="cb26-17">    <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'state'</span>,</span>
+<span id="cb26-18">    <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'ProductGroup'</span>,</span>
+<span id="cb26-19">    <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'ProductGroupDesc'</span>,</span>
+<span id="cb26-20">    <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'Drive_System'</span>,</span>
+<span id="cb26-21">    <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'Enclosure'</span>,</span>
+<span id="cb26-22">    <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'Forks'</span>,</span>
+<span id="cb26-23">    <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'Pad_Type'</span>,</span>
+<span id="cb26-24">    <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'Ride_Control'</span>,</span>
+<span id="cb26-25">    <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'Stick'</span>,</span>
+<span id="cb26-26">    <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'Transmission'</span>,</span>
+<span id="cb26-27">    <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'Turbocharged'</span>,</span>
+<span id="cb26-28">    <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'Blade_Extension'</span>,</span>
+<span id="cb26-29">    <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'Blade_Width'</span>,</span>
+<span id="cb26-30">    <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'Enclosure_Type'</span>,</span>
+<span id="cb26-31">    <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'Engine_Horsepower'</span>,</span>
+<span id="cb26-32">    <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'Hydraulics'</span>,</span>
+<span id="cb26-33">    <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'Pushblock'</span>,</span>
+<span id="cb26-34">    <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'Ripper'</span>,</span>
+<span id="cb26-35">    <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'Scarifier'</span>,</span>
+<span id="cb26-36">    <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'Tip_Control'</span>,</span>
+<span id="cb26-37">    <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'Tire_Size'</span>,</span>
+<span id="cb26-38">    <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'Coupler'</span>,</span>
+<span id="cb26-39">    <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'Coupler_System'</span>,</span>
+<span id="cb26-40">    <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'Grouser_Tracks'</span>,</span>
+<span id="cb26-41">    <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'Hydraulics_Flow'</span>,</span>
+<span id="cb26-42">    <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'Track_Type'</span>,</span>
+<span id="cb26-43">    <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'Undercarriage_Pad_Width'</span>,</span>
+<span id="cb26-44">    <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'Stick_Length'</span>,</span>
+<span id="cb26-45">    <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'Thumb'</span>,</span>
+<span id="cb26-46">    <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'Pattern_Changer'</span>,</span>
+<span id="cb26-47">    <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'Grouser_Type'</span>,</span>
+<span id="cb26-48">    <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'Backhoe_Mounting'</span>,</span>
+<span id="cb26-49">    <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'Blade_Type'</span>,</span>
+<span id="cb26-50">    <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'Travel_Controls'</span>,</span>
+<span id="cb26-51">    <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'Differential_Type'</span>,</span>
+<span id="cb26-52">    <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'Steering_Controls'</span>,</span>
+<span id="cb26-53">    <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'saledate_year'</span>,</span>
+<span id="cb26-54">    <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'saledate_month'</span>,</span>
+<span id="cb26-55">    <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'saledate_day'</span>,</span>
+<span id="cb26-56">    <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'saledate_quarter'</span>,</span>
+<span id="cb26-57">    <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'saledate_day_of_year'</span>,</span>
+<span id="cb26-58">    <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'saledate_day_of_week'</span>,</span>
+<span id="cb26-59">    <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'saledate_days_since_epoch'</span>,</span>
+<span id="cb26-60">]</span>
+<span id="cb26-61"></span>
+<span id="cb26-62">target <span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">=</span> <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'logSalePrice'</span></span>
+<span id="cb26-63"></span>
+<span id="cb26-64">train_df, valid_df <span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">=</span> train_test_split_temporal(df, <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'saledate'</span>, <span class="dv" style="color: #AD0000;
+background-color: null;
+font-style: inherit;">12000</span>)</span>
+<span id="cb26-65">dtrain <span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">=</span> xgb.DMatrix(data<span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">=</span>train_df[features], label<span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">=</span>train_df[target], enable_categorical<span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">=</span><span class="va" style="color: #111111;
+background-color: null;
+font-style: inherit;">True</span>)</span>
+<span id="cb26-66">dvalid <span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">=</span> xgb.DMatrix(data<span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">=</span>valid_df[features], label<span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">=</span>valid_df[target], enable_categorical<span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">=</span><span class="va" style="color: #111111;
+background-color: null;
+font-style: inherit;">True</span>)</span>
+<span id="cb26-67"></span>
+<span id="cb26-68">params <span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">=</span> {</span>
+<span id="cb26-69">    <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'learning_rate'</span>: <span class="fl" style="color: #AD0000;
+background-color: null;
+font-style: inherit;">0.3</span>,</span>
+<span id="cb26-70">    <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'max_depth'</span>: <span class="dv" style="color: #AD0000;
+background-color: null;
+font-style: inherit;">6</span>,</span>
+<span id="cb26-71">    <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'min_child_weight'</span>: <span class="dv" style="color: #AD0000;
+background-color: null;
+font-style: inherit;">1</span>,</span>
+<span id="cb26-72">    <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'subsample'</span>: <span class="dv" style="color: #AD0000;
+background-color: null;
+font-style: inherit;">1</span>,</span>
+<span id="cb26-73">    <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'colsample_bynode'</span>: <span class="dv" style="color: #AD0000;
+background-color: null;
+font-style: inherit;">1</span>,</span>
+<span id="cb26-74">    <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'objective'</span>: <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'reg:squarederror'</span>,</span>
+<span id="cb26-75">}</span>
+<span id="cb26-76">num_boost_round <span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">=</span> <span class="dv" style="color: #AD0000;
+background-color: null;
+font-style: inherit;">50</span></span>
+<span id="cb26-77"></span>
+<span id="cb26-78">m <span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">=</span> xgb.train(params<span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">=</span>params, dtrain<span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">=</span>dtrain, num_boost_round<span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">=</span>num_boost_round,</span>
+<span id="cb26-79">              evals<span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">=</span>[(dtrain, <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'train'</span>), (dvalid, <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'valid'</span>)],verbose_eval<span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">=</span><span class="dv" style="color: #AD0000;
+background-color: null;
+font-style: inherit;">10</span>)</span></code></pre></div>
+<div class="cell-output cell-output-stdout">
+<pre><code>[0] train-rmse:6.74422  valid-rmse:6.79733
+[10]    train-rmse:0.34798  valid-rmse:0.37158
+[20]    train-rmse:0.26289  valid-rmse:0.28239
+[30]    train-rmse:0.25148  valid-rmse:0.27028
+[40]    train-rmse:0.24375  valid-rmse:0.26420
+[49]    train-rmse:0.23738  valid-rmse:0.25855</code></pre>
+</div>
+</div>
+<section id="feature-selection" class="level3">
+<h3 class="anchored" data-anchor-id="feature-selection">Feature selection</h3>
+<section id="drop-low-importance-features" class="level4">
+<h4 class="anchored" data-anchor-id="drop-low-importance-features">Drop low-importance features</h4>
+<p>Let’s try training a model on only the top k most important features. You can try different values of k for the rankings created from each of the three importance measures. You can play with how many to keep, looking for the optimal number manually.</p>
+<div class="cell" data-execution_count="21">
+<div class="sourceCode cell-code" id="cb28" style="background: #f1f3f5;"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb28-1">feature_importances_weight <span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">=</span> pd.Series(m.get_score(importance_type<span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">=</span><span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'weight'</span>)).sort_values(ascending<span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">=</span><span class="va" style="color: #111111;
+background-color: null;
+font-style: inherit;">False</span>)</span>
+<span id="cb28-2">feature_importances_cover <span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">=</span> pd.Series(m.get_score(importance_type<span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">=</span><span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'cover'</span>)).sort_values(ascending<span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">=</span><span class="va" style="color: #111111;
+background-color: null;
+font-style: inherit;">False</span>)</span>
+<span id="cb28-3">feature_importances_gain <span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">=</span> pd.Series(m.get_score(importance_type<span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">=</span><span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'gain'</span>)).sort_values(ascending<span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">=</span><span class="va" style="color: #111111;
+background-color: null;
+font-style: inherit;">False</span>)</span></code></pre></div>
+</div>
+<div class="cell" data-execution_count="22">
+<div class="sourceCode cell-code" id="cb29" style="background: #f1f3f5;"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb29-1"><span class="co" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;"># features = list(feature_importances_weight[:30].index)</span></span>
+<span id="cb29-2"><span class="co" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;"># features = list(feature_importances_cover[:35].index)</span></span>
+<span id="cb29-3">features <span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">=</span> <span class="bu" style="color: null;
+background-color: null;
+font-style: inherit;">list</span>(feature_importances_gain[:<span class="dv" style="color: #AD0000;
+background-color: null;
+font-style: inherit;">30</span>].index)</span>
+<span id="cb29-4"></span>
+<span id="cb29-5">dtrain <span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">=</span> xgb.DMatrix(data<span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">=</span>train_df[features], label<span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">=</span>train_df[target], enable_categorical<span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">=</span><span class="va" style="color: #111111;
+background-color: null;
+font-style: inherit;">True</span>)</span>
+<span id="cb29-6">dvalid <span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">=</span> xgb.DMatrix(data<span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">=</span>valid_df[features], label<span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">=</span>valid_df[target], enable_categorical<span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">=</span><span class="va" style="color: #111111;
+background-color: null;
+font-style: inherit;">True</span>)</span>
+<span id="cb29-7"></span>
+<span id="cb29-8">params <span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">=</span> {</span>
+<span id="cb29-9">    <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'learning_rate'</span>: <span class="fl" style="color: #AD0000;
+background-color: null;
+font-style: inherit;">0.3</span>,</span>
+<span id="cb29-10">    <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'max_depth'</span>: <span class="dv" style="color: #AD0000;
+background-color: null;
+font-style: inherit;">6</span>,</span>
+<span id="cb29-11">    <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'min_child_weight'</span>: <span class="dv" style="color: #AD0000;
+background-color: null;
+font-style: inherit;">1</span>,</span>
+<span id="cb29-12">    <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'subsample'</span>: <span class="dv" style="color: #AD0000;
+background-color: null;
+font-style: inherit;">1</span>,</span>
+<span id="cb29-13">    <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'colsample_bynode'</span>: <span class="dv" style="color: #AD0000;
+background-color: null;
+font-style: inherit;">1</span>,</span>
+<span id="cb29-14">    <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'objective'</span>: <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'reg:squarederror'</span>,</span>
+<span id="cb29-15">}</span>
+<span id="cb29-16">num_boost_round <span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">=</span> <span class="dv" style="color: #AD0000;
+background-color: null;
+font-style: inherit;">50</span></span>
+<span id="cb29-17"></span>
+<span id="cb29-18">m <span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">=</span> xgb.train(params<span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">=</span>params, dtrain<span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">=</span>dtrain, num_boost_round<span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">=</span>num_boost_round,</span>
+<span id="cb29-19">              evals<span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">=</span>[(dtrain, <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'train'</span>), (dvalid, <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'valid'</span>)], verbose_eval<span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">=</span><span class="dv" style="color: #AD0000;
+background-color: null;
+font-style: inherit;">10</span>)</span></code></pre></div>
+<div class="cell-output cell-output-stdout">
+<pre><code>[0] train-rmse:6.74422  valid-rmse:6.79733
+[10]    train-rmse:0.34798  valid-rmse:0.37150
+[20]    train-rmse:0.26182  valid-rmse:0.27986
+[30]    train-rmse:0.24974  valid-rmse:0.26896
+[40]    train-rmse:0.24282  valid-rmse:0.26043
+[49]    train-rmse:0.23768  valid-rmse:0.25664</code></pre>
+</div>
+</div>
+<p>Looks like keeping the top 30 from the gain importance type gives a slight performance improvement.</p>
+</section>
+<section id="drop-one-feature-at-a-time" class="level4">
+<h4 class="anchored" data-anchor-id="drop-one-feature-at-a-time">Drop one feature at a time</h4>
+<p>Next try dropping each feature out of the model one-at-a-time to see if there are any more features that you can drop. For each feature, drop it from the feature set, then train a new model, then record the evaluation score. At the end, sort the scores to see which features are the best candidates for removal.</p>
+<div class="cell" data-execution_count="23">
+<div class="sourceCode cell-code" id="cb31" style="background: #f1f3f5;"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb31-1">features <span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">=</span> [</span>
+<span id="cb31-2">    <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'Coupler_System'</span>,</span>
+<span id="cb31-3">     <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'Tire_Size'</span>,</span>
+<span id="cb31-4">     <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'Scarifier'</span>,</span>
+<span id="cb31-5">     <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'ProductSize'</span>,</span>
+<span id="cb31-6">     <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'Ride_Control'</span>,</span>
+<span id="cb31-7">     <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'fiBaseModel'</span>,</span>
+<span id="cb31-8">     <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'Enclosure'</span>,</span>
+<span id="cb31-9">     <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'Pad_Type'</span>,</span>
+<span id="cb31-10">     <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'YearMade'</span>,</span>
+<span id="cb31-11">     <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'fiSecondaryDesc'</span>,</span>
+<span id="cb31-12">     <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'ProductGroup'</span>,</span>
+<span id="cb31-13">     <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'Drive_System'</span>,</span>
+<span id="cb31-14">     <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'Ripper'</span>,</span>
+<span id="cb31-15">     <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'saledate_days_since_epoch'</span>,</span>
+<span id="cb31-16">     <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'fiModelDescriptor'</span>,</span>
+<span id="cb31-17">     <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'fiProductClassDesc'</span>,</span>
+<span id="cb31-18">     <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'MachineID'</span>,</span>
+<span id="cb31-19">     <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'Hydraulics'</span>,</span>
+<span id="cb31-20">     <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'SalesID'</span>,</span>
+<span id="cb31-21">     <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'Track_Type'</span>,</span>
+<span id="cb31-22">     <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'ModelID'</span>,</span>
+<span id="cb31-23">     <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'fiModelDesc'</span>,</span>
+<span id="cb31-24">     <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'Travel_Controls'</span>,</span>
+<span id="cb31-25">     <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'Transmission'</span>,</span>
+<span id="cb31-26">     <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'Blade_Extension'</span>,</span>
+<span id="cb31-27">     <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'fiModelSeries'</span>,</span>
+<span id="cb31-28">     <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'Grouser_Tracks'</span>,</span>
+<span id="cb31-29">     <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'Undercarriage_Pad_Width'</span>,</span>
+<span id="cb31-30">     <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'Stick'</span>,</span>
+<span id="cb31-31">     <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'Thumb'</span></span>
+<span id="cb31-32">]</span>
+<span id="cb31-33"></span>
+<span id="cb31-34"><span class="co" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;"># drop each feature one-at-a-time</span></span>
+<span id="cb31-35">scores <span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">=</span> []</span>
+<span id="cb31-36"><span class="cf" style="color: #003B4F;
+background-color: null;
+font-style: inherit;">for</span> i, feature <span class="kw" style="color: #003B4F;
+background-color: null;
+font-style: inherit;">in</span> <span class="bu" style="color: null;
+background-color: null;
+font-style: inherit;">enumerate</span>(features):</span>
+<span id="cb31-37">    drop_one_features <span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">=</span> features[:i] <span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">+</span> features[i<span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">+</span><span class="dv" style="color: #AD0000;
+background-color: null;
+font-style: inherit;">1</span>:]</span>
+<span id="cb31-38"></span>
+<span id="cb31-39">    dtrain <span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">=</span> xgb.DMatrix(data<span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">=</span>train_df[drop_one_features], label<span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">=</span>train_df[target], enable_categorical<span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">=</span><span class="va" style="color: #111111;
+background-color: null;
+font-style: inherit;">True</span>)</span>
+<span id="cb31-40">    dvalid <span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">=</span> xgb.DMatrix(data<span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">=</span>valid_df[drop_one_features], label<span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">=</span>valid_df[target], enable_categorical<span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">=</span><span class="va" style="color: #111111;
+background-color: null;
+font-style: inherit;">True</span>)</span>
+<span id="cb31-41"></span>
+<span id="cb31-42">    params <span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">=</span> {</span>
+<span id="cb31-43">        <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'learning_rate'</span>: <span class="fl" style="color: #AD0000;
+background-color: null;
+font-style: inherit;">0.3</span>,</span>
+<span id="cb31-44">        <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'max_depth'</span>: <span class="dv" style="color: #AD0000;
+background-color: null;
+font-style: inherit;">6</span>,</span>
+<span id="cb31-45">        <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'min_child_weight'</span>: <span class="dv" style="color: #AD0000;
+background-color: null;
+font-style: inherit;">1</span>,</span>
+<span id="cb31-46">        <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'subsample'</span>: <span class="dv" style="color: #AD0000;
+background-color: null;
+font-style: inherit;">1</span>,</span>
+<span id="cb31-47">        <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'colsample_bynode'</span>: <span class="dv" style="color: #AD0000;
+background-color: null;
+font-style: inherit;">1</span>,</span>
+<span id="cb31-48">        <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'objective'</span>: <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'reg:squarederror'</span>,</span>
+<span id="cb31-49">    }</span>
+<span id="cb31-50">    num_boost_round <span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">=</span> <span class="dv" style="color: #AD0000;
+background-color: null;
+font-style: inherit;">50</span></span>
+<span id="cb31-51"></span>
+<span id="cb31-52">    m <span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">=</span> xgb.train(params<span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">=</span>params, dtrain<span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">=</span>dtrain, num_boost_round<span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">=</span>num_boost_round,</span>
+<span id="cb31-53">                evals<span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">=</span>[(dtrain, <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'train'</span>), (dvalid, <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'valid'</span>)],</span>
+<span id="cb31-54">                verbose_eval<span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">=</span><span class="va" style="color: #111111;
+background-color: null;
+font-style: inherit;">False</span>)</span>
+<span id="cb31-55">    score <span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">=</span> root_mean_squared_error(dvalid.get_label(), m.predict(dvalid))</span>
+<span id="cb31-56">    scores.append(score)</span>
+<span id="cb31-57"></span>
+<span id="cb31-58">results_df <span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">=</span> pd.DataFrame({</span>
+<span id="cb31-59">    <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'feature'</span>: features,</span>
+<span id="cb31-60">    <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'score'</span>: scores</span>
+<span id="cb31-61">})</span>
+<span id="cb31-62">results_df.sort_values(by<span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">=</span><span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'score'</span>)</span></code></pre></div>
+<div class="cell-output cell-output-display" data-execution_count="23">
+<div>
+
+
+<table class="dataframe table table-sm table-striped small" data-quarto-postprocess="true" data-border="1">
+<thead>
+<tr class="header">
+<th data-quarto-table-cell-role="th"></th>
+<th data-quarto-table-cell-role="th">feature</th>
+<th data-quarto-table-cell-role="th">score</th>
+</tr>
+</thead>
+<tbody>
+<tr class="odd">
+<td data-quarto-table-cell-role="th">18</td>
+<td>SalesID</td>
+<td>0.252617</td>
+</tr>
+<tr class="even">
+<td data-quarto-table-cell-role="th">5</td>
+<td>fiBaseModel</td>
+<td>0.253710</td>
+</tr>
+<tr class="odd">
+<td data-quarto-table-cell-role="th">27</td>
+<td>Undercarriage_Pad_Width</td>
+<td>0.254032</td>
+</tr>
+<tr class="even">
+<td data-quarto-table-cell-role="th">17</td>
+<td>Hydraulics</td>
+<td>0.254114</td>
+</tr>
+<tr class="odd">
+<td data-quarto-table-cell-role="th">20</td>
+<td>ModelID</td>
+<td>0.254169</td>
+</tr>
+<tr class="even">
+<td data-quarto-table-cell-role="th">4</td>
+<td>Ride_Control</td>
+<td>0.254278</td>
+</tr>
+<tr class="odd">
+<td data-quarto-table-cell-role="th">16</td>
+<td>MachineID</td>
+<td>0.254413</td>
+</tr>
+<tr class="even">
+<td data-quarto-table-cell-role="th">19</td>
+<td>Track_Type</td>
+<td>0.254825</td>
+</tr>
+<tr class="odd">
+<td data-quarto-table-cell-role="th">6</td>
+<td>Enclosure</td>
+<td>0.254958</td>
+</tr>
+<tr class="even">
+<td data-quarto-table-cell-role="th">28</td>
+<td>Stick</td>
+<td>0.255164</td>
+</tr>
+<tr class="odd">
+<td data-quarto-table-cell-role="th">1</td>
+<td>Tire_Size</td>
+<td>0.255365</td>
+</tr>
+<tr class="even">
+<td data-quarto-table-cell-role="th">10</td>
+<td>ProductGroup</td>
+<td>0.255404</td>
+</tr>
+<tr class="odd">
+<td data-quarto-table-cell-role="th">22</td>
+<td>Travel_Controls</td>
+<td>0.255895</td>
+</tr>
+<tr class="even">
+<td data-quarto-table-cell-role="th">29</td>
+<td>Thumb</td>
+<td>0.256300</td>
+</tr>
+<tr class="odd">
+<td data-quarto-table-cell-role="th">23</td>
+<td>Transmission</td>
+<td>0.256380</td>
+</tr>
+<tr class="even">
+<td data-quarto-table-cell-role="th">26</td>
+<td>Grouser_Tracks</td>
+<td>0.256395</td>
+</tr>
+<tr class="odd">
+<td data-quarto-table-cell-role="th">11</td>
+<td>Drive_System</td>
+<td>0.256652</td>
+</tr>
+<tr class="even">
+<td data-quarto-table-cell-role="th">24</td>
+<td>Blade_Extension</td>
+<td>0.256698</td>
+</tr>
+<tr class="odd">
+<td data-quarto-table-cell-role="th">7</td>
+<td>Pad_Type</td>
+<td>0.256952</td>
+</tr>
+<tr class="even">
+<td data-quarto-table-cell-role="th">25</td>
+<td>fiModelSeries</td>
+<td>0.257073</td>
+</tr>
+<tr class="odd">
+<td data-quarto-table-cell-role="th">2</td>
+<td>Scarifier</td>
+<td>0.257590</td>
+</tr>
+<tr class="even">
+<td data-quarto-table-cell-role="th">12</td>
+<td>Ripper</td>
+<td>0.257848</td>
+</tr>
+<tr class="odd">
+<td data-quarto-table-cell-role="th">0</td>
+<td>Coupler_System</td>
+<td>0.258074</td>
+</tr>
+<tr class="even">
+<td data-quarto-table-cell-role="th">21</td>
+<td>fiModelDesc</td>
+<td>0.258712</td>
+</tr>
+<tr class="odd">
+<td data-quarto-table-cell-role="th">13</td>
+<td>saledate_days_since_epoch</td>
+<td>0.259856</td>
+</tr>
+<tr class="even">
+<td data-quarto-table-cell-role="th">14</td>
+<td>fiModelDescriptor</td>
+<td>0.260439</td>
+</tr>
+<tr class="odd">
+<td data-quarto-table-cell-role="th">9</td>
+<td>fiSecondaryDesc</td>
+<td>0.260782</td>
+</tr>
+<tr class="even">
+<td data-quarto-table-cell-role="th">15</td>
+<td>fiProductClassDesc</td>
+<td>0.263790</td>
+</tr>
+<tr class="odd">
+<td data-quarto-table-cell-role="th">3</td>
+<td>ProductSize</td>
+<td>0.268068</td>
+</tr>
+<tr class="even">
+<td data-quarto-table-cell-role="th">8</td>
+<td>YearMade</td>
+<td>0.313105</td>
+</tr>
+</tbody>
+</table>
+
+</div>
+</div>
+</div>
+<p>Next try removing the feature with the best removal score. Then with that feature still removed, also try removing the feature with the next best removal score and so on. Repeat this process until the model evaluation metric is no longer improving. I think this could be considered a faster version of backward stepwise feature selection.</p>
+<div class="cell" data-execution_count="24">
+<div class="sourceCode cell-code" id="cb32" style="background: #f1f3f5;"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb32-1">features <span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">=</span> [</span>
+<span id="cb32-2">    <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'Coupler_System'</span>,</span>
+<span id="cb32-3">     <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'Tire_Size'</span>,</span>
+<span id="cb32-4">     <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'Scarifier'</span>,</span>
+<span id="cb32-5">     <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'ProductSize'</span>,</span>
+<span id="cb32-6">     <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'Ride_Control'</span>,</span>
+<span id="cb32-7"><span class="co" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">#      'fiBaseModel',</span></span>
+<span id="cb32-8">     <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'Enclosure'</span>,</span>
+<span id="cb32-9">     <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'Pad_Type'</span>,</span>
+<span id="cb32-10">     <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'YearMade'</span>,</span>
+<span id="cb32-11">     <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'fiSecondaryDesc'</span>,</span>
+<span id="cb32-12">     <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'ProductGroup'</span>,</span>
+<span id="cb32-13">     <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'Drive_System'</span>,</span>
+<span id="cb32-14">     <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'Ripper'</span>,</span>
+<span id="cb32-15">     <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'saledate_days_since_epoch'</span>,</span>
+<span id="cb32-16">     <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'fiModelDescriptor'</span>,</span>
+<span id="cb32-17">     <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'fiProductClassDesc'</span>,</span>
+<span id="cb32-18">     <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'MachineID'</span>,</span>
+<span id="cb32-19"><span class="co" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">#      'Hydraulics',</span></span>
+<span id="cb32-20"><span class="co" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">#      'SalesID',</span></span>
+<span id="cb32-21">     <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'Track_Type'</span>,</span>
+<span id="cb32-22">     <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'ModelID'</span>,</span>
+<span id="cb32-23">     <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'fiModelDesc'</span>,</span>
+<span id="cb32-24">     <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'Travel_Controls'</span>,</span>
+<span id="cb32-25">     <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'Transmission'</span>,</span>
+<span id="cb32-26">     <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'Blade_Extension'</span>,</span>
+<span id="cb32-27">     <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'fiModelSeries'</span>,</span>
+<span id="cb32-28">     <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'Grouser_Tracks'</span>,</span>
+<span id="cb32-29"><span class="co" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">#      'Undercarriage_Pad_Width',</span></span>
+<span id="cb32-30">     <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'Stick'</span>,</span>
+<span id="cb32-31">     <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'Thumb'</span></span>
+<span id="cb32-32">]</span>
+<span id="cb32-33"></span>
+<span id="cb32-34">dtrain <span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">=</span> xgb.DMatrix(data<span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">=</span>train_df[features], label<span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">=</span>train_df[target], enable_categorical<span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">=</span><span class="va" style="color: #111111;
+background-color: null;
+font-style: inherit;">True</span>)</span>
+<span id="cb32-35">dvalid <span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">=</span> xgb.DMatrix(data<span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">=</span>valid_df[features], label<span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">=</span>valid_df[target], enable_categorical<span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">=</span><span class="va" style="color: #111111;
+background-color: null;
+font-style: inherit;">True</span>)</span>
+<span id="cb32-36"></span>
+<span id="cb32-37">params <span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">=</span> {</span>
+<span id="cb32-38">    <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'learning_rate'</span>: <span class="fl" style="color: #AD0000;
+background-color: null;
+font-style: inherit;">0.3</span>,</span>
+<span id="cb32-39">    <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'max_depth'</span>: <span class="dv" style="color: #AD0000;
+background-color: null;
+font-style: inherit;">6</span>,</span>
+<span id="cb32-40">    <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'min_child_weight'</span>: <span class="dv" style="color: #AD0000;
+background-color: null;
+font-style: inherit;">1</span>,</span>
+<span id="cb32-41">    <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'subsample'</span>: <span class="dv" style="color: #AD0000;
+background-color: null;
+font-style: inherit;">1</span>,</span>
+<span id="cb32-42">    <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'colsample_bynode'</span>: <span class="dv" style="color: #AD0000;
+background-color: null;
+font-style: inherit;">1</span>,</span>
+<span id="cb32-43">    <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'objective'</span>: <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'reg:squarederror'</span>,</span>
+<span id="cb32-44">}</span>
+<span id="cb32-45">num_boost_round <span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">=</span> <span class="dv" style="color: #AD0000;
+background-color: null;
+font-style: inherit;">50</span></span>
+<span id="cb32-46"></span>
+<span id="cb32-47">m <span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">=</span> xgb.train(params<span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">=</span>params, dtrain<span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">=</span>dtrain, num_boost_round<span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">=</span>num_boost_round,</span>
+<span id="cb32-48">              evals<span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">=</span>[(dtrain, <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'train'</span>), (dvalid, <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'valid'</span>)], verbose_eval<span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">=</span><span class="dv" style="color: #AD0000;
+background-color: null;
+font-style: inherit;">10</span>)</span></code></pre></div>
+<div class="cell-output cell-output-stdout">
+<pre><code>[0] train-rmse:6.74422  valid-rmse:6.79145
+[10]    train-rmse:0.34882  valid-rmse:0.37201
+[20]    train-rmse:0.26050  valid-rmse:0.27386
+[30]    train-rmse:0.24844  valid-rmse:0.26205
+[40]    train-rmse:0.24042  valid-rmse:0.25426
+[49]    train-rmse:0.23549  valid-rmse:0.25004</code></pre>
+</div>
+</div>
+<p>So here I was able to remove four more features before the score started getting worse. With our reduced feature set, we’re now ranking 39th on that Kagle leaderboard. Let’s see how far we can get with some hyperparameter tuning.</p>
+</section>
+</section>
+<section id="tune-the-xgboost-hyperparameters" class="level3">
+<h3 class="anchored" data-anchor-id="tune-the-xgboost-hyperparameters">Tune the XGBoost hyperparameters</h3>
+<p>This is a topic which deserves its own full-length post, but just for fun, here I’ll do a quick and dirty hand tuning without a ton of explanation.</p>
+<p>Broadly speaking, my process is to increase model expressiveness by increasing the maximum tree depth untill it looks like I’m overfitting. At that point, I start pushing tree pruning parameters like min child weight and regularization parameters like lambda to counteract the overfitting. That process lead me to the following parameters.</p>
+<div class="cell" data-execution_count="25">
+<div class="sourceCode cell-code" id="cb34" style="background: #f1f3f5;"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb34-1">params <span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">=</span> {</span>
+<span id="cb34-2">    <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'learning_rate'</span>: <span class="fl" style="color: #AD0000;
+background-color: null;
+font-style: inherit;">0.3</span>,</span>
+<span id="cb34-3">    <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'max_depth'</span>: <span class="dv" style="color: #AD0000;
+background-color: null;
+font-style: inherit;">10</span>,</span>
+<span id="cb34-4">    <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'min_child_weight'</span>: <span class="dv" style="color: #AD0000;
+background-color: null;
+font-style: inherit;">14</span>,</span>
+<span id="cb34-5">    <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'lambda'</span>: <span class="dv" style="color: #AD0000;
+background-color: null;
+font-style: inherit;">5</span>,</span>
+<span id="cb34-6">    <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'subsample'</span>: <span class="dv" style="color: #AD0000;
+background-color: null;
+font-style: inherit;">1</span>,</span>
+<span id="cb34-7">    <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'colsample_bynode'</span>: <span class="dv" style="color: #AD0000;
+background-color: null;
+font-style: inherit;">1</span>,</span>
+<span id="cb34-8">    <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'objective'</span>: <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'reg:squarederror'</span>,}</span>
+<span id="cb34-9">num_boost_round <span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">=</span> <span class="dv" style="color: #AD0000;
+background-color: null;
+font-style: inherit;">50</span></span>
+<span id="cb34-10"></span>
+<span id="cb34-11">m <span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">=</span> xgb.train(params<span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">=</span>params, dtrain<span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">=</span>dtrain, num_boost_round<span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">=</span>num_boost_round,</span>
+<span id="cb34-12">              evals<span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">=</span>[(dtrain, <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'train'</span>), (dvalid, <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'valid'</span>)], verbose_eval<span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">=</span><span class="dv" style="color: #AD0000;
+background-color: null;
+font-style: inherit;">10</span>)</span></code></pre></div>
+<div class="cell-output cell-output-stdout">
+<pre><code>[0] train-rmse:6.74473  valid-rmse:6.80196
+[10]    train-rmse:0.31833  valid-rmse:0.34151
+[20]    train-rmse:0.22651  valid-rmse:0.24885
+[30]    train-rmse:0.21501  valid-rmse:0.23904
+[40]    train-rmse:0.20897  valid-rmse:0.23645
+[49]    train-rmse:0.20418  valid-rmse:0.23412</code></pre>
+</div>
+</div>
+<p>That gets us up to 12th place. Next I start reducing the learning rate and increasing the boosting rounds in proportion to one another.</p>
+<div class="cell" data-execution_count="26">
+<div class="sourceCode cell-code" id="cb36" style="background: #f1f3f5;"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb36-1">params <span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">=</span> {</span>
+<span id="cb36-2">    <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'learning_rate'</span>: <span class="fl" style="color: #AD0000;
+background-color: null;
+font-style: inherit;">0.3</span><span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">/</span><span class="dv" style="color: #AD0000;
+background-color: null;
+font-style: inherit;">5</span>,</span>
+<span id="cb36-3">    <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'max_depth'</span>: <span class="dv" style="color: #AD0000;
+background-color: null;
+font-style: inherit;">10</span>,</span>
+<span id="cb36-4">    <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'min_child_weight'</span>: <span class="dv" style="color: #AD0000;
+background-color: null;
+font-style: inherit;">14</span>,</span>
+<span id="cb36-5">    <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'lambda'</span>: <span class="dv" style="color: #AD0000;
+background-color: null;
+font-style: inherit;">5</span>,</span>
+<span id="cb36-6">    <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'subsample'</span>: <span class="dv" style="color: #AD0000;
+background-color: null;
+font-style: inherit;">1</span>,</span>
+<span id="cb36-7">    <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'colsample_bynode'</span>: <span class="dv" style="color: #AD0000;
+background-color: null;
+font-style: inherit;">1</span>,</span>
+<span id="cb36-8">    <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'objective'</span>: <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'reg:squarederror'</span>,}</span>
+<span id="cb36-9">num_boost_round <span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">=</span> <span class="dv" style="color: #AD0000;
+background-color: null;
+font-style: inherit;">50</span><span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">*</span><span class="dv" style="color: #AD0000;
+background-color: null;
+font-style: inherit;">5</span></span>
+<span id="cb36-10"></span>
+<span id="cb36-11">m <span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">=</span> xgb.train(params<span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">=</span>params, dtrain<span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">=</span>dtrain, num_boost_round<span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">=</span>num_boost_round,</span>
+<span id="cb36-12">              evals<span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">=</span>[(dtrain, <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'train'</span>), (dvalid, <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'valid'</span>)], verbose_eval<span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">=</span><span class="dv" style="color: #AD0000;
+background-color: null;
+font-style: inherit;">10</span>)</span></code></pre></div>
+<div class="cell-output cell-output-stdout">
+<pre><code>[0] train-rmse:9.04930  valid-rmse:9.12743
+[10]    train-rmse:4.88505  valid-rmse:4.93769
+[20]    train-rmse:2.64630  valid-rmse:2.68501
+[30]    train-rmse:1.44703  valid-rmse:1.47923
+[40]    train-rmse:0.81123  valid-rmse:0.84079
+[50]    train-rmse:0.48441  valid-rmse:0.51272
+[60]    train-rmse:0.32887  valid-rmse:0.35434
+[70]    train-rmse:0.26276  valid-rmse:0.28630
+[80]    train-rmse:0.23720  valid-rmse:0.26026
+[90]    train-rmse:0.22658  valid-rmse:0.24932
+[100]   train-rmse:0.22119  valid-rmse:0.24441
+[110]   train-rmse:0.21747  valid-rmse:0.24114
+[120]   train-rmse:0.21479  valid-rmse:0.23923
+[130]   train-rmse:0.21250  valid-rmse:0.23768
+[140]   train-rmse:0.21099  valid-rmse:0.23618
+[150]   train-rmse:0.20928  valid-rmse:0.23524
+[160]   train-rmse:0.20767  valid-rmse:0.23445
+[170]   train-rmse:0.20658  valid-rmse:0.23375
+[180]   train-rmse:0.20558  valid-rmse:0.23307
+[190]   train-rmse:0.20431  valid-rmse:0.23252
+[200]   train-rmse:0.20316  valid-rmse:0.23181
+[210]   train-rmse:0.20226  valid-rmse:0.23145
+[220]   train-rmse:0.20133  valid-rmse:0.23087
+[230]   train-rmse:0.20045  valid-rmse:0.23048
+[240]   train-rmse:0.19976  valid-rmse:0.23023
+[249]   train-rmse:0.19902  valid-rmse:0.23009</code></pre>
+</div>
+</div>
+<p>Decreasing the learning rate and increasing the boosting rounds got us up to a 2nd place score. Notice that the score is still decreasing on the validation set. We can actually continue boosting on this model by passing it to the <code>xgb_model</code> argument in the <code>train</code> function. We want to go very very slowly here to avoid overshooting the minimum of the objective function. To do that I ramp up the lambda regularization parameter and boost a few more rounds from where we left off.</p>
+<div class="cell" data-execution_count="27">
+<div class="sourceCode cell-code" id="cb38" style="background: #f1f3f5;"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb38-1"><span class="co" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;"># second stage</span></span>
+<span id="cb38-2">params <span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">=</span> {</span>
+<span id="cb38-3">    <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'learning_rate'</span>: <span class="fl" style="color: #AD0000;
+background-color: null;
+font-style: inherit;">0.3</span><span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">/</span><span class="dv" style="color: #AD0000;
+background-color: null;
+font-style: inherit;">10</span>,</span>
+<span id="cb38-4">    <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'max_depth'</span>: <span class="dv" style="color: #AD0000;
+background-color: null;
+font-style: inherit;">10</span>,</span>
+<span id="cb38-5">    <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'min_child_weight'</span>: <span class="dv" style="color: #AD0000;
+background-color: null;
+font-style: inherit;">14</span>,</span>
+<span id="cb38-6">    <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'lambda'</span>: <span class="dv" style="color: #AD0000;
+background-color: null;
+font-style: inherit;">60</span>,</span>
+<span id="cb38-7">    <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'subsample'</span>: <span class="dv" style="color: #AD0000;
+background-color: null;
+font-style: inherit;">1</span>,</span>
+<span id="cb38-8">    <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'colsample_bynode'</span>: <span class="dv" style="color: #AD0000;
+background-color: null;
+font-style: inherit;">1</span>,</span>
+<span id="cb38-9">    <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'objective'</span>: <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'reg:squarederror'</span>,}</span>
+<span id="cb38-10">num_boost_round <span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">=</span> <span class="dv" style="color: #AD0000;
+background-color: null;
+font-style: inherit;">50</span><span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">*</span><span class="dv" style="color: #AD0000;
+background-color: null;
+font-style: inherit;">3</span></span>
+<span id="cb38-11"></span>
+<span id="cb38-12">m1 <span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">=</span> xgb.train(params<span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">=</span>params, dtrain<span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">=</span>dtrain, num_boost_round<span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">=</span>num_boost_round,</span>
+<span id="cb38-13">              evals<span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">=</span>[(dtrain, <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'train'</span>), (dvalid, <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'valid'</span>)], verbose_eval<span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">=</span><span class="dv" style="color: #AD0000;
+background-color: null;
+font-style: inherit;">10</span>,</span>
+<span id="cb38-14">              xgb_model<span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">=</span>m)</span></code></pre></div>
+<div class="cell-output cell-output-stdout">
+<pre><code>[0] train-rmse:0.19900  valid-rmse:0.23007
+[10]    train-rmse:0.19862  valid-rmse:0.22990
+[20]    train-rmse:0.19831  valid-rmse:0.22975
+[30]    train-rmse:0.19796  valid-rmse:0.22964
+[40]    train-rmse:0.19768  valid-rmse:0.22955
+[50]    train-rmse:0.19739  valid-rmse:0.22940
+[60]    train-rmse:0.19714  valid-rmse:0.22935
+[70]    train-rmse:0.19689  valid-rmse:0.22927
+[80]    train-rmse:0.19664  valid-rmse:0.22915
+[90]    train-rmse:0.19646  valid-rmse:0.22915
+[100]   train-rmse:0.19620  valid-rmse:0.22910
+[110]   train-rmse:0.19604  valid-rmse:0.22907
+[120]   train-rmse:0.19583  valid-rmse:0.22901
+[130]   train-rmse:0.19562  valid-rmse:0.22899
+[140]   train-rmse:0.19546  valid-rmse:0.22898
+[149]   train-rmse:0.19520  valid-rmse:0.22886</code></pre>
+</div>
+</div>
+<div class="cell" data-execution_count="28">
+<div class="sourceCode cell-code" id="cb40" style="background: #f1f3f5;"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb40-1">root_mean_squared_error(dvalid.get_label(), m1.predict(dvalid))</span></code></pre></div>
+<div class="cell-output cell-output-display" data-execution_count="28">
+<pre><code>0.22885828</code></pre>
+</div>
+</div>
+<p>And that gets us to 1st place on the leaderboard.</p>
+</section>
+</section>
+<section id="wrapping-up" class="level2">
+<h2 class="anchored" data-anchor-id="wrapping-up">Wrapping Up</h2>
+<p>There you have it, how to use XGBoost to solve a regression problem in python with world class performance. Remember you can use the XGBoost regression notebook from my <a href="https://github.com/mcb00/ds-templates">ds-templates repo</a> to make it easy to follow this flow on your own problems. If you found this helpful, or if you have additional ideas about solving regression problems with XGBoost, let me know down in the comments.</p>
+</section>
+
+ ]]></description>
+  <category>python</category>
+  <category>tutorial</category>
+  <category>gradient boosting</category>
+  <category>xgboost</category>
+  <guid>https://randomrealizations.com/posts/xgboost-for-regression-in-python/index.html</guid>
+  <pubDate>Tue, 24 Oct 2023 22:00:00 GMT</pubDate>
+  <media:content url="https://randomrealizations.com/posts/xgboost-for-regression-in-python/kigali-branches.jpg" medium="image" type="image/jpeg"/>
+</item>
 <item>
   <title>Blogging with Quarto and Jupyter: The Complete Guide</title>
   <dc:creator>Matt Bowers</dc:creator>
@@ -479,7 +2707,7 @@ image-alt: "A London Underground train emerging from a tunnel"
   <category>tutorial</category>
   <category>blogging</category>
   <guid>https://randomrealizations.com/posts/blogging-with-quarto-and-jupyter/index.html</guid>
-  <pubDate>Tue, 05 Sep 2023 21:00:00 GMT</pubDate>
+  <pubDate>Tue, 05 Sep 2023 22:00:00 GMT</pubDate>
   <media:content url="https://randomrealizations.com/posts/blogging-with-quarto-and-jupyter/thumbnail.png" medium="image" type="image/png" height="108" width="144"/>
 </item>
 <item>
@@ -2431,7 +4659,7 @@ xgboost score: 0.24123239765807963</code></pre>
   <category>gradient boosting</category>
   <category>from scratch</category>
   <guid>https://randomrealizations.com/posts/xgboost-from-scratch/index.html</guid>
-  <pubDate>Fri, 06 May 2022 21:00:00 GMT</pubDate>
+  <pubDate>Fri, 06 May 2022 22:00:00 GMT</pubDate>
   <media:content url="https://randomrealizations.com/posts/xgboost-from-scratch/thumbnail.jpg" medium="image" type="image/jpeg"/>
 </item>
 <item>
@@ -3525,2303 +5753,1611 @@ font-style: inherit;">=</span> np.argsort(x)</span>
 <span id="cb20-36">        sort_y, sort_x <span class="op" style="color: #5E5E5E;
 background-color: null;
 font-style: inherit;">=</span> y[sort_idx], x[sort_idx]</span>
-<span id="cb20-37">        sum_y, n <span class="op" style="color: #5E5E5E;
-background-color: null;
-font-style: inherit;">=</span> y.<span class="bu" style="color: null;
-background-color: null;
-font-style: inherit;">sum</span>(), <span class="bu" style="color: null;
-background-color: null;
-font-style: inherit;">len</span>(y)</span>
-<span id="cb20-38">        sum_y_right, n_right <span class="op" style="color: #5E5E5E;
-background-color: null;
-font-style: inherit;">=</span> sum_y, n</span>
-<span id="cb20-39">        sum_y_left, n_left <span class="op" style="color: #5E5E5E;
-background-color: null;
-font-style: inherit;">=</span> <span class="fl" style="color: #AD0000;
-background-color: null;
-font-style: inherit;">0.</span>, <span class="dv" style="color: #AD0000;
-background-color: null;
-font-style: inherit;">0</span></span>
-<span id="cb20-40">    </span>
-<span id="cb20-41">        <span class="cf" style="color: #003B4F;
-background-color: null;
-font-style: inherit;">for</span> i <span class="kw" style="color: #003B4F;
-background-color: null;
-font-style: inherit;">in</span> <span class="bu" style="color: null;
-background-color: null;
-font-style: inherit;">range</span>(<span class="dv" style="color: #AD0000;
-background-color: null;
-font-style: inherit;">0</span>, <span class="va" style="color: #111111;
-background-color: null;
-font-style: inherit;">self</span>.n <span class="op" style="color: #5E5E5E;
-background-color: null;
-font-style: inherit;">-</span> <span class="va" style="color: #111111;
-background-color: null;
-font-style: inherit;">self</span>.min_samples_leaf):</span>
-<span id="cb20-42">            y_i, x_i, x_i_next <span class="op" style="color: #5E5E5E;
-background-color: null;
-font-style: inherit;">=</span> sort_y[i], sort_x[i], sort_x[i <span class="op" style="color: #5E5E5E;
-background-color: null;
-font-style: inherit;">+</span> <span class="dv" style="color: #AD0000;
-background-color: null;
-font-style: inherit;">1</span>]</span>
-<span id="cb20-43">            sum_y_left <span class="op" style="color: #5E5E5E;
-background-color: null;
-font-style: inherit;">+=</span> y_i<span class="op" style="color: #5E5E5E;
-background-color: null;
-font-style: inherit;">;</span> sum_y_right <span class="op" style="color: #5E5E5E;
-background-color: null;
-font-style: inherit;">-=</span> y_i</span>
-<span id="cb20-44">            n_left <span class="op" style="color: #5E5E5E;
-background-color: null;
-font-style: inherit;">+=</span> <span class="dv" style="color: #AD0000;
-background-color: null;
-font-style: inherit;">1</span><span class="op" style="color: #5E5E5E;
-background-color: null;
-font-style: inherit;">;</span> n_right <span class="op" style="color: #5E5E5E;
-background-color: null;
-font-style: inherit;">-=</span> <span class="dv" style="color: #AD0000;
-background-color: null;
-font-style: inherit;">1</span></span>
-<span id="cb20-45">            <span class="cf" style="color: #003B4F;
-background-color: null;
-font-style: inherit;">if</span>  n_left <span class="op" style="color: #5E5E5E;
-background-color: null;
-font-style: inherit;">&lt;</span> <span class="va" style="color: #111111;
-background-color: null;
-font-style: inherit;">self</span>.min_samples_leaf <span class="kw" style="color: #003B4F;
-background-color: null;
-font-style: inherit;">or</span> x_i <span class="op" style="color: #5E5E5E;
-background-color: null;
-font-style: inherit;">==</span> x_i_next:</span>
-<span id="cb20-46">                <span class="cf" style="color: #003B4F;
-background-color: null;
-font-style: inherit;">continue</span></span>
-<span id="cb20-47">            score <span class="op" style="color: #5E5E5E;
-background-color: null;
-font-style: inherit;">=</span> <span class="op" style="color: #5E5E5E;
-background-color: null;
-font-style: inherit;">-</span> sum_y_left<span class="op" style="color: #5E5E5E;
-background-color: null;
-font-style: inherit;">**</span><span class="dv" style="color: #AD0000;
-background-color: null;
-font-style: inherit;">2</span> <span class="op" style="color: #5E5E5E;
-background-color: null;
-font-style: inherit;">/</span> n_left <span class="op" style="color: #5E5E5E;
-background-color: null;
-font-style: inherit;">-</span> sum_y_right<span class="op" style="color: #5E5E5E;
-background-color: null;
-font-style: inherit;">**</span><span class="dv" style="color: #AD0000;
-background-color: null;
-font-style: inherit;">2</span> <span class="op" style="color: #5E5E5E;
-background-color: null;
-font-style: inherit;">/</span> n_right <span class="op" style="color: #5E5E5E;
-background-color: null;
-font-style: inherit;">+</span> sum_y<span class="op" style="color: #5E5E5E;
-background-color: null;
-font-style: inherit;">**</span><span class="dv" style="color: #AD0000;
-background-color: null;
-font-style: inherit;">2</span> <span class="op" style="color: #5E5E5E;
-background-color: null;
-font-style: inherit;">/</span> n</span>
-<span id="cb20-48">            <span class="cf" style="color: #003B4F;
-background-color: null;
-font-style: inherit;">if</span> score <span class="op" style="color: #5E5E5E;
-background-color: null;
-font-style: inherit;">&lt;</span> <span class="va" style="color: #111111;
-background-color: null;
-font-style: inherit;">self</span>.best_score_so_far:</span>
-<span id="cb20-49">                <span class="va" style="color: #111111;
-background-color: null;
-font-style: inherit;">self</span>.best_score_so_far <span class="op" style="color: #5E5E5E;
-background-color: null;
-font-style: inherit;">=</span> score</span>
-<span id="cb20-50">                <span class="va" style="color: #111111;
-background-color: null;
-font-style: inherit;">self</span>.split_feature_idx <span class="op" style="color: #5E5E5E;
-background-color: null;
-font-style: inherit;">=</span> feature_idx</span>
-<span id="cb20-51">                <span class="va" style="color: #111111;
-background-color: null;
-font-style: inherit;">self</span>.threshold <span class="op" style="color: #5E5E5E;
-background-color: null;
-font-style: inherit;">=</span> (x_i <span class="op" style="color: #5E5E5E;
-background-color: null;
-font-style: inherit;">+</span> x_i_next) <span class="op" style="color: #5E5E5E;
-background-color: null;
-font-style: inherit;">/</span> <span class="dv" style="color: #AD0000;
-background-color: null;
-font-style: inherit;">2</span></span>
-<span id="cb20-52">                </span>
-<span id="cb20-53">    <span class="kw" style="color: #003B4F;
-background-color: null;
-font-style: inherit;">def</span> <span class="fu" style="color: #4758AB;
-background-color: null;
-font-style: inherit;">__repr__</span>(<span class="va" style="color: #111111;
-background-color: null;
-font-style: inherit;">self</span>):</span>
-<span id="cb20-54">        s <span class="op" style="color: #5E5E5E;
-background-color: null;
-font-style: inherit;">=</span> <span class="ss" style="color: #20794D;
-background-color: null;
-font-style: inherit;">f'n: </span><span class="sc" style="color: #5E5E5E;
-background-color: null;
-font-style: inherit;">{</span><span class="va" style="color: #111111;
-background-color: null;
-font-style: inherit;">self</span><span class="sc" style="color: #5E5E5E;
-background-color: null;
-font-style: inherit;">.</span>n<span class="sc" style="color: #5E5E5E;
-background-color: null;
-font-style: inherit;">}</span><span class="ss" style="color: #20794D;
-background-color: null;
-font-style: inherit;">'</span></span>
-<span id="cb20-55">        s <span class="op" style="color: #5E5E5E;
-background-color: null;
-font-style: inherit;">+=</span> <span class="ss" style="color: #20794D;
-background-color: null;
-font-style: inherit;">f'; value:</span><span class="sc" style="color: #5E5E5E;
-background-color: null;
-font-style: inherit;">{</span><span class="va" style="color: #111111;
-background-color: null;
-font-style: inherit;">self</span><span class="sc" style="color: #5E5E5E;
-background-color: null;
-font-style: inherit;">.</span>value<span class="sc" style="color: #5E5E5E;
-background-color: null;
-font-style: inherit;">:0.2f}</span><span class="ss" style="color: #20794D;
-background-color: null;
-font-style: inherit;">'</span></span>
-<span id="cb20-56">        <span class="cf" style="color: #003B4F;
-background-color: null;
-font-style: inherit;">if</span> <span class="kw" style="color: #003B4F;
-background-color: null;
-font-style: inherit;">not</span> <span class="va" style="color: #111111;
-background-color: null;
-font-style: inherit;">self</span>.is_leaf:</span>
-<span id="cb20-57">            split_feature_name <span class="op" style="color: #5E5E5E;
-background-color: null;
-font-style: inherit;">=</span> <span class="va" style="color: #111111;
-background-color: null;
-font-style: inherit;">self</span>.X.columns[<span class="va" style="color: #111111;
-background-color: null;
-font-style: inherit;">self</span>.split_feature_idx]</span>
-<span id="cb20-58">            s <span class="op" style="color: #5E5E5E;
-background-color: null;
-font-style: inherit;">+=</span> <span class="ss" style="color: #20794D;
-background-color: null;
-font-style: inherit;">f'; split: </span><span class="sc" style="color: #5E5E5E;
-background-color: null;
-font-style: inherit;">{</span>split_feature_name<span class="sc" style="color: #5E5E5E;
-background-color: null;
-font-style: inherit;">}</span><span class="ss" style="color: #20794D;
-background-color: null;
-font-style: inherit;"> &lt;= </span><span class="sc" style="color: #5E5E5E;
-background-color: null;
-font-style: inherit;">{</span><span class="va" style="color: #111111;
-background-color: null;
-font-style: inherit;">self</span><span class="sc" style="color: #5E5E5E;
-background-color: null;
-font-style: inherit;">.</span>threshold<span class="sc" style="color: #5E5E5E;
-background-color: null;
-font-style: inherit;">:0.3f}</span><span class="ss" style="color: #20794D;
-background-color: null;
-font-style: inherit;">'</span></span>
-<span id="cb20-59">        <span class="cf" style="color: #003B4F;
-background-color: null;
-font-style: inherit;">return</span> s</span>
-<span id="cb20-60">    </span>
-<span id="cb20-61">    <span class="kw" style="color: #003B4F;
-background-color: null;
-font-style: inherit;">def</span> predict(<span class="va" style="color: #111111;
-background-color: null;
-font-style: inherit;">self</span>, X):</span>
-<span id="cb20-62">        <span class="cf" style="color: #003B4F;
-background-color: null;
-font-style: inherit;">return</span> np.array([<span class="va" style="color: #111111;
-background-color: null;
-font-style: inherit;">self</span>._predict_row(row) <span class="cf" style="color: #003B4F;
-background-color: null;
-font-style: inherit;">for</span> i, row <span class="kw" style="color: #003B4F;
-background-color: null;
-font-style: inherit;">in</span> X.iterrows()])</span>
-<span id="cb20-63">    </span>
-<span id="cb20-64">    <span class="kw" style="color: #003B4F;
-background-color: null;
-font-style: inherit;">def</span> _predict_row(<span class="va" style="color: #111111;
-background-color: null;
-font-style: inherit;">self</span>, row):</span>
-<span id="cb20-65">        <span class="cf" style="color: #003B4F;
-background-color: null;
-font-style: inherit;">if</span> <span class="va" style="color: #111111;
-background-color: null;
-font-style: inherit;">self</span>.is_leaf: </span>
-<span id="cb20-66">            <span class="cf" style="color: #003B4F;
-background-color: null;
-font-style: inherit;">return</span> <span class="va" style="color: #111111;
-background-color: null;
-font-style: inherit;">self</span>.value</span>
-<span id="cb20-67">        child <span class="op" style="color: #5E5E5E;
-background-color: null;
-font-style: inherit;">=</span> <span class="va" style="color: #111111;
-background-color: null;
-font-style: inherit;">self</span>.left <span class="cf" style="color: #003B4F;
-background-color: null;
-font-style: inherit;">if</span> row[<span class="va" style="color: #111111;
-background-color: null;
-font-style: inherit;">self</span>.split_feature_idx] <span class="op" style="color: #5E5E5E;
-background-color: null;
-font-style: inherit;">&lt;=</span> <span class="va" style="color: #111111;
-background-color: null;
-font-style: inherit;">self</span>.threshold <span class="op" style="color: #5E5E5E;
-background-color: null;
-font-style: inherit;">\</span></span>
-<span id="cb20-68">                <span class="cf" style="color: #003B4F;
-background-color: null;
-font-style: inherit;">else</span> <span class="va" style="color: #111111;
+<span id="cb20-37">        sum_y, n <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">self</span>.right</span>
-<span id="cb20-69">        <span class="cf" style="color: #003B4F;
+font-style: inherit;">=</span> y.<span class="bu" style="color: null;
 background-color: null;
-font-style: inherit;">return</span> child._predict_row(row)</span></code></pre></div>
-</div>
-</section>
-<section id="from-scratch-versus-scikit-learn" class="level2">
-<h2 class="anchored" data-anchor-id="from-scratch-versus-scikit-learn">From Scratch versus Scikit-Learn</h2>
-<p>As usual, we’ll test our homegrown handiwork by comparing it to the existing implementation in scikit-learn. First let’s train both models on the <a href="https://scikit-learn.org/stable/datasets/real_world.html">California Housing dataset</a> which gives us 20k instances and 8 features to predict median house price by district.</p>
-<div class="cell" data-execution_count="18">
-<div class="sourceCode cell-code" id="cb21" style="background: #f1f3f5;"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb21-1"><span class="im" style="color: #00769E;
+font-style: inherit;">sum</span>(), <span class="bu" style="color: null;
 background-color: null;
-font-style: inherit;">from</span> sklearn.datasets <span class="im" style="color: #00769E;
+font-style: inherit;">len</span>(y)</span>
+<span id="cb20-38">        sum_y_right, n_right <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">import</span> fetch_california_housing</span>
-<span id="cb21-2"><span class="im" style="color: #00769E;
+font-style: inherit;">=</span> sum_y, n</span>
+<span id="cb20-39">        sum_y_left, n_left <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">from</span> sklearn.model_selection <span class="im" style="color: #00769E;
+font-style: inherit;">=</span> <span class="fl" style="color: #AD0000;
 background-color: null;
-font-style: inherit;">import</span> train_test_split</span>
-<span id="cb21-3"></span>
-<span id="cb21-4">X, y <span class="op" style="color: #5E5E5E;
+font-style: inherit;">0.</span>, <span class="dv" style="color: #AD0000;
 background-color: null;
-font-style: inherit;">=</span> fetch_california_housing(as_frame<span class="op" style="color: #5E5E5E;
+font-style: inherit;">0</span></span>
+<span id="cb20-40">    </span>
+<span id="cb20-41">        <span class="cf" style="color: #003B4F;
 background-color: null;
-font-style: inherit;">=</span><span class="va" style="color: #111111;
+font-style: inherit;">for</span> i <span class="kw" style="color: #003B4F;
 background-color: null;
-font-style: inherit;">True</span>, return_X_y<span class="op" style="color: #5E5E5E;
+font-style: inherit;">in</span> <span class="bu" style="color: null;
 background-color: null;
-font-style: inherit;">=</span><span class="va" style="color: #111111;
+font-style: inherit;">range</span>(<span class="dv" style="color: #AD0000;
 background-color: null;
-font-style: inherit;">True</span>)</span>
-<span id="cb21-5">X_train, X_test, y_train, y_test <span class="op" style="color: #5E5E5E;
+font-style: inherit;">0</span>, <span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">=</span> train_test_split(X, y, test_size<span class="op" style="color: #5E5E5E;
+font-style: inherit;">self</span>.n <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">=</span><span class="fl" style="color: #AD0000;
+font-style: inherit;">-</span> <span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">0.3</span>, random_state<span class="op" style="color: #5E5E5E;
+font-style: inherit;">self</span>.min_samples_leaf):</span>
+<span id="cb20-42">            y_i, x_i, x_i_next <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">=</span><span class="dv" style="color: #AD0000;
+font-style: inherit;">=</span> sort_y[i], sort_x[i], sort_x[i <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">43</span>)</span></code></pre></div>
-</div>
-<div class="cell" data-execution_count="19">
-<div class="sourceCode cell-code" id="cb22" style="background: #f1f3f5;"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb22-1"><span class="im" style="color: #00769E;
+font-style: inherit;">+</span> <span class="dv" style="color: #AD0000;
 background-color: null;
-font-style: inherit;">from</span> sklearn.tree <span class="im" style="color: #00769E;
+font-style: inherit;">1</span>]</span>
+<span id="cb20-43">            sum_y_left <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">import</span> DecisionTreeRegressor</span>
-<span id="cb22-2"><span class="im" style="color: #00769E;
+font-style: inherit;">+=</span> y_i<span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">from</span> sklearn.metrics <span class="im" style="color: #00769E;
+font-style: inherit;">;</span> sum_y_right <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">import</span> mean_squared_error</span>
-<span id="cb22-3"></span>
-<span id="cb22-4">max_depth <span class="op" style="color: #5E5E5E;
+font-style: inherit;">-=</span> y_i</span>
+<span id="cb20-44">            n_left <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">=</span> <span class="dv" style="color: #AD0000;
+font-style: inherit;">+=</span> <span class="dv" style="color: #AD0000;
 background-color: null;
-font-style: inherit;">8</span></span>
-<span id="cb22-5">min_samples_leaf <span class="op" style="color: #5E5E5E;
+font-style: inherit;">1</span><span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">=</span> <span class="dv" style="color: #AD0000;
+font-style: inherit;">;</span> n_right <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">16</span></span>
-<span id="cb22-6"></span>
-<span id="cb22-7">tree <span class="op" style="color: #5E5E5E;
+font-style: inherit;">-=</span> <span class="dv" style="color: #AD0000;
 background-color: null;
-font-style: inherit;">=</span> DecisionTree(X_train, y_train, max_depth<span class="op" style="color: #5E5E5E;
+font-style: inherit;">1</span></span>
+<span id="cb20-45">            <span class="cf" style="color: #003B4F;
 background-color: null;
-font-style: inherit;">=</span>max_depth, min_samples_leaf<span class="op" style="color: #5E5E5E;
+font-style: inherit;">if</span>  n_left <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">=</span>min_samples_leaf)</span>
-<span id="cb22-8">pred <span class="op" style="color: #5E5E5E;
+font-style: inherit;">&lt;</span> <span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">=</span> tree.predict(X_test)</span>
-<span id="cb22-9"></span>
-<span id="cb22-10">sk_tree <span class="op" style="color: #5E5E5E;
+font-style: inherit;">self</span>.min_samples_leaf <span class="kw" style="color: #003B4F;
 background-color: null;
-font-style: inherit;">=</span> DecisionTreeRegressor(max_depth<span class="op" style="color: #5E5E5E;
+font-style: inherit;">or</span> x_i <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">=</span>max_depth, min_samples_leaf<span class="op" style="color: #5E5E5E;
+font-style: inherit;">==</span> x_i_next:</span>
+<span id="cb20-46">                <span class="cf" style="color: #003B4F;
 background-color: null;
-font-style: inherit;">=</span>min_samples_leaf)</span>
-<span id="cb22-11">sk_tree.fit(X_train, y_train)</span>
-<span id="cb22-12">sk_pred <span class="op" style="color: #5E5E5E;
+font-style: inherit;">continue</span></span>
+<span id="cb20-47">            score <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">=</span> sk_tree.predict(X_test)</span>
-<span id="cb22-13"></span>
-<span id="cb22-14"><span class="bu" style="color: null;
+font-style: inherit;">=</span> <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">print</span>(<span class="ss" style="color: #20794D;
+font-style: inherit;">-</span> sum_y_left<span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">f'from scratch MSE: </span><span class="sc" style="color: #5E5E5E;
+font-style: inherit;">**</span><span class="dv" style="color: #AD0000;
 background-color: null;
-font-style: inherit;">{</span>mean_squared_error(y_test, pred)<span class="sc" style="color: #5E5E5E;
+font-style: inherit;">2</span> <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">:0.4f}</span><span class="ss" style="color: #20794D;
+font-style: inherit;">/</span> n_left <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">'</span>)</span>
-<span id="cb22-15"><span class="bu" style="color: null;
+font-style: inherit;">-</span> sum_y_right<span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">print</span>(<span class="ss" style="color: #20794D;
+font-style: inherit;">**</span><span class="dv" style="color: #AD0000;
 background-color: null;
-font-style: inherit;">f'scikit-learn MSE: </span><span class="sc" style="color: #5E5E5E;
+font-style: inherit;">2</span> <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">{</span>mean_squared_error(y_test, sk_pred)<span class="sc" style="color: #5E5E5E;
+font-style: inherit;">/</span> n_right <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">:0.4f}</span><span class="ss" style="color: #20794D;
+font-style: inherit;">+</span> sum_y<span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">'</span>)</span></code></pre></div>
-<div class="cell-output cell-output-stdout">
-<pre><code>from scratch MSE: 0.3988
-scikit-learn MSE: 0.3988</code></pre>
-</div>
-</div>
-<p>We get similar accuracy on a held-out test dataset.</p>
-<p>Let’s benchmark the two implementations on training time.</p>
-<div class="cell" data-execution_count="20">
-<div class="sourceCode cell-code" id="cb24" style="background: #f1f3f5;"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb24-1"><span class="op" style="color: #5E5E5E;
+font-style: inherit;">**</span><span class="dv" style="color: #AD0000;
 background-color: null;
-font-style: inherit;">%%</span>time</span>
-<span id="cb24-2">sk_tree <span class="op" style="color: #5E5E5E;
+font-style: inherit;">2</span> <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">=</span> DecisionTreeRegressor(max_depth<span class="op" style="color: #5E5E5E;
+font-style: inherit;">/</span> n</span>
+<span id="cb20-48">            <span class="cf" style="color: #003B4F;
 background-color: null;
-font-style: inherit;">=</span>max_depth, min_samples_leaf<span class="op" style="color: #5E5E5E;
+font-style: inherit;">if</span> score <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">=</span>min_samples_leaf)</span>
-<span id="cb24-3">sk_tree.fit(X_train, y_train)<span class="op" style="color: #5E5E5E;
+font-style: inherit;">&lt;</span> <span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">;</span></span></code></pre></div>
-<div class="cell-output cell-output-stdout">
-<pre><code>CPU times: user 45.3 ms, sys: 555 µs, total: 45.8 ms
-Wall time: 45.3 ms</code></pre>
-</div>
-<div class="cell-output cell-output-display" data-execution_count="20">
-<style>#sk-container-id-1 {color: black;background-color: white;}#sk-container-id-1 pre{padding: 0;}#sk-container-id-1 div.sk-toggleable {background-color: white;}#sk-container-id-1 label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-container-id-1 label.sk-toggleable__label-arrow:before {content: "▸";float: left;margin-right: 0.25em;color: #696969;}#sk-container-id-1 label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-container-id-1 div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-container-id-1 div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-container-id-1 div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-container-id-1 input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-container-id-1 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: "▾";}#sk-container-id-1 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-container-id-1 div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-container-id-1 div.sk-estimator:hover {background-color: #d4ebff;}#sk-container-id-1 div.sk-parallel-item::after {content: "";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-container-id-1 div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 div.sk-serial::before {content: "";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: 0;}#sk-container-id-1 div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;position: relative;}#sk-container-id-1 div.sk-item {position: relative;z-index: 1;}#sk-container-id-1 div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;position: relative;}#sk-container-id-1 div.sk-item::before, #sk-container-id-1 div.sk-parallel-item::before {content: "";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: -1;}#sk-container-id-1 div.sk-parallel-item {display: flex;flex-direction: column;z-index: 1;position: relative;background-color: white;}#sk-container-id-1 div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-container-id-1 div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-container-id-1 div.sk-parallel-item:only-child::after {width: 0;}#sk-container-id-1 div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;}#sk-container-id-1 div.sk-label label {font-family: monospace;font-weight: bold;display: inline-block;line-height: 1.2em;}#sk-container-id-1 div.sk-label-container {text-align: center;}#sk-container-id-1 div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: https://github.com/scikit-learn/scikit-learn/issues/21755 */display: inline-block !important;position: relative;}#sk-container-id-1 div.sk-text-repr-fallback {display: none;}</style><div id="sk-container-id-1" class="sk-top-container"><div class="sk-text-repr-fallback"><pre>DecisionTreeRegressor(max_depth=8, min_samples_leaf=16)</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br>On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class="sk-container" hidden=""><div class="sk-item"><div class="sk-estimator sk-toggleable"><input class="sk-toggleable__control sk-hidden--visually" id="sk-estimator-id-1" type="checkbox" checked=""><label for="sk-estimator-id-1" class="sk-toggleable__label sk-toggleable__label-arrow">DecisionTreeRegressor</label><div class="sk-toggleable__content"><pre>DecisionTreeRegressor(max_depth=8, min_samples_leaf=16)</pre></div></div></div></div></div>
-</div>
-</div>
-<div class="cell" data-execution_count="21">
-<div class="sourceCode cell-code" id="cb26" style="background: #f1f3f5;"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb26-1"><span class="op" style="color: #5E5E5E;
+font-style: inherit;">self</span>.best_score_so_far:</span>
+<span id="cb20-49">                <span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">%%</span>time</span>
-<span id="cb26-2">tree <span class="op" style="color: #5E5E5E;
+font-style: inherit;">self</span>.best_score_so_far <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">=</span> DecisionTree(X_train, y_train, max_depth<span class="op" style="color: #5E5E5E;
+font-style: inherit;">=</span> score</span>
+<span id="cb20-50">                <span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">=</span>max_depth, min_samples_leaf<span class="op" style="color: #5E5E5E;
+font-style: inherit;">self</span>.split_feature_idx <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">=</span>min_samples_leaf)</span></code></pre></div>
-<div class="cell-output cell-output-stdout">
-<pre><code>CPU times: user 624 ms, sys: 1.65 ms, total: 625 ms
-Wall time: 625 ms</code></pre>
-</div>
-</div>
-<p>Wow, the scikit-learn implementation absolutely smoked us, training an order of magnitude faster. This is to be expected, since they implement split finding in cython, which generates compiled C code that can run much faster than our native python code. Maybe we can take a look at how to optimize python code with cython here on the blog one of these days.</p>
-</section>
-<section id="wrapping-up" class="level2">
-<h2 class="anchored" data-anchor-id="wrapping-up">Wrapping Up</h2>
-<p>Holy cow, we just implemented a decision tree using nothing but numpy. I hope you enjoyed the scratch build as much as I did, and I hope you got a little bit better at coding (I certainly did). That was actually way harder than I expected, but looking back at the finished product, it doesn’t seem so bad right? I almost thought we were going to get away with not implementing our own decision tree, but it turns out that this will be super helpful for us when it comes time to implement XGBoost from scratch.</p>
-</section>
-<section id="references" class="level2">
-<h2 class="anchored" data-anchor-id="references">References</h2>
-<p>This implementation is inspired and partially adapted from Jeremy Howard’s live coding of a <a href="https://course18.fast.ai/lessonsml1/lesson7.html">Random Forest</a> as part of the fastai ML course.</p>
-</section>
-
- ]]></description>
-  <category>python</category>
-  <category>gradient boosting</category>
-  <category>from scratch</category>
-  <guid>https://randomrealizations.com/posts/decision-tree-from-scratch/index.html</guid>
-  <pubDate>Sun, 12 Dec 2021 21:00:00 GMT</pubDate>
-  <media:content url="https://randomrealizations.com/posts/decision-tree-from-scratch/thumbnail.png" medium="image" type="image/png" height="86" width="144"/>
-</item>
-<item>
-  <title>How to Implement a Gradient Boosting Machine that Works with Any Loss Function</title>
-  <dc:creator>Matt Bowers</dc:creator>
-  <link>https://randomrealizations.com/posts/gradient-boosting-machine-with-any-loss-function/index.html</link>
-  <description><![CDATA[ 
-
-
-
-<div class="quarto-figure quarto-figure-center">
-<figure class="figure">
-<p><img src="https://randomrealizations.com/posts/gradient-boosting-machine-with-any-loss-function/main.jpg" class="img-fluid figure-img"></p>
-<figcaption class="figure-caption">Cold water cascades over the rocks in Erwin, Tennessee.</figcaption>
-</figure>
-</div>
-<p>Friends, this is going to be an epic post! Today, we bring together all the ideas we’ve built up over the past few posts to nail down our understanding of the key ideas in Jerome Friedman’s seminal 2001 paper: “<a href="https://statweb.stanford.edu/~jhf/ftp/trebst.pdf">Greedy Function Approximation: A Gradient Boosting Machine</a>.” In particular, we’ll summarize the highlights from the paper, and we’ll build an in-house python implementation of his generic gradient boosting algorithm which can train with any differentiable loss function. What’s more, we’ll go ahead and take our generic gradient boosting machine for a spin by training it with several of the most popular loss functions used in practice.</p>
-<p>Are you freaking stoked or what?</p>
-<p>Sweet. Let’s do this.</p>
-<section id="friedman-2001-tldr" class="level2">
-<h2 class="anchored" data-anchor-id="friedman-2001-tldr">Friedman 2001: TL;DR</h2>
-<p>I’ve mentioned this paper a couple of times before, but as far as I can tell, this is the origin of gradient boosting; it is therefore, a seminal work worth reading. You know what, I think you might like to pick up <a href="https://statweb.stanford.edu/~jhf/ftp/trebst.pdf">the paper</a> and read it yourself. Like many papers, there is a lot of scary looking math in the first few pages, but if you’ve been following along on this blog, you’ll find that it’s actually totally approachable. This is the kind of thing that cures imposter syndrome, so give it a shot. That said, here’s the TL;DR as I see it.</p>
-<p>The first part of the paper introduces the idea of fitting models by doing gradient descent in function space, an ingenious idea we spent <a href="../../posts/how-gradient-boosting-does-gradient-descent/">an entire post</a> demystifying earlier. Friedman goes on to introduce the generic gradient boost algorithm, which works with any differentiable loss function, as well as specific variants for minimizing absolute error, Huber loss, and binary deviance. In terms of hyperparameters, he points out that the learning rate can be used to reduce overfitting, while increased tree depth can help capture more complex interactions among features. He even discusses feature importance and partial dependence methods for interpreting fitted gradient boosting models.</p>
-<p>Friedman concludes by musing about the advantages of gradient boosting with trees. He notes some key advantages afforded by the use of decision trees including no need to rescale input data, robustness against irrelevant input features, and elegant handling of missing feature values. He points out that gradient boosting manages to capitalize on the benefits of decision trees while minimizing their key weakness (crappy accuracy). I think this offers a great insight into why gradient boosting models have become so widespread and successful in practical ML applications.</p>
-</section>
-<section id="friedmans-generic-gradient-boosting-algorithm" class="level2">
-<h2 class="anchored" data-anchor-id="friedmans-generic-gradient-boosting-algorithm">Friedman’s Generic Gradient Boosting Algorithm</h2>
-<p>Let’s take a closer look at Friedman’s original gradient boost algorithm, Alg. 1 in Section 3 of <a href="https://statweb.stanford.edu/~jhf/ftp/trebst.pdf">the paper</a> (translated into the notation we’ve been using so far).</p>
-<p>Like last time, we have training data <img src="https://latex.codecogs.com/png.latex?(%5Cmathbf%7By%7D,%20%5Cmathbf%7BX%7D)"> where <img src="https://latex.codecogs.com/png.latex?%5Cmathbf%7By%7D"> is a length-<img src="https://latex.codecogs.com/png.latex?n"> vector of target values, and <img src="https://latex.codecogs.com/png.latex?%5Cmathbf%7BX%7D"> is an <img src="https://latex.codecogs.com/png.latex?n%20%5Ctimes%20p"> matrix with <img src="https://latex.codecogs.com/png.latex?n"> observations of <img src="https://latex.codecogs.com/png.latex?p"> features. We also have a differentiable loss function <img src="https://latex.codecogs.com/png.latex?L(%5Cmathbf%7By%7D,%20%5Cmathbf%7B%5Chat%7By%7D%7D)%20=%20%5Csum_%7Bi=1%7D%5En%20l(y_i,%20%5Chat%7By%7D_i)">, a “learning rate” hyperparameter <img src="https://latex.codecogs.com/png.latex?%5Ceta">, and a fixed number of model iterations <img src="https://latex.codecogs.com/png.latex?M">.</p>
-<p><strong>Algorithm:</strong> <em>gradient_boost</em><img src="https://latex.codecogs.com/png.latex?(%5Cmathbf%7BX%7D,%5Cmathbf%7By%7D,L,%5Ceta,%20M)"> returns: model <img src="https://latex.codecogs.com/png.latex?F_M"></p>
-<ol type="1">
-<li><p>Let base model <img src="https://latex.codecogs.com/png.latex?F_0(%5Cmathbf%7Bx%7D)%20=%20c">, where <img src="https://latex.codecogs.com/png.latex?c%20=%20%5Ctext%7Bargmin%7D_%7Bc%7D%20%5Csum_%7Bi=1%7D%5En%20l(y_i,%20c)"></p></li>
-<li><p><code>for</code> <img src="https://latex.codecogs.com/png.latex?m"> = <img src="https://latex.codecogs.com/png.latex?0"> to <img src="https://latex.codecogs.com/png.latex?M-1">:</p></li>
-<li><p>&nbsp;&nbsp;&nbsp;&nbsp; Let “pseudo-residual” vector <img src="https://latex.codecogs.com/png.latex?%5Cmathbf%7Br%7D_m%20=%20-%5Cnabla_%7B%5Cmathbf%7B%5Chat%7By%7D%7D_m%7D%20L(%5Cmathbf%7By%7D,%5Cmathbf%7B%5Chat%7By%7D%7D_m)"></p></li>
-<li><p>&nbsp;&nbsp;&nbsp;&nbsp; Train decision tree regressor <img src="https://latex.codecogs.com/png.latex?h_m(%5Cmathbf%7BX%7D)"> to predict <img src="https://latex.codecogs.com/png.latex?%5Cmathbf%7Br%7D_m"> (minimizing squared error)</p></li>
-<li><p>&nbsp;&nbsp;&nbsp;&nbsp; <code>foreach</code> terminal leaf node <img src="https://latex.codecogs.com/png.latex?t%20%5Cin%20h_m">:</p></li>
-<li><p>&nbsp;&nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp;&nbsp; Let <img src="https://latex.codecogs.com/png.latex?v%20=%20%5Ctext%7Bargmin%7D_v%20%5Csum_%7Bi%20%5Cin%20t%7D%20l(y_i,%20F_m(%5Cmathbf%7Bx%7D_i)%20+%20v)"></p></li>
-<li><p>&nbsp;&nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp;&nbsp; Set terminal leaf node <img src="https://latex.codecogs.com/png.latex?t"> to predict value <img src="https://latex.codecogs.com/png.latex?v"></p></li>
-<li><p>&nbsp;&nbsp;&nbsp;&nbsp; <img src="https://latex.codecogs.com/png.latex?F_%7Bm+1%7D(%5Cmathbf%7BX%7D)%20=%20F_%7Bm%7D(%5Cmathbf%7BX%7D)%20+%20%5Ceta%20h_m(%5Cmathbf%7BX%7D)"></p></li>
-<li><p>Return composite model <img src="https://latex.codecogs.com/png.latex?F_M"></p></li>
-</ol>
-<p>By now, most of this is already familiar to us. We begin by setting the base model <img src="https://latex.codecogs.com/png.latex?F_0"> equal to the constant prediction value that minimizes the loss over all examples in the training dataset (line 1). Then we begin the boosting iterations (line 2), each time computing the negative gradients of the loss with respect to the current model predictions (known as the pseudo residuals) (line 3). We then fit our next decision tree regressor to predict the pseudo residuals (line 4).</p>
-<p>Then we encounter something new on lines 5-7. When we fit a vanilla decision tree regressor to predict pseudo residuals, we’re using mean squared error as the loss function to train the tree. As you might imagine, this works well when the global loss function is also squared error. But if we want to use a global loss other than squared error, there is an additional trick we can use to further increase the composite model’s accuracy. The idea is to continue using squared error to train each decision tree, keeping its structure and split conditions but altering the predicted value in each leaf to help minimize the global loss function. Instead of using the mean target value as the prediction for each node (as we would do when minimizing squared error), we use a numerical optimization method like line search to choose the constant value for that leaf that leads to the best overall loss. This is the same thing we did in line 1 of the algorithm to set the base prediction, but here we choose the optimal prediction for each terminal node of the newly trained decision tree.</p>
-</section>
-<section id="implementation" class="level2">
-<h2 class="anchored" data-anchor-id="implementation">Implementation</h2>
-<p>I did some (half-assed) searching on the interweb for an implementation of GBM that allows the user to provide a custom loss function, and you know what? I couldn’t find anything. If you find another implementation, post in the comments so we can learn from it too.</p>
-<p>Since we need to modify the values predicted by our decision trees’ terminal nodes, we’ll want to brush up on the scikit-learn <a href="https://scikit-learn.org/stable/auto_examples/tree/plot_unveil_tree_structure.html">decision tree structure</a> before we get going. You can see explanations of all the necessary decision tree hacks in this <a href="https://github.com/mcb00/blog/blob/master/supplemental/friedman-gbm-implementation.ipynb">notebook</a>.</p>
-<div class="cell" data-execution_count="1">
-<div class="sourceCode cell-code" id="cb1" style="background: #f1f3f5;"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb1-1"><span class="im" style="color: #00769E;
+font-style: inherit;">=</span> feature_idx</span>
+<span id="cb20-51">                <span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">import</span> numpy <span class="im" style="color: #00769E;
+font-style: inherit;">self</span>.threshold <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">as</span> np</span>
-<span id="cb1-2"><span class="im" style="color: #00769E;
+font-style: inherit;">=</span> (x_i <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">from</span> sklearn.tree <span class="im" style="color: #00769E;
+font-style: inherit;">+</span> x_i_next) <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">import</span> DecisionTreeRegressor </span>
-<span id="cb1-3"><span class="im" style="color: #00769E;
+font-style: inherit;">/</span> <span class="dv" style="color: #AD0000;
 background-color: null;
-font-style: inherit;">from</span> scipy.optimize <span class="im" style="color: #00769E;
+font-style: inherit;">2</span></span>
+<span id="cb20-52">                </span>
+<span id="cb20-53">    <span class="kw" style="color: #003B4F;
 background-color: null;
-font-style: inherit;">import</span> minimize</span>
-<span id="cb1-4"></span>
-<span id="cb1-5"><span class="kw" style="color: #003B4F;
+font-style: inherit;">def</span> <span class="fu" style="color: #4758AB;
 background-color: null;
-font-style: inherit;">class</span> GradientBoostingMachine():</span>
-<span id="cb1-6">    <span class="co" style="color: #5E5E5E;
+font-style: inherit;">__repr__</span>(<span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">'''Gradient Boosting Machine supporting any user-supplied loss function.</span></span>
-<span id="cb1-7"><span class="co" style="color: #5E5E5E;
+font-style: inherit;">self</span>):</span>
+<span id="cb20-54">        s <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">    </span></span>
-<span id="cb1-8"><span class="co" style="color: #5E5E5E;
+font-style: inherit;">=</span> <span class="ss" style="color: #20794D;
 background-color: null;
-font-style: inherit;">    Parameters</span></span>
-<span id="cb1-9"><span class="co" style="color: #5E5E5E;
+font-style: inherit;">f'n: </span><span class="sc" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">    ----------</span></span>
-<span id="cb1-10"><span class="co" style="color: #5E5E5E;
+font-style: inherit;">{</span><span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">    n_trees : int</span></span>
-<span id="cb1-11"><span class="co" style="color: #5E5E5E;
+font-style: inherit;">self</span><span class="sc" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">        number of boosting rounds</span></span>
-<span id="cb1-12"><span class="co" style="color: #5E5E5E;
+font-style: inherit;">.</span>n<span class="sc" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">        </span></span>
-<span id="cb1-13"><span class="co" style="color: #5E5E5E;
+font-style: inherit;">}</span><span class="ss" style="color: #20794D;
 background-color: null;
-font-style: inherit;">    learning_rate : float</span></span>
-<span id="cb1-14"><span class="co" style="color: #5E5E5E;
+font-style: inherit;">'</span></span>
+<span id="cb20-55">        s <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">        learning rate hyperparameter</span></span>
-<span id="cb1-15"><span class="co" style="color: #5E5E5E;
+font-style: inherit;">+=</span> <span class="ss" style="color: #20794D;
 background-color: null;
-font-style: inherit;">        </span></span>
-<span id="cb1-16"><span class="co" style="color: #5E5E5E;
+font-style: inherit;">f'; value:</span><span class="sc" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">    max_depth : int</span></span>
-<span id="cb1-17"><span class="co" style="color: #5E5E5E;
+font-style: inherit;">{</span><span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">        maximum tree depth</span></span>
-<span id="cb1-18"><span class="co" style="color: #5E5E5E;
+font-style: inherit;">self</span><span class="sc" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">    '''</span></span>
-<span id="cb1-19">    </span>
-<span id="cb1-20">    <span class="kw" style="color: #003B4F;
+font-style: inherit;">.</span>value<span class="sc" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">def</span> <span class="fu" style="color: #4758AB;
+font-style: inherit;">:0.2f}</span><span class="ss" style="color: #20794D;
 background-color: null;
-font-style: inherit;">__init__</span>(<span class="va" style="color: #111111;
+font-style: inherit;">'</span></span>
+<span id="cb20-56">        <span class="cf" style="color: #003B4F;
 background-color: null;
-font-style: inherit;">self</span>, n_trees, learning_rate<span class="op" style="color: #5E5E5E;
+font-style: inherit;">if</span> <span class="kw" style="color: #003B4F;
 background-color: null;
-font-style: inherit;">=</span><span class="fl" style="color: #AD0000;
+font-style: inherit;">not</span> <span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">0.1</span>, max_depth<span class="op" style="color: #5E5E5E;
+font-style: inherit;">self</span>.is_leaf:</span>
+<span id="cb20-57">            split_feature_name <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">=</span><span class="dv" style="color: #AD0000;
+font-style: inherit;">=</span> <span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">1</span>):</span>
-<span id="cb1-21">        <span class="va" style="color: #111111;
+font-style: inherit;">self</span>.X.columns[<span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">self</span>.n_trees<span class="op" style="color: #5E5E5E;
+font-style: inherit;">self</span>.split_feature_idx]</span>
+<span id="cb20-58">            s <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">=</span>n_trees<span class="op" style="color: #5E5E5E;
+font-style: inherit;">+=</span> <span class="ss" style="color: #20794D;
 background-color: null;
-font-style: inherit;">;</span> </span>
-<span id="cb1-22">        <span class="va" style="color: #111111;
+font-style: inherit;">f'; split: </span><span class="sc" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">self</span>.learning_rate<span class="op" style="color: #5E5E5E;
+font-style: inherit;">{</span>split_feature_name<span class="sc" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">=</span>learning_rate</span>
-<span id="cb1-23">        <span class="va" style="color: #111111;
+font-style: inherit;">}</span><span class="ss" style="color: #20794D;
 background-color: null;
-font-style: inherit;">self</span>.max_depth<span class="op" style="color: #5E5E5E;
+font-style: inherit;"> &lt;= </span><span class="sc" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">=</span>max_depth<span class="op" style="color: #5E5E5E;
+font-style: inherit;">{</span><span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">;</span></span>
-<span id="cb1-24">    </span>
-<span id="cb1-25">    <span class="kw" style="color: #003B4F;
+font-style: inherit;">self</span><span class="sc" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">def</span> fit(<span class="va" style="color: #111111;
+font-style: inherit;">.</span>threshold<span class="sc" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">self</span>, X, y, objective):</span>
-<span id="cb1-26">        <span class="co" style="color: #5E5E5E;
+font-style: inherit;">:0.3f}</span><span class="ss" style="color: #20794D;
 background-color: null;
-font-style: inherit;">'''Fit the GBM using the specified loss function.</span></span>
-<span id="cb1-27"><span class="co" style="color: #5E5E5E;
+font-style: inherit;">'</span></span>
+<span id="cb20-59">        <span class="cf" style="color: #003B4F;
 background-color: null;
-font-style: inherit;">        </span></span>
-<span id="cb1-28"><span class="co" style="color: #5E5E5E;
+font-style: inherit;">return</span> s</span>
+<span id="cb20-60">    </span>
+<span id="cb20-61">    <span class="kw" style="color: #003B4F;
 background-color: null;
-font-style: inherit;">        Parameters</span></span>
-<span id="cb1-29"><span class="co" style="color: #5E5E5E;
+font-style: inherit;">def</span> predict(<span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">        ----------</span></span>
-<span id="cb1-30"><span class="co" style="color: #5E5E5E;
+font-style: inherit;">self</span>, X):</span>
+<span id="cb20-62">        <span class="cf" style="color: #003B4F;
 background-color: null;
-font-style: inherit;">        X : ndarray of size (number observations, number features)</span></span>
-<span id="cb1-31"><span class="co" style="color: #5E5E5E;
+font-style: inherit;">return</span> np.array([<span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">            design matrix</span></span>
-<span id="cb1-32"><span class="co" style="color: #5E5E5E;
+font-style: inherit;">self</span>._predict_row(row) <span class="cf" style="color: #003B4F;
 background-color: null;
-font-style: inherit;">            </span></span>
-<span id="cb1-33"><span class="co" style="color: #5E5E5E;
+font-style: inherit;">for</span> i, row <span class="kw" style="color: #003B4F;
 background-color: null;
-font-style: inherit;">        y : ndarray of size (number observations,)</span></span>
-<span id="cb1-34"><span class="co" style="color: #5E5E5E;
+font-style: inherit;">in</span> X.iterrows()])</span>
+<span id="cb20-63">    </span>
+<span id="cb20-64">    <span class="kw" style="color: #003B4F;
 background-color: null;
-font-style: inherit;">            target values</span></span>
-<span id="cb1-35"><span class="co" style="color: #5E5E5E;
+font-style: inherit;">def</span> _predict_row(<span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">            </span></span>
-<span id="cb1-36"><span class="co" style="color: #5E5E5E;
+font-style: inherit;">self</span>, row):</span>
+<span id="cb20-65">        <span class="cf" style="color: #003B4F;
 background-color: null;
-font-style: inherit;">        objective : loss function class instance</span></span>
-<span id="cb1-37"><span class="co" style="color: #5E5E5E;
+font-style: inherit;">if</span> <span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">            Class specifying the loss function for training.</span></span>
-<span id="cb1-38"><span class="co" style="color: #5E5E5E;
+font-style: inherit;">self</span>.is_leaf: </span>
+<span id="cb20-66">            <span class="cf" style="color: #003B4F;
 background-color: null;
-font-style: inherit;">            Should implement two methods:</span></span>
-<span id="cb1-39"><span class="co" style="color: #5E5E5E;
+font-style: inherit;">return</span> <span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">                loss(labels: ndarray, predictions: ndarray) -&gt; float</span></span>
-<span id="cb1-40"><span class="co" style="color: #5E5E5E;
+font-style: inherit;">self</span>.value</span>
+<span id="cb20-67">        child <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">                negative_gradient(labels: ndarray, predictions: ndarray) -&gt; ndarray</span></span>
-<span id="cb1-41"><span class="co" style="color: #5E5E5E;
+font-style: inherit;">=</span> <span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">        '''</span></span>
-<span id="cb1-42">        </span>
-<span id="cb1-43">        <span class="va" style="color: #111111;
+font-style: inherit;">self</span>.left <span class="cf" style="color: #003B4F;
 background-color: null;
-font-style: inherit;">self</span>.trees <span class="op" style="color: #5E5E5E;
+font-style: inherit;">if</span> row[<span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">=</span> []</span>
-<span id="cb1-44">        <span class="va" style="color: #111111;
+font-style: inherit;">self</span>.split_feature_idx] <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">self</span>.base_prediction <span class="op" style="color: #5E5E5E;
+font-style: inherit;">&lt;=</span> <span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">=</span> <span class="va" style="color: #111111;
+font-style: inherit;">self</span>.threshold <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">self</span>._get_optimal_base_value(y, objective.loss)</span>
-<span id="cb1-45">        current_predictions <span class="op" style="color: #5E5E5E;
+font-style: inherit;">\</span></span>
+<span id="cb20-68">                <span class="cf" style="color: #003B4F;
 background-color: null;
-font-style: inherit;">=</span> <span class="va" style="color: #111111;
+font-style: inherit;">else</span> <span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">self</span>.base_prediction <span class="op" style="color: #5E5E5E;
+font-style: inherit;">self</span>.right</span>
+<span id="cb20-69">        <span class="cf" style="color: #003B4F;
 background-color: null;
-font-style: inherit;">*</span> np.ones(shape<span class="op" style="color: #5E5E5E;
+font-style: inherit;">return</span> child._predict_row(row)</span></code></pre></div>
+</div>
+</section>
+<section id="from-scratch-versus-scikit-learn" class="level2">
+<h2 class="anchored" data-anchor-id="from-scratch-versus-scikit-learn">From Scratch versus Scikit-Learn</h2>
+<p>As usual, we’ll test our homegrown handiwork by comparing it to the existing implementation in scikit-learn. First let’s train both models on the <a href="https://scikit-learn.org/stable/datasets/real_world.html">California Housing dataset</a> which gives us 20k instances and 8 features to predict median house price by district.</p>
+<div class="cell" data-execution_count="18">
+<div class="sourceCode cell-code" id="cb21" style="background: #f1f3f5;"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb21-1"><span class="im" style="color: #00769E;
 background-color: null;
-font-style: inherit;">=</span>y.shape)</span>
-<span id="cb1-46">        <span class="cf" style="color: #003B4F;
+font-style: inherit;">from</span> sklearn.datasets <span class="im" style="color: #00769E;
 background-color: null;
-font-style: inherit;">for</span> _ <span class="kw" style="color: #003B4F;
+font-style: inherit;">import</span> fetch_california_housing</span>
+<span id="cb21-2"><span class="im" style="color: #00769E;
 background-color: null;
-font-style: inherit;">in</span> <span class="bu" style="color: null;
+font-style: inherit;">from</span> sklearn.model_selection <span class="im" style="color: #00769E;
 background-color: null;
-font-style: inherit;">range</span>(<span class="va" style="color: #111111;
+font-style: inherit;">import</span> train_test_split</span>
+<span id="cb21-3"></span>
+<span id="cb21-4">X, y <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">self</span>.n_trees):</span>
-<span id="cb1-47">            pseudo_residuals <span class="op" style="color: #5E5E5E;
+font-style: inherit;">=</span> fetch_california_housing(as_frame<span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">=</span> objective.negative_gradient(y, current_predictions)</span>
-<span id="cb1-48">            tree <span class="op" style="color: #5E5E5E;
+font-style: inherit;">=</span><span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">=</span> DecisionTreeRegressor(max_depth<span class="op" style="color: #5E5E5E;
+font-style: inherit;">True</span>, return_X_y<span class="op" style="color: #5E5E5E;
 background-color: null;
 font-style: inherit;">=</span><span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">self</span>.max_depth)</span>
-<span id="cb1-49">            tree.fit(X, pseudo_residuals)</span>
-<span id="cb1-50">            <span class="va" style="color: #111111;
+font-style: inherit;">True</span>)</span>
+<span id="cb21-5">X_train, X_test, y_train, y_test <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">self</span>._update_terminal_nodes(tree, X, y, current_predictions, objective.loss)</span>
-<span id="cb1-51">            current_predictions <span class="op" style="color: #5E5E5E;
+font-style: inherit;">=</span> train_test_split(X, y, test_size<span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">+=</span> <span class="va" style="color: #111111;
+font-style: inherit;">=</span><span class="fl" style="color: #AD0000;
 background-color: null;
-font-style: inherit;">self</span>.learning_rate <span class="op" style="color: #5E5E5E;
+font-style: inherit;">0.3</span>, random_state<span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">*</span> tree.predict(X)</span>
-<span id="cb1-52">            <span class="va" style="color: #111111;
+font-style: inherit;">=</span><span class="dv" style="color: #AD0000;
 background-color: null;
-font-style: inherit;">self</span>.trees.append(tree)</span>
-<span id="cb1-53">     </span>
-<span id="cb1-54">    <span class="kw" style="color: #003B4F;
+font-style: inherit;">43</span>)</span></code></pre></div>
+</div>
+<div class="cell" data-execution_count="19">
+<div class="sourceCode cell-code" id="cb22" style="background: #f1f3f5;"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb22-1"><span class="im" style="color: #00769E;
 background-color: null;
-font-style: inherit;">def</span> _get_optimal_base_value(<span class="va" style="color: #111111;
+font-style: inherit;">from</span> sklearn.tree <span class="im" style="color: #00769E;
 background-color: null;
-font-style: inherit;">self</span>, y, loss):</span>
-<span id="cb1-55">        <span class="co" style="color: #5E5E5E;
+font-style: inherit;">import</span> DecisionTreeRegressor</span>
+<span id="cb22-2"><span class="im" style="color: #00769E;
 background-color: null;
-font-style: inherit;">'''Find the optimal initial prediction for the base model.'''</span></span>
-<span id="cb1-56">        fun <span class="op" style="color: #5E5E5E;
+font-style: inherit;">from</span> sklearn.metrics <span class="im" style="color: #00769E;
 background-color: null;
-font-style: inherit;">=</span> <span class="kw" style="color: #003B4F;
+font-style: inherit;">import</span> mean_squared_error</span>
+<span id="cb22-3"></span>
+<span id="cb22-4">max_depth <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">lambda</span> c: loss(y, c)</span>
-<span id="cb1-57">        c0 <span class="op" style="color: #5E5E5E;
+font-style: inherit;">=</span> <span class="dv" style="color: #AD0000;
 background-color: null;
-font-style: inherit;">=</span> y.mean()</span>
-<span id="cb1-58">        <span class="cf" style="color: #003B4F;
+font-style: inherit;">8</span></span>
+<span id="cb22-5">min_samples_leaf <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">return</span> minimize(fun<span class="op" style="color: #5E5E5E;
+font-style: inherit;">=</span> <span class="dv" style="color: #AD0000;
 background-color: null;
-font-style: inherit;">=</span>fun, x0<span class="op" style="color: #5E5E5E;
+font-style: inherit;">16</span></span>
+<span id="cb22-6"></span>
+<span id="cb22-7">tree <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">=</span>c0).x[<span class="dv" style="color: #AD0000;
+font-style: inherit;">=</span> DecisionTree(X_train, y_train, max_depth<span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">0</span>]</span>
-<span id="cb1-59">        </span>
-<span id="cb1-60">    <span class="kw" style="color: #003B4F;
+font-style: inherit;">=</span>max_depth, min_samples_leaf<span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">def</span> _update_terminal_nodes(<span class="va" style="color: #111111;
+font-style: inherit;">=</span>min_samples_leaf)</span>
+<span id="cb22-8">pred <span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">=</span> tree.predict(X_test)</span>
+<span id="cb22-9"></span>
+<span id="cb22-10">sk_tree <span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">=</span> DecisionTreeRegressor(max_depth<span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">self</span>, tree, X, y, current_predictions, loss):</span>
-<span id="cb1-61">        <span class="co" style="color: #5E5E5E;
+font-style: inherit;">=</span>max_depth, min_samples_leaf<span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">'''Update the tree's predictions according to the loss function.'''</span></span>
-<span id="cb1-62">        <span class="co" style="color: #5E5E5E;
+font-style: inherit;">=</span>min_samples_leaf)</span>
+<span id="cb22-11">sk_tree.fit(X_train, y_train)</span>
+<span id="cb22-12">sk_pred <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;"># terminal node id's</span></span>
-<span id="cb1-63">        leaf_nodes <span class="op" style="color: #5E5E5E;
+font-style: inherit;">=</span> sk_tree.predict(X_test)</span>
+<span id="cb22-13"></span>
+<span id="cb22-14"><span class="bu" style="color: null;
 background-color: null;
-font-style: inherit;">=</span> np.nonzero(tree.tree_.children_left <span class="op" style="color: #5E5E5E;
+font-style: inherit;">print</span>(<span class="ss" style="color: #20794D;
 background-color: null;
-font-style: inherit;">==</span> <span class="op" style="color: #5E5E5E;
+font-style: inherit;">f'from scratch MSE: </span><span class="sc" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">-</span><span class="dv" style="color: #AD0000;
+font-style: inherit;">{</span>mean_squared_error(y_test, pred)<span class="sc" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">1</span>)[<span class="dv" style="color: #AD0000;
+font-style: inherit;">:0.4f}</span><span class="ss" style="color: #20794D;
 background-color: null;
-font-style: inherit;">0</span>]</span>
-<span id="cb1-64">        <span class="co" style="color: #5E5E5E;
+font-style: inherit;">'</span>)</span>
+<span id="cb22-15"><span class="bu" style="color: null;
 background-color: null;
-font-style: inherit;"># compute leaf for each sample in ``X``.</span></span>
-<span id="cb1-65">        leaf_node_for_each_sample <span class="op" style="color: #5E5E5E;
+font-style: inherit;">print</span>(<span class="ss" style="color: #20794D;
 background-color: null;
-font-style: inherit;">=</span> tree.<span class="bu" style="color: null;
+font-style: inherit;">f'scikit-learn MSE: </span><span class="sc" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">apply</span>(X)</span>
-<span id="cb1-66">        <span class="cf" style="color: #003B4F;
+font-style: inherit;">{</span>mean_squared_error(y_test, sk_pred)<span class="sc" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">for</span> leaf <span class="kw" style="color: #003B4F;
+font-style: inherit;">:0.4f}</span><span class="ss" style="color: #20794D;
 background-color: null;
-font-style: inherit;">in</span> leaf_nodes:</span>
-<span id="cb1-67">            samples_in_this_leaf <span class="op" style="color: #5E5E5E;
+font-style: inherit;">'</span>)</span></code></pre></div>
+<div class="cell-output cell-output-stdout">
+<pre><code>from scratch MSE: 0.3988
+scikit-learn MSE: 0.3988</code></pre>
+</div>
+</div>
+<p>We get similar accuracy on a held-out test dataset.</p>
+<p>Let’s benchmark the two implementations on training time.</p>
+<div class="cell" data-execution_count="20">
+<div class="sourceCode cell-code" id="cb24" style="background: #f1f3f5;"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb24-1"><span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">=</span> np.where(leaf_node_for_each_sample <span class="op" style="color: #5E5E5E;
+font-style: inherit;">%%</span>time</span>
+<span id="cb24-2">sk_tree <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">==</span> leaf)[<span class="dv" style="color: #AD0000;
+font-style: inherit;">=</span> DecisionTreeRegressor(max_depth<span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">0</span>]</span>
-<span id="cb1-68">            y_in_leaf <span class="op" style="color: #5E5E5E;
+font-style: inherit;">=</span>max_depth, min_samples_leaf<span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">=</span> y.take(samples_in_this_leaf, axis<span class="op" style="color: #5E5E5E;
+font-style: inherit;">=</span>min_samples_leaf)</span>
+<span id="cb24-3">sk_tree.fit(X_train, y_train)<span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">=</span><span class="dv" style="color: #AD0000;
+font-style: inherit;">;</span></span></code></pre></div>
+<div class="cell-output cell-output-stdout">
+<pre><code>CPU times: user 45.3 ms, sys: 555 µs, total: 45.8 ms
+Wall time: 45.3 ms</code></pre>
+</div>
+<div class="cell-output cell-output-display" data-execution_count="20">
+<style>#sk-container-id-1 {color: black;background-color: white;}#sk-container-id-1 pre{padding: 0;}#sk-container-id-1 div.sk-toggleable {background-color: white;}#sk-container-id-1 label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-container-id-1 label.sk-toggleable__label-arrow:before {content: "▸";float: left;margin-right: 0.25em;color: #696969;}#sk-container-id-1 label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-container-id-1 div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-container-id-1 div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-container-id-1 div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-container-id-1 input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-container-id-1 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: "▾";}#sk-container-id-1 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-container-id-1 div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-container-id-1 div.sk-estimator:hover {background-color: #d4ebff;}#sk-container-id-1 div.sk-parallel-item::after {content: "";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-container-id-1 div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 div.sk-serial::before {content: "";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: 0;}#sk-container-id-1 div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;position: relative;}#sk-container-id-1 div.sk-item {position: relative;z-index: 1;}#sk-container-id-1 div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;position: relative;}#sk-container-id-1 div.sk-item::before, #sk-container-id-1 div.sk-parallel-item::before {content: "";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: -1;}#sk-container-id-1 div.sk-parallel-item {display: flex;flex-direction: column;z-index: 1;position: relative;background-color: white;}#sk-container-id-1 div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-container-id-1 div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-container-id-1 div.sk-parallel-item:only-child::after {width: 0;}#sk-container-id-1 div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;}#sk-container-id-1 div.sk-label label {font-family: monospace;font-weight: bold;display: inline-block;line-height: 1.2em;}#sk-container-id-1 div.sk-label-container {text-align: center;}#sk-container-id-1 div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: https://github.com/scikit-learn/scikit-learn/issues/21755 */display: inline-block !important;position: relative;}#sk-container-id-1 div.sk-text-repr-fallback {display: none;}</style><div id="sk-container-id-1" class="sk-top-container"><div class="sk-text-repr-fallback"><pre>DecisionTreeRegressor(max_depth=8, min_samples_leaf=16)</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br>On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class="sk-container" hidden=""><div class="sk-item"><div class="sk-estimator sk-toggleable"><input class="sk-toggleable__control sk-hidden--visually" id="sk-estimator-id-1" type="checkbox" checked=""><label for="sk-estimator-id-1" class="sk-toggleable__label sk-toggleable__label-arrow">DecisionTreeRegressor</label><div class="sk-toggleable__content"><pre>DecisionTreeRegressor(max_depth=8, min_samples_leaf=16)</pre></div></div></div></div></div>
+</div>
+</div>
+<div class="cell" data-execution_count="21">
+<div class="sourceCode cell-code" id="cb26" style="background: #f1f3f5;"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb26-1"><span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">0</span>)</span>
-<span id="cb1-69">            preds_in_leaf <span class="op" style="color: #5E5E5E;
+font-style: inherit;">%%</span>time</span>
+<span id="cb26-2">tree <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">=</span> current_predictions.take(samples_in_this_leaf, axis<span class="op" style="color: #5E5E5E;
+font-style: inherit;">=</span> DecisionTree(X_train, y_train, max_depth<span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">=</span><span class="dv" style="color: #AD0000;
+font-style: inherit;">=</span>max_depth, min_samples_leaf<span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">0</span>)</span>
-<span id="cb1-70">            val <span class="op" style="color: #5E5E5E;
+font-style: inherit;">=</span>min_samples_leaf)</span></code></pre></div>
+<div class="cell-output cell-output-stdout">
+<pre><code>CPU times: user 624 ms, sys: 1.65 ms, total: 625 ms
+Wall time: 625 ms</code></pre>
+</div>
+</div>
+<p>Wow, the scikit-learn implementation absolutely smoked us, training an order of magnitude faster. This is to be expected, since they implement split finding in cython, which generates compiled C code that can run much faster than our native python code. Maybe we can take a look at how to optimize python code with cython here on the blog one of these days.</p>
+</section>
+<section id="wrapping-up" class="level2">
+<h2 class="anchored" data-anchor-id="wrapping-up">Wrapping Up</h2>
+<p>Holy cow, we just implemented a decision tree using nothing but numpy. I hope you enjoyed the scratch build as much as I did, and I hope you got a little bit better at coding (I certainly did). That was actually way harder than I expected, but looking back at the finished product, it doesn’t seem so bad right? I almost thought we were going to get away with not implementing our own decision tree, but it turns out that this will be super helpful for us when it comes time to implement XGBoost from scratch.</p>
+</section>
+<section id="references" class="level2">
+<h2 class="anchored" data-anchor-id="references">References</h2>
+<p>This implementation is inspired and partially adapted from Jeremy Howard’s live coding of a <a href="https://course18.fast.ai/lessonsml1/lesson7.html">Random Forest</a> as part of the fastai ML course.</p>
+</section>
+
+ ]]></description>
+  <category>python</category>
+  <category>gradient boosting</category>
+  <category>from scratch</category>
+  <guid>https://randomrealizations.com/posts/decision-tree-from-scratch/index.html</guid>
+  <pubDate>Sun, 12 Dec 2021 22:00:00 GMT</pubDate>
+  <media:content url="https://randomrealizations.com/posts/decision-tree-from-scratch/thumbnail.png" medium="image" type="image/png" height="86" width="144"/>
+</item>
+<item>
+  <title>How to Implement a Gradient Boosting Machine that Works with Any Loss Function</title>
+  <dc:creator>Matt Bowers</dc:creator>
+  <link>https://randomrealizations.com/posts/gradient-boosting-machine-with-any-loss-function/index.html</link>
+  <description><![CDATA[ 
+
+
+
+<div class="quarto-figure quarto-figure-center">
+<figure class="figure">
+<p><img src="https://randomrealizations.com/posts/gradient-boosting-machine-with-any-loss-function/main.jpg" class="img-fluid figure-img"></p>
+<figcaption class="figure-caption">Cold water cascades over the rocks in Erwin, Tennessee.</figcaption>
+</figure>
+</div>
+<p>Friends, this is going to be an epic post! Today, we bring together all the ideas we’ve built up over the past few posts to nail down our understanding of the key ideas in Jerome Friedman’s seminal 2001 paper: “<a href="https://statweb.stanford.edu/~jhf/ftp/trebst.pdf">Greedy Function Approximation: A Gradient Boosting Machine</a>.” In particular, we’ll summarize the highlights from the paper, and we’ll build an in-house python implementation of his generic gradient boosting algorithm which can train with any differentiable loss function. What’s more, we’ll go ahead and take our generic gradient boosting machine for a spin by training it with several of the most popular loss functions used in practice.</p>
+<p>Are you freaking stoked or what?</p>
+<p>Sweet. Let’s do this.</p>
+<section id="friedman-2001-tldr" class="level2">
+<h2 class="anchored" data-anchor-id="friedman-2001-tldr">Friedman 2001: TL;DR</h2>
+<p>I’ve mentioned this paper a couple of times before, but as far as I can tell, this is the origin of gradient boosting; it is therefore, a seminal work worth reading. You know what, I think you might like to pick up <a href="https://statweb.stanford.edu/~jhf/ftp/trebst.pdf">the paper</a> and read it yourself. Like many papers, there is a lot of scary looking math in the first few pages, but if you’ve been following along on this blog, you’ll find that it’s actually totally approachable. This is the kind of thing that cures imposter syndrome, so give it a shot. That said, here’s the TL;DR as I see it.</p>
+<p>The first part of the paper introduces the idea of fitting models by doing gradient descent in function space, an ingenious idea we spent <a href="../../posts/how-gradient-boosting-does-gradient-descent/">an entire post</a> demystifying earlier. Friedman goes on to introduce the generic gradient boost algorithm, which works with any differentiable loss function, as well as specific variants for minimizing absolute error, Huber loss, and binary deviance. In terms of hyperparameters, he points out that the learning rate can be used to reduce overfitting, while increased tree depth can help capture more complex interactions among features. He even discusses feature importance and partial dependence methods for interpreting fitted gradient boosting models.</p>
+<p>Friedman concludes by musing about the advantages of gradient boosting with trees. He notes some key advantages afforded by the use of decision trees including no need to rescale input data, robustness against irrelevant input features, and elegant handling of missing feature values. He points out that gradient boosting manages to capitalize on the benefits of decision trees while minimizing their key weakness (crappy accuracy). I think this offers a great insight into why gradient boosting models have become so widespread and successful in practical ML applications.</p>
+</section>
+<section id="friedmans-generic-gradient-boosting-algorithm" class="level2">
+<h2 class="anchored" data-anchor-id="friedmans-generic-gradient-boosting-algorithm">Friedman’s Generic Gradient Boosting Algorithm</h2>
+<p>Let’s take a closer look at Friedman’s original gradient boost algorithm, Alg. 1 in Section 3 of <a href="https://statweb.stanford.edu/~jhf/ftp/trebst.pdf">the paper</a> (translated into the notation we’ve been using so far).</p>
+<p>Like last time, we have training data <img src="https://latex.codecogs.com/png.latex?(%5Cmathbf%7By%7D,%20%5Cmathbf%7BX%7D)"> where <img src="https://latex.codecogs.com/png.latex?%5Cmathbf%7By%7D"> is a length-<img src="https://latex.codecogs.com/png.latex?n"> vector of target values, and <img src="https://latex.codecogs.com/png.latex?%5Cmathbf%7BX%7D"> is an <img src="https://latex.codecogs.com/png.latex?n%20%5Ctimes%20p"> matrix with <img src="https://latex.codecogs.com/png.latex?n"> observations of <img src="https://latex.codecogs.com/png.latex?p"> features. We also have a differentiable loss function <img src="https://latex.codecogs.com/png.latex?L(%5Cmathbf%7By%7D,%20%5Cmathbf%7B%5Chat%7By%7D%7D)%20=%20%5Csum_%7Bi=1%7D%5En%20l(y_i,%20%5Chat%7By%7D_i)">, a “learning rate” hyperparameter <img src="https://latex.codecogs.com/png.latex?%5Ceta">, and a fixed number of model iterations <img src="https://latex.codecogs.com/png.latex?M">.</p>
+<p><strong>Algorithm:</strong> <em>gradient_boost</em><img src="https://latex.codecogs.com/png.latex?(%5Cmathbf%7BX%7D,%5Cmathbf%7By%7D,L,%5Ceta,%20M)"> returns: model <img src="https://latex.codecogs.com/png.latex?F_M"></p>
+<ol type="1">
+<li><p>Let base model <img src="https://latex.codecogs.com/png.latex?F_0(%5Cmathbf%7Bx%7D)%20=%20c">, where <img src="https://latex.codecogs.com/png.latex?c%20=%20%5Ctext%7Bargmin%7D_%7Bc%7D%20%5Csum_%7Bi=1%7D%5En%20l(y_i,%20c)"></p></li>
+<li><p><code>for</code> <img src="https://latex.codecogs.com/png.latex?m"> = <img src="https://latex.codecogs.com/png.latex?0"> to <img src="https://latex.codecogs.com/png.latex?M-1">:</p></li>
+<li><p>&nbsp;&nbsp;&nbsp;&nbsp; Let “pseudo-residual” vector <img src="https://latex.codecogs.com/png.latex?%5Cmathbf%7Br%7D_m%20=%20-%5Cnabla_%7B%5Cmathbf%7B%5Chat%7By%7D%7D_m%7D%20L(%5Cmathbf%7By%7D,%5Cmathbf%7B%5Chat%7By%7D%7D_m)"></p></li>
+<li><p>&nbsp;&nbsp;&nbsp;&nbsp; Train decision tree regressor <img src="https://latex.codecogs.com/png.latex?h_m(%5Cmathbf%7BX%7D)"> to predict <img src="https://latex.codecogs.com/png.latex?%5Cmathbf%7Br%7D_m"> (minimizing squared error)</p></li>
+<li><p>&nbsp;&nbsp;&nbsp;&nbsp; <code>foreach</code> terminal leaf node <img src="https://latex.codecogs.com/png.latex?t%20%5Cin%20h_m">:</p></li>
+<li><p>&nbsp;&nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp;&nbsp; Let <img src="https://latex.codecogs.com/png.latex?v%20=%20%5Ctext%7Bargmin%7D_v%20%5Csum_%7Bi%20%5Cin%20t%7D%20l(y_i,%20F_m(%5Cmathbf%7Bx%7D_i)%20+%20v)"></p></li>
+<li><p>&nbsp;&nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp;&nbsp; Set terminal leaf node <img src="https://latex.codecogs.com/png.latex?t"> to predict value <img src="https://latex.codecogs.com/png.latex?v"></p></li>
+<li><p>&nbsp;&nbsp;&nbsp;&nbsp; <img src="https://latex.codecogs.com/png.latex?F_%7Bm+1%7D(%5Cmathbf%7BX%7D)%20=%20F_%7Bm%7D(%5Cmathbf%7BX%7D)%20+%20%5Ceta%20h_m(%5Cmathbf%7BX%7D)"></p></li>
+<li><p>Return composite model <img src="https://latex.codecogs.com/png.latex?F_M"></p></li>
+</ol>
+<p>By now, most of this is already familiar to us. We begin by setting the base model <img src="https://latex.codecogs.com/png.latex?F_0"> equal to the constant prediction value that minimizes the loss over all examples in the training dataset (line 1). Then we begin the boosting iterations (line 2), each time computing the negative gradients of the loss with respect to the current model predictions (known as the pseudo residuals) (line 3). We then fit our next decision tree regressor to predict the pseudo residuals (line 4).</p>
+<p>Then we encounter something new on lines 5-7. When we fit a vanilla decision tree regressor to predict pseudo residuals, we’re using mean squared error as the loss function to train the tree. As you might imagine, this works well when the global loss function is also squared error. But if we want to use a global loss other than squared error, there is an additional trick we can use to further increase the composite model’s accuracy. The idea is to continue using squared error to train each decision tree, keeping its structure and split conditions but altering the predicted value in each leaf to help minimize the global loss function. Instead of using the mean target value as the prediction for each node (as we would do when minimizing squared error), we use a numerical optimization method like line search to choose the constant value for that leaf that leads to the best overall loss. This is the same thing we did in line 1 of the algorithm to set the base prediction, but here we choose the optimal prediction for each terminal node of the newly trained decision tree.</p>
+</section>
+<section id="implementation" class="level2">
+<h2 class="anchored" data-anchor-id="implementation">Implementation</h2>
+<p>I did some (half-assed) searching on the interweb for an implementation of GBM that allows the user to provide a custom loss function, and you know what? I couldn’t find anything. If you find another implementation, post in the comments so we can learn from it too.</p>
+<p>Since we need to modify the values predicted by our decision trees’ terminal nodes, we’ll want to brush up on the scikit-learn <a href="https://scikit-learn.org/stable/auto_examples/tree/plot_unveil_tree_structure.html">decision tree structure</a> before we get going. You can see explanations of all the necessary decision tree hacks in this <a href="https://github.com/mcb00/blog/blob/master/supplemental/friedman-gbm-implementation.ipynb">notebook</a>.</p>
+<div class="cell" data-execution_count="1">
+<div class="sourceCode cell-code" id="cb1" style="background: #f1f3f5;"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb1-1"><span class="im" style="color: #00769E;
 background-color: null;
-font-style: inherit;">=</span> <span class="va" style="color: #111111;
+font-style: inherit;">import</span> numpy <span class="im" style="color: #00769E;
 background-color: null;
-font-style: inherit;">self</span>._get_optimal_leaf_value(y_in_leaf, </span>
-<span id="cb1-71">                                               preds_in_leaf,</span>
-<span id="cb1-72">                                               loss)</span>
-<span id="cb1-73">            tree.tree_.value[leaf, <span class="dv" style="color: #AD0000;
+font-style: inherit;">as</span> np</span>
+<span id="cb1-2"><span class="im" style="color: #00769E;
 background-color: null;
-font-style: inherit;">0</span>, <span class="dv" style="color: #AD0000;
+font-style: inherit;">from</span> sklearn.tree <span class="im" style="color: #00769E;
 background-color: null;
-font-style: inherit;">0</span>] <span class="op" style="color: #5E5E5E;
+font-style: inherit;">import</span> DecisionTreeRegressor </span>
+<span id="cb1-3"><span class="im" style="color: #00769E;
 background-color: null;
-font-style: inherit;">=</span> val</span>
-<span id="cb1-74">            </span>
-<span id="cb1-75">    <span class="kw" style="color: #003B4F;
+font-style: inherit;">from</span> scipy.optimize <span class="im" style="color: #00769E;
 background-color: null;
-font-style: inherit;">def</span> _get_optimal_leaf_value(<span class="va" style="color: #111111;
+font-style: inherit;">import</span> minimize</span>
+<span id="cb1-4"></span>
+<span id="cb1-5"><span class="kw" style="color: #003B4F;
 background-color: null;
-font-style: inherit;">self</span>, y, current_predictions, loss):</span>
-<span id="cb1-76">        <span class="co" style="color: #5E5E5E;
+font-style: inherit;">class</span> GradientBoostingMachine():</span>
+<span id="cb1-6">    <span class="co" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">'''Find the optimal prediction value for a given leaf.'''</span></span>
-<span id="cb1-77">        fun <span class="op" style="color: #5E5E5E;
+font-style: inherit;">'''Gradient Boosting Machine supporting any user-supplied loss function.</span></span>
+<span id="cb1-7"><span class="co" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">=</span> <span class="kw" style="color: #003B4F;
+font-style: inherit;">    </span></span>
+<span id="cb1-8"><span class="co" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">lambda</span> c: loss(y, current_predictions <span class="op" style="color: #5E5E5E;
+font-style: inherit;">    Parameters</span></span>
+<span id="cb1-9"><span class="co" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">+</span> c)</span>
-<span id="cb1-78">        c0 <span class="op" style="color: #5E5E5E;
+font-style: inherit;">    ----------</span></span>
+<span id="cb1-10"><span class="co" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">=</span> y.mean()</span>
-<span id="cb1-79">        <span class="cf" style="color: #003B4F;
+font-style: inherit;">    n_trees : int</span></span>
+<span id="cb1-11"><span class="co" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">return</span> minimize(fun<span class="op" style="color: #5E5E5E;
+font-style: inherit;">        number of boosting rounds</span></span>
+<span id="cb1-12"><span class="co" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">=</span>fun, x0<span class="op" style="color: #5E5E5E;
+font-style: inherit;">        </span></span>
+<span id="cb1-13"><span class="co" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">=</span>c0).x[<span class="dv" style="color: #AD0000;
+font-style: inherit;">    learning_rate : float</span></span>
+<span id="cb1-14"><span class="co" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">0</span>]</span>
-<span id="cb1-80">          </span>
-<span id="cb1-81">    <span class="kw" style="color: #003B4F;
+font-style: inherit;">        learning rate hyperparameter</span></span>
+<span id="cb1-15"><span class="co" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">def</span> predict(<span class="va" style="color: #111111;
+font-style: inherit;">        </span></span>
+<span id="cb1-16"><span class="co" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">self</span>, X):</span>
-<span id="cb1-82">        <span class="co" style="color: #5E5E5E;
+font-style: inherit;">    max_depth : int</span></span>
+<span id="cb1-17"><span class="co" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">'''Generate predictions for the given input data.'''</span></span>
-<span id="cb1-83">        <span class="cf" style="color: #003B4F;
+font-style: inherit;">        maximum tree depth</span></span>
+<span id="cb1-18"><span class="co" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">return</span> (<span class="va" style="color: #111111;
+font-style: inherit;">    '''</span></span>
+<span id="cb1-19">    </span>
+<span id="cb1-20">    <span class="kw" style="color: #003B4F;
 background-color: null;
-font-style: inherit;">self</span>.base_prediction </span>
-<span id="cb1-84">                <span class="op" style="color: #5E5E5E;
+font-style: inherit;">def</span> <span class="fu" style="color: #4758AB;
 background-color: null;
-font-style: inherit;">+</span> <span class="va" style="color: #111111;
+font-style: inherit;">__init__</span>(<span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">self</span>.learning_rate </span>
-<span id="cb1-85">                <span class="op" style="color: #5E5E5E;
+font-style: inherit;">self</span>, n_trees, learning_rate<span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">*</span> np.<span class="bu" style="color: null;
+font-style: inherit;">=</span><span class="fl" style="color: #AD0000;
 background-color: null;
-font-style: inherit;">sum</span>([tree.predict(X) <span class="cf" style="color: #003B4F;
+font-style: inherit;">0.1</span>, max_depth<span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">for</span> tree <span class="kw" style="color: #003B4F;
+font-style: inherit;">=</span><span class="dv" style="color: #AD0000;
 background-color: null;
-font-style: inherit;">in</span> <span class="va" style="color: #111111;
+font-style: inherit;">1</span>):</span>
+<span id="cb1-21">        <span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">self</span>.trees], axis<span class="op" style="color: #5E5E5E;
+font-style: inherit;">self</span>.n_trees<span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">=</span><span class="dv" style="color: #AD0000;
+font-style: inherit;">=</span>n_trees<span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">0</span>))</span></code></pre></div>
-</div>
-<p>In terms of design, we implement a class for the GBM with scikit-like <code>fit</code> and <code>predict</code> methods. Notice in the below implementation that the <code>fit</code> method is only 10 lines long, and corresponds very closely to Friedman’s gradient boost algorithm from above. Most of the complexity comes from the helper methods for updating the leaf values according to the specified loss function.</p>
-<p>When the user wants to call the <code>fit</code> method, they’ll need to supply the loss function they want to use for boosting. We’ll make the user implement their loss (a.k.a. objective) function as a class with two methods: (1) a <code>loss</code> method taking the labels and the predictions and returning the loss score and (2) a <code>negative_gradient</code> method taking the labels and the predictions and returning an array of negative gradients.</p>
-</section>
-<section id="testing-our-model" class="level2">
-<h2 class="anchored" data-anchor-id="testing-our-model">Testing our Model</h2>
-<p>Let’s test drive our custom-loss-ready GBM with a few different loss functions! We’ll compare it to the scikit-learn GBM to sanity check our implementation.</p>
-<div class="cell" data-execution_count="2">
-<div class="sourceCode cell-code" id="cb2" style="background: #f1f3f5;"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb2-1"><span class="im" style="color: #00769E;
+font-style: inherit;">;</span> </span>
+<span id="cb1-22">        <span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">from</span> sklearn.ensemble <span class="im" style="color: #00769E;
+font-style: inherit;">self</span>.learning_rate<span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">import</span> GradientBoostingRegressor, GradientBoostingClassifier</span>
-<span id="cb2-2"></span>
-<span id="cb2-3">rng <span class="op" style="color: #5E5E5E;
+font-style: inherit;">=</span>learning_rate</span>
+<span id="cb1-23">        <span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">=</span> np.random.default_rng()</span>
-<span id="cb2-4"></span>
-<span id="cb2-5"><span class="co" style="color: #5E5E5E;
+font-style: inherit;">self</span>.max_depth<span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;"># test data</span></span>
-<span id="cb2-6"><span class="kw" style="color: #003B4F;
+font-style: inherit;">=</span>max_depth<span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">def</span> make_test_data(n, noise_scale):</span>
-<span id="cb2-7">    x <span class="op" style="color: #5E5E5E;
+font-style: inherit;">;</span></span>
+<span id="cb1-24">    </span>
+<span id="cb1-25">    <span class="kw" style="color: #003B4F;
 background-color: null;
-font-style: inherit;">=</span> np.linspace(<span class="dv" style="color: #AD0000;
+font-style: inherit;">def</span> fit(<span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">0</span>, <span class="dv" style="color: #AD0000;
+font-style: inherit;">self</span>, X, y, objective):</span>
+<span id="cb1-26">        <span class="co" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">10</span>, <span class="dv" style="color: #AD0000;
+font-style: inherit;">'''Fit the GBM using the specified loss function.</span></span>
+<span id="cb1-27"><span class="co" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">500</span>).reshape(<span class="op" style="color: #5E5E5E;
+font-style: inherit;">        </span></span>
+<span id="cb1-28"><span class="co" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">-</span><span class="dv" style="color: #AD0000;
+font-style: inherit;">        Parameters</span></span>
+<span id="cb1-29"><span class="co" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">1</span>,<span class="dv" style="color: #AD0000;
+font-style: inherit;">        ----------</span></span>
+<span id="cb1-30"><span class="co" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">1</span>)</span>
-<span id="cb2-8">    y <span class="op" style="color: #5E5E5E;
+font-style: inherit;">        X : ndarray of size (number observations, number features)</span></span>
+<span id="cb1-31"><span class="co" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">=</span> (np.where(x <span class="op" style="color: #5E5E5E;
+font-style: inherit;">            design matrix</span></span>
+<span id="cb1-32"><span class="co" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">&lt;</span> <span class="dv" style="color: #AD0000;
+font-style: inherit;">            </span></span>
+<span id="cb1-33"><span class="co" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">5</span>, x, <span class="dv" style="color: #AD0000;
+font-style: inherit;">        y : ndarray of size (number observations,)</span></span>
+<span id="cb1-34"><span class="co" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">5</span>) <span class="op" style="color: #5E5E5E;
+font-style: inherit;">            target values</span></span>
+<span id="cb1-35"><span class="co" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">+</span> rng.normal(<span class="dv" style="color: #AD0000;
+font-style: inherit;">            </span></span>
+<span id="cb1-36"><span class="co" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">0</span>, noise_scale, size<span class="op" style="color: #5E5E5E;
+font-style: inherit;">        objective : loss function class instance</span></span>
+<span id="cb1-37"><span class="co" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">=</span>x.shape)).ravel()</span>
-<span id="cb2-9">    <span class="cf" style="color: #003B4F;
+font-style: inherit;">            Class specifying the loss function for training.</span></span>
+<span id="cb1-38"><span class="co" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">return</span> x, y</span>
-<span id="cb2-10">    </span>
-<span id="cb2-11"><span class="co" style="color: #5E5E5E;
+font-style: inherit;">            Should implement two methods:</span></span>
+<span id="cb1-39"><span class="co" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;"># print model loss scores</span></span>
-<span id="cb2-12"><span class="kw" style="color: #003B4F;
+font-style: inherit;">                loss(labels: ndarray, predictions: ndarray) -&gt; float</span></span>
+<span id="cb1-40"><span class="co" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">def</span> print_model_loss_scores(obj, y, preds, sk_preds):</span>
-<span id="cb2-13">    <span class="bu" style="color: null;
+font-style: inherit;">                negative_gradient(labels: ndarray, predictions: ndarray) -&gt; ndarray</span></span>
+<span id="cb1-41"><span class="co" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">print</span>(<span class="ss" style="color: #20794D;
+font-style: inherit;">        '''</span></span>
+<span id="cb1-42">        </span>
+<span id="cb1-43">        <span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">f'From Scratch Loss = </span><span class="sc" style="color: #5E5E5E;
+font-style: inherit;">self</span>.trees <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">{</span>obj<span class="sc" style="color: #5E5E5E;
+font-style: inherit;">=</span> []</span>
+<span id="cb1-44">        <span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">.</span>loss(y, pred)<span class="sc" style="color: #5E5E5E;
+font-style: inherit;">self</span>.base_prediction <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">:0.4}</span><span class="ss" style="color: #20794D;
+font-style: inherit;">=</span> <span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">'</span>)</span>
-<span id="cb2-14">    <span class="bu" style="color: null;
+font-style: inherit;">self</span>._get_optimal_base_value(y, objective.loss)</span>
+<span id="cb1-45">        current_predictions <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">print</span>(<span class="ss" style="color: #20794D;
+font-style: inherit;">=</span> <span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">f'Scikit-Learn Loss = </span><span class="sc" style="color: #5E5E5E;
+font-style: inherit;">self</span>.base_prediction <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">{</span>obj<span class="sc" style="color: #5E5E5E;
+font-style: inherit;">*</span> np.ones(shape<span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">.</span>loss(y, sk_pred)<span class="sc" style="color: #5E5E5E;
+font-style: inherit;">=</span>y.shape)</span>
+<span id="cb1-46">        <span class="cf" style="color: #003B4F;
 background-color: null;
-font-style: inherit;">:0.4}</span><span class="ss" style="color: #20794D;
+font-style: inherit;">for</span> _ <span class="kw" style="color: #003B4F;
 background-color: null;
-font-style: inherit;">'</span>)</span></code></pre></div>
-</div>
-<section id="mean-squared-error" class="level3">
-<h3 class="anchored" data-anchor-id="mean-squared-error">Mean Squared Error</h3>
-<p>Mean Squared Error (a.k.a. Least Squares) loss produces estimates of the mean target value conditioned on the feature values. Here’s the implementation.</p>
-<div class="cell" data-execution_count="3">
-<div class="sourceCode cell-code" id="cb3" style="background: #f1f3f5;"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb3-1">x, y <span class="op" style="color: #5E5E5E;
+font-style: inherit;">in</span> <span class="bu" style="color: null;
 background-color: null;
-font-style: inherit;">=</span> make_test_data(<span class="dv" style="color: #AD0000;
+font-style: inherit;">range</span>(<span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">500</span>, <span class="fl" style="color: #AD0000;
+font-style: inherit;">self</span>.n_trees):</span>
+<span id="cb1-47">            pseudo_residuals <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">0.4</span>)</span></code></pre></div>
-</div>
-<div class="cell" data-execution_count="4">
-<div class="sourceCode cell-code" id="cb4" style="background: #f1f3f5;"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb4-1"><span class="co" style="color: #5E5E5E;
+font-style: inherit;">=</span> objective.negative_gradient(y, current_predictions)</span>
+<span id="cb1-48">            tree <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;"># from scratch GBM</span></span>
-<span id="cb4-2"><span class="kw" style="color: #003B4F;
+font-style: inherit;">=</span> DecisionTreeRegressor(max_depth<span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">class</span> SquaredErrorLoss():</span>
-<span id="cb4-3">    <span class="co" style="color: #5E5E5E;
+font-style: inherit;">=</span><span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">'''User-Defined Squared Error Loss'''</span></span>
-<span id="cb4-4">    </span>
-<span id="cb4-5">    <span class="kw" style="color: #003B4F;
+font-style: inherit;">self</span>.max_depth)</span>
+<span id="cb1-49">            tree.fit(X, pseudo_residuals)</span>
+<span id="cb1-50">            <span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">def</span> loss(<span class="va" style="color: #111111;
+font-style: inherit;">self</span>._update_terminal_nodes(tree, X, y, current_predictions, objective.loss)</span>
+<span id="cb1-51">            current_predictions <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">self</span>, y, preds):</span>
-<span id="cb4-6">        <span class="cf" style="color: #003B4F;
+font-style: inherit;">+=</span> <span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">return</span> np.mean((y <span class="op" style="color: #5E5E5E;
+font-style: inherit;">self</span>.learning_rate <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">-</span> preds)<span class="op" style="color: #5E5E5E;
+font-style: inherit;">*</span> tree.predict(X)</span>
+<span id="cb1-52">            <span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">**</span><span class="dv" style="color: #AD0000;
+font-style: inherit;">self</span>.trees.append(tree)</span>
+<span id="cb1-53">     </span>
+<span id="cb1-54">    <span class="kw" style="color: #003B4F;
 background-color: null;
-font-style: inherit;">2</span>)</span>
-<span id="cb4-7">    </span>
-<span id="cb4-8">    <span class="kw" style="color: #003B4F;
+font-style: inherit;">def</span> _get_optimal_base_value(<span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">def</span> negative_gradient(<span class="va" style="color: #111111;
+font-style: inherit;">self</span>, y, loss):</span>
+<span id="cb1-55">        <span class="co" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">self</span>, y, preds):</span>
-<span id="cb4-9">        <span class="cf" style="color: #003B4F;
+font-style: inherit;">'''Find the optimal initial prediction for the base model.'''</span></span>
+<span id="cb1-56">        fun <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">return</span> y <span class="op" style="color: #5E5E5E;
+font-style: inherit;">=</span> <span class="kw" style="color: #003B4F;
 background-color: null;
-font-style: inherit;">-</span> preds</span>
-<span id="cb4-10">    </span>
-<span id="cb4-11"></span>
-<span id="cb4-12">gbm <span class="op" style="color: #5E5E5E;
+font-style: inherit;">lambda</span> c: loss(y, c)</span>
+<span id="cb1-57">        c0 <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">=</span> GradientBoostingMachine(n_trees<span class="op" style="color: #5E5E5E;
+font-style: inherit;">=</span> y.mean()</span>
+<span id="cb1-58">        <span class="cf" style="color: #003B4F;
 background-color: null;
-font-style: inherit;">=</span><span class="dv" style="color: #AD0000;
+font-style: inherit;">return</span> minimize(fun<span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">10</span>,</span>
-<span id="cb4-13">                              learning_rate<span class="op" style="color: #5E5E5E;
+font-style: inherit;">=</span>fun, x0<span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">=</span><span class="fl" style="color: #AD0000;
+font-style: inherit;">=</span>c0).x[<span class="dv" style="color: #AD0000;
 background-color: null;
-font-style: inherit;">0.5</span>,</span>
-<span id="cb4-14">                              max_depth<span class="op" style="color: #5E5E5E;
+font-style: inherit;">0</span>]</span>
+<span id="cb1-59">        </span>
+<span id="cb1-60">    <span class="kw" style="color: #003B4F;
 background-color: null;
-font-style: inherit;">=</span><span class="dv" style="color: #AD0000;
+font-style: inherit;">def</span> _update_terminal_nodes(<span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">1</span>)</span>
-<span id="cb4-15">gbm.fit(x, y, SquaredErrorLoss())</span>
-<span id="cb4-16">pred <span class="op" style="color: #5E5E5E;
+font-style: inherit;">self</span>, tree, X, y, current_predictions, loss):</span>
+<span id="cb1-61">        <span class="co" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">=</span> gbm.predict(x)</span></code></pre></div>
-</div>
-<div class="cell" data-execution_count="5">
-<div class="sourceCode cell-code" id="cb5" style="background: #f1f3f5;"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb5-1"><span class="co" style="color: #5E5E5E;
+font-style: inherit;">'''Update the tree's predictions according to the loss function.'''</span></span>
+<span id="cb1-62">        <span class="co" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;"># scikit-learn GBM</span></span>
-<span id="cb5-2">sk_gbm <span class="op" style="color: #5E5E5E;
+font-style: inherit;"># terminal node id's</span></span>
+<span id="cb1-63">        leaf_nodes <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">=</span> GradientBoostingRegressor(n_estimators<span class="op" style="color: #5E5E5E;
+font-style: inherit;">=</span> np.nonzero(tree.tree_.children_left <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">=</span><span class="dv" style="color: #AD0000;
+font-style: inherit;">==</span> <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">10</span>,</span>
-<span id="cb5-3">                                   learning_rate<span class="op" style="color: #5E5E5E;
+font-style: inherit;">-</span><span class="dv" style="color: #AD0000;
 background-color: null;
-font-style: inherit;">=</span><span class="fl" style="color: #AD0000;
+font-style: inherit;">1</span>)[<span class="dv" style="color: #AD0000;
 background-color: null;
-font-style: inherit;">0.5</span>,</span>
-<span id="cb5-4">                                   max_depth<span class="op" style="color: #5E5E5E;
+font-style: inherit;">0</span>]</span>
+<span id="cb1-64">        <span class="co" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">=</span><span class="dv" style="color: #AD0000;
+font-style: inherit;"># compute leaf for each sample in ``X``.</span></span>
+<span id="cb1-65">        leaf_node_for_each_sample <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">1</span>,</span>
-<span id="cb5-5">                                   loss<span class="op" style="color: #5E5E5E;
+font-style: inherit;">=</span> tree.<span class="bu" style="color: null;
 background-color: null;
-font-style: inherit;">=</span><span class="st" style="color: #20794D;
+font-style: inherit;">apply</span>(X)</span>
+<span id="cb1-66">        <span class="cf" style="color: #003B4F;
 background-color: null;
-font-style: inherit;">'squared_error'</span>)</span>
-<span id="cb5-6">sk_gbm.fit(x, y)</span>
-<span id="cb5-7">sk_pred <span class="op" style="color: #5E5E5E;
+font-style: inherit;">for</span> leaf <span class="kw" style="color: #003B4F;
 background-color: null;
-font-style: inherit;">=</span> sk_gbm.predict(x)</span></code></pre></div>
-</div>
-<div class="cell" data-scrolled="true" data-execution_count="6">
-<div class="sourceCode cell-code" id="cb6" style="background: #f1f3f5;"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb6-1">print_model_loss_scores(SquaredErrorLoss(), y, pred, sk_pred)</span></code></pre></div>
-<div class="cell-output cell-output-stdout">
-<pre><code>From Scratch Loss = 0.168
-Scikit-Learn Loss = 0.168</code></pre>
-</div>
-</div>
-<div class="cell" data-execution_count="7">
-<div class="cell-output cell-output-display">
-<p><img src="https://randomrealizations.com/posts/gradient-boosting-machine-with-any-loss-function/gbm-any-loss_files/figure-html/cell-8-output-1.png" class="img-fluid" alt="Scatterplot showing data and model prediction of y given x"></p>
-</div>
-</div>
-</section>
-<section id="mean-absolute-error" class="level3">
-<h3 class="anchored" data-anchor-id="mean-absolute-error">Mean Absolute Error</h3>
-<p>Mean Absolute Error (a.k.a.Least Absolute Deviations) loss produces estimates of the median target value conditioned on the feature values. Here’s the implementation.</p>
-<div class="cell" data-execution_count="8">
-<div class="sourceCode cell-code" id="cb8" style="background: #f1f3f5;"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb8-1">x, y <span class="op" style="color: #5E5E5E;
+font-style: inherit;">in</span> leaf_nodes:</span>
+<span id="cb1-67">            samples_in_this_leaf <span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">=</span> np.where(leaf_node_for_each_sample <span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">==</span> leaf)[<span class="dv" style="color: #AD0000;
+background-color: null;
+font-style: inherit;">0</span>]</span>
+<span id="cb1-68">            y_in_leaf <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">=</span> make_test_data(<span class="dv" style="color: #AD0000;
+font-style: inherit;">=</span> y.take(samples_in_this_leaf, axis<span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">500</span>, <span class="fl" style="color: #AD0000;
+font-style: inherit;">=</span><span class="dv" style="color: #AD0000;
 background-color: null;
-font-style: inherit;">0.4</span>)</span></code></pre></div>
-</div>
-<div class="cell" data-execution_count="9">
-<div class="sourceCode cell-code" id="cb9" style="background: #f1f3f5;"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb9-1"></span>
-<span id="cb9-2"><span class="co" style="color: #5E5E5E;
+font-style: inherit;">0</span>)</span>
+<span id="cb1-69">            preds_in_leaf <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;"># from scratch GBM</span></span>
-<span id="cb9-3"><span class="kw" style="color: #003B4F;
+font-style: inherit;">=</span> current_predictions.take(samples_in_this_leaf, axis<span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">class</span> AbsoluteErrorLoss():</span>
-<span id="cb9-4">    <span class="co" style="color: #5E5E5E;
+font-style: inherit;">=</span><span class="dv" style="color: #AD0000;
 background-color: null;
-font-style: inherit;">'''User-Defined Absolute Error Loss'''</span></span>
-<span id="cb9-5">    </span>
-<span id="cb9-6">    <span class="kw" style="color: #003B4F;
+font-style: inherit;">0</span>)</span>
+<span id="cb1-70">            val <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">def</span> loss(<span class="va" style="color: #111111;
+font-style: inherit;">=</span> <span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">self</span>, y, preds):</span>
-<span id="cb9-7">        <span class="cf" style="color: #003B4F;
+font-style: inherit;">self</span>._get_optimal_leaf_value(y_in_leaf, </span>
+<span id="cb1-71">                                               preds_in_leaf,</span>
+<span id="cb1-72">                                               loss)</span>
+<span id="cb1-73">            tree.tree_.value[leaf, <span class="dv" style="color: #AD0000;
 background-color: null;
-font-style: inherit;">return</span> np.mean(np.<span class="bu" style="color: null;
+font-style: inherit;">0</span>, <span class="dv" style="color: #AD0000;
 background-color: null;
-font-style: inherit;">abs</span>(y <span class="op" style="color: #5E5E5E;
+font-style: inherit;">0</span>] <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">-</span> preds))</span>
-<span id="cb9-8">    </span>
-<span id="cb9-9">    <span class="kw" style="color: #003B4F;
+font-style: inherit;">=</span> val</span>
+<span id="cb1-74">            </span>
+<span id="cb1-75">    <span class="kw" style="color: #003B4F;
 background-color: null;
-font-style: inherit;">def</span> negative_gradient(<span class="va" style="color: #111111;
+font-style: inherit;">def</span> _get_optimal_leaf_value(<span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">self</span>, y, preds):</span>
-<span id="cb9-10">        <span class="cf" style="color: #003B4F;
+font-style: inherit;">self</span>, y, current_predictions, loss):</span>
+<span id="cb1-76">        <span class="co" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">return</span> np.sign(y <span class="op" style="color: #5E5E5E;
+font-style: inherit;">'''Find the optimal prediction value for a given leaf.'''</span></span>
+<span id="cb1-77">        fun <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">-</span> preds)</span>
-<span id="cb9-11"></span>
-<span id="cb9-12"></span>
-<span id="cb9-13">gbm <span class="op" style="color: #5E5E5E;
+font-style: inherit;">=</span> <span class="kw" style="color: #003B4F;
 background-color: null;
-font-style: inherit;">=</span> GradientBoostingMachine(n_trees<span class="op" style="color: #5E5E5E;
+font-style: inherit;">lambda</span> c: loss(y, current_predictions <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">=</span><span class="dv" style="color: #AD0000;
+font-style: inherit;">+</span> c)</span>
+<span id="cb1-78">        c0 <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">10</span>,</span>
-<span id="cb9-14">                              learning_rate<span class="op" style="color: #5E5E5E;
+font-style: inherit;">=</span> y.mean()</span>
+<span id="cb1-79">        <span class="cf" style="color: #003B4F;
 background-color: null;
-font-style: inherit;">=</span><span class="fl" style="color: #AD0000;
+font-style: inherit;">return</span> minimize(fun<span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">0.5</span>,</span>
-<span id="cb9-15">                              max_depth<span class="op" style="color: #5E5E5E;
+font-style: inherit;">=</span>fun, x0<span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">=</span><span class="dv" style="color: #AD0000;
+font-style: inherit;">=</span>c0).x[<span class="dv" style="color: #AD0000;
 background-color: null;
-font-style: inherit;">1</span>)</span>
-<span id="cb9-16">gbm.fit(x, y, AbsoluteErrorLoss())</span>
-<span id="cb9-17">pred <span class="op" style="color: #5E5E5E;
+font-style: inherit;">0</span>]</span>
+<span id="cb1-80">          </span>
+<span id="cb1-81">    <span class="kw" style="color: #003B4F;
 background-color: null;
-font-style: inherit;">=</span> gbm.predict(x)</span></code></pre></div>
-</div>
-<div class="cell" data-execution_count="10">
-<div class="sourceCode cell-code" id="cb10" style="background: #f1f3f5;"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb10-1"><span class="co" style="color: #5E5E5E;
+font-style: inherit;">def</span> predict(<span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;"># scikit-learn GBM</span></span>
-<span id="cb10-2">sk_gbm <span class="op" style="color: #5E5E5E;
+font-style: inherit;">self</span>, X):</span>
+<span id="cb1-82">        <span class="co" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">=</span> GradientBoostingRegressor(n_estimators<span class="op" style="color: #5E5E5E;
+font-style: inherit;">'''Generate predictions for the given input data.'''</span></span>
+<span id="cb1-83">        <span class="cf" style="color: #003B4F;
 background-color: null;
-font-style: inherit;">=</span><span class="dv" style="color: #AD0000;
+font-style: inherit;">return</span> (<span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">10</span>,</span>
-<span id="cb10-3">                                   learning_rate<span class="op" style="color: #5E5E5E;
+font-style: inherit;">self</span>.base_prediction </span>
+<span id="cb1-84">                <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">=</span><span class="fl" style="color: #AD0000;
+font-style: inherit;">+</span> <span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">0.5</span>,</span>
-<span id="cb10-4">                                   max_depth<span class="op" style="color: #5E5E5E;
+font-style: inherit;">self</span>.learning_rate </span>
+<span id="cb1-85">                <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">=</span><span class="dv" style="color: #AD0000;
+font-style: inherit;">*</span> np.<span class="bu" style="color: null;
 background-color: null;
-font-style: inherit;">1</span>,</span>
-<span id="cb10-5">                                   loss<span class="op" style="color: #5E5E5E;
+font-style: inherit;">sum</span>([tree.predict(X) <span class="cf" style="color: #003B4F;
 background-color: null;
-font-style: inherit;">=</span><span class="st" style="color: #20794D;
+font-style: inherit;">for</span> tree <span class="kw" style="color: #003B4F;
 background-color: null;
-font-style: inherit;">'absolute_error'</span>)</span>
-<span id="cb10-6">sk_gbm.fit(x, y)</span>
-<span id="cb10-7">sk_pred <span class="op" style="color: #5E5E5E;
+font-style: inherit;">in</span> <span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">=</span> sk_gbm.predict(x)</span></code></pre></div>
-</div>
-<div class="cell" data-scrolled="true" data-execution_count="11">
-<div class="sourceCode cell-code" id="cb11" style="background: #f1f3f5;"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb11-1">print_model_loss_scores(AbsoluteErrorLoss(), y, pred, sk_pred)</span></code></pre></div>
-<div class="cell-output cell-output-stdout">
-<pre><code>From Scratch Loss = 0.3225
-Scikit-Learn Loss = 0.3208</code></pre>
-</div>
-</div>
-<div class="cell" data-execution_count="12">
-<div class="cell-output cell-output-display">
-<p><img src="https://randomrealizations.com/posts/gradient-boosting-machine-with-any-loss-function/gbm-any-loss_files/figure-html/cell-14-output-1.png" class="img-fluid" alt="Figure showing scatterplot of data and model prediction of median of y given x"></p>
-</div>
+font-style: inherit;">self</span>.trees], axis<span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">=</span><span class="dv" style="color: #AD0000;
+background-color: null;
+font-style: inherit;">0</span>))</span></code></pre></div>
 </div>
+<p>In terms of design, we implement a class for the GBM with scikit-like <code>fit</code> and <code>predict</code> methods. Notice in the below implementation that the <code>fit</code> method is only 10 lines long, and corresponds very closely to Friedman’s gradient boost algorithm from above. Most of the complexity comes from the helper methods for updating the leaf values according to the specified loss function.</p>
+<p>When the user wants to call the <code>fit</code> method, they’ll need to supply the loss function they want to use for boosting. We’ll make the user implement their loss (a.k.a. objective) function as a class with two methods: (1) a <code>loss</code> method taking the labels and the predictions and returning the loss score and (2) a <code>negative_gradient</code> method taking the labels and the predictions and returning an array of negative gradients.</p>
 </section>
-<section id="quantile-loss" class="level3">
-<h3 class="anchored" data-anchor-id="quantile-loss">Quantile Loss</h3>
-<p>Quantile loss yields estimates of a given quantile of the target variable conditioned on the features. Here’s my implementation.</p>
-<div class="cell" data-execution_count="13">
-<div class="sourceCode cell-code" id="cb13" style="background: #f1f3f5;"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb13-1">x, y <span class="op" style="color: #5E5E5E;
+<section id="testing-our-model" class="level2">
+<h2 class="anchored" data-anchor-id="testing-our-model">Testing our Model</h2>
+<p>Let’s test drive our custom-loss-ready GBM with a few different loss functions! We’ll compare it to the scikit-learn GBM to sanity check our implementation.</p>
+<div class="cell" data-execution_count="2">
+<div class="sourceCode cell-code" id="cb2" style="background: #f1f3f5;"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb2-1"><span class="im" style="color: #00769E;
 background-color: null;
-font-style: inherit;">=</span> make_test_data(<span class="dv" style="color: #AD0000;
+font-style: inherit;">from</span> sklearn.ensemble <span class="im" style="color: #00769E;
 background-color: null;
-font-style: inherit;">500</span>, <span class="dv" style="color: #AD0000;
+font-style: inherit;">import</span> GradientBoostingRegressor, GradientBoostingClassifier</span>
+<span id="cb2-2"></span>
+<span id="cb2-3">rng <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">1</span>)</span></code></pre></div>
-</div>
-<div class="cell" data-execution_count="14">
-<div class="sourceCode cell-code" id="cb14" style="background: #f1f3f5;"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb14-1"></span>
-<span id="cb14-2"><span class="co" style="color: #5E5E5E;
+font-style: inherit;">=</span> np.random.default_rng()</span>
+<span id="cb2-4"></span>
+<span id="cb2-5"><span class="co" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;"># from scratch GBM</span></span>
-<span id="cb14-3"><span class="kw" style="color: #003B4F;
+font-style: inherit;"># test data</span></span>
+<span id="cb2-6"><span class="kw" style="color: #003B4F;
 background-color: null;
-font-style: inherit;">class</span> QuantileLoss():</span>
-<span id="cb14-4">    <span class="co" style="color: #5E5E5E;
+font-style: inherit;">def</span> make_test_data(n, noise_scale):</span>
+<span id="cb2-7">    x <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">'''Quantile Loss</span></span>
-<span id="cb14-5"><span class="co" style="color: #5E5E5E;
+font-style: inherit;">=</span> np.linspace(<span class="dv" style="color: #AD0000;
 background-color: null;
-font-style: inherit;">    </span></span>
-<span id="cb14-6"><span class="co" style="color: #5E5E5E;
+font-style: inherit;">0</span>, <span class="dv" style="color: #AD0000;
 background-color: null;
-font-style: inherit;">    Parameters</span></span>
-<span id="cb14-7"><span class="co" style="color: #5E5E5E;
+font-style: inherit;">10</span>, <span class="dv" style="color: #AD0000;
 background-color: null;
-font-style: inherit;">    ----------</span></span>
-<span id="cb14-8"><span class="co" style="color: #5E5E5E;
+font-style: inherit;">500</span>).reshape(<span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">    alpha : float</span></span>
-<span id="cb14-9"><span class="co" style="color: #5E5E5E;
+font-style: inherit;">-</span><span class="dv" style="color: #AD0000;
 background-color: null;
-font-style: inherit;">        quantile to be estimated, 0 &lt; alpha &lt; 1</span></span>
-<span id="cb14-10"><span class="co" style="color: #5E5E5E;
+font-style: inherit;">1</span>,<span class="dv" style="color: #AD0000;
 background-color: null;
-font-style: inherit;">    '''</span></span>
-<span id="cb14-11">    </span>
-<span id="cb14-12">    <span class="kw" style="color: #003B4F;
+font-style: inherit;">1</span>)</span>
+<span id="cb2-8">    y <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">def</span> <span class="fu" style="color: #4758AB;
+font-style: inherit;">=</span> (np.where(x <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">__init__</span>(<span class="va" style="color: #111111;
+font-style: inherit;">&lt;</span> <span class="dv" style="color: #AD0000;
 background-color: null;
-font-style: inherit;">self</span>, alpha):</span>
-<span id="cb14-13">        <span class="cf" style="color: #003B4F;
+font-style: inherit;">5</span>, x, <span class="dv" style="color: #AD0000;
 background-color: null;
-font-style: inherit;">if</span> alpha <span class="op" style="color: #5E5E5E;
+font-style: inherit;">5</span>) <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">&lt;</span> <span class="dv" style="color: #AD0000;
+font-style: inherit;">+</span> rng.normal(<span class="dv" style="color: #AD0000;
 background-color: null;
-font-style: inherit;">0</span> <span class="kw" style="color: #003B4F;
+font-style: inherit;">0</span>, noise_scale, size<span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">or</span> alpha <span class="op" style="color: #5E5E5E;
+font-style: inherit;">=</span>x.shape)).ravel()</span>
+<span id="cb2-9">    <span class="cf" style="color: #003B4F;
 background-color: null;
-font-style: inherit;">&gt;</span><span class="dv" style="color: #AD0000;
+font-style: inherit;">return</span> x, y</span>
+<span id="cb2-10">    </span>
+<span id="cb2-11"><span class="co" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">1</span>:</span>
-<span id="cb14-14">            <span class="cf" style="color: #003B4F;
+font-style: inherit;"># print model loss scores</span></span>
+<span id="cb2-12"><span class="kw" style="color: #003B4F;
 background-color: null;
-font-style: inherit;">raise</span> <span class="pp" style="color: #AD0000;
+font-style: inherit;">def</span> print_model_loss_scores(obj, y, preds, sk_preds):</span>
+<span id="cb2-13">    <span class="bu" style="color: null;
 background-color: null;
-font-style: inherit;">ValueError</span>(<span class="st" style="color: #20794D;
+font-style: inherit;">print</span>(<span class="ss" style="color: #20794D;
 background-color: null;
-font-style: inherit;">'alpha must be between 0 and 1'</span>)</span>
-<span id="cb14-15">        <span class="va" style="color: #111111;
+font-style: inherit;">f'From Scratch Loss = </span><span class="sc" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">self</span>.alpha <span class="op" style="color: #5E5E5E;
+font-style: inherit;">{</span>obj<span class="sc" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">=</span> alpha</span>
-<span id="cb14-16">        </span>
-<span id="cb14-17">    <span class="kw" style="color: #003B4F;
+font-style: inherit;">.</span>loss(y, pred)<span class="sc" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">def</span> loss(<span class="va" style="color: #111111;
+font-style: inherit;">:0.4}</span><span class="ss" style="color: #20794D;
 background-color: null;
-font-style: inherit;">self</span>, y, preds):</span>
-<span id="cb14-18">        e <span class="op" style="color: #5E5E5E;
+font-style: inherit;">'</span>)</span>
+<span id="cb2-14">    <span class="bu" style="color: null;
 background-color: null;
-font-style: inherit;">=</span> y <span class="op" style="color: #5E5E5E;
+font-style: inherit;">print</span>(<span class="ss" style="color: #20794D;
 background-color: null;
-font-style: inherit;">-</span> preds</span>
-<span id="cb14-19">        <span class="cf" style="color: #003B4F;
+font-style: inherit;">f'Scikit-Learn Loss = </span><span class="sc" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">return</span> np.mean(np.where(e <span class="op" style="color: #5E5E5E;
+font-style: inherit;">{</span>obj<span class="sc" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">&gt;</span> <span class="dv" style="color: #AD0000;
+font-style: inherit;">.</span>loss(y, sk_pred)<span class="sc" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">0</span>, <span class="va" style="color: #111111;
+font-style: inherit;">:0.4}</span><span class="ss" style="color: #20794D;
 background-color: null;
-font-style: inherit;">self</span>.alpha <span class="op" style="color: #5E5E5E;
+font-style: inherit;">'</span>)</span></code></pre></div>
+</div>
+<section id="mean-squared-error" class="level3">
+<h3 class="anchored" data-anchor-id="mean-squared-error">Mean Squared Error</h3>
+<p>Mean Squared Error (a.k.a. Least Squares) loss produces estimates of the mean target value conditioned on the feature values. Here’s the implementation.</p>
+<div class="cell" data-execution_count="3">
+<div class="sourceCode cell-code" id="cb3" style="background: #f1f3f5;"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb3-1">x, y <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">*</span> e, (<span class="va" style="color: #111111;
+font-style: inherit;">=</span> make_test_data(<span class="dv" style="color: #AD0000;
 background-color: null;
-font-style: inherit;">self</span>.alpha <span class="op" style="color: #5E5E5E;
+font-style: inherit;">500</span>, <span class="fl" style="color: #AD0000;
+background-color: null;
+font-style: inherit;">0.4</span>)</span></code></pre></div>
+</div>
+<div class="cell" data-execution_count="4">
+<div class="sourceCode cell-code" id="cb4" style="background: #f1f3f5;"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb4-1"><span class="co" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">-</span> <span class="dv" style="color: #AD0000;
+font-style: inherit;"># from scratch GBM</span></span>
+<span id="cb4-2"><span class="kw" style="color: #003B4F;
 background-color: null;
-font-style: inherit;">1</span>) <span class="op" style="color: #5E5E5E;
+font-style: inherit;">class</span> SquaredErrorLoss():</span>
+<span id="cb4-3">    <span class="co" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">*</span> e))</span>
-<span id="cb14-20">    </span>
-<span id="cb14-21">    <span class="kw" style="color: #003B4F;
+font-style: inherit;">'''User-Defined Squared Error Loss'''</span></span>
+<span id="cb4-4">    </span>
+<span id="cb4-5">    <span class="kw" style="color: #003B4F;
 background-color: null;
-font-style: inherit;">def</span> negative_gradient(<span class="va" style="color: #111111;
+font-style: inherit;">def</span> loss(<span class="va" style="color: #111111;
 background-color: null;
 font-style: inherit;">self</span>, y, preds):</span>
-<span id="cb14-22">        e <span class="op" style="color: #5E5E5E;
-background-color: null;
-font-style: inherit;">=</span> y <span class="op" style="color: #5E5E5E;
+<span id="cb4-6">        <span class="cf" style="color: #003B4F;
 background-color: null;
-font-style: inherit;">-</span> preds </span>
-<span id="cb14-23">        <span class="cf" style="color: #003B4F;
+font-style: inherit;">return</span> np.mean((y <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">return</span> np.where(e <span class="op" style="color: #5E5E5E;
+font-style: inherit;">-</span> preds)<span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">&gt;</span> <span class="dv" style="color: #AD0000;
+font-style: inherit;">**</span><span class="dv" style="color: #AD0000;
 background-color: null;
-font-style: inherit;">0</span>, <span class="va" style="color: #111111;
+font-style: inherit;">2</span>)</span>
+<span id="cb4-7">    </span>
+<span id="cb4-8">    <span class="kw" style="color: #003B4F;
 background-color: null;
-font-style: inherit;">self</span>.alpha, <span class="va" style="color: #111111;
+font-style: inherit;">def</span> negative_gradient(<span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">self</span>.alpha <span class="op" style="color: #5E5E5E;
+font-style: inherit;">self</span>, y, preds):</span>
+<span id="cb4-9">        <span class="cf" style="color: #003B4F;
 background-color: null;
-font-style: inherit;">-</span> <span class="dv" style="color: #AD0000;
+font-style: inherit;">return</span> y <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">1</span>)</span>
-<span id="cb14-24"></span>
-<span id="cb14-25">gbm <span class="op" style="color: #5E5E5E;
+font-style: inherit;">-</span> preds</span>
+<span id="cb4-10">    </span>
+<span id="cb4-11"></span>
+<span id="cb4-12">gbm <span class="op" style="color: #5E5E5E;
 background-color: null;
 font-style: inherit;">=</span> GradientBoostingMachine(n_trees<span class="op" style="color: #5E5E5E;
 background-color: null;
 font-style: inherit;">=</span><span class="dv" style="color: #AD0000;
 background-color: null;
 font-style: inherit;">10</span>,</span>
-<span id="cb14-26">                              learning_rate<span class="op" style="color: #5E5E5E;
+<span id="cb4-13">                              learning_rate<span class="op" style="color: #5E5E5E;
 background-color: null;
 font-style: inherit;">=</span><span class="fl" style="color: #AD0000;
 background-color: null;
 font-style: inherit;">0.5</span>,</span>
-<span id="cb14-27">                             max_depth<span class="op" style="color: #5E5E5E;
+<span id="cb4-14">                              max_depth<span class="op" style="color: #5E5E5E;
 background-color: null;
 font-style: inherit;">=</span><span class="dv" style="color: #AD0000;
 background-color: null;
 font-style: inherit;">1</span>)</span>
-<span id="cb14-28">gbm.fit(x, y, QuantileLoss(alpha<span class="op" style="color: #5E5E5E;
-background-color: null;
-font-style: inherit;">=</span><span class="fl" style="color: #AD0000;
-background-color: null;
-font-style: inherit;">0.9</span>))</span>
-<span id="cb14-29">pred <span class="op" style="color: #5E5E5E;
+<span id="cb4-15">gbm.fit(x, y, SquaredErrorLoss())</span>
+<span id="cb4-16">pred <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">=</span> gbm.predict(x)    </span></code></pre></div>
+font-style: inherit;">=</span> gbm.predict(x)</span></code></pre></div>
 </div>
-<div class="cell" data-execution_count="15">
-<div class="sourceCode cell-code" id="cb15" style="background: #f1f3f5;"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb15-1"><span class="co" style="color: #5E5E5E;
+<div class="cell" data-execution_count="5">
+<div class="sourceCode cell-code" id="cb5" style="background: #f1f3f5;"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb5-1"><span class="co" style="color: #5E5E5E;
 background-color: null;
 font-style: inherit;"># scikit-learn GBM</span></span>
-<span id="cb15-2">sk_gbm <span class="op" style="color: #5E5E5E;
+<span id="cb5-2">sk_gbm <span class="op" style="color: #5E5E5E;
 background-color: null;
 font-style: inherit;">=</span> GradientBoostingRegressor(n_estimators<span class="op" style="color: #5E5E5E;
 background-color: null;
 font-style: inherit;">=</span><span class="dv" style="color: #AD0000;
 background-color: null;
 font-style: inherit;">10</span>,</span>
-<span id="cb15-3">                                 learning_rate<span class="op" style="color: #5E5E5E;
+<span id="cb5-3">                                   learning_rate<span class="op" style="color: #5E5E5E;
 background-color: null;
 font-style: inherit;">=</span><span class="fl" style="color: #AD0000;
 background-color: null;
 font-style: inherit;">0.5</span>,</span>
-<span id="cb15-4">                                 max_depth<span class="op" style="color: #5E5E5E;
+<span id="cb5-4">                                   max_depth<span class="op" style="color: #5E5E5E;
 background-color: null;
 font-style: inherit;">=</span><span class="dv" style="color: #AD0000;
 background-color: null;
 font-style: inherit;">1</span>,</span>
-<span id="cb15-5">                                 loss<span class="op" style="color: #5E5E5E;
+<span id="cb5-5">                                   loss<span class="op" style="color: #5E5E5E;
 background-color: null;
 font-style: inherit;">=</span><span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">'quantile'</span>, alpha<span class="op" style="color: #5E5E5E;
-background-color: null;
-font-style: inherit;">=</span><span class="fl" style="color: #AD0000;
-background-color: null;
-font-style: inherit;">0.9</span>)</span>
-<span id="cb15-6">sk_gbm.fit(x, y)</span>
-<span id="cb15-7">sk_pred <span class="op" style="color: #5E5E5E;
+font-style: inherit;">'squared_error'</span>)</span>
+<span id="cb5-6">sk_gbm.fit(x, y)</span>
+<span id="cb5-7">sk_pred <span class="op" style="color: #5E5E5E;
 background-color: null;
 font-style: inherit;">=</span> sk_gbm.predict(x)</span></code></pre></div>
 </div>
-<div class="cell" data-scrolled="true" data-execution_count="16">
-<div class="sourceCode cell-code" id="cb16" style="background: #f1f3f5;"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb16-1">print_model_loss_scores(QuantileLoss(alpha<span class="op" style="color: #5E5E5E;
-background-color: null;
-font-style: inherit;">=</span><span class="fl" style="color: #AD0000;
-background-color: null;
-font-style: inherit;">0.9</span>), y, pred, sk_pred)</span></code></pre></div>
+<div class="cell" data-scrolled="true" data-execution_count="6">
+<div class="sourceCode cell-code" id="cb6" style="background: #f1f3f5;"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb6-1">print_model_loss_scores(SquaredErrorLoss(), y, pred, sk_pred)</span></code></pre></div>
 <div class="cell-output cell-output-stdout">
-<pre><code>From Scratch Loss = 0.1853
-Scikit-Learn Loss = 0.1856</code></pre>
+<pre><code>From Scratch Loss = 0.168
+Scikit-Learn Loss = 0.168</code></pre>
 </div>
 </div>
-<div class="cell" data-scrolled="true" data-execution_count="17">
+<div class="cell" data-execution_count="7">
 <div class="cell-output cell-output-display">
-<p><img src="https://randomrealizations.com/posts/gradient-boosting-machine-with-any-loss-function/gbm-any-loss_files/figure-html/cell-20-output-1.png" class="img-fluid" alt="Figure showing scatterplot of data and model prediction of 0.9 quantile of y given x"></p>
+<p><img src="https://randomrealizations.com/posts/gradient-boosting-machine-with-any-loss-function/gbm-any-loss_files/figure-html/cell-8-output-1.png" class="img-fluid" alt="Scatterplot showing data and model prediction of y given x"></p>
 </div>
 </div>
 </section>
-<section id="binary-cross-entropy-loss" class="level3">
-<h3 class="anchored" data-anchor-id="binary-cross-entropy-loss">Binary Cross Entropy Loss</h3>
-<p>The previous losses are useful for regression problems, where the target is numeric. But we can also solve classification problems, simply by swapping in an appropriate loss function. Here we’ll implement binary cross entropy, a.k.a. binary deviance, a.k.a. negative binomial log likelihood (sometimes abusively called log loss). One thing to remember is that, as with logistic regression, our model is actually predicting the log odds ratio, not the probability of the positive class. Thus we use expit transformations (the inverse of logit) whenever probabilities are needed, e.g., when predicting the probability that an observation belongs to the positive class.</p>
-<div class="cell" data-execution_count="18">
-<div class="sourceCode cell-code" id="cb18" style="background: #f1f3f5;"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb18-1"><span class="co" style="color: #5E5E5E;
-background-color: null;
-font-style: inherit;"># make categorical test data</span></span>
-<span id="cb18-2"></span>
-<span id="cb18-3"><span class="kw" style="color: #003B4F;
-background-color: null;
-font-style: inherit;">def</span> expit(t):</span>
-<span id="cb18-4">    <span class="cf" style="color: #003B4F;
-background-color: null;
-font-style: inherit;">return</span> np.exp(t) <span class="op" style="color: #5E5E5E;
-background-color: null;
-font-style: inherit;">/</span> (<span class="dv" style="color: #AD0000;
-background-color: null;
-font-style: inherit;">1</span> <span class="op" style="color: #5E5E5E;
-background-color: null;
-font-style: inherit;">+</span> np.exp(t))</span>
-<span id="cb18-5"></span>
-<span id="cb18-6">x <span class="op" style="color: #5E5E5E;
-background-color: null;
-font-style: inherit;">=</span> np.linspace(<span class="op" style="color: #5E5E5E;
-background-color: null;
-font-style: inherit;">-</span><span class="dv" style="color: #AD0000;
-background-color: null;
-font-style: inherit;">3</span>, <span class="dv" style="color: #AD0000;
-background-color: null;
-font-style: inherit;">3</span>, <span class="dv" style="color: #AD0000;
-background-color: null;
-font-style: inherit;">500</span>)</span>
-<span id="cb18-7">p <span class="op" style="color: #5E5E5E;
-background-color: null;
-font-style: inherit;">=</span> expit(x)</span>
-<span id="cb18-8">y <span class="op" style="color: #5E5E5E;
-background-color: null;
-font-style: inherit;">=</span> rng.binomial(<span class="dv" style="color: #AD0000;
-background-color: null;
-font-style: inherit;">1</span>, p, size<span class="op" style="color: #5E5E5E;
-background-color: null;
-font-style: inherit;">=</span>p.shape)</span>
-<span id="cb18-9">x <span class="op" style="color: #5E5E5E;
-background-color: null;
-font-style: inherit;">=</span> x.reshape(<span class="op" style="color: #5E5E5E;
+<section id="mean-absolute-error" class="level3">
+<h3 class="anchored" data-anchor-id="mean-absolute-error">Mean Absolute Error</h3>
+<p>Mean Absolute Error (a.k.a.Least Absolute Deviations) loss produces estimates of the median target value conditioned on the feature values. Here’s the implementation.</p>
+<div class="cell" data-execution_count="8">
+<div class="sourceCode cell-code" id="cb8" style="background: #f1f3f5;"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb8-1">x, y <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">-</span><span class="dv" style="color: #AD0000;
+font-style: inherit;">=</span> make_test_data(<span class="dv" style="color: #AD0000;
 background-color: null;
-font-style: inherit;">1</span>,<span class="dv" style="color: #AD0000;
+font-style: inherit;">500</span>, <span class="fl" style="color: #AD0000;
 background-color: null;
-font-style: inherit;">1</span>)</span></code></pre></div>
+font-style: inherit;">0.4</span>)</span></code></pre></div>
 </div>
-<div class="cell" data-execution_count="19">
-<div class="sourceCode cell-code" id="cb19" style="background: #f1f3f5;"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb19-1"><span class="co" style="color: #5E5E5E;
+<div class="cell" data-execution_count="9">
+<div class="sourceCode cell-code" id="cb9" style="background: #f1f3f5;"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb9-1"></span>
+<span id="cb9-2"><span class="co" style="color: #5E5E5E;
 background-color: null;
 font-style: inherit;"># from scratch GBM</span></span>
-<span id="cb19-2"><span class="kw" style="color: #003B4F;
-background-color: null;
-font-style: inherit;">class</span> BinaryCrossEntropyLoss():</span>
-<span id="cb19-3">    <span class="co" style="color: #5E5E5E;
-background-color: null;
-font-style: inherit;">'''Binary Cross Entropy Loss</span></span>
-<span id="cb19-4"><span class="co" style="color: #5E5E5E;
-background-color: null;
-font-style: inherit;">    </span></span>
-<span id="cb19-5"><span class="co" style="color: #5E5E5E;
-background-color: null;
-font-style: inherit;">    Note that the predictions should be log odds ratios.</span></span>
-<span id="cb19-6"><span class="co" style="color: #5E5E5E;
-background-color: null;
-font-style: inherit;">    '''</span></span>
-<span id="cb19-7">    </span>
-<span id="cb19-8">    <span class="kw" style="color: #003B4F;
-background-color: null;
-font-style: inherit;">def</span> <span class="fu" style="color: #4758AB;
-background-color: null;
-font-style: inherit;">__init__</span>(<span class="va" style="color: #111111;
-background-color: null;
-font-style: inherit;">self</span>):</span>
-<span id="cb19-9">        <span class="va" style="color: #111111;
-background-color: null;
-font-style: inherit;">self</span>.expit <span class="op" style="color: #5E5E5E;
-background-color: null;
-font-style: inherit;">=</span> <span class="kw" style="color: #003B4F;
-background-color: null;
-font-style: inherit;">lambda</span> t: np.exp(t) <span class="op" style="color: #5E5E5E;
-background-color: null;
-font-style: inherit;">/</span> (<span class="dv" style="color: #AD0000;
+<span id="cb9-3"><span class="kw" style="color: #003B4F;
 background-color: null;
-font-style: inherit;">1</span> <span class="op" style="color: #5E5E5E;
+font-style: inherit;">class</span> AbsoluteErrorLoss():</span>
+<span id="cb9-4">    <span class="co" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">+</span> np.exp(t))</span>
-<span id="cb19-10">    </span>
-<span id="cb19-11">    <span class="kw" style="color: #003B4F;
+font-style: inherit;">'''User-Defined Absolute Error Loss'''</span></span>
+<span id="cb9-5">    </span>
+<span id="cb9-6">    <span class="kw" style="color: #003B4F;
 background-color: null;
 font-style: inherit;">def</span> loss(<span class="va" style="color: #111111;
 background-color: null;
 font-style: inherit;">self</span>, y, preds):</span>
-<span id="cb19-12">        p <span class="op" style="color: #5E5E5E;
-background-color: null;
-font-style: inherit;">=</span> <span class="va" style="color: #111111;
-background-color: null;
-font-style: inherit;">self</span>.expit(preds)</span>
-<span id="cb19-13">        <span class="cf" style="color: #003B4F;
-background-color: null;
-font-style: inherit;">return</span> <span class="op" style="color: #5E5E5E;
-background-color: null;
-font-style: inherit;">-</span>np.mean(y <span class="op" style="color: #5E5E5E;
-background-color: null;
-font-style: inherit;">*</span> np.log(p) <span class="op" style="color: #5E5E5E;
-background-color: null;
-font-style: inherit;">+</span> (<span class="dv" style="color: #AD0000;
-background-color: null;
-font-style: inherit;">1</span> <span class="op" style="color: #5E5E5E;
-background-color: null;
-font-style: inherit;">-</span> y) <span class="op" style="color: #5E5E5E;
+<span id="cb9-7">        <span class="cf" style="color: #003B4F;
 background-color: null;
-font-style: inherit;">*</span> np.log(<span class="dv" style="color: #AD0000;
+font-style: inherit;">return</span> np.mean(np.<span class="bu" style="color: null;
 background-color: null;
-font-style: inherit;">1</span> <span class="op" style="color: #5E5E5E;
+font-style: inherit;">abs</span>(y <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">-</span> p))</span>
-<span id="cb19-14">    </span>
-<span id="cb19-15">    <span class="kw" style="color: #003B4F;
+font-style: inherit;">-</span> preds))</span>
+<span id="cb9-8">    </span>
+<span id="cb9-9">    <span class="kw" style="color: #003B4F;
 background-color: null;
 font-style: inherit;">def</span> negative_gradient(<span class="va" style="color: #111111;
 background-color: null;
 font-style: inherit;">self</span>, y, preds):</span>
-<span id="cb19-16">        p <span class="op" style="color: #5E5E5E;
-background-color: null;
-font-style: inherit;">=</span> <span class="va" style="color: #111111;
-background-color: null;
-font-style: inherit;">self</span>.expit(preds)</span>
-<span id="cb19-17">        <span class="cf" style="color: #003B4F;
-background-color: null;
-font-style: inherit;">return</span> y <span class="op" style="color: #5E5E5E;
-background-color: null;
-font-style: inherit;">/</span> p <span class="op" style="color: #5E5E5E;
-background-color: null;
-font-style: inherit;">-</span> (<span class="dv" style="color: #AD0000;
-background-color: null;
-font-style: inherit;">1</span> <span class="op" style="color: #5E5E5E;
-background-color: null;
-font-style: inherit;">-</span> y) <span class="op" style="color: #5E5E5E;
-background-color: null;
-font-style: inherit;">/</span> (<span class="dv" style="color: #AD0000;
-background-color: null;
-font-style: inherit;">1</span> <span class="op" style="color: #5E5E5E;
-background-color: null;
-font-style: inherit;">-</span> p)</span>
-<span id="cb19-18"></span>
-<span id="cb19-19">    </span>
-<span id="cb19-20">gbm <span class="op" style="color: #5E5E5E;
+<span id="cb9-10">        <span class="cf" style="color: #003B4F;
+background-color: null;
+font-style: inherit;">return</span> np.sign(y <span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">-</span> preds)</span>
+<span id="cb9-11"></span>
+<span id="cb9-12"></span>
+<span id="cb9-13">gbm <span class="op" style="color: #5E5E5E;
 background-color: null;
 font-style: inherit;">=</span> GradientBoostingMachine(n_trees<span class="op" style="color: #5E5E5E;
 background-color: null;
 font-style: inherit;">=</span><span class="dv" style="color: #AD0000;
 background-color: null;
 font-style: inherit;">10</span>,</span>
-<span id="cb19-21">                              learning_rate<span class="op" style="color: #5E5E5E;
+<span id="cb9-14">                              learning_rate<span class="op" style="color: #5E5E5E;
 background-color: null;
 font-style: inherit;">=</span><span class="fl" style="color: #AD0000;
 background-color: null;
 font-style: inherit;">0.5</span>,</span>
-<span id="cb19-22">                              max_depth<span class="op" style="color: #5E5E5E;
+<span id="cb9-15">                              max_depth<span class="op" style="color: #5E5E5E;
 background-color: null;
 font-style: inherit;">=</span><span class="dv" style="color: #AD0000;
 background-color: null;
 font-style: inherit;">1</span>)</span>
-<span id="cb19-23">gbm.fit(x, y, BinaryCrossEntropyLoss())</span>
-<span id="cb19-24">pred <span class="op" style="color: #5E5E5E;
+<span id="cb9-16">gbm.fit(x, y, AbsoluteErrorLoss())</span>
+<span id="cb9-17">pred <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">=</span> expit(gbm.predict(x))</span></code></pre></div>
+font-style: inherit;">=</span> gbm.predict(x)</span></code></pre></div>
 </div>
-<div class="cell" data-execution_count="20">
-<div class="sourceCode cell-code" id="cb20" style="background: #f1f3f5;"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb20-1"><span class="co" style="color: #5E5E5E;
+<div class="cell" data-execution_count="10">
+<div class="sourceCode cell-code" id="cb10" style="background: #f1f3f5;"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb10-1"><span class="co" style="color: #5E5E5E;
 background-color: null;
 font-style: inherit;"># scikit-learn GBM</span></span>
-<span id="cb20-2">sk_gbm <span class="op" style="color: #5E5E5E;
+<span id="cb10-2">sk_gbm <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">=</span> GradientBoostingClassifier(n_estimators<span class="op" style="color: #5E5E5E;
+font-style: inherit;">=</span> GradientBoostingRegressor(n_estimators<span class="op" style="color: #5E5E5E;
 background-color: null;
 font-style: inherit;">=</span><span class="dv" style="color: #AD0000;
 background-color: null;
 font-style: inherit;">10</span>,</span>
-<span id="cb20-3">                                    learning_rate<span class="op" style="color: #5E5E5E;
+<span id="cb10-3">                                   learning_rate<span class="op" style="color: #5E5E5E;
 background-color: null;
 font-style: inherit;">=</span><span class="fl" style="color: #AD0000;
 background-color: null;
 font-style: inherit;">0.5</span>,</span>
-<span id="cb20-4">                                    max_depth<span class="op" style="color: #5E5E5E;
+<span id="cb10-4">                                   max_depth<span class="op" style="color: #5E5E5E;
 background-color: null;
 font-style: inherit;">=</span><span class="dv" style="color: #AD0000;
 background-color: null;
 font-style: inherit;">1</span>,</span>
-<span id="cb20-5">                                    loss<span class="op" style="color: #5E5E5E;
+<span id="cb10-5">                                   loss<span class="op" style="color: #5E5E5E;
 background-color: null;
 font-style: inherit;">=</span><span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">'log_loss'</span>)</span>
-<span id="cb20-6">sk_gbm.fit(x, y)</span>
-<span id="cb20-7">sk_pred <span class="op" style="color: #5E5E5E;
-background-color: null;
-font-style: inherit;">=</span> sk_gbm.predict_proba(x)[:, <span class="dv" style="color: #AD0000;
+font-style: inherit;">'absolute_error'</span>)</span>
+<span id="cb10-6">sk_gbm.fit(x, y)</span>
+<span id="cb10-7">sk_pred <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">1</span>]</span></code></pre></div>
+font-style: inherit;">=</span> sk_gbm.predict(x)</span></code></pre></div>
 </div>
-<div class="cell" data-execution_count="21">
-<div class="sourceCode cell-code" id="cb21" style="background: #f1f3f5;"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb21-1">print_model_loss_scores(BinaryCrossEntropyLoss(), y, pred, sk_pred)</span></code></pre></div>
+<div class="cell" data-scrolled="true" data-execution_count="11">
+<div class="sourceCode cell-code" id="cb11" style="background: #f1f3f5;"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb11-1">print_model_loss_scores(AbsoluteErrorLoss(), y, pred, sk_pred)</span></code></pre></div>
 <div class="cell-output cell-output-stdout">
-<pre><code>From Scratch Loss = 0.6379
-Scikit-Learn Loss = 0.6403</code></pre>
+<pre><code>From Scratch Loss = 0.3225
+Scikit-Learn Loss = 0.3208</code></pre>
 </div>
 </div>
-<div class="cell" data-execution_count="22">
+<div class="cell" data-execution_count="12">
 <div class="cell-output cell-output-display">
-<p><img src="https://randomrealizations.com/posts/gradient-boosting-machine-with-any-loss-function/gbm-any-loss_files/figure-html/cell-26-output-1.png" class="img-fluid" alt="Figure showing data and model prediction of probability that y equals one given x"></p>
+<p><img src="https://randomrealizations.com/posts/gradient-boosting-machine-with-any-loss-function/gbm-any-loss_files/figure-html/cell-14-output-1.png" class="img-fluid" alt="Figure showing scatterplot of data and model prediction of median of y given x"></p>
 </div>
 </div>
 </section>
-</section>
-<section id="wrapping-up" class="level2">
-<h2 class="anchored" data-anchor-id="wrapping-up">Wrapping Up</h2>
-<p>Woohoo! We did it! We finally made it through Friedman’s paper in its entirety, and we implemented the generic gradient boosting algorithm which works with any differentiable loss function. If you made it this far, great job, gold star! By now you hopefully have a pretty solid grasp on gradient boosting, which is good, because soon we’re going to dive into the modern Newton descent gradient boosting frameworks like XGBoost. Onward!</p>
-</section>
-<section id="references" class="level2">
-<h2 class="anchored" data-anchor-id="references">References</h2>
-<p>Friedman’s 2001 paper: <a href="https://statweb.stanford.edu/~jhf/ftp/trebst.pdf">Greedy Function Approximation: A Gradient Boosting Machine</a></p>
-</section>
-
- ]]></description>
-  <category>python</category>
-  <category>gradient boosting</category>
-  <category>from scratch</category>
-  <guid>https://randomrealizations.com/posts/gradient-boosting-machine-with-any-loss-function/index.html</guid>
-  <pubDate>Fri, 22 Oct 2021 21:00:00 GMT</pubDate>
-  <media:content url="https://randomrealizations.com/posts/gradient-boosting-machine-with-any-loss-function/thumbnail.jpg" medium="image" type="image/jpeg"/>
-</item>
-<item>
-  <title>Hello PySpark!</title>
-  <dc:creator>Matt Bowers</dc:creator>
-  <link>https://randomrealizations.com/posts/hello-pyspark/index.html</link>
-  <description><![CDATA[ 
-
-
-
-<div class="quarto-figure quarto-figure-center">
-<figure class="figure">
-<p><img src="https://randomrealizations.com/posts/hello-pyspark/guiones_wave.jpeg" class="img-fluid figure-img"></p>
-<figcaption class="figure-caption">A big day at Playa Guiones</figcaption>
-</figure>
-</div>
-<p>Well, you guessed it: it’s time for us to learn PySpark!</p>
-<p>I know, I know, I can hear you screaming into your pillow. Indeed we just spent all that time converting from R and learning python and why the hell do we need yet another API for working with dataframes?</p>
-<p>That’s a totally fair question.</p>
-<p>So what happens when we’re working on something in the real world, where datasets get large in a hurry, and we suddenly have a dataframe that no longer fits into memory? We need a way for our computations and datasets to scale across multiple nodes in a distributed system without having to get too fussy about all the distributed compute details.</p>
-<p>Enter PySpark.</p>
-<p>I think it’s fair to think of PySpark as a python package for working with arbitrarily large dataframes, i.e., it’s like pandas but scalable. It’s built on top of <a href="https://spark.apache.org/">Apache Spark</a>, a unified analytics engine for large-scale data processing. <a href="https://spark.apache.org/docs/latest/api/python/">PySpark</a> is essentially a way to access the functionality of spark via python code. While there are other high-level interfaces to Spark (such as Java, Scala, and R), for data scientists who are already working extensively with python, PySpark will be the natural interface of choice. PySpark also has great integration with <a href="https://spark.apache.org/docs/latest/sql-programming-guide.html">SQL</a>, and it has a companion machine learning library called <a href="https://spark.apache.org/mllib/">MLlib</a> that’s more or less a scalable scikit-learn (maybe we can cover it in a future post).</p>
-<p>So, here’s the plan. First we’re going to get set up to run PySpark locally in a jupyter notebook on our laptop. This is my preferred environment for interactively playing with PySpark and learning the ropes. Then we’re going to get up and running in PySpark as quickly as possible by reviewing the most essential functionality for working with dataframes and comparing it to how we would do things in pandas. Once we’re comfortable running PySpark on the laptop, it’s going to be much easier to jump onto a distributed cluster and run PySpark at scale.</p>
-<p>Let’s do this.</p>
-<section id="how-to-run-pyspark-in-a-jupyter-notebook-on-your-laptop" class="level2">
-<h2 class="anchored" data-anchor-id="how-to-run-pyspark-in-a-jupyter-notebook-on-your-laptop">How to Run PySpark in a Jupyter Notebook on Your Laptop</h2>
-<p>Ok, I’m going to walk us through how to get things installed on a Mac or Linux machine where we’re using homebrew and conda to manage virtual environments. If you have a different setup, your favorite search engine will help you get PySpark set up locally.</p>
-<div class="callout callout-style-default callout-note callout-titled">
-<div class="callout-header d-flex align-content-center">
-<div class="callout-icon-container">
-<i class="callout-icon"></i>
-</div>
-<div class="callout-title-container flex-fill">
-Note
-</div>
-</div>
-<div class="callout-body-container callout-body">
-<p>It’s possible for Homebrew and Anaconda to interfere with one another. The simple rule of thumb is that whenever you want to use the <code>brew</code> command, first deactivate your conda environment by running <code>conda deactivate</code>. See this <a href="https://stackoverflow.com/questions/42859781/best-practices-with-anaconda-and-brew">Stack Overflow question</a> for more details.</p>
-</div>
-</div>
-<section id="install-spark" class="level3">
-<h3 class="anchored" data-anchor-id="install-spark">Install Spark</h3>
-<p>Install Spark with homebrew.</p>
-<div class="sourceCode" id="cb1" style="background: #f1f3f5;"><pre class="sourceCode zsh code-with-copy"><code class="sourceCode zsh"><span id="cb1-1"><span class="ex" style="color: null;
+<section id="quantile-loss" class="level3">
+<h3 class="anchored" data-anchor-id="quantile-loss">Quantile Loss</h3>
+<p>Quantile loss yields estimates of a given quantile of the target variable conditioned on the features. Here’s my implementation.</p>
+<div class="cell" data-execution_count="13">
+<div class="sourceCode cell-code" id="cb13" style="background: #f1f3f5;"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb13-1">x, y <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">brew</span> install apache-spark</span></code></pre></div>
-<p>Next we need to set up a <code>SPARK_HOME</code> environment variable in the shell. Check where Spark is installed.</p>
-<div class="sourceCode" id="cb2" style="background: #f1f3f5;"><pre class="sourceCode zsh code-with-copy"><code class="sourceCode zsh"><span id="cb2-1"><span class="ex" style="color: null;
+font-style: inherit;">=</span> make_test_data(<span class="dv" style="color: #AD0000;
 background-color: null;
-font-style: inherit;">brew</span> info apache-spark</span></code></pre></div>
-<p>You should see something like</p>
-<pre><code>==&gt; apache-spark: stable 3.3.2 (bottled), HEAD
-Engine for large-scale data processing
-https://spark.apache.org/
-/opt/homebrew/Cellar/apache-spark/3.3.2 (1,453 files, 320.9MB) *
-...</code></pre>
-<p>Set the <code>SPARK_HOME</code> environment variable to your spark installation path with <code>/libexec</code> appended to the end. To do this I added the following line to my <code>.zshrc</code> file.</p>
-<div class="sourceCode" id="cb4" style="background: #f1f3f5;"><pre class="sourceCode zsh code-with-copy"><code class="sourceCode zsh"><span id="cb4-1"><span class="bu" style="color: null;
+font-style: inherit;">500</span>, <span class="dv" style="color: #AD0000;
 background-color: null;
-font-style: inherit;">export</span> <span class="va" style="color: #111111;
-background-color: null;
-font-style: inherit;">SPARK_HOME</span><span class="op" style="color: #5E5E5E;
-background-color: null;
-font-style: inherit;">=</span>/opt/homebrew/Cellar/apache-spark/3.3.2/libexec</span></code></pre></div>
-<p>Restart your shell, and test the installation by starting the Spark shell.</p>
-<div class="sourceCode" id="cb5" style="background: #f1f3f5;"><pre class="sourceCode zsh code-with-copy"><code class="sourceCode zsh"><span id="cb5-1"><span class="ex" style="color: null;
-background-color: null;
-font-style: inherit;">spark-shell</span></span></code></pre></div>
-<pre><code>...
-Welcome to
-      ____              __
-     / __/__  ___ _____/ /__
-    _\ \/ _ \/ _ `/ __/  '_/
-   /___/ .__/\_,_/_/ /_/\_\   version 3.3.2
-      /_/
-         
-Using Scala version 2.12.15 (OpenJDK 64-Bit Server VM, Java 19.0.2)
-Type in expressions to have them evaluated.
-Type :help for more information.
-
-scala&gt; </code></pre>
-<p>If you get the <code>scala&gt;</code> prompt, then you’ve successfully installed Spark on your laptop!</p>
-</section>
-<section id="install-pyspark" class="level3">
-<h3 class="anchored" data-anchor-id="install-pyspark">Install PySpark</h3>
-<p>Use conda to install the PySpark python package. As usual, it’s advisable to do this in a new virtual environment.</p>
-<pre><code>$ conda install pyspark</code></pre>
-<p>You should be able to launch an interactive PySpark REPL by saying pyspark.</p>
-<pre><code>$ pyspark
-...
-Welcome to
-      ____              __
-     / __/__  ___ _____/ /__
-    _\ \/ _ \/ _ `/ __/  '_/
-   /__ / .__/\_,_/_/ /_/\_\   version 3.1.2
-      /_/
-
-Using Python version 3.8.3 (default, Jul  2 2020 11:26:31)
-Spark context Web UI available at http://192.168.100.47:4041
-Spark context available as 'sc' (master = local[*], app id = local-1624127229929).
-SparkSession available as 'spark'.
-&gt;&gt;&gt; </code></pre>
-<p>This time we get a familiar python <code>&gt;&gt;&gt;</code> prompt. This is an interactive shell where we can easily experiment with PySpark. Feel free to run the example code in this post here in the PySpark shell, or, if you prefer a notebook, read on and we’ll get set up to run PySpark in a jupyter notebook.</p>
-<div class="callout callout-style-default callout-note callout-titled">
-<div class="callout-header d-flex align-content-center">
-<div class="callout-icon-container">
-<i class="callout-icon"></i>
-</div>
-<div class="callout-title-container flex-fill">
-Note
-</div>
-</div>
-<div class="callout-body-container callout-body">
-<p>When I tried following this setup on a new Mac, I hit an error about being unable to find the Java Runtime. This <a href="https://stackoverflow.com/questions/75021908/cannot-start-pyspark-unable-to-locate-a-java-runtime">stack overflow question</a> lead me to the fix.</p>
-</div>
+font-style: inherit;">1</span>)</span></code></pre></div>
 </div>
-</section>
-<section id="the-spark-session-object" class="level3">
-<h3 class="anchored" data-anchor-id="the-spark-session-object">The Spark Session Object</h3>
-<p>You may have noticed that when we launched that PySpark interactive shell, it told us that something called <code>SparkSession</code> was available as <code>'spark'</code>. So basically, what’s happening here is that when we launch the pyspark shell, it instantiates an object called <code>spark</code> which is an instance of class <code>pyspark.sql.session.SparkSession</code>. The spark session object is going to be our entry point for all kinds of PySpark functionality, i.e., we’re going to be saying things like <code>spark.this()</code> and <code>spark.that()</code> to make stuff happen.</p>
-<p>The PySpark interactive shell is kind enough to instantiate one of these spark session objects for us automatically. However, when we’re using another interface to PySpark (like say a jupyter notebook running a python kernal), we’ll have to make a spark session object for ourselves.</p>
-</section>
-<section id="create-a-pyspark-session-in-a-jupyter-notebook" class="level3">
-<h3 class="anchored" data-anchor-id="create-a-pyspark-session-in-a-jupyter-notebook">Create a PySpark Session in a Jupyter Notebook</h3>
-<p>There are a few ways to run PySpark in jupyter which you can read about <a href="https://www.datacamp.com/community/tutorials/apache-spark-python">here</a>.</p>
-<p>For derping around with PySpark on your laptop, I think the best way is to instantiate a spark session from a jupyter notebook running on a regular python kernel. The method we’ll use involves running a standard jupyter notebook session with a python kernal and using the findspark package to initialize the spark session. So, first install the findspark package.</p>
-<div class="sourceCode" id="cb9" style="background: #f1f3f5;"><pre class="sourceCode zsh code-with-copy"><code class="sourceCode zsh"><span id="cb9-1"><span class="ex" style="color: null;
+<div class="cell" data-execution_count="14">
+<div class="sourceCode cell-code" id="cb14" style="background: #f1f3f5;"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb14-1"></span>
+<span id="cb14-2"><span class="co" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">conda</span> install <span class="at" style="color: #657422;
+font-style: inherit;"># from scratch GBM</span></span>
+<span id="cb14-3"><span class="kw" style="color: #003B4F;
 background-color: null;
-font-style: inherit;">-c</span> conda-forge findspark</span></code></pre></div>
-<p>Launch jupyter as usual.</p>
-<div class="sourceCode" id="cb10" style="background: #f1f3f5;"><pre class="sourceCode zsh code-with-copy"><code class="sourceCode zsh"><span id="cb10-1"><span class="ex" style="color: null;
+font-style: inherit;">class</span> QuantileLoss():</span>
+<span id="cb14-4">    <span class="co" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">jupyter</span> notebook</span></code></pre></div>
-<p>Go ahead and fire up a new notebook using a regular python 3 kernal. Once you land inside the notebook, there are a couple things we need to do to get a spark session instantiated. You can think of this as boilerplate code that we need to run in the first cell of a notebook where we’re going to use PySpark.</p>
-<div class="cell" data-execution_count="1">
-<div class="sourceCode cell-code" id="cb11" style="background: #f1f3f5;"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb11-1"><span class="im" style="color: #00769E;
+font-style: inherit;">'''Quantile Loss</span></span>
+<span id="cb14-5"><span class="co" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">import</span> pyspark</span>
-<span id="cb11-2"><span class="im" style="color: #00769E;
+font-style: inherit;">    </span></span>
+<span id="cb14-6"><span class="co" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">import</span> findspark</span>
-<span id="cb11-3"><span class="im" style="color: #00769E;
+font-style: inherit;">    Parameters</span></span>
+<span id="cb14-7"><span class="co" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">from</span> pyspark.sql <span class="im" style="color: #00769E;
+font-style: inherit;">    ----------</span></span>
+<span id="cb14-8"><span class="co" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">import</span> SparkSession</span>
-<span id="cb11-4"></span>
-<span id="cb11-5">findspark.init()</span>
-<span id="cb11-6">spark <span class="op" style="color: #5E5E5E;
+font-style: inherit;">    alpha : float</span></span>
+<span id="cb14-9"><span class="co" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">=</span> SparkSession.builder.appName(<span class="st" style="color: #20794D;
+font-style: inherit;">        quantile to be estimated, 0 &lt; alpha &lt; 1</span></span>
+<span id="cb14-10"><span class="co" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">'My Spark App'</span>).getOrCreate()</span></code></pre></div>
-</div>
-<p>First we’re running findspark’s <code>init()</code> method to find our Spark installation. If you run into errors here, make sure you got the <code>SPARK_HOME</code> environment variable correctly set in the install instructions above. Then we instantiate a spark session as <code>spark</code>. Once you run this, you’re ready to rock and roll with PySpark in your jupyter notebook.</p>
-<div class="callout callout-style-default callout-note callout-titled">
-<div class="callout-header d-flex align-content-center">
-<div class="callout-icon-container">
-<i class="callout-icon"></i>
-</div>
-<div class="callout-title-container flex-fill">
-Note
-</div>
-</div>
-<div class="callout-body-container callout-body">
-<p>Spark provides a handy web UI that you can use for monitoring and debugging. Once you instantiate the spark session You can open the UI in your web browser at <a href="http://localhost:4040/jobs/">http://localhost:4040/jobs/</a>.</p>
-</div>
-</div>
-</section>
-</section>
-<section id="pyspark-concepts" class="level2">
-<h2 class="anchored" data-anchor-id="pyspark-concepts">PySpark Concepts</h2>
-<p>PySpark provides two main abstractions for data: the RDD and the dataframe. <strong>RDD</strong>’s are just a distributed list of objects; we won’t go into details about them in this post. For us, the key object in PySpark is the <strong>dataframe</strong>.</p>
-<p>While PySpark dataframes expose much of the functionality you would expect from a library for tabular data manipulation, they behave a little differently from pandas dataframes, both syntactically and under-the-hood. There are a couple of key concepts that will help explain these idiosyncracies.</p>
-<p><strong>Immutability</strong> - Pyspark RDD’s and dataframes are immutable. This means that if you change an object, e.g.&nbsp;by adding a column to a dataframe, PySpark returns a reference to a new dataframe; it does not modify the existing dataframe. This is kind of nice, because we don’t have to worry about that whole <a href="https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy">view versus copy</a> nonsense that happens in pandas.</p>
-<p><strong>Lazy Evaluation</strong> - Lazy evaluation means that when we start manipulating a dataframe, PySpark won’t actually perform any of the computations until we explicitly ask for the result. This is nice because it potentially allows PySpark to do fancy optimizations before executing a sequence of operations. It’s also confusing at first, because PySpark will seem to blaze through complex operations and then take forever to print a few rows of the dataframe.</p>
-</section>
-<section id="pyspark-dataframe-essentials" class="level2">
-<h2 class="anchored" data-anchor-id="pyspark-dataframe-essentials">PySpark Dataframe Essentials</h2>
-<section id="creating-a-pyspark-dataframe-with-createdataframe" class="level3">
-<h3 class="anchored" data-anchor-id="creating-a-pyspark-dataframe-with-createdataframe">Creating a PySpark dataframe with <code>createDataFrame()</code></h3>
-<p>The first thing we’ll need is a way to make dataframes. <a href="https://spark.apache.org/docs/3.1.1/api/python/reference/api/pyspark.sql.SparkSession.createDataFrame.html"><code>createDataFrame()</code></a> allows us to create PySpark dataframes from python objects like nested lists or pandas dataframes. Notice that <code>createDataFrame()</code> is a method of the spark session class, so we’ll call it from our spark session <code>spark</code>by saying <code>spark.createDataFrame()</code>.</p>
-<div class="cell" data-execution_count="2">
-<div class="sourceCode cell-code" id="cb12" style="background: #f1f3f5;"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb12-1"><span class="co" style="color: #5E5E5E;
+font-style: inherit;">    '''</span></span>
+<span id="cb14-11">    </span>
+<span id="cb14-12">    <span class="kw" style="color: #003B4F;
 background-color: null;
-font-style: inherit;"># create pyspark dataframe from nested  lists</span></span>
-<span id="cb12-2">my_df <span class="op" style="color: #5E5E5E;
+font-style: inherit;">def</span> <span class="fu" style="color: #4758AB;
 background-color: null;
-font-style: inherit;">=</span> spark.createDataFrame(</span>
-<span id="cb12-3">    data<span class="op" style="color: #5E5E5E;
+font-style: inherit;">__init__</span>(<span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">=</span>[</span>
-<span id="cb12-4">        [<span class="dv" style="color: #AD0000;
+font-style: inherit;">self</span>, alpha):</span>
+<span id="cb14-13">        <span class="cf" style="color: #003B4F;
 background-color: null;
-font-style: inherit;">2022</span>, <span class="st" style="color: #20794D;
+font-style: inherit;">if</span> alpha <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">"tiger"</span>],</span>
-<span id="cb12-5">        [<span class="dv" style="color: #AD0000;
+font-style: inherit;">&lt;</span> <span class="dv" style="color: #AD0000;
 background-color: null;
-font-style: inherit;">2023</span>, <span class="st" style="color: #20794D;
+font-style: inherit;">0</span> <span class="kw" style="color: #003B4F;
 background-color: null;
-font-style: inherit;">"rabbit"</span>],</span>
-<span id="cb12-6">        [<span class="dv" style="color: #AD0000;
+font-style: inherit;">or</span> alpha <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">2024</span>, <span class="st" style="color: #20794D;
+font-style: inherit;">&gt;</span><span class="dv" style="color: #AD0000;
 background-color: null;
-font-style: inherit;">"dragon"</span>]</span>
-<span id="cb12-7">    ],</span>
-<span id="cb12-8">    schema<span class="op" style="color: #5E5E5E;
+font-style: inherit;">1</span>:</span>
+<span id="cb14-14">            <span class="cf" style="color: #003B4F;
 background-color: null;
-font-style: inherit;">=</span>[<span class="st" style="color: #20794D;
+font-style: inherit;">raise</span> <span class="pp" style="color: #AD0000;
 background-color: null;
-font-style: inherit;">'year'</span>, <span class="st" style="color: #20794D;
+font-style: inherit;">ValueError</span>(<span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">'animal'</span>]</span>
-<span id="cb12-9">)</span></code></pre></div>
-</div>
-<p>Let’s read the seaborn tips dataset into a pandas dataframe and then use it to create a PySpark dataframe.</p>
-<div class="cell" data-execution_count="3">
-<div class="sourceCode cell-code" id="cb13" style="background: #f1f3f5;"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb13-1"><span class="im" style="color: #00769E;
+font-style: inherit;">'alpha must be between 0 and 1'</span>)</span>
+<span id="cb14-15">        <span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">import</span> pandas <span class="im" style="color: #00769E;
+font-style: inherit;">self</span>.alpha <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">as</span> pd</span>
-<span id="cb13-2"></span>
-<span id="cb13-3"><span class="co" style="color: #5E5E5E;
+font-style: inherit;">=</span> alpha</span>
+<span id="cb14-16">        </span>
+<span id="cb14-17">    <span class="kw" style="color: #003B4F;
 background-color: null;
-font-style: inherit;"># load tips dataset into a pandas dataframe</span></span>
-<span id="cb13-4">pandas_df <span class="op" style="color: #5E5E5E;
+font-style: inherit;">def</span> loss(<span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">=</span> pd.read_csv(<span class="st" style="color: #20794D;
+font-style: inherit;">self</span>, y, preds):</span>
+<span id="cb14-18">        e <span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">=</span> y <span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">-</span> preds</span>
+<span id="cb14-19">        <span class="cf" style="color: #003B4F;
 background-color: null;
-font-style: inherit;">'https://raw.githubusercontent.com/mwaskom/seaborn-data/master/tips.csv'</span>)</span>
-<span id="cb13-5"></span>
-<span id="cb13-6"><span class="co" style="color: #5E5E5E;
+font-style: inherit;">return</span> np.mean(np.where(e <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;"># create pyspark dataframe from a pandas dataframe</span></span>
-<span id="cb13-7">pyspark_df <span class="op" style="color: #5E5E5E;
+font-style: inherit;">&gt;</span> <span class="dv" style="color: #AD0000;
 background-color: null;
-font-style: inherit;">=</span> spark.createDataFrame(pandas_df)</span></code></pre></div>
-</div>
-<div class="callout callout-style-default callout-note callout-titled">
-<div class="callout-header d-flex align-content-center">
-<div class="callout-icon-container">
-<i class="callout-icon"></i>
-</div>
-<div class="callout-title-container flex-fill">
-Note
-</div>
-</div>
-<div class="callout-body-container callout-body">
-<p>In real life when we’re running PySpark on a large-scale distributed system, we would not generally want to use python lists or pandas dataframes to load data into PySpark. Ideally we would want to read data directly from where it is stored on HDFS, e.g.&nbsp;by reading <a href="https://spark.apache.org/docs/latest/sql-data-sources-parquet.html">parquet files</a>, or by querying directly from a hive database using <a href="https://spark.apache.org/docs/latest/sql-programming-guide.html">spark sql</a>.</p>
-</div>
-</div>
-</section>
-<section id="peeking-at-a-dataframes-contents" class="level3">
-<h3 class="anchored" data-anchor-id="peeking-at-a-dataframes-contents">Peeking at a dataframe’s contents</h3>
-<p>The default print method for the PySpark dataframe will just give you the schema.</p>
-<div class="cell" data-execution_count="4">
-<div class="sourceCode cell-code" id="cb14" style="background: #f1f3f5;"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb14-1">pyspark_df</span></code></pre></div>
-<div class="cell-output cell-output-display" data-execution_count="4">
-<pre><code>DataFrame[total_bill: double, tip: double, sex: string, smoker: string, day: string, time: string, size: bigint]</code></pre>
-</div>
-</div>
-<p>If we want to peek at some of the data, we’ll need to use the <code>show()</code> method, which is analogous to the pandas <code>head()</code>. Remember that <code>show()</code> will cause PySpark to execute any operations that it’s been lazily waiting to evaluate, so sometimes it can take a while to run.</p>
-<div class="cell" data-scrolled="true" data-execution_count="5">
-<div class="sourceCode cell-code" id="cb16" style="background: #f1f3f5;"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb16-1"><span class="co" style="color: #5E5E5E;
+font-style: inherit;">0</span>, <span class="va" style="color: #111111;
+background-color: null;
+font-style: inherit;">self</span>.alpha <span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">*</span> e, (<span class="va" style="color: #111111;
+background-color: null;
+font-style: inherit;">self</span>.alpha <span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">-</span> <span class="dv" style="color: #AD0000;
+background-color: null;
+font-style: inherit;">1</span>) <span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">*</span> e))</span>
+<span id="cb14-20">    </span>
+<span id="cb14-21">    <span class="kw" style="color: #003B4F;
+background-color: null;
+font-style: inherit;">def</span> negative_gradient(<span class="va" style="color: #111111;
+background-color: null;
+font-style: inherit;">self</span>, y, preds):</span>
+<span id="cb14-22">        e <span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">=</span> y <span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">-</span> preds </span>
+<span id="cb14-23">        <span class="cf" style="color: #003B4F;
+background-color: null;
+font-style: inherit;">return</span> np.where(e <span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">&gt;</span> <span class="dv" style="color: #AD0000;
+background-color: null;
+font-style: inherit;">0</span>, <span class="va" style="color: #111111;
+background-color: null;
+font-style: inherit;">self</span>.alpha, <span class="va" style="color: #111111;
+background-color: null;
+font-style: inherit;">self</span>.alpha <span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">-</span> <span class="dv" style="color: #AD0000;
+background-color: null;
+font-style: inherit;">1</span>)</span>
+<span id="cb14-24"></span>
+<span id="cb14-25">gbm <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;"># show the first few rows of the dataframe</span></span>
-<span id="cb16-2">pyspark_df.show(<span class="dv" style="color: #AD0000;
+font-style: inherit;">=</span> GradientBoostingMachine(n_trees<span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">5</span>)</span></code></pre></div>
-<div class="cell-output cell-output-stdout">
-<pre><code>+----------+----+------+------+---+------+----+
-|total_bill| tip|   sex|smoker|day|  time|size|
-+----------+----+------+------+---+------+----+
-|     16.99|1.01|Female|    No|Sun|Dinner|   2|
-|     10.34|1.66|  Male|    No|Sun|Dinner|   3|
-|     21.01| 3.5|  Male|    No|Sun|Dinner|   3|
-|     23.68|3.31|  Male|    No|Sun|Dinner|   2|
-|     24.59|3.61|Female|    No|Sun|Dinner|   4|
-+----------+----+------+------+---+------+----+
-only showing top 5 rows
-</code></pre>
-</div>
-<div class="cell-output cell-output-stderr">
-<pre><code>
-[Stage 0:&gt;                                                          (0 + 1) / 1]
-
-                                                                                </code></pre>
-</div>
-</div>
-<p>We thus encounter our first rude awakening. PySpark’s default representation of dataframes in the notebook isn’t as pretty as that of pandas. But no one ever said it would be pretty, they just said it would be scalable.</p>
-<p>You can also use the <code>printSchema()</code> method for a nice vertical representation of the schema.</p>
-<div class="cell" data-scrolled="true" data-execution_count="6">
-<div class="sourceCode cell-code" id="cb19" style="background: #f1f3f5;"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb19-1"><span class="co" style="color: #5E5E5E;
+font-style: inherit;">=</span><span class="dv" style="color: #AD0000;
 background-color: null;
-font-style: inherit;"># show the dataframe schema</span></span>
-<span id="cb19-2">pyspark_df.printSchema()</span></code></pre></div>
-<div class="cell-output cell-output-stdout">
-<pre><code>root
- |-- total_bill: double (nullable = true)
- |-- tip: double (nullable = true)
- |-- sex: string (nullable = true)
- |-- smoker: string (nullable = true)
- |-- day: string (nullable = true)
- |-- time: string (nullable = true)
- |-- size: long (nullable = true)
-</code></pre>
-</div>
-</div>
-</section>
-<section id="select-columns-by-name" class="level3">
-<h3 class="anchored" data-anchor-id="select-columns-by-name">Select columns by name</h3>
-<p>You can select specific columns from a dataframe using the <a href="https://spark.apache.org/docs/latest/api/python/reference/api/pyspark.sql.DataFrame.select.html"><code>select()</code></a> method. You can pass either a list of names, or pass names as arguments.</p>
-<div class="cell" data-execution_count="7">
-<div class="sourceCode cell-code" id="cb21" style="background: #f1f3f5;"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb21-1"><span class="co" style="color: #5E5E5E;
+font-style: inherit;">10</span>,</span>
+<span id="cb14-26">                              learning_rate<span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;"># select some of the columns</span></span>
-<span id="cb21-2">pyspark_df.select(<span class="st" style="color: #20794D;
+font-style: inherit;">=</span><span class="fl" style="color: #AD0000;
 background-color: null;
-font-style: inherit;">'total_bill'</span>, <span class="st" style="color: #20794D;
+font-style: inherit;">0.5</span>,</span>
+<span id="cb14-27">                             max_depth<span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">'tip'</span>)</span>
-<span id="cb21-3"></span>
-<span id="cb21-4"><span class="co" style="color: #5E5E5E;
+font-style: inherit;">=</span><span class="dv" style="color: #AD0000;
 background-color: null;
-font-style: inherit;"># select columns in a list</span></span>
-<span id="cb21-5">pyspark_df.select([<span class="st" style="color: #20794D;
+font-style: inherit;">1</span>)</span>
+<span id="cb14-28">gbm.fit(x, y, QuantileLoss(alpha<span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">'day'</span>, <span class="st" style="color: #20794D;
+font-style: inherit;">=</span><span class="fl" style="color: #AD0000;
 background-color: null;
-font-style: inherit;">'time'</span>, <span class="st" style="color: #20794D;
+font-style: inherit;">0.9</span>))</span>
+<span id="cb14-29">pred <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">'total_bill'</span>])</span></code></pre></div>
+font-style: inherit;">=</span> gbm.predict(x)    </span></code></pre></div>
 </div>
-</section>
-<section id="filter-rows-based-on-column-values" class="level3">
-<h3 class="anchored" data-anchor-id="filter-rows-based-on-column-values">Filter rows based on column values</h3>
-<p>Analogous to the <code>WHERE</code> clause in SQL, and the <code>query()</code> method in pandas, PySpark provides a <a href="https://spark.apache.org/docs/latest/api/python/reference/api/pyspark.sql.DataFrame.filter.html"><code>filter()</code></a> method which returns only the rows that meet the specified conditions. Its argument is a string specifying the condition to be met for rows to be included in the result. You specify the condition as an expression involving the column names and comparison operators like &lt;, &gt;, &lt;=, &gt;=, == (equal), and ~= (not equal). You can specify compound expressions using <code>and</code> and <code>or</code>, and you can even do a SQL-like <code>in</code> to check if the column value matches any items in a list.</p>
-<div class="cell" data-execution_count="8">
-<div class="sourceCode cell-code" id="cb22" style="background: #f1f3f5;"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb22-1"><span class="co" style="color: #5E5E5E;
+<div class="cell" data-execution_count="15">
+<div class="sourceCode cell-code" id="cb15" style="background: #f1f3f5;"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb15-1"><span class="co" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">## compare a column to a value</span></span>
-<span id="cb22-2">pyspark_df.<span class="bu" style="color: null;
+font-style: inherit;"># scikit-learn GBM</span></span>
+<span id="cb15-2">sk_gbm <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">filter</span>(<span class="st" style="color: #20794D;
+font-style: inherit;">=</span> GradientBoostingRegressor(n_estimators<span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">'total_bill &gt; 20'</span>)</span>
-<span id="cb22-3"></span>
-<span id="cb22-4"><span class="co" style="color: #5E5E5E;
+font-style: inherit;">=</span><span class="dv" style="color: #AD0000;
 background-color: null;
-font-style: inherit;"># compare two columns with arithmetic</span></span>
-<span id="cb22-5">pyspark_df.<span class="bu" style="color: null;
+font-style: inherit;">10</span>,</span>
+<span id="cb15-3">                                 learning_rate<span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">filter</span>(<span class="st" style="color: #20794D;
+font-style: inherit;">=</span><span class="fl" style="color: #AD0000;
 background-color: null;
-font-style: inherit;">'tip &gt; 0.15 * total_bill'</span>)</span>
-<span id="cb22-6"></span>
-<span id="cb22-7"><span class="co" style="color: #5E5E5E;
+font-style: inherit;">0.5</span>,</span>
+<span id="cb15-4">                                 max_depth<span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;"># check equality with a string value</span></span>
-<span id="cb22-8">pyspark_df.<span class="bu" style="color: null;
+font-style: inherit;">=</span><span class="dv" style="color: #AD0000;
 background-color: null;
-font-style: inherit;">filter</span>(<span class="st" style="color: #20794D;
+font-style: inherit;">1</span>,</span>
+<span id="cb15-5">                                 loss<span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">'sex == "Male"'</span>)</span>
-<span id="cb22-9"></span>
-<span id="cb22-10"><span class="co" style="color: #5E5E5E;
+font-style: inherit;">=</span><span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;"># check equality with any of several possible values</span></span>
-<span id="cb22-11">pyspark_df.<span class="bu" style="color: null;
+font-style: inherit;">'quantile'</span>, alpha<span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">filter</span>(<span class="st" style="color: #20794D;
+font-style: inherit;">=</span><span class="fl" style="color: #AD0000;
 background-color: null;
-font-style: inherit;">'day in ("Sat", "Sun")'</span>)</span>
-<span id="cb22-12"></span>
-<span id="cb22-13"><span class="co" style="color: #5E5E5E;
+font-style: inherit;">0.9</span>)</span>
+<span id="cb15-6">sk_gbm.fit(x, y)</span>
+<span id="cb15-7">sk_pred <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;"># use "and" </span></span>
-<span id="cb22-14">pyspark_df.<span class="bu" style="color: null;
+font-style: inherit;">=</span> sk_gbm.predict(x)</span></code></pre></div>
+</div>
+<div class="cell" data-scrolled="true" data-execution_count="16">
+<div class="sourceCode cell-code" id="cb16" style="background: #f1f3f5;"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb16-1">print_model_loss_scores(QuantileLoss(alpha<span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">filter</span>(<span class="st" style="color: #20794D;
+font-style: inherit;">=</span><span class="fl" style="color: #AD0000;
 background-color: null;
-font-style: inherit;">'day == "Fri" and time == "Lunch"'</span>)</span></code></pre></div>
+font-style: inherit;">0.9</span>), y, pred, sk_pred)</span></code></pre></div>
+<div class="cell-output cell-output-stdout">
+<pre><code>From Scratch Loss = 0.1853
+Scikit-Learn Loss = 0.1856</code></pre>
 </div>
-<p>If you’re into boolean indexing with the brackets, PySpark does support that too, but I encourage you to use <code>filter()</code> instead. Check out my rant about <a href="../../8020-pandas-tutorial#select-rows-based-on-their-values-with-query">why you shouldn’t use boolean indexing</a> for the details. The TLDR is that <code>filter()</code> requires less typing, makes your code more readable and portable, and it allows you to chain method calls together using dot chains.</p>
-<p>Here’s the boolean indexing equivalent of the last example from above.</p>
-<div class="cell" data-execution_count="9">
-<div class="sourceCode cell-code" id="cb23" style="background: #f1f3f5;"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb23-1"><span class="co" style="color: #5E5E5E;
+</div>
+<div class="cell" data-scrolled="true" data-execution_count="17">
+<div class="cell-output cell-output-display">
+<p><img src="https://randomrealizations.com/posts/gradient-boosting-machine-with-any-loss-function/gbm-any-loss_files/figure-html/cell-20-output-1.png" class="img-fluid" alt="Figure showing scatterplot of data and model prediction of 0.9 quantile of y given x"></p>
+</div>
+</div>
+</section>
+<section id="binary-cross-entropy-loss" class="level3">
+<h3 class="anchored" data-anchor-id="binary-cross-entropy-loss">Binary Cross Entropy Loss</h3>
+<p>The previous losses are useful for regression problems, where the target is numeric. But we can also solve classification problems, simply by swapping in an appropriate loss function. Here we’ll implement binary cross entropy, a.k.a. binary deviance, a.k.a. negative binomial log likelihood (sometimes abusively called log loss). One thing to remember is that, as with logistic regression, our model is actually predicting the log odds ratio, not the probability of the positive class. Thus we use expit transformations (the inverse of logit) whenever probabilities are needed, e.g., when predicting the probability that an observation belongs to the positive class.</p>
+<div class="cell" data-execution_count="18">
+<div class="sourceCode cell-code" id="cb18" style="background: #f1f3f5;"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb18-1"><span class="co" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;"># using boolean indexing</span></span>
-<span id="cb23-2">pyspark_df[(pyspark_df.day <span class="op" style="color: #5E5E5E;
+font-style: inherit;"># make categorical test data</span></span>
+<span id="cb18-2"></span>
+<span id="cb18-3"><span class="kw" style="color: #003B4F;
 background-color: null;
-font-style: inherit;">==</span> <span class="st" style="color: #20794D;
+font-style: inherit;">def</span> expit(t):</span>
+<span id="cb18-4">    <span class="cf" style="color: #003B4F;
 background-color: null;
-font-style: inherit;">'Fri'</span>) <span class="op" style="color: #5E5E5E;
+font-style: inherit;">return</span> np.exp(t) <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">&amp;</span> (pyspark_df.time <span class="op" style="color: #5E5E5E;
+font-style: inherit;">/</span> (<span class="dv" style="color: #AD0000;
 background-color: null;
-font-style: inherit;">==</span> <span class="st" style="color: #20794D;
+font-style: inherit;">1</span> <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">'Lunch'</span>)]</span></code></pre></div>
-</div>
-<p>I know, it looks horrendous, but not as horrendous as the error message you’ll get if you forget the parentheses.</p>
-</section>
-<section id="add-new-columns-to-a-dataframe" class="level3">
-<h3 class="anchored" data-anchor-id="add-new-columns-to-a-dataframe">Add new columns to a dataframe</h3>
-<p>You can add new columns which are functions of the existing columns with the <a href="https://spark.apache.org/docs/latest/api/python/reference/api/pyspark.sql.DataFrame.withColumn.html"><code>withColumn()</code></a> method.</p>
-<div class="cell" data-execution_count="10">
-<div class="sourceCode cell-code" id="cb24" style="background: #f1f3f5;"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb24-1"><span class="im" style="color: #00769E;
+font-style: inherit;">+</span> np.exp(t))</span>
+<span id="cb18-5"></span>
+<span id="cb18-6">x <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">import</span> pyspark.sql.functions <span class="im" style="color: #00769E;
+font-style: inherit;">=</span> np.linspace(<span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">as</span> f</span>
-<span id="cb24-2"></span>
-<span id="cb24-3"><span class="co" style="color: #5E5E5E;
+font-style: inherit;">-</span><span class="dv" style="color: #AD0000;
 background-color: null;
-font-style: inherit;"># add a new column using col() to reference other columns</span></span>
-<span id="cb24-4">pyspark_df.withColumn(<span class="st" style="color: #20794D;
+font-style: inherit;">3</span>, <span class="dv" style="color: #AD0000;
 background-color: null;
-font-style: inherit;">'tip_percent'</span>, f.col(<span class="st" style="color: #20794D;
+font-style: inherit;">3</span>, <span class="dv" style="color: #AD0000;
 background-color: null;
-font-style: inherit;">'tip'</span>) <span class="op" style="color: #5E5E5E;
+font-style: inherit;">500</span>)</span>
+<span id="cb18-7">p <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">/</span> f.col(<span class="st" style="color: #20794D;
+font-style: inherit;">=</span> expit(x)</span>
+<span id="cb18-8">y <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">'total_bill'</span>))</span></code></pre></div>
-</div>
-<p>Notice that we’ve imported the <a href="[pyspark.sql.functions](https://spark.apache.org/docs/2.4.0/api/python/pyspark.sql.html#module-pyspark.sql.functions)"><code>pyspark.sql.functions</code></a> module. This module contains lots of useful functions that we’ll be using all over the place, so it’s probably a good idea to go ahead and import it whenever you’re using PySpark. BTW, it seems like folks usually import this module as <code>f</code> or <code>F</code>. In this example we’re using the <code>col()</code> function, which allows us to refer to columns in our dataframe using string representations of the column names.</p>
-<p>You could also achieve the same result using the dot to reference the other columns, but this requires us to type the dataframe name over and over again, which makes it harder to reuse this code on different dataframes or in <a href="https://blog.mattbowers.dev/8020-pandas-tutorial#Chain-transformations-together-with-the-dot-chain">dot chains</a>.</p>
-<div class="cell" data-execution_count="11">
-<div class="sourceCode cell-code" id="cb25" style="background: #f1f3f5;"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb25-1"><span class="co" style="color: #5E5E5E;
+font-style: inherit;">=</span> rng.binomial(<span class="dv" style="color: #AD0000;
 background-color: null;
-font-style: inherit;"># add a new column using the dot to reference other columns (less recommended)</span></span>
-<span id="cb25-2">pyspark_df.withColumn(<span class="st" style="color: #20794D;
+font-style: inherit;">1</span>, p, size<span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">'tip_percent'</span>, pyspark_df.tip <span class="op" style="color: #5E5E5E;
+font-style: inherit;">=</span>p.shape)</span>
+<span id="cb18-9">x <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">/</span> pyspark_df.total_bill)</span></code></pre></div>
-</div>
-<p>If you want to apply numerical transformations like exponents or logs, use the built-in functions in the <code>pyspark.sql.functions</code> module.</p>
-<div class="cell" data-execution_count="12">
-<div class="sourceCode cell-code" id="cb26" style="background: #f1f3f5;"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb26-1"><span class="co" style="color: #5E5E5E;
+font-style: inherit;">=</span> x.reshape(<span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;"># log </span></span>
-<span id="cb26-2">pyspark_df.withColumn(<span class="st" style="color: #20794D;
+font-style: inherit;">-</span><span class="dv" style="color: #AD0000;
 background-color: null;
-font-style: inherit;">'log_bill'</span>, f.log(f.col(<span class="st" style="color: #20794D;
+font-style: inherit;">1</span>,<span class="dv" style="color: #AD0000;
 background-color: null;
-font-style: inherit;">'total_bill'</span>)))</span>
-<span id="cb26-3"></span>
-<span id="cb26-4"><span class="co" style="color: #5E5E5E;
+font-style: inherit;">1</span>)</span></code></pre></div>
+</div>
+<div class="cell" data-execution_count="19">
+<div class="sourceCode cell-code" id="cb19" style="background: #f1f3f5;"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb19-1"><span class="co" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;"># exponent</span></span>
-<span id="cb26-5">pyspark_df.withColumn(<span class="st" style="color: #20794D;
+font-style: inherit;"># from scratch GBM</span></span>
+<span id="cb19-2"><span class="kw" style="color: #003B4F;
 background-color: null;
-font-style: inherit;">'bill_squared'</span>, f.<span class="bu" style="color: null;
+font-style: inherit;">class</span> BinaryCrossEntropyLoss():</span>
+<span id="cb19-3">    <span class="co" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">pow</span>(f.col(<span class="st" style="color: #20794D;
+font-style: inherit;">'''Binary Cross Entropy Loss</span></span>
+<span id="cb19-4"><span class="co" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">'total_bill'</span>), <span class="dv" style="color: #AD0000;
+font-style: inherit;">    </span></span>
+<span id="cb19-5"><span class="co" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">2</span>))</span></code></pre></div>
-</div>
-<p>You can implement conditional assignment like SQL’s <code>CASE WHEN</code> construct using the <code>when()</code> function and the <code>otherwise()</code> method.</p>
-<div class="cell" data-scrolled="true" data-execution_count="13">
-<div class="sourceCode cell-code" id="cb27" style="background: #f1f3f5;"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb27-1"><span class="co" style="color: #5E5E5E;
+font-style: inherit;">    Note that the predictions should be log odds ratios.</span></span>
+<span id="cb19-6"><span class="co" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;"># conditional assignment (like CASE WHEN)</span></span>
-<span id="cb27-2">pyspark_df.withColumn(<span class="st" style="color: #20794D;
+font-style: inherit;">    '''</span></span>
+<span id="cb19-7">    </span>
+<span id="cb19-8">    <span class="kw" style="color: #003B4F;
 background-color: null;
-font-style: inherit;">'is_male'</span>, f.when(f.col(<span class="st" style="color: #20794D;
+font-style: inherit;">def</span> <span class="fu" style="color: #4758AB;
 background-color: null;
-font-style: inherit;">'sex'</span>) <span class="op" style="color: #5E5E5E;
+font-style: inherit;">__init__</span>(<span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">==</span> <span class="st" style="color: #20794D;
+font-style: inherit;">self</span>):</span>
+<span id="cb19-9">        <span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">'Male'</span>, <span class="va" style="color: #111111;
+font-style: inherit;">self</span>.expit <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">True</span>).otherwise(<span class="va" style="color: #111111;
+font-style: inherit;">=</span> <span class="kw" style="color: #003B4F;
 background-color: null;
-font-style: inherit;">False</span>))</span>
-<span id="cb27-3"></span>
-<span id="cb27-4"><span class="co" style="color: #5E5E5E;
+font-style: inherit;">lambda</span> t: np.exp(t) <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;"># using multiple when conditions and values</span></span>
-<span id="cb27-5">pyspark_df.withColumn(<span class="st" style="color: #20794D;
+font-style: inherit;">/</span> (<span class="dv" style="color: #AD0000;
 background-color: null;
-font-style: inherit;">'bill_size'</span>, </span>
-<span id="cb27-6">    f.when(f.col(<span class="st" style="color: #20794D;
+font-style: inherit;">1</span> <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">'total_bill'</span>) <span class="op" style="color: #5E5E5E;
+font-style: inherit;">+</span> np.exp(t))</span>
+<span id="cb19-10">    </span>
+<span id="cb19-11">    <span class="kw" style="color: #003B4F;
 background-color: null;
-font-style: inherit;">&lt;</span> <span class="dv" style="color: #AD0000;
+font-style: inherit;">def</span> loss(<span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">10</span>, <span class="st" style="color: #20794D;
+font-style: inherit;">self</span>, y, preds):</span>
+<span id="cb19-12">        p <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">'small'</span>)</span>
-<span id="cb27-7">    .when(f.col(<span class="st" style="color: #20794D;
+font-style: inherit;">=</span> <span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">'total_bill'</span>) <span class="op" style="color: #5E5E5E;
+font-style: inherit;">self</span>.expit(preds)</span>
+<span id="cb19-13">        <span class="cf" style="color: #003B4F;
 background-color: null;
-font-style: inherit;">&lt;</span> <span class="dv" style="color: #AD0000;
+font-style: inherit;">return</span> <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">20</span>, <span class="st" style="color: #20794D;
+font-style: inherit;">-</span>np.mean(y <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">'medium'</span>)</span>
-<span id="cb27-8">    .otherwise(<span class="st" style="color: #20794D;
+font-style: inherit;">*</span> np.log(p) <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">'large'</span>)</span>
-<span id="cb27-9">)</span></code></pre></div>
-</div>
-<p>Remember that since PySpark dataframes are immutable, calling <code>withColumns()</code> on a dataframe returns a new dataframe. If you want to persist the result, you’ll need to make an assignment.</p>
-<pre><code>pyspark_df = pyspark_df.withColumns(...)</code></pre>
-</section>
-<section id="group-by-and-aggregate" class="level3">
-<h3 class="anchored" data-anchor-id="group-by-and-aggregate">Group by and aggregate</h3>
-<p>PySpark provides a <code>groupBy()</code> method similar to the pandas <code>groupby()</code>. Just like in pandas, we can call methods like <code>count()</code> and <code>mean()</code> on our grouped dataframe, and we also have a more flexible <code>agg()</code> method that allows us to specify column-aggregation mappings.</p>
-<div class="cell" data-execution_count="14">
-<div class="sourceCode cell-code" id="cb29" style="background: #f1f3f5;"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb29-1"></span>
-<span id="cb29-2"><span class="co" style="color: #5E5E5E;
+font-style: inherit;">+</span> (<span class="dv" style="color: #AD0000;
 background-color: null;
-font-style: inherit;"># group by and count</span></span>
-<span id="cb29-3">pyspark_df.groupBy(<span class="st" style="color: #20794D;
+font-style: inherit;">1</span> <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">'time'</span>).count().show()</span></code></pre></div>
-<div class="cell-output cell-output-stdout">
-<pre><code>+------+-----+
-|  time|count|
-+------+-----+
-|Dinner|  176|
-| Lunch|   68|
-+------+-----+
-</code></pre>
-</div>
-</div>
-<div class="cell" data-execution_count="15">
-<div class="sourceCode cell-code" id="cb31" style="background: #f1f3f5;"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb31-1"></span>
-<span id="cb31-2"><span class="co" style="color: #5E5E5E;
+font-style: inherit;">-</span> y) <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;"># group by and specify column-aggregation mappings with agg()</span></span>
-<span id="cb31-3">pyspark_df.groupBy(<span class="st" style="color: #20794D;
+font-style: inherit;">*</span> np.log(<span class="dv" style="color: #AD0000;
 background-color: null;
-font-style: inherit;">'time'</span>).agg({<span class="st" style="color: #20794D;
+font-style: inherit;">1</span> <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">'total_bill'</span>: <span class="st" style="color: #20794D;
+font-style: inherit;">-</span> p))</span>
+<span id="cb19-14">    </span>
+<span id="cb19-15">    <span class="kw" style="color: #003B4F;
 background-color: null;
-font-style: inherit;">'mean'</span>, <span class="st" style="color: #20794D;
+font-style: inherit;">def</span> negative_gradient(<span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">'tip'</span>: <span class="st" style="color: #20794D;
+font-style: inherit;">self</span>, y, preds):</span>
+<span id="cb19-16">        p <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">'max'</span>}).show()</span></code></pre></div>
-<div class="cell-output cell-output-stdout">
-<pre><code>+------+--------+------------------+
-|  time|max(tip)|   avg(total_bill)|
-+------+--------+------------------+
-|Dinner|    10.0| 20.79715909090909|
-| Lunch|     6.7|17.168676470588235|
-+------+--------+------------------+
-</code></pre>
-</div>
-</div>
-<p>If you want to get fancier with your aggregations, it might just be easier to express them using hive syntax. Read on to find out how.</p>
-</section>
-<section id="run-hive-sql-on-dataframes" class="level3">
-<h3 class="anchored" data-anchor-id="run-hive-sql-on-dataframes">Run Hive SQL on dataframes</h3>
-<p>One of the mind-blowing features of PySpark is that it allows you to write hive SQL queries on your dataframes. To take a PySpark dataframe into the SQL world, use the <code>createOrReplaceTempView()</code> method. This method takes one string argument which will be the dataframes name in the SQL world. Then you can use <code>spark.sql()</code> to run a query. The result is returned as a PySpark dataframe.</p>
-<div class="cell" data-execution_count="16">
-<div class="sourceCode cell-code" id="cb33" style="background: #f1f3f5;"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb33-1"></span>
-<span id="cb33-2"><span class="co" style="color: #5E5E5E;
+font-style: inherit;">=</span> <span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;"># put pyspark dataframe in SQL world and query it</span></span>
-<span id="cb33-3">pyspark_df.createOrReplaceTempView(<span class="st" style="color: #20794D;
+font-style: inherit;">self</span>.expit(preds)</span>
+<span id="cb19-17">        <span class="cf" style="color: #003B4F;
 background-color: null;
-font-style: inherit;">'tips'</span>)</span>
-<span id="cb33-4">spark.sql(<span class="st" style="color: #20794D;
+font-style: inherit;">return</span> y <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">'select * from tips'</span>).show(<span class="dv" style="color: #AD0000;
+font-style: inherit;">/</span> p <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">5</span>)</span></code></pre></div>
-<div class="cell-output cell-output-stdout">
-<pre><code>+----------+----+------+------+---+------+----+
-|total_bill| tip|   sex|smoker|day|  time|size|
-+----------+----+------+------+---+------+----+
-|     16.99|1.01|Female|    No|Sun|Dinner|   2|
-|     10.34|1.66|  Male|    No|Sun|Dinner|   3|
-|     21.01| 3.5|  Male|    No|Sun|Dinner|   3|
-|     23.68|3.31|  Male|    No|Sun|Dinner|   2|
-|     24.59|3.61|Female|    No|Sun|Dinner|   4|
-+----------+----+------+------+---+------+----+
-only showing top 5 rows
-</code></pre>
-</div>
-</div>
-<p>This is awesome for a couple of reasons. First, it allows us to easily express any transformations in hive syntax. If you’re like me and you’ve already been using hive, this will dramatically reduce the PySpark learning curve, because when in doubt, you can always bump a dataframe into the SQL world and simply use hive to do what you need. Second, if you have a hive deployment, PySpark’s SQL world also has access to all of your hive tables. This means you can write queries involving both hive tables and your PySpark dataframes. It also means you can run hive commands, like inserting into a table, directly from PySpark.</p>
-<p>Let’s do some aggregations that might be a little trickier to do using the PySpark built-in functions.</p>
-<div class="cell" data-execution_count="17">
-<div class="sourceCode cell-code" id="cb35" style="background: #f1f3f5;"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb35-1"></span>
-<span id="cb35-2"><span class="co" style="color: #5E5E5E;
+font-style: inherit;">-</span> (<span class="dv" style="color: #AD0000;
 background-color: null;
-font-style: inherit;"># run hive query and save result to dataframe</span></span>
-<span id="cb35-3">tip_stats_by_time <span class="op" style="color: #5E5E5E;
+font-style: inherit;">1</span> <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">=</span> spark.sql(<span class="st" style="color: #20794D;
+font-style: inherit;">-</span> y) <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">"""</span></span>
-<span id="cb35-4"><span class="st" style="color: #20794D;
+font-style: inherit;">/</span> (<span class="dv" style="color: #AD0000;
 background-color: null;
-font-style: inherit;">    select</span></span>
-<span id="cb35-5"><span class="st" style="color: #20794D;
+font-style: inherit;">1</span> <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">        time</span></span>
-<span id="cb35-6"><span class="st" style="color: #20794D;
+font-style: inherit;">-</span> p)</span>
+<span id="cb19-18"></span>
+<span id="cb19-19">    </span>
+<span id="cb19-20">gbm <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">        , count(*) as n </span></span>
-<span id="cb35-7"><span class="st" style="color: #20794D;
+font-style: inherit;">=</span> GradientBoostingMachine(n_trees<span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">        , avg(tip) as avg_tip</span></span>
-<span id="cb35-8"><span class="st" style="color: #20794D;
+font-style: inherit;">=</span><span class="dv" style="color: #AD0000;
 background-color: null;
-font-style: inherit;">        , percentile_approx(tip, 0.5) as med_tip</span></span>
-<span id="cb35-9"><span class="st" style="color: #20794D;
+font-style: inherit;">10</span>,</span>
+<span id="cb19-21">                              learning_rate<span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">        , avg(case when tip &gt; 3 then 1 else 0 end) as pct_tip_gt_3</span></span>
-<span id="cb35-10"><span class="st" style="color: #20794D;
+font-style: inherit;">=</span><span class="fl" style="color: #AD0000;
 background-color: null;
-font-style: inherit;">    from </span></span>
-<span id="cb35-11"><span class="st" style="color: #20794D;
+font-style: inherit;">0.5</span>,</span>
+<span id="cb19-22">                              max_depth<span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">        tips</span></span>
-<span id="cb35-12"><span class="st" style="color: #20794D;
+font-style: inherit;">=</span><span class="dv" style="color: #AD0000;
 background-color: null;
-font-style: inherit;">    group by 1</span></span>
-<span id="cb35-13"><span class="st" style="color: #20794D;
+font-style: inherit;">1</span>)</span>
+<span id="cb19-23">gbm.fit(x, y, BinaryCrossEntropyLoss())</span>
+<span id="cb19-24">pred <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">"""</span>)</span>
-<span id="cb35-14"></span>
-<span id="cb35-15">tip_stats_by_time.show()</span></code></pre></div>
-<div class="cell-output cell-output-stdout">
-<pre><code>+------+---+------------------+-------+-------------------+
-|  time|  n|           avg_tip|med_tip|       pct_tip_gt_3|
-+------+---+------------------+-------+-------------------+
-|Dinner|176| 3.102670454545455|    3.0|0.44886363636363635|
-| Lunch| 68|2.7280882352941176|    2.2|0.27941176470588236|
-+------+---+------------------+-------+-------------------+
-</code></pre>
-</div>
+font-style: inherit;">=</span> expit(gbm.predict(x))</span></code></pre></div>
 </div>
-</section>
-</section>
-<section id="visualization-with-pyspark" class="level2">
-<h2 class="anchored" data-anchor-id="visualization-with-pyspark">Visualization with PySpark</h2>
-<p>There aren’t any tools for visualization included in PySpark. But that’s no problem, because we can just use the <code>toPandas()</code> method on a PySpark dataframe to pull data back into pandas. Once we have a pandas dataframe, we can happily build visualizations as usual. Of course, if your PySpark dataframe is huge, you wouldn’t want to use <code>toPandas()</code> directly, because PySpark will attempt to read the entire contents of its huge dataframe into memory. Instead, it’s best to use PySpark to generate aggregations of your data for plotting or to pull only a sample of your full data into pandas.</p>
-<div class="cell" data-execution_count="18">
-<div class="sourceCode cell-code" id="cb37" style="background: #f1f3f5;"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb37-1"><span class="co" style="color: #5E5E5E;
+<div class="cell" data-execution_count="20">
+<div class="sourceCode cell-code" id="cb20" style="background: #f1f3f5;"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb20-1"><span class="co" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;"># read aggregated pyspark dataframe into pandas for plotting</span></span>
-<span id="cb37-2">plot_pdf <span class="op" style="color: #5E5E5E;
+font-style: inherit;"># scikit-learn GBM</span></span>
+<span id="cb20-2">sk_gbm <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">=</span> tip_stats_by_time.toPandas()</span>
-<span id="cb37-3">plot_pdf.plot.bar(x<span class="op" style="color: #5E5E5E;
+font-style: inherit;">=</span> GradientBoostingClassifier(n_estimators<span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">=</span><span class="st" style="color: #20794D;
+font-style: inherit;">=</span><span class="dv" style="color: #AD0000;
 background-color: null;
-font-style: inherit;">'time'</span>, y<span class="op" style="color: #5E5E5E;
+font-style: inherit;">10</span>,</span>
+<span id="cb20-3">                                    learning_rate<span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">=</span>[<span class="st" style="color: #20794D;
+font-style: inherit;">=</span><span class="fl" style="color: #AD0000;
 background-color: null;
-font-style: inherit;">'avg_tip'</span>, <span class="st" style="color: #20794D;
+font-style: inherit;">0.5</span>,</span>
+<span id="cb20-4">                                    max_depth<span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">'med_tip'</span>])<span class="op" style="color: #5E5E5E;
+font-style: inherit;">=</span><span class="dv" style="color: #AD0000;
 background-color: null;
-font-style: inherit;">;</span></span></code></pre></div>
+font-style: inherit;">1</span>,</span>
+<span id="cb20-5">                                    loss<span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">=</span><span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'log_loss'</span>)</span>
+<span id="cb20-6">sk_gbm.fit(x, y)</span>
+<span id="cb20-7">sk_pred <span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">=</span> sk_gbm.predict_proba(x)[:, <span class="dv" style="color: #AD0000;
+background-color: null;
+font-style: inherit;">1</span>]</span></code></pre></div>
+</div>
+<div class="cell" data-execution_count="21">
+<div class="sourceCode cell-code" id="cb21" style="background: #f1f3f5;"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb21-1">print_model_loss_scores(BinaryCrossEntropyLoss(), y, pred, sk_pred)</span></code></pre></div>
+<div class="cell-output cell-output-stdout">
+<pre><code>From Scratch Loss = 0.6379
+Scikit-Learn Loss = 0.6403</code></pre>
+</div>
+</div>
+<div class="cell" data-execution_count="22">
 <div class="cell-output cell-output-display">
-<p><img src="https://randomrealizations.com/posts/hello-pyspark/hello-pyspark_files/figure-html/cell-21-output-1.png" class="img-fluid" alt="Figure showing a bar plot of average and median tips by time"></p>
+<p><img src="https://randomrealizations.com/posts/gradient-boosting-machine-with-any-loss-function/gbm-any-loss_files/figure-html/cell-26-output-1.png" class="img-fluid" alt="Figure showing data and model prediction of probability that y equals one given x"></p>
 </div>
 </div>
 </section>
+</section>
 <section id="wrapping-up" class="level2">
 <h2 class="anchored" data-anchor-id="wrapping-up">Wrapping Up</h2>
-<p>So that’s a wrap on our crash course in working with PySpark. You now have a good idea of what pyspark is and how to get started manipulating dataframes with it. Stay tuned for a future post on PySpark’s companion ML library MLlib. In the meantime, may no dataframe be too large for you ever again.</p>
+<p>Woohoo! We did it! We finally made it through Friedman’s paper in its entirety, and we implemented the generic gradient boosting algorithm which works with any differentiable loss function. If you made it this far, great job, gold star! By now you hopefully have a pretty solid grasp on gradient boosting, which is good, because soon we’re going to dive into the modern Newton descent gradient boosting frameworks like XGBoost. Onward!</p>
+</section>
+<section id="references" class="level2">
+<h2 class="anchored" data-anchor-id="references">References</h2>
+<p>Friedman’s 2001 paper: <a href="https://statweb.stanford.edu/~jhf/ftp/trebst.pdf">Greedy Function Approximation: A Gradient Boosting Machine</a></p>
 </section>
 
  ]]></description>
   <category>python</category>
-  <category>PySpark</category>
-  <category>tutorial</category>
-  <guid>https://randomrealizations.com/posts/hello-pyspark/index.html</guid>
-  <pubDate>Mon, 21 Jun 2021 21:00:00 GMT</pubDate>
-  <media:content url="https://randomrealizations.com/posts/hello-pyspark/guiones_wave.jpeg" medium="image" type="image/jpeg"/>
+  <category>gradient boosting</category>
+  <category>from scratch</category>
+  <guid>https://randomrealizations.com/posts/gradient-boosting-machine-with-any-loss-function/index.html</guid>
+  <pubDate>Fri, 22 Oct 2021 22:00:00 GMT</pubDate>
+  <media:content url="https://randomrealizations.com/posts/gradient-boosting-machine-with-any-loss-function/thumbnail.jpg" medium="image" type="image/jpeg"/>
 </item>
 </channel>
 </rss>
diff --git a/archive.html b/archive.html
index afa80e1..f7afd72 100644
--- a/archive.html
+++ b/archive.html
@@ -168,7 +168,7 @@ <h5 style="font-weight:600; font-size:1rem">Subscribe</h5>
     </div>
     
     <div style="margin-top: 10px;">
-    <button type="submit" class="btn btn-secondary btn-sm">Submit</button>
+    <button type="submit" class="btn btn-secondary btn-sm" data-umami-event="Subscribe">Submit</button>
     
     <!-- real people should not fill this in and expect good things - do not remove this or risk form bot signups-->
     <div style="position: absolute; left: -5000px;" aria-hidden="true"><input type="text" name="b_f718424fc5df77c22533bdaa6_a3c37fb57b" tabindex="-1" value=""></div>
@@ -202,7 +202,26 @@ <h1 class="title">Archive</h1>
 
 <div class="quarto-listing quarto-listing-container-default" id="listing-listing">
 <div class="list quarto-listing-default">
-<div class="quarto-post image-right" data-index="0" data-categories="python,tutorial,blogging" data-listing-date-sort="1693947600000" data-listing-file-modified-sort="1693989650207" data-listing-date-modified-sort="NaN" data-listing-reading-time-sort="22">
+<div class="quarto-post image-right" data-index="0" data-categories="python,tutorial,gradient boosting,xgboost" data-listing-date-sort="1698184800000" data-listing-file-modified-sort="1695040520046" data-listing-date-modified-sort="NaN" data-listing-reading-time-sort="12">
+<div class="body">
+<a href="./posts/xgboost-for-regression-in-python/index.html">
+<h3 class="no-anchor listing-title">
+XGBoost for Regression in Python
+</h3>
+<div class="listing-subtitle">
+
+</div>
+</a>
+</div>
+<div class="metadata">
+<a href="./posts/xgboost-for-regression-in-python/index.html">
+<div class="listing-date">
+Oct 25, 2023
+</div>
+</a>
+</div>
+</div>
+<div class="quarto-post image-right" data-index="1" data-categories="python,tutorial,blogging" data-listing-date-sort="1693951200000" data-listing-file-modified-sort="1693989650207" data-listing-date-modified-sort="NaN" data-listing-reading-time-sort="22">
 <div class="body">
 <a href="./posts/blogging-with-quarto-and-jupyter/index.html">
 <h3 class="no-anchor listing-title">
@@ -221,7 +240,7 @@ <h3 class="no-anchor listing-title">
 </a>
 </div>
 </div>
-<div class="quarto-post image-right" data-index="1" data-categories="blogging" data-listing-date-sort="1690923600000" data-listing-file-modified-sort="1691189870034" data-listing-date-modified-sort="NaN" data-listing-reading-time-sort="3">
+<div class="quarto-post image-right" data-index="2" data-categories="blogging" data-listing-date-sort="1690927200000" data-listing-file-modified-sort="1691189870034" data-listing-date-modified-sort="NaN" data-listing-reading-time-sort="3">
 <div class="body">
 <a href="./posts/random-realizations-resurrected/index.html">
 <h3 class="no-anchor listing-title">
@@ -240,7 +259,7 @@ <h3 class="no-anchor listing-title">
 </a>
 </div>
 </div>
-<div class="quarto-post image-right" data-index="2" data-categories="python,gradient boosting,from scratch" data-listing-date-sort="1651870800000" data-listing-file-modified-sort="1693833457294" data-listing-date-modified-sort="NaN" data-listing-reading-time-sort="6">
+<div class="quarto-post image-right" data-index="3" data-categories="python,gradient boosting,from scratch" data-listing-date-sort="1651874400000" data-listing-file-modified-sort="1693833457294" data-listing-date-modified-sort="NaN" data-listing-reading-time-sort="6">
 <div class="body">
 <a href="./posts/xgboost-from-scratch/index.html">
 <h3 class="no-anchor listing-title">
@@ -259,7 +278,7 @@ <h3 class="no-anchor listing-title">
 </a>
 </div>
 </div>
-<div class="quarto-post image-right" data-index="3" data-categories="gradient boosting" data-listing-date-sort="1647118800000" data-listing-file-modified-sort="1691156276855" data-listing-date-modified-sort="NaN" data-listing-reading-time-sort="14">
+<div class="quarto-post image-right" data-index="4" data-categories="gradient boosting" data-listing-date-sort="1647122400000" data-listing-file-modified-sort="1691156276855" data-listing-date-modified-sort="NaN" data-listing-reading-time-sort="14">
 <div class="body">
 <a href="./posts/xgboost-explained/index.html">
 <h3 class="no-anchor listing-title">
@@ -278,7 +297,7 @@ <h3 class="no-anchor listing-title">
 </a>
 </div>
 </div>
-<div class="quarto-post image-right" data-index="4" data-categories="python,gradient boosting,from scratch" data-listing-date-sort="1639342800000" data-listing-file-modified-sort="1693833457288" data-listing-date-modified-sort="NaN" data-listing-reading-time-sort="11">
+<div class="quarto-post image-right" data-index="5" data-categories="python,gradient boosting,from scratch" data-listing-date-sort="1639346400000" data-listing-file-modified-sort="1693833457288" data-listing-date-modified-sort="NaN" data-listing-reading-time-sort="11">
 <div class="body">
 <a href="./posts/decision-tree-from-scratch/index.html">
 <h3 class="no-anchor listing-title">
@@ -297,7 +316,7 @@ <h3 class="no-anchor listing-title">
 </a>
 </div>
 </div>
-<div class="quarto-post image-right" data-index="5" data-categories="gradient boosting" data-listing-date-sort="1639256400000" data-listing-file-modified-sort="1691156276845" data-listing-date-modified-sort="NaN" data-listing-reading-time-sort="9">
+<div class="quarto-post image-right" data-index="6" data-categories="gradient boosting" data-listing-date-sort="1639260000000" data-listing-file-modified-sort="1691156276845" data-listing-date-modified-sort="NaN" data-listing-reading-time-sort="9">
 <div class="body">
 <a href="./posts/consider-the-decision-tree/index.html">
 <h3 class="no-anchor listing-title">
@@ -316,7 +335,7 @@ <h3 class="no-anchor listing-title">
 </a>
 </div>
 </div>
-<div class="quarto-post image-right" data-index="6" data-categories="python,gradient boosting,from scratch" data-listing-date-sort="1634936400000" data-listing-file-modified-sort="1693833457293" data-listing-date-modified-sort="NaN" data-listing-reading-time-sort="7">
+<div class="quarto-post image-right" data-index="7" data-categories="python,gradient boosting,from scratch" data-listing-date-sort="1634940000000" data-listing-file-modified-sort="1693833457293" data-listing-date-modified-sort="NaN" data-listing-reading-time-sort="7">
 <div class="body">
 <a href="./posts/gradient-boosting-machine-with-any-loss-function/index.html">
 <h3 class="no-anchor listing-title">
@@ -335,7 +354,7 @@ <h3 class="no-anchor listing-title">
 </a>
 </div>
 </div>
-<div class="quarto-post image-right" data-index="7" data-categories="python,PySpark,tutorial" data-listing-date-sort="1624309200000" data-listing-file-modified-sort="1693833457294" data-listing-date-modified-sort="NaN" data-listing-reading-time-sort="13">
+<div class="quarto-post image-right" data-index="8" data-categories="python,PySpark,tutorial" data-listing-date-sort="1624312800000" data-listing-file-modified-sort="1693833457294" data-listing-date-modified-sort="NaN" data-listing-reading-time-sort="13">
 <div class="body">
 <a href="./posts/hello-pyspark/index.html">
 <h3 class="no-anchor listing-title">
@@ -354,7 +373,7 @@ <h3 class="no-anchor listing-title">
 </a>
 </div>
 </div>
-<div class="quarto-post image-right" data-index="8" data-categories="gradient boosting" data-listing-date-sort="1619470800000" data-listing-file-modified-sort="1691156276855" data-listing-date-modified-sort="NaN" data-listing-reading-time-sort="15">
+<div class="quarto-post image-right" data-index="9" data-categories="gradient boosting" data-listing-date-sort="1619474400000" data-listing-file-modified-sort="1691156276855" data-listing-date-modified-sort="NaN" data-listing-reading-time-sort="15">
 <div class="body">
 <a href="./posts/how-gradient-boosting-does-gradient-descent/index.html">
 <h3 class="no-anchor listing-title">
@@ -373,7 +392,7 @@ <h3 class="no-anchor listing-title">
 </a>
 </div>
 </div>
-<div class="quarto-post image-right" data-index="9" data-categories="gradient boosting" data-listing-date-sort="1611262800000" data-listing-file-modified-sort="1693423021628" data-listing-date-modified-sort="NaN" data-listing-reading-time-sort="9">
+<div class="quarto-post image-right" data-index="10" data-categories="gradient boosting" data-listing-date-sort="1611266400000" data-listing-file-modified-sort="1693423021628" data-listing-date-modified-sort="NaN" data-listing-reading-time-sort="9">
 <div class="body">
 <a href="./posts/get-down-with-gradient-descent/index.html">
 <h3 class="no-anchor listing-title">
@@ -392,7 +411,7 @@ <h3 class="no-anchor listing-title">
 </a>
 </div>
 </div>
-<div class="quarto-post image-right" data-index="10" data-categories="python,gradient boosting,from scratch" data-listing-date-sort="1607374800000" data-listing-file-modified-sort="1693833457291" data-listing-date-modified-sort="NaN" data-listing-reading-time-sort="6">
+<div class="quarto-post image-right" data-index="11" data-categories="python,gradient boosting,from scratch" data-listing-date-sort="1607378400000" data-listing-file-modified-sort="1693833457291" data-listing-date-modified-sort="NaN" data-listing-reading-time-sort="6">
 <div class="body">
 <a href="./posts/gradient-boosting-machine-from-scratch/index.html">
 <h3 class="no-anchor listing-title">
@@ -411,7 +430,7 @@ <h3 class="no-anchor listing-title">
 </a>
 </div>
 </div>
-<div class="quarto-post image-right" data-index="11" data-categories="python,pandas,tutorial" data-listing-date-sort="1606251600000" data-listing-file-modified-sort="1693833457288" data-listing-date-modified-sort="NaN" data-listing-reading-time-sort="11">
+<div class="quarto-post image-right" data-index="12" data-categories="python,pandas,tutorial" data-listing-date-sort="1606255200000" data-listing-file-modified-sort="1693833457288" data-listing-date-modified-sort="NaN" data-listing-reading-time-sort="11">
 <div class="body">
 <a href="./posts/8020-pandas-tutorial/index.html">
 <h3 class="no-anchor listing-title">
@@ -430,7 +449,7 @@ <h3 class="no-anchor listing-title">
 </a>
 </div>
 </div>
-<div class="quarto-post image-right" data-index="12" data-categories="blogging" data-listing-date-sort="1605992400000" data-listing-file-modified-sort="1691156276854" data-listing-date-modified-sort="NaN" data-listing-reading-time-sort="4">
+<div class="quarto-post image-right" data-index="13" data-categories="blogging" data-listing-date-sort="1605996000000" data-listing-file-modified-sort="1691156276854" data-listing-date-modified-sort="NaN" data-listing-reading-time-sort="4">
 <div class="body">
 <a href="./posts/hello-world/index.html">
 <h3 class="no-anchor listing-title">
diff --git a/archive.xml b/archive.xml
index f1eeab2..5119f35 100644
--- a/archive.xml
+++ b/archive.xml
@@ -10,3505 +10,4035 @@
 <atom:link href="https://randomrealizations.com/archive.xml" rel="self" type="application/rss+xml"/>
 <description>A blog about data science, statistics, machine learning, and the scientific method</description>
 <generator>quarto-1.3.433</generator>
-<lastBuildDate>Tue, 05 Sep 2023 21:00:00 GMT</lastBuildDate>
+<lastBuildDate>Tue, 24 Oct 2023 22:00:00 GMT</lastBuildDate>
 <item>
-  <title>Blogging with Quarto and Jupyter: The Complete Guide</title>
+  <title>XGBoost for Regression in Python</title>
   <dc:creator>Matt Bowers</dc:creator>
-  <link>https://randomrealizations.com/posts/blogging-with-quarto-and-jupyter/index.html</link>
+  <link>https://randomrealizations.com/posts/xgboost-for-regression-in-python/index.html</link>
   <description><![CDATA[ 
 
 
 
-<!-- 
-![](thumbnail.png "quarto, jupyter, and python logos in hexagons")
--->
-<p>Ahh, blogging. I think we can all agree it’s probably one of the greatest forms of written communication to have ever existed.</p>
-<p>Whats that you say? You’d like to set up your own blog? And you say you want to use a dead simple, data science friendly tech stack? And you wouldn’t be caught dead handing over your painstakingly crafted content to Medium? No worries, friend, I know exactly what you need.</p>
-<p>Enter <a href="https://quarto.org">Quarto</a>.</p>
-<p>In this post we’ll set up a blog using a lightweight tech stack consisting of a terminal running quarto, git, and jupyter, and we’ll use Github Pages to host our website for free. Optionally, for a few dollars a year, we can even host our website at our own custom domain.</p>
-<p>A quick note on how to use this post. <a href="https://quarto.org/docs/websites/website-blog.html">Quarto’s documentation on blogging</a> provides a nice high-level overview of the blogging workflow, and I refer to it and many other bits of Quarto documentation here. At the time of writing, the handful of other blog posts about setting up quarto blogs are aimed at the RStudio user. This post exists to provide a jupyter and python-centric path for you to follow through the entire setup of your new quarto blog, and to impart my opinionated recommendations about best practices.</p>
-<p>Let’s get into it!</p>
-<section id="what-is-quarto" class="level2">
-<h2 class="anchored" data-anchor-id="what-is-quarto">What is Quarto?</h2>
-<p>Quarto is a way to render plain text source files containing markdown and code in python, R, and other languages into published formats like websites, books, slides, journal articles, etc. There is clearly a lot that we can do with it, but Today, we’ll use it to make a nice looking blog out of some jupyter notebook files.</p>
-<p>Quarto follows the familiar convention of using a project directory to house all material for a given project. The directory will include source files like jupyter notebooks or Rmarkdown files, as well as configuration files that control how output files are rendered. We can then use the quarto command line utility to perform actions like previewing and rendering within the project directory.</p>
-</section>
-<section id="instantiate-your-blog" class="level2">
-<h2 class="anchored" data-anchor-id="instantiate-your-blog">Instantiate your blog</h2>
-<section id="create-a-new-quarto-project" class="level3">
-<h3 class="anchored" data-anchor-id="create-a-new-quarto-project">Create a new Quarto project</h3>
-<p>After <a href="https://quarto.org/docs/get-started/">installing quarto</a> fire up a new terminal and check that the install was successful by running</p>
-<div class="sourceCode" id="cb1" style="background: #f1f3f5;"><pre class="sourceCode zsh code-with-copy"><code class="sourceCode zsh"><span id="cb1-1"><span class="ex" style="color: null;
+<p>In this post I’m going to show you my process for solving regression problems with XGBoost in python, using either the native <code>xgboost</code> API or the scikit-learn interface. This is a powerful methodology that can produce world class results in a short time with minimal thought or effort. While we’ll be working on an old Kagle competition for predicting the sale prices of bulldozers and other heavy machinery, you can use this flow to solve whatever tabular data regression problem you’re working on.</p>
+<p>This post serves as the explanation and documentation for the XGBoost regression jupyter notebook from my <a href="https://github.com/mcb00/ds-templates">ds-templates repo</a> on GitHub, so go ahead and download the notebook and follow along with your own data.</p>
+<p>If you’re not already comfortable with the ideas behind gradient boosting and XGBoost, you’ll find it helpful to read some of my previous posts to get up to speed. I’d start with this <a href="../../posts/gradient-boosting-machine-from-scratch/">introduction to gradient boosting</a>, and then read this <a href="../../posts/xgboost-explained/">explanation of how XGBoost works</a>.</p>
+<p>Let’s get into it! 🚀</p>
+<section id="install-and-import-the-xgboost-library" class="level2">
+<h2 class="anchored" data-anchor-id="install-and-import-the-xgboost-library">Install and import the <code>xgboost</code> library</h2>
+<p>If you don’t already have it, go ahead and <a href="https://anaconda.org/conda-forge/xgboost">use conda to install the xgboost library</a>, e.g.</p>
+<div class="sourceCode" id="cb1" style="background: #f1f3f5;"><pre class="sourceCode .zsh code-with-copy"><code class="sourceCode zsh"><span id="cb1-1"><span class="ex" style="color: null;
+background-color: null;
+font-style: inherit;">$</span> conda install <span class="at" style="color: #657422;
+background-color: null;
+font-style: inherit;">-c</span> conda-forge xgboost</span></code></pre></div>
+<p>Then import it along with the usual suspects.</p>
+<div class="cell" data-execution_count="1">
+<div class="sourceCode cell-code" id="cb2" style="background: #f1f3f5;"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb2-1"><span class="im" style="color: #00769E;
 background-color: null;
-font-style: inherit;">quarto</span> <span class="at" style="color: #657422;
+font-style: inherit;">import</span> numpy <span class="im" style="color: #00769E;
 background-color: null;
-font-style: inherit;">--version</span></span></code></pre></div>
-<p>Now think of a name for your blog’s project directory; this will also be the name of its git repository. The name will have no effect on your website’s name or URL, so don’t think too hard. The <a href="https://quarto.org/docs/websites/website-blog.html">quarto documentation</a> calls it <code>myblog</code>, so we’ll one-up them and call ours <code>pirate-ninja-blog</code>. Run the following command to create it in the current directory.</p>
-<div class="sourceCode" id="cb2" style="background: #f1f3f5;"><pre class="sourceCode zsh code-with-copy"><code class="sourceCode zsh"><span id="cb2-1"><span class="ex" style="color: null;
+font-style: inherit;">as</span> np</span>
+<span id="cb2-2"><span class="im" style="color: #00769E;
 background-color: null;
-font-style: inherit;">quarto</span> create-project pirate-ninja-blog <span class="at" style="color: #657422;
+font-style: inherit;">import</span> pandas <span class="im" style="color: #00769E;
 background-color: null;
-font-style: inherit;">--type</span> website:blog</span></code></pre></div>
-<p>That command creates a directory called <code>pirate-ninja-blog</code> containing everything you need to render your new blog. You can preview your website by running</p>
-<div class="sourceCode" id="cb3" style="background: #f1f3f5;"><pre class="sourceCode zsh code-with-copy"><code class="sourceCode zsh"><span id="cb3-1"><span class="ex" style="color: null;
+font-style: inherit;">as</span> pd</span>
+<span id="cb2-3"><span class="im" style="color: #00769E;
 background-color: null;
-font-style: inherit;">quarto</span> preview pirate-ninja-blog</span></code></pre></div>
-<p>Your local website will open in a new browser window. As you edit various aspects of your blog, the preview will update with your changes. This preview feature is so simple and so great.</p>
-<div class="quarto-figure quarto-figure-center">
-<figure class="figure">
-<p><img src="https://randomrealizations.com/posts/blogging-with-quarto-and-jupyter/pirate-ninja-blog-screenshot.png" class="img-fluid figure-img"></p>
-<figcaption class="figure-caption">Previewing your blog with quarto preview command</figcaption>
-</figure>
+font-style: inherit;">import</span> matplotlib.pyplot <span class="im" style="color: #00769E;
+background-color: null;
+font-style: inherit;">as</span> plt</span>
+<span id="cb2-4"><span class="im" style="color: #00769E;
+background-color: null;
+font-style: inherit;">import</span> xgboost <span class="im" style="color: #00769E;
+background-color: null;
+font-style: inherit;">as</span> xgb</span></code></pre></div>
 </div>
 </section>
-<section id="set-up-a-git-repo" class="level3">
-<h3 class="anchored" data-anchor-id="set-up-a-git-repo">Set up a git repo</h3>
-<p>Change into your project directory and we’ll start setting up your git repo.</p>
-<div class="sourceCode" id="cb4" style="background: #f1f3f5;"><pre class="sourceCode zsh code-with-copy"><code class="sourceCode zsh"><span id="cb4-1"><span class="bu" style="color: null;
+<section id="read-dataset-into-python" class="level2">
+<h2 class="anchored" data-anchor-id="read-dataset-into-python">Read dataset into python</h2>
+<p>In this example we’ll work on the <a href="https://www.kaggle.com/competitions/bluebook-for-bulldozers/overview">Kagle Bluebook for Bulldozers</a> competition, which asks us to build a regression model to predict the sale price of heavy equipment. Amazingly, you can solve your own regression problem by swapping this data out with your organization’s data before proceeding with the tutorial.</p>
+<p>Go ahead and download the <code>Train.zip</code> file from Kagle and extract it into <code>Train.csv</code>. Then read the data into a pandas dataframe.</p>
+<div class="cell" data-execution_count="2">
+<div class="sourceCode cell-code" id="cb3" style="background: #f1f3f5;"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb3-1">df <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">cd</span> pirate-ninja-blog</span></code></pre></div>
-<p>initialize a new git repo.</p>
-<div class="sourceCode" id="cb5" style="background: #f1f3f5;"><pre class="sourceCode zsh code-with-copy"><code class="sourceCode zsh"><span id="cb5-1"><span class="fu" style="color: #4758AB;
+font-style: inherit;">=</span> pd.read_csv(<span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">git</span> init <span class="at" style="color: #657422;
+font-style: inherit;">'Train.csv'</span>, parse_dates<span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">-b</span> main</span></code></pre></div>
-<p>The <code>_site/</code> directory is where quarto puts the rendered output files, so you’ll want to ignore it in git. I also like to just ignore any hidden files too, so add the following to your <code>.gitignore</code> file.</p>
-<div class="code-with-filename">
-<div class="code-with-filename-file">
-<pre><strong>.gitignore</strong></pre>
+font-style: inherit;">=</span>[<span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'saledate'</span>])<span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">;</span></span></code></pre></div>
 </div>
-<div class="sourceCode" id="cb6" style="background: #f1f3f5;"><pre class="sourceCode bash code-with-copy"><code class="sourceCode bash"><span id="cb6-1"><span class="ex" style="color: null;
+<p>Notice I cheated a little bit, checking the columns ahead of time and telling pandas to treat the <code>saledate</code> column as a date. In general it will make life easier to read in any date-like columns as dates.</p>
+<div class="cell" data-execution_count="3">
+<div class="sourceCode cell-code" id="cb4" style="background: #f1f3f5;"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb4-1">df.info()</span></code></pre></div>
+<div class="cell-output cell-output-stdout">
+<pre><code>&lt;class 'pandas.core.frame.DataFrame'&gt;
+RangeIndex: 401125 entries, 0 to 401124
+Data columns (total 53 columns):
+ #   Column                    Non-Null Count   Dtype         
+---  ------                    --------------   -----         
+ 0   SalesID                   401125 non-null  int64         
+ 1   SalePrice                 401125 non-null  int64         
+ 2   MachineID                 401125 non-null  int64         
+ 3   ModelID                   401125 non-null  int64         
+ 4   datasource                401125 non-null  int64         
+ 5   auctioneerID              380989 non-null  float64       
+ 6   YearMade                  401125 non-null  int64         
+ 7   MachineHoursCurrentMeter  142765 non-null  float64       
+ 8   UsageBand                 69639 non-null   object        
+ 9   saledate                  401125 non-null  datetime64[ns]
+ 10  fiModelDesc               401125 non-null  object        
+ 11  fiBaseModel               401125 non-null  object        
+ 12  fiSecondaryDesc           263934 non-null  object        
+ 13  fiModelSeries             56908 non-null   object        
+ 14  fiModelDescriptor         71919 non-null   object        
+ 15  ProductSize               190350 non-null  object        
+ 16  fiProductClassDesc        401125 non-null  object        
+ 17  state                     401125 non-null  object        
+ 18  ProductGroup              401125 non-null  object        
+ 19  ProductGroupDesc          401125 non-null  object        
+ 20  Drive_System              104361 non-null  object        
+ 21  Enclosure                 400800 non-null  object        
+ 22  Forks                     192077 non-null  object        
+ 23  Pad_Type                  79134 non-null   object        
+ 24  Ride_Control              148606 non-null  object        
+ 25  Stick                     79134 non-null   object        
+ 26  Transmission              183230 non-null  object        
+ 27  Turbocharged              79134 non-null   object        
+ 28  Blade_Extension           25219 non-null   object        
+ 29  Blade_Width               25219 non-null   object        
+ 30  Enclosure_Type            25219 non-null   object        
+ 31  Engine_Horsepower         25219 non-null   object        
+ 32  Hydraulics                320570 non-null  object        
+ 33  Pushblock                 25219 non-null   object        
+ 34  Ripper                    104137 non-null  object        
+ 35  Scarifier                 25230 non-null   object        
+ 36  Tip_Control               25219 non-null   object        
+ 37  Tire_Size                 94718 non-null   object        
+ 38  Coupler                   213952 non-null  object        
+ 39  Coupler_System            43458 non-null   object        
+ 40  Grouser_Tracks            43362 non-null   object        
+ 41  Hydraulics_Flow           43362 non-null   object        
+ 42  Track_Type                99153 non-null   object        
+ 43  Undercarriage_Pad_Width   99872 non-null   object        
+ 44  Stick_Length              99218 non-null   object        
+ 45  Thumb                     99288 non-null   object        
+ 46  Pattern_Changer           99218 non-null   object        
+ 47  Grouser_Type              99153 non-null   object        
+ 48  Backhoe_Mounting          78672 non-null   object        
+ 49  Blade_Type                79833 non-null   object        
+ 50  Travel_Controls           79834 non-null   object        
+ 51  Differential_Type         69411 non-null   object        
+ 52  Steering_Controls         69369 non-null   object        
+dtypes: datetime64[ns](1), float64(2), int64(6), object(44)
+memory usage: 162.2+ MB</code></pre>
+</div>
+</div>
+</section>
+<section id="prepare-raw-data-for-xgboost" class="level2">
+<h2 class="anchored" data-anchor-id="prepare-raw-data-for-xgboost">Prepare raw data for XGBoost</h2>
+<p>When faced with a new tabular dataset for modeling, we have two format considerations: data types and missingness. From the call to <code>df.info()</code> above, we can see we have both mixed types and missing values.</p>
+<p>When it comes to missing values, some models like the gradient booster or random forest in scikit-learn require purely non-missing inputs. One of the great strengths of XGBoost is that it relaxes this requirement, allowing us to pass in missing feature values, so we don’t have to worry about them.</p>
+<p>Regarding data types, all ML models for tabular data require inputs to be numeric, either integers or floats, so we’re going to have to deal with those <code>object</code> columns.</p>
+<section id="encode-string-features" class="level3">
+<h3 class="anchored" data-anchor-id="encode-string-features">Encode string features</h3>
+<p>The simplest way to encode string variables is to map each unique string value to an integer; this is called <em>integer encoding</em>.</p>
+<p>We have a couple of options for how to implement this transformation: pandas categoricals or the scikit-learn label encoder. We can use the categorical type in pandas to generate mappings from string values to integers for each string feature. The category type is a bit like the factor type in R. Pandas stores the underlying data as integers, and it also keeps a mapping from the integers to the string values. XGBoost will be able to access the integers for model fitting. This is nice because we can still access the actual categories which can be helpful when we start taking a closer look at the data. If you prefer, you can also use the scikit-learn label encoder to replace the string columns with their integer-mapped counterparts.</p>
+<div class="cell" data-execution_count="4">
+<div class="sourceCode cell-code" id="cb6" style="background: #f1f3f5;"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb6-1"><span class="kw" style="color: #003B4F;
 background-color: null;
-font-style: inherit;">/.quarto/</span></span>
-<span id="cb6-2"><span class="ex" style="color: null;
+font-style: inherit;">def</span> encode_string_features(df, use_cats<span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">/_site/</span></span>
-<span id="cb6-3"><span class="bu" style="color: null;
+font-style: inherit;">=</span><span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">.</span><span class="pp" style="color: #AD0000;
+font-style: inherit;">True</span>):</span>
+<span id="cb6-2">    out_df <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">*</span></span></code></pre></div>
-</div>
-<p>For now we’ll just stage the <code>.gitignore</code> file for the initial commit. Eventually you’ll want to commit the other files in your project too, either now or later as you edit them.</p>
-<div class="sourceCode" id="cb7" style="background: #f1f3f5;"><pre class="sourceCode zsh code-with-copy"><code class="sourceCode zsh"><span id="cb7-1"><span class="fu" style="color: #4758AB;
+font-style: inherit;">=</span> df.copy()</span>
+<span id="cb6-3">    <span class="cf" style="color: #003B4F;
 background-color: null;
-font-style: inherit;">git</span> add .gitignore </span>
-<span id="cb7-2"><span class="fu" style="color: #4758AB;
+font-style: inherit;">for</span> feature, feature_type <span class="kw" style="color: #003B4F;
 background-color: null;
-font-style: inherit;">git</span> commit <span class="at" style="color: #657422;
+font-style: inherit;">in</span> df.dtypes.items():</span>
+<span id="cb6-4">        <span class="cf" style="color: #003B4F;
 background-color: null;
-font-style: inherit;">-m</span> <span class="st" style="color: #20794D;
+font-style: inherit;">if</span> feature_type <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">"Initial commit."</span></span></code></pre></div>
-<p>Then follow GitHub’s instructions to <a href="https://docs.github.com/en/migrations/importing-source-code/using-the-command-line-to-import-source-code/adding-locally-hosted-code-to-github#adding-a-local-repository-to-github-using-git">add the local repo to GitHub using git</a>. Basically just create a new blank repo on GitHub’s website, copy the remote repository url, then add the remote repo url to your local git repo.</p>
-<div class="sourceCode" id="cb8" style="background: #f1f3f5;"><pre class="sourceCode zsh code-with-copy"><code class="sourceCode zsh"><span id="cb8-1"><span class="fu" style="color: #4758AB;
+font-style: inherit;">==</span> <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">git</span> remote add origin <span class="op" style="color: #5E5E5E;
+font-style: inherit;">'object'</span>:</span>
+<span id="cb6-5">            <span class="cf" style="color: #003B4F;
 background-color: null;
-font-style: inherit;">&lt;</span>REMOTE_URL<span class="op" style="color: #5E5E5E;
+font-style: inherit;">if</span> use_cats:</span>
+<span id="cb6-6">                out_df[feature] <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">&gt;</span></span></code></pre></div>
-<p>Then you’ll be able to push any commits you make to your remote repository on GitHub by saying <code>git push</code>.</p>
-</section>
-</section>
-<section id="understand-the-components-of-a-quarto-blog" class="level2">
-<h2 class="anchored" data-anchor-id="understand-the-components-of-a-quarto-blog">Understand the components of a Quarto blog</h2>
-<section id="contents-of-the-quarto-project-directory" class="level3">
-<h3 class="anchored" data-anchor-id="contents-of-the-quarto-project-directory">Contents of the quarto project directory</h3>
-<p>Let’s have a quick look at what quarto put inside of the project directory.</p>
-<pre><code>_quarto.yml
-about.qmd
-index.qmd
-profile.jpg
-posts
-styles.css
-_site</code></pre>
-<ul>
-<li>Quarto uses yaml files to specify configurations. The <code>_quarto.yml</code> file specifies project-wide configurations.</li>
-<li>Quarto’s markdown file type uses extension <code>qmd``. Each qmd file will correspond to a page in our website.</code>index.qmd<code>is the homepage and</code>about.qmd` is the About page.</li>
-<li><code>profile.jpg</code> is an image that is included on the about page.</li>
-<li><code>styles.css</code> defines css styles for the website.</li>
-<li><code>posts</code> is a directory where we can put qmd and other documents which will be rendered into blog posts.</li>
-<li><code>posts/_metadata.yml</code> contains configurations that apply to all documents in the <code>posts</code> directory.</li>
-<li><code>_site</code> is a directory that contains the rendered website. Whereas all the other files and directories constitute the source code for our blog, <code>_site</code> is the rendered output, i.e.&nbsp;the website itself.</li>
-</ul>
-<p>Let’s take a closer look at these components and start to make the blog yours.</p>
-</section>
-<section id="project-wide-configurations" class="level3">
-<h3 class="anchored" data-anchor-id="project-wide-configurations">Project-wide Configurations</h3>
-<p>The <code>_quarto.yml</code> file controls project-wide configurations, website options, and HTML document options. Options in this file are specified in yaml in a key/value structure with three top level keys: <code>project</code>, <code>website</code>, and <code>format</code>. <a href="https://quarto.org/docs/reference/projects/websites.html">The quarto website options documentation</a> has the full list of options that you can set here. It will be very helpful to take a look at some example <code>_quarto.yml</code> files in the wild, such as the one from <a href="https://github.com/quarto-dev/quarto-web/blob/main/_quarto.yml">quarto.org</a> or even the one from <a href="https://github.com/mcb00/rr-blog/blob/main/_quarto.yml">this blog</a>.</p>
-<p>Under the <code>website</code> key, go ahead and set the title and description for your blog.</p>
-<pre><code>website:
-  title: "Pirate Ninja Blog"
-  description: "A blog about pirates, ninjas, and other things"</code></pre>
-<p>You can also customize your <a href="https://quarto.org/docs/reference/projects/websites.html#navbar">navbar</a> which is visible at the top of all pages on your site. Also go ahead and set your github and twitter urls for the icons in the navbar.</p>
-<p>Under the <code>format</code> key, you can also try changing the <a href="https://quarto.org/docs/output-formats/html-themes.html">HTML theme</a> to one of the other 25 built-in themes.</p>
-</section>
-<section id="the-about-page" class="level3">
-<h3 class="anchored" data-anchor-id="the-about-page">The About Page</h3>
-<p>The <code>about.qmd</code> file defines an About page for the blog. Go ahead and fill in your details in the <code>about.qmd</code> file; you can also replace the <code>profile.jpg</code> file with your own image. Have a look at <a href="https://quarto.org/docs/websites/website-about.html">the quarto documentation on About pages</a> to explore more functionality. Notably, you can change the <code>template</code> option to change the page layout.</p>
-</section>
-<section id="the-homepage" class="level3">
-<h3 class="anchored" data-anchor-id="the-homepage">The Homepage</h3>
-<p>The <code>index.qmd</code> file defines the landing page for your website. It is a <a href="https://quarto.org/docs/websites/website-listings.html">listing page</a> which shows links to all the pages in the <code>posts</code> directory. For now we don’t need to change anything here.</p>
-</section>
-<section id="the-posts-directory" class="level3">
-<h3 class="anchored" data-anchor-id="the-posts-directory">The <code>posts/</code> directory</h3>
-<p>The <code>posts</code> directory contains all your blog posts. There aren’t really requirements for subdirectory structure inside the <code>posts</code> directory, but it’s a best practice to create a new subdirectory for each new blog post. This just helps keep auxillary files like images or conda environment files organized. Out of the box, the <code>posts</code> directory looks like this.</p>
-<pre><code>posts
-├── _metadata.yml
-├── post-with-code
-│&nbsp;&nbsp; ├── image.jpg
-│&nbsp;&nbsp; └── index.qmd
-└── welcome
-    ├── index.qmd
-    └── thumbnail.jpg</code></pre>
-<p>There are two reasons we want to be deliberate about how we organize and name things in the <code>posts</code> directory. First, the vast majority of our blog’s content will live here, so we don’t want it to be a big confusing mess. Second, the directory sstructure and file naming will be reflected in the URLs to our blog posts; if you prefer tidy-looking URLs, and I know you do, then you want to use tidy directory and file names in the <code>posts</code> directory.</p>
-<p>You can check how the URLs look by navigating to one of the pre-populated posts in the site preview in your browser. For instance, the welcome post’s URL would be</p>
-<pre><code>https://example.com/posts/welcome/</code></pre>
-<p>When quarto renders the qmd file at <code>posts/welcome/index.qmd</code> it creates an output document in the website at <code>posts/welcome/index.html</code>. In fact the full URL to the post is,</p>
-<pre><code>https://example.com/posts/welcome/index.html</code></pre>
-<p>but the browser knows if you give it a URL with a path ending in a <code>/</code>, then it should look for the <code>index.html</code> file inside that directory.</p>
-<p>So I think the best practice here is to name your new post subdirectory with the title of the post in all lower case with dashes for spaces, e.g.&nbsp;<code>post-with-code</code>. Then to force all output pages to be called <code>index.html</code>, you can set the <code>output-file</code> key in the <code>posts/_metadata.yml</code> file like this.</p>
-<div class="code-with-filename">
-<div class="code-with-filename-file">
-<pre><strong>posts/_metadata.yml</strong></pre>
-</div>
-<div class="sourceCode" id="cb14" style="background: #f1f3f5;"><pre class="sourceCode bash code-with-copy"><code class="sourceCode bash"><span id="cb14-1"><span class="ex" style="color: null;
+font-style: inherit;">=</span> out_df[feature].astype(<span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">output-file:</span> index.html</span></code></pre></div>
-</div>
-<p>Note that alternative naming conventions are possible; notably you might want to prefix each post name with the date in yyyy-mm-dd format, so the post subdirectories sort temporally and look nice in a list. That’s the convention used in Quarto’s own blog at <a href="https://github.com/quarto-dev/quarto-web/tree/main">quarto.org</a>, As long as you keep everything for a given post inside its subdirectory, you should be good to go with nice-looking URLs.</p>
-</section>
-</section>
-<section id="authoring-posts-with-jupyter" class="level2">
-<h2 class="anchored" data-anchor-id="authoring-posts-with-jupyter">Authoring posts with jupyter</h2>
-<section id="creating-a-new-post" class="level3">
-<h3 class="anchored" data-anchor-id="creating-a-new-post">Creating a new post</h3>
-<p>It turns out that quarto will render not only <code>.qmd</code> files, but also <code>.ipynb</code> files in the <code>posts</code> directory. So let’s create a new blog post from a notebook.</p>
-<p>I think it’s a best practice to write draft posts in their own git branches, that way if you need to deploy some kind of hotfix to main while you’re drafting a post, you won’t have to deploy a half-written post livin on the main branch. To start a new post, create a new development branch, change into the posts directory, create a new subdirectory with your preferred naming convention, change into that new directory, and fire up jupyter.</p>
-<div class="sourceCode" id="cb15" style="background: #f1f3f5;"><pre class="sourceCode zsh code-with-copy"><code class="sourceCode zsh"><span id="cb15-1"><span class="fu" style="color: #4758AB;
+font-style: inherit;">'category'</span>)</span>
+<span id="cb6-7">            <span class="cf" style="color: #003B4F;
 background-color: null;
-font-style: inherit;">git</span> checkout <span class="at" style="color: #657422;
+font-style: inherit;">else</span>:</span>
+<span id="cb6-8">                <span class="im" style="color: #00769E;
 background-color: null;
-font-style: inherit;">-b</span> new-post</span>
-<span id="cb15-2"><span class="bu" style="color: null;
+font-style: inherit;">from</span> sklearn.preprocessing <span class="im" style="color: #00769E;
 background-color: null;
-font-style: inherit;">cd</span> posts</span>
-<span id="cb15-3"><span class="fu" style="color: #4758AB;
+font-style: inherit;">import</span> LabelEncoder</span>
+<span id="cb6-9">                out_df[feature] <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">mkdir</span> new-post</span>
-<span id="cb15-4"><span class="bu" style="color: null;
+font-style: inherit;">=</span> LabelEncoder() <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">cd</span> new-post</span>
-<span id="cb15-5"><span class="ex" style="color: null;
+font-style: inherit;">\</span></span>
+<span id="cb6-10">                    .fit_transform(out_df[feature].astype(<span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">jupyter</span> notebook</span></code></pre></div>
-<p>Now create a new notebook from the jupyter UI. In order for quarto to recognize the document, the first cell of the notebook must be a raw text cell (press <code>r</code> in command mode to change a cell to raw text), and it must contain the document’s yaml front matter. You can use the following as a frontmatter template.</p>
-<pre><code>---
-title: New Post
-date: 2023-07-12
-description: A nice new post
-categories: [nonsense, code]
----</code></pre>
-<p>Now to preview your post, open a new terminal, change into your blog’s project directory and run the <code>quarto preview</code> command. You’ll see a link to the new post in the listing on the homepage. I usually like to have the preview open in a browser while I’m editing the jupyter notebook, just to make sure things look the way I want in the rendered output. From here you can keep editing the notebook, and the preview will update in the browser dynamically.</p>
-</section>
-<section id="markdown-and-code-cells" class="level3">
-<h3 class="anchored" data-anchor-id="markdown-and-code-cells">Markdown and code cells</h3>
-<p>From here you can put text in markdown cells and you can write code in code cells. Let’s add a markdown cell with some markdown formatting.</p>
-<pre><code>## A nice heading
-
-Here is some lovely text and an equation.
-
-$$ a^2 + b^2 = c^2 $$
-
-Here's a list.
-
-- a link to an [external website](https://quarto.org).
-- a link to [another post in this blog](/posts/welcome/index.qmd).</code></pre>
-<p>This markdown will be rendered into the HTML page for the post. The last line in the above cell demonstrates the best practice for using relative urls to link to other resources within your website. Instead of providing the full url in the parentheses, just give the path to the qmd or ipynb file that you want to link to. Note that paths need to start with the <code>/</code> at the root of the quarto project, since without it, quarto will try to resolve paths relative to the location of the current document instead of the root of the project.</p>
-<p>Then create a code cell with some code. Try something like this.</p>
-<div class="sourceCode" id="cb18" style="background: #f1f3f5;"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb18-1"><span class="bu" style="color: null;
+font-style: inherit;">'str'</span>))</span>
+<span id="cb6-11">    <span class="cf" style="color: #003B4F;
 background-color: null;
-font-style: inherit;">print</span>(<span class="st" style="color: #20794D;
+font-style: inherit;">return</span> out_df</span>
+<span id="cb6-12"></span>
+<span id="cb6-13">df <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">'Hello, Quarto!'</span>)</span></code></pre></div>
-<p>By default, both code and cell output will be rendered into the HTML output. So far our jupyter notebook looks like this.</p>
-<div class="quarto-figure quarto-figure-center">
-<figure class="figure">
-<p><img src="https://randomrealizations.com/posts/blogging-with-quarto-and-jupyter/authoring-in-jupyter.png" class="img-fluid figure-img"></p>
-<figcaption class="figure-caption">View of a new post being written in jupyter notebook</figcaption>
-</figure>
-</div>
-<p>Back in the browser window running your blog preview, you can see the rendered page of the new post.</p>
-<div class="quarto-figure quarto-figure-center">
-<figure class="figure">
-<p><img src="https://randomrealizations.com/posts/blogging-with-quarto-and-jupyter/new-post-preview.png" class="img-fluid figure-img"></p>
-<figcaption class="figure-caption">View of the preview of the rendered post</figcaption>
-</figure>
+font-style: inherit;">=</span> encode_string_features(df, use_cats<span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">=</span><span class="va" style="color: #111111;
+background-color: null;
+font-style: inherit;">False</span>)</span></code></pre></div>
 </div>
 </section>
-<section id="figures" class="level3">
-<h3 class="anchored" data-anchor-id="figures">Figures</h3>
-<p>Let’s add a figure to our post. Add a new code cell with the following code.</p>
-<div class="sourceCode" id="cb19" style="background: #f1f3f5;"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb19-1"><span class="co" style="color: #5E5E5E;
+<section id="encode-date-and-timestamp-features" class="level3">
+<h3 class="anchored" data-anchor-id="encode-date-and-timestamp-features">Encode date and timestamp features</h3>
+<p>While dates feel sort of numeric, they are not numbers, so we need to transform them into numeric columns. Unfortunately, encoding timestamps isn’t as straightforward as encoding strings, so we actually might need to engage in a little bit of feature engineering. A single date has many different attributes, e.g.&nbsp;days since epoch, year, quarter, month, day, day of year, day of week, is holiday, etc. As a starting point, we can just add a few of these attributes as features. Once a feature is represented as a date or timestamp data type, you can access various attributes via the <code>dt</code> attribute.</p>
+<div class="cell" data-execution_count="5">
+<div class="sourceCode cell-code" id="cb7" style="background: #f1f3f5;"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb7-1"><span class="kw" style="color: #003B4F;
 background-color: null;
-font-style: inherit;"># | fig-cap: This is my lovely line plot</span></span>
-<span id="cb19-2"><span class="co" style="color: #5E5E5E;
+font-style: inherit;">def</span> encode_datetime_features(df, datetime_features, datetime_attributes):</span>
+<span id="cb7-2">    out_df <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;"># | fig-alt: A line plot extending up and to the right</span></span>
-<span id="cb19-3"></span>
-<span id="cb19-4"><span class="im" style="color: #00769E;
+font-style: inherit;">=</span> df.copy()</span>
+<span id="cb7-3">    <span class="cf" style="color: #003B4F;
 background-color: null;
-font-style: inherit;">import</span> numpy <span class="im" style="color: #00769E;
+font-style: inherit;">for</span> datetime_feature <span class="kw" style="color: #003B4F;
 background-color: null;
-font-style: inherit;">as</span> np</span>
-<span id="cb19-5"><span class="im" style="color: #00769E;
+font-style: inherit;">in</span> datetime_features:</span>
+<span id="cb7-4">        <span class="cf" style="color: #003B4F;
 background-color: null;
-font-style: inherit;">import</span> matplotlib.pyplot <span class="im" style="color: #00769E;
+font-style: inherit;">for</span> datetime_attribute <span class="kw" style="color: #003B4F;
 background-color: null;
-font-style: inherit;">as</span> plt</span>
-<span id="cb19-6"></span>
-<span id="cb19-7">x <span class="op" style="color: #5E5E5E;
+font-style: inherit;">in</span> datetime_attributes:</span>
+<span id="cb7-5">            <span class="cf" style="color: #003B4F;
 background-color: null;
-font-style: inherit;">=</span> np.arange(<span class="dv" style="color: #AD0000;
+font-style: inherit;">if</span> datetime_attribute <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">10</span>)</span>
-<span id="cb19-8">y <span class="op" style="color: #5E5E5E;
+font-style: inherit;">==</span> <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">=</span> <span class="dv" style="color: #AD0000;
+font-style: inherit;">'days_since_epoch'</span>:</span>
+<span id="cb7-6">                out_df[<span class="ss" style="color: #20794D;
 background-color: null;
-font-style: inherit;">2</span> <span class="op" style="color: #5E5E5E;
+font-style: inherit;">f'</span><span class="sc" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">*</span> x <span class="op" style="color: #5E5E5E;
+font-style: inherit;">{</span>datetime_feature<span class="sc" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">+</span> <span class="dv" style="color: #AD0000;
+font-style: inherit;">}</span><span class="ss" style="color: #20794D;
 background-color: null;
-font-style: inherit;">1</span></span>
-<span id="cb19-9">plt.plot(x, y)<span class="op" style="color: #5E5E5E;
+font-style: inherit;">_</span><span class="sc" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">;</span></span></code></pre></div>
-<p>Notice a couple of important details. First I placed a semicolon at the end of the last line. That supresses the <code>[&lt;matplotlib.lines.Line2D at 0x1111d00a0&gt;]</code> text output, which would otherwise show up in your blog post too.</p>
-<p>Second, I added a couple of special comments at the top of the cell. Quarto allows you to specify numerous <a href="https://quarto.org/docs/computations/execution-options.html">code execution options</a>, designated by the <code># |</code> prefix, to control the behavior and appearance of the code and output at a cell level. I set two keys here, <code>fig-cap</code> and <code>fig-alt</code> which respectively set the figure caption text and the image alt tag text. The <code>fig-alt</code> key is particularly important to set on all your figures because it provides the non-visual description for screenreader users reading your post. The alt tag should be a simple description of what the plot is and possibly what it shows or means. Be a friend of the blind and visually impaired community and set <code>fig-alt</code> on all of your figures.</p>
-</section>
-<section id="version-control" class="level3">
-<h3 class="anchored" data-anchor-id="version-control">Version control</h3>
-<p>As you edit your new post, go ahead and commit your changes on your development branch. Once you’ve finished your new post, you can merge it into main like this.</p>
-<div class="sourceCode" id="cb20" style="background: #f1f3f5;"><pre class="sourceCode zsh code-with-copy"><code class="sourceCode zsh"><span id="cb20-1"><span class="fu" style="color: #4758AB;
+font-style: inherit;">{</span>datetime_attribute<span class="sc" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">git</span> checkout main</span>
-<span id="cb20-2"><span class="fu" style="color: #4758AB;
+font-style: inherit;">}</span><span class="ss" style="color: #20794D;
 background-color: null;
-font-style: inherit;">git</span> merge new-post</span></code></pre></div>
-<p>Then you can push to GitHub by running <code>git push</code>. You should also be sure to run a final <code>quarto preview</code> to check that everything looks good before publishing to the web.</p>
-</section>
-</section>
-<section id="publishing-your-blog-to-the-web" class="level2">
-<h2 class="anchored" data-anchor-id="publishing-your-blog-to-the-web">Publishing your blog to the web</h2>
-<section id="hosting-with-github-pages" class="level3">
-<h3 class="anchored" data-anchor-id="hosting-with-github-pages">Hosting with GitHub Pages</h3>
-<p>It’s likely that the easiest (read best) option for you is to host your blog on <a href="https://pages.github.com/">GitHub Pages</a>. This is because GitHub pages is free, and since you already have your blog’s source code checked into a remote repository at GitHub, it’s very easy to set up. <a href="https://quarto.org/docs/publishing/github-pages.html">Quarto’s documentation on publishing to GitHub Pages</a> outlines three ways to publish your website, but I recommend their option 2, using the <code>quarto publish</code> command. Once you set up your <code>gh-pages</code> branch as described in the documentation, you simply run <code>quarto publish</code> at the command line and your updates are deployed to your website.</p>
-</section>
-<section id="setting-up-your-domain-name" class="level3">
-<h3 class="anchored" data-anchor-id="setting-up-your-domain-name">Setting up your domain name</h3>
-<p>By default, if you choose to host with GitHub Pages, your website will be published to a url in the form <code>https://username.github.io/reponame/</code>. You can certainly do this; for example Jake VanderPlas’s awesome blog Pythonic Perambulations lives at <a href="http://jakevdp.github.io">http://jakevdp.github.io</a>.</p>
-<p>But, like me, you might want to get your own custom domain by buying, or really renting, one from a registrar. I use <a href="https://www.namecheap.com">Namecheap</a>. If you decide to go for a custom domain, refer to <a href="https://docs.github.com/en/pages/configuring-a-custom-domain-for-your-github-pages-site/about-custom-domains-and-github-pages">GitHub’s documentation on custom domains</a>. You’ll also need to point your domain registrar to the IP address where GitHub Pages is hosting your website. For an example of how to do this at Namecheap, see <a href="https://www.namecheap.com">Namecheap’s documentation about GitHub Pages</a></p>
-<p>Whether you decide to use the standard <code>github.io</code> domain or your own custom domain, be sure to set the <code>site-url</code> key in your <code>_quarto.yml</code> file to ensure other quarto functionality works correctly. For example</p>
-<div class="code-with-filename">
-<div class="code-with-filename-file">
-<pre><strong>_quarto.yml</strong></pre>
-</div>
-<div class="sourceCode" id="cb21" style="background: #f1f3f5;"><pre class="sourceCode bash code-with-copy"><code class="sourceCode bash"><span id="cb21-1"><span class="ex" style="color: null;
+font-style: inherit;">'</span>] <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">website:</span></span>
-<span id="cb21-2">  <span class="ex" style="color: null;
+font-style: inherit;">=</span> <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">site-url:</span> https://example.com/</span></code></pre></div>
-</div>
-<p>Edit: I found that after upgrading to quarto 1.3, using <code>quarto publish</code> to publish from the <code>gh-pages</code> branch obliterates the <code>CNAME</code> file that is created when you set a custom domain in your repository settings &gt; Pages &gt; Custom Domain. That breaks the mapping from your custom domain to your published website. See this <a href="https://github.com/quarto-dev/quarto-cli/discussions/3249">disscussion thread</a> for details. The fix is to manually create a <code>CNAME</code> file in the root of your project, and include it in the rendered website using the <code>resources</code> option under the <code>project</code> key in <code>_quarto.yml</code>. The <code>CNAME</code> file should just contain your custom domain, excluding any <code>https://</code>.</p>
-<div class="code-with-filename">
-<div class="code-with-filename-file">
-<pre><strong>CNAME</strong></pre>
-</div>
-<div class="sourceCode" id="cb22" style="background: #f1f3f5;"><pre class="sourceCode bash code-with-copy"><code class="sourceCode bash"><span id="cb22-1"><span class="ex" style="color: null;
+font-style: inherit;">\</span></span>
+<span id="cb7-7">                    (out_df[datetime_feature] </span>
+<span id="cb7-8">                     <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">example.com</span></span></code></pre></div>
-</div>
-<p>With the <code>CNAME</code> file in the root of your quarto project, you can then include it in the rendered output.</p>
-<div class="code-with-filename">
-<div class="code-with-filename-file">
-<pre><strong>_quarto.yml</strong></pre>
-</div>
-<div class="sourceCode" id="cb23" style="background: #f1f3f5;"><pre class="sourceCode bash code-with-copy"><code class="sourceCode bash"><span id="cb23-1"><span class="ex" style="color: null;
+font-style: inherit;">-</span> pd.Timestamp(year<span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">project:</span></span>
-<span id="cb23-2">  <span class="ex" style="color: null;
+font-style: inherit;">=</span><span class="dv" style="color: #AD0000;
 background-color: null;
-font-style: inherit;">resources:</span></span>
-<span id="cb23-3">    <span class="ex" style="color: null;
+font-style: inherit;">1970</span>, month<span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">-</span> CNAME</span></code></pre></div>
-</div>
-</section>
-</section>
-<section id="keep-in-touch-with-your-readers" class="level2">
-<h2 class="anchored" data-anchor-id="keep-in-touch-with-your-readers">Keep in touch with your readers</h2>
-<section id="rss-feed" class="level3">
-<h3 class="anchored" data-anchor-id="rss-feed">RSS Feed</h3>
-<p>The RSS feed is handy for syndicating your posts to feed readers, other websites, and to your email subscribers. As described in <a href="https://quarto.org/docs/websites/website-blog.html#rss-feed">quarto’s documentation on RSS feeds</a>, you can automatically generate an RSS feed for your blog by first setting the value of <code>site-url</code> under the <code>website</code> key in <code>_quarto.yml</code>, and then setting <code>feed: true</code> under the <code>listing</code> key in the frontmatter of <code>index.qmd</code>. This will generate an RSS feed in the root of your website called <code>index.xml</code>. Once you have an RSS feed, go ahead and submit it to <a href="https://python-bloggers.com">Python-Bloggers</a> to have your work syndicated to a wider audience and to strengthen our little community of independent data science blogs.</p>
-</section>
-<section id="email-subscriptions" class="level3">
-<h3 class="anchored" data-anchor-id="email-subscriptions">Email Subscriptions</h3>
-<p>The idea here is to have a form field on your website where readers can input their email address to be added to your mailing list. <a href="https://quarto.org/docs/websites/website-blog.html#subscriptions">Quarto’s documentation on subscriptions</a> describes how to set up a subscribe box on your blog using MailChimp, so we won’t repeat it here. Once you have some subscribers, you can send them updates whenever you write a new post. You could do this manually or, in my case, set up an <a href="https://mailchimp.com/help/share-your-blog-posts-with-mailchimp/">automation through MailChimp</a> which uses your RSS feed to send out email updates to the list about new posts.</p>
-</section>
-<section id="comments" class="level3">
-<h3 class="anchored" data-anchor-id="comments">Comments</h3>
-<p>Quarto has build-in support for three different comment systems: hypothesis, utterances, and giscus. The good news is that these are all free to use, easy to set up, and AFAIK do not engage in any sketchy tracking activities. The bad news is that none of them are ideal because they all require the user to create an account and login to leave a comment. We want to encourage readers to comment, so we don’t want them to have to create accounts or deal with passwords or pick all the squares with bicycles or any such nonsense, just to leave a little comment. To that end, I’ve actually been working on self-hosted login-free comments for this blog using <a href="https://isso-comments.de">isso</a>, but it’s a bit more involved than these built-in solutions, so we’ll have to discuss it at length in a future post.</p>
-<p>If you prefer an easy, out-of-the-box solution, I can recommend utterances, which uses GitHub issues to store comments for each post. I used utterances for comments on the first jekyll-based incarnation of this blog; you can still see the utterances comments on posts before this one. Go check out the <a href="https://quarto.org/docs/reference/projects/websites.html#comments">Quarto documentation on comments</a> to see how to set up utterances in your project.</p>
-</section>
-<section id="analytics" class="level3">
-<h3 class="anchored" data-anchor-id="analytics">Analytics</h3>
-<p>As a data enthusiast, you’ll likely enjoy collecting some data about page views and visitors to your site. You might be tempted to use Google Analytics to do this; indeed quarto makes it very easy to just add a line to your <code>_quarto.yml</code> file to set it up. Unfortunately, in this case, going with the easy and free solution means supporting <a href="https://en.wikipedia.org/wiki/Privacy_concerns_regarding_Google">Google’s dubious corporate surveillance activities</a>. Be a conscientious internet citizen and avoid using Google Analytics on your blog. Fortunately, there are numerous privacy-friendly alternatives to Google Analytics. For this blog I’m self-hosting <a href="https://umami.is">umami analytics</a>, which might warrant its own post in the future.</p>
-</section>
-</section>
-<section id="more-humbly-suggested-best-practices" class="level2">
-<h2 class="anchored" data-anchor-id="more-humbly-suggested-best-practices">More humbly suggested best practices</h2>
-<section id="using-conda-environments-for-reproducibility" class="level3">
-<h3 class="anchored" data-anchor-id="using-conda-environments-for-reproducibility">Using conda environments for reproducibility</h3>
-<p>As you know, it’s a good practice to use an environment manager to keep track of packages, their versions, and other dependencies for software in a data science project. The same applies to blog posts; especially if you’re using unusual or bleeding-edge packages in a post. This will help us out a lot when we have to go back and re-run a notebook a couple years later to regenerate the output. Here we’ll use <a href="https://docs.conda.io/projects/conda/en/latest/index.html">conda</a> as our environment manager.</p>
-<p>To be clear, I don’t bother doing this if I’m just using fairly stable functionality in standard packages like pandas, numpy, and matplotlib, but we’ll do it here for illustration. From a terminal sitting inside our post subdirectory at <code>posts/new-post</code>, create a new conda environment with the packages you’re using in the post.</p>
-<div class="sourceCode" id="cb24" style="background: #f1f3f5;"><pre class="sourceCode zsh code-with-copy"><code class="sourceCode zsh"><span id="cb24-1"><span class="ex" style="color: null;
+font-style: inherit;">=</span><span class="dv" style="color: #AD0000;
 background-color: null;
-font-style: inherit;">conda</span> create <span class="at" style="color: #657422;
+font-style: inherit;">1</span>, day<span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">-p</span> ./venv jupyter numpy matplotlib</span></code></pre></div>
-<p>Note the <code>-p</code> flag which tells conda to save the environment to <code>./venv</code> in the current working directory. This will save all the installed packages here in the post directory instead of in your system-wide location for conda environments. Note also that you’ll want to avoid checking anything in the <code>venv</code> directory into source control, so add <code>venv</code> to the <code>.gitignore</code> file at the root of the quarto project to ignore all <code>venv</code> directories throughout your quarto project.</p>
-<p>Now whenever you work on this post, you’ll navigate to the post subdirectory with a terminal and activate the conda environment.</p>
-<div class="sourceCode" id="cb25" style="background: #f1f3f5;"><pre class="sourceCode zsh code-with-copy"><code class="sourceCode zsh"><span id="cb25-1"><span class="ex" style="color: null;
+font-style: inherit;">=</span><span class="dv" style="color: #AD0000;
 background-color: null;
-font-style: inherit;">conda</span> activate ./venv</span></code></pre></div>
-<p>Then you can fire up your jupyter notebook from the command line, and it will use the active conda environment.</p>
-<p>Since we don’t want to check the <code>venv</code> directory with all its installed libraries into source control, we need to create an <code>environment.yml</code> file from which the environment can later be reproduced. With the local conda environment active, run the following.</p>
-<div class="sourceCode" id="cb26" style="background: #f1f3f5;"><pre class="sourceCode zsh code-with-copy"><code class="sourceCode zsh"><span id="cb26-1"><span class="ex" style="color: null;
+font-style: inherit;">1</span>)).dt.days</span>
+<span id="cb7-9">            <span class="cf" style="color: #003B4F;
 background-color: null;
-font-style: inherit;">conda</span> env export <span class="at" style="color: #657422;
+font-style: inherit;">else</span>:</span>
+<span id="cb7-10">                out_df[<span class="ss" style="color: #20794D;
 background-color: null;
-font-style: inherit;">--from-history</span> <span class="op" style="color: #5E5E5E;
+font-style: inherit;">f'</span><span class="sc" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">&gt;</span> environment.yml</span></code></pre></div>
-<p>The <code>--from-history</code> flag tells conda to skip adding a bunch of system specific stuff that will gunk up your environment yaml file and make it harder to use for cross-platform reproducibility. This <code>environment.yml</code> file is the only environment management artifact that you need to check into git.</p>
-<p>Later if you need to recreate the environment from the <code>environment.yml</code> file, you can use the following command.</p>
-<div class="sourceCode" id="cb27" style="background: #f1f3f5;"><pre class="sourceCode zsh code-with-copy"><code class="sourceCode zsh"><span id="cb27-1"><span class="ex" style="color: null;
+font-style: inherit;">{</span>datetime_feature<span class="sc" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">conda</span> env create <span class="at" style="color: #657422;
+font-style: inherit;">}</span><span class="ss" style="color: #20794D;
 background-color: null;
-font-style: inherit;">-f</span> environment.yml <span class="at" style="color: #657422;
+font-style: inherit;">_</span><span class="sc" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">-p</span> ./venv<span class="kw" style="color: #003B4F;
+font-style: inherit;">{</span>datetime_attribute<span class="sc" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">`</span></span></code></pre></div>
-</section>
-<section id="image-file-best-practices" class="level3">
-<h3 class="anchored" data-anchor-id="image-file-best-practices">Image file best practices</h3>
-<p>Let’s talk about image file sizes. The key idea is that we want images to have just enough resolution to look good; any more than that and we’re just draging around larger-than-necessary files and wasting bandwidth and slowing down page load times.</p>
-<p>You can read all about <a href="https://www.foregroundweb.com/image-size/">choosing optimal image sizes</a>, but the TLDR is that images should be just large enough (in pixels) to fill the containers they occupy on the page. In our quarto blog, the two most common kinds of images are inline images we put in the body of posts and image thumbnails that show up as the associated image for a post, e.g.&nbsp;in the listing on our homepage. The inline image container seems to be about 800 pixels wide in my browser and the thumbnails are smaller, so adding some margin of error, I decided to go for 1000x750 for inline images and 500x375 for the thumbnails.</p>
-<p>I use a command line tool called <a href="https://imagemagick.org">Image Magick</a> to resize image files. Go ahead and <a href="https://formulae.brew.sh/formula/imagemagick">install image magick with homebrew</a>, and let’s add some images to our new post.</p>
-<p>For this example I’ll use a nice shot of the <a href="https://upload.wikimedia.org/wikipedia/commons/thumb/9/93/Why_London_Underground_is_nicknamed_The_Tube.jpg/1920px-Why_London_Underground_is_nicknamed_The_Tube.jpg">London Underground</a> from Wikipedia. Save your image as <code>image.jpg</code>. Then use image magick to create two new resized images for inline and thumbnail use.</p>
-<div class="sourceCode" id="cb28" style="background: #f1f3f5;"><pre class="sourceCode zsh code-with-copy"><code class="sourceCode zsh"><span id="cb28-1"><span class="ex" style="color: null;
+font-style: inherit;">}</span><span class="ss" style="color: #20794D;
 background-color: null;
-font-style: inherit;">convert</span> image.jpg <span class="at" style="color: #657422;
+font-style: inherit;">'</span>] <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">-resize</span> 1000x1000 main.jpg </span>
-<span id="cb28-2"><span class="ex" style="color: null;
+font-style: inherit;">=</span> <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">convert</span> image.jpg <span class="at" style="color: #657422;
+font-style: inherit;">\</span></span>
+<span id="cb7-11">                    <span class="bu" style="color: null;
 background-color: null;
-font-style: inherit;">-resize</span> 500x500 thumbnail.jpg </span></code></pre></div>
-<p>These commands do not change the aspect ratio of the image; they just reduce the size so that the image fits within the size specified.</p>
-<p>Now move both of your new images into the post subdirectory at <code>posts/new-post/</code>. To specify the thumbnail image, set the <code>image</code> key in the post’s front matter. Be sure to also add an alt tag description of the image using the <code>image-alt</code> key to keep it accessible for screen reader users. Our post’s frontmatter now looks like this.</p>
-<pre><code>---
-title: New Post
-date: 2023-07-12
-description: A nice new post
-categories: [nonsense, code]
-image: thumbnail.jpg
-image-alt: "A London Underground train emerging from a tunnel"
----</code></pre>
-<p>To include an image within the body of a post, use markdown in the post to include the image. I added a markdown cell just under the front matter containing the following.</p>
-<pre><code>![A London Underground train emerging from a tunnel](main.jpg "")</code></pre>
-<p>In your preview browser window, you can see we have the thumbnail for our new post on the homepage listing.</p>
-<div class="quarto-figure quarto-figure-center">
-<figure class="figure">
-<p><img src="https://randomrealizations.com/posts/blogging-with-quarto-and-jupyter/homepage-with-new-post-thumbnail.jpg" class="img-fluid figure-img"></p>
-<figcaption class="figure-caption">A screenshot of the homepage showing the new post’s thumbnail image</figcaption>
-</figure>
-</div>
-<p>And we also have the inline image appearing in the body of the post.</p>
-<div class="quarto-figure quarto-figure-center">
-<figure class="figure">
-<p><img src="https://randomrealizations.com/posts/blogging-with-quarto-and-jupyter/new-post-with-inline-image.jpg" class="img-fluid figure-img"></p>
-<figcaption class="figure-caption">A screenshot of the new post showing the image included in the body of the post</figcaption>
-</figure>
-</div>
-<p>You can take a look at <a href="https://github.com/mcb00/rr-blog">the source code for this blog</a> to see some examples of including images in posts.</p>
-</section>
-</section>
-<section id="seo" class="level2">
-<h2 class="anchored" data-anchor-id="seo">SEO</h2>
-<p>SEO is a huge topic, but here we’ll just focus on a few fundamental technical aspects that we want to be sure to get right. This boils down to registering with the top search engines by market share and ensuring that we’re providing them with the information they need to properly index our pages.</p>
-<p>I checked the <a href="https://www.statista.com/statistics/216573/worldwide-market-share-of-search-engines/#main-content">top search engines by global market share</a> and as of 2023 it looks like Google has about 85%, Bing has about 8%, and the others have 2% or less each. So let’s focus on setting our site up to work well with Google search and Bing to get over 90% coverage.</p>
-<section id="google-search-console-and-bing-webmaster-tools" class="level3">
-<h3 class="anchored" data-anchor-id="google-search-console-and-bing-webmaster-tools">Google Search Console and Bing Webmaster Tools</h3>
-<p><a href="https://search.google.com/search-console/about">Google Search Console</a> is a tool for web admins to help analyze search traffic and identify any technical issues that might prevent pages from appearing or ranking well in search. Go ahead and set up an account and register your blog in search console. You can refer to <a href="https://developers.google.com/search/docs/monitor-debug/search-console-start">Google’s documentation on search console</a> to guide you through setup and configuration.</p>
-<p>Once you get set up on GSC, you can also create an account for <a href="https://www.bing.com/webmasters/about">Bing Webmaster Tools</a>. Do this after setting up GSC because there is an option to import your information from your GSC account.</p>
-<p>Once you’re set up with GSC and BWT, you’ll get email alerts anytime they crawl your site and detect any indexing problems. When that happens, track down the issues and fix them so your pages can appear in organic searches.</p>
-</section>
-<section id="sitemap" class="level3">
-<h3 class="anchored" data-anchor-id="sitemap">Sitemap</h3>
-<p>A sitemap is an xml document that lists all the pages on your website. It’s a map for the search engine bots that crawl the web looking for new pages to index. Quarto will automatically generate a sitemap called <code>sitemap.xml</code> in the root of your website, as long as you’ve filled out the <code>site-url</code> key in <code>_quarto.yml</code>. You can submit your website for indexing by providing your sitemap in Google Search Console and Bing Webmaster Tools.</p>
-</section>
-</section>
-<section id="wrapping-up" class="level2">
-<h2 class="anchored" data-anchor-id="wrapping-up">Wrapping Up</h2>
-<p>Boy howdy, that was a lot, but at this point you should have a fully functioning blog, built with a minimalist, data-science-friendly tech stack consisting of quarto, jupyter, and GitHub. If you do create a blog using quarto, drop a link to it in the comments, and we can all check it out and celebrate your creation!</p>
-</section>
-
- ]]></description>
-  <category>python</category>
-  <category>tutorial</category>
-  <category>blogging</category>
-  <guid>https://randomrealizations.com/posts/blogging-with-quarto-and-jupyter/index.html</guid>
-  <pubDate>Tue, 05 Sep 2023 21:00:00 GMT</pubDate>
-  <media:content url="https://randomrealizations.com/posts/blogging-with-quarto-and-jupyter/thumbnail.png" medium="image" type="image/png" height="108" width="144"/>
-</item>
-<item>
-  <title>Random Realizations Resurrected</title>
-  <dc:creator>Matt Bowers</dc:creator>
-  <link>https://randomrealizations.com/posts/random-realizations-resurrected/index.html</link>
-  <description><![CDATA[ 
-
-
-
-<div class="quarto-figure quarto-figure-center">
-<figure class="figure">
-<p><img src="https://randomrealizations.com/posts/random-realizations-resurrected/main.jpg" class="img-fluid figure-img"></p>
-<figcaption class="figure-caption">Christ the Redeemer towers into a vast blue Brazilian sky.!</figcaption>
-</figure>
-</div>
-<p>Well it’s been over a year since I posted anything here. You see, a lot has been going on here at the Random Realizations Remote Global Headquarters that has distracted from producing the high-quality data science content that you’re used to. Mostly I went on hiatus from work and started traveling, which turns out to be it’s own full time job. I had aspirations of writing more after leaving work, but of course, after leaving, I couldn’t be bothered to sit down at my laptop and type stuff about data science to yall. After all, life is bigger than that.</p>
-<p>When I finally felt like opening up my laptop, I was confronted with an email from the maintainers of <a href="https://github.com/fastai/fastpages">fastpages</a>, the open source content management system (CMS) I originally used to create this blog, notifying me that the project was being deprecated and that I would need to migrate my content to some other platform.</p>
-<p>Boo.</p>
-<p>That didn’t sound like much fun, so I spent another few months ignoring the blog. But eventually, dear reader, I decided it was time to roll up my sleeves and get this blog thriving once again.</p>
-<p>Ok so fastpages was going to be deprecated, and I needed to find a new CMS. My requirements were pretty simple: I wanted to write the blog posts with jupyter notebook, and I wanted to host the site on my own domain. Helpfully, the former maintainers of fastpages recommended an alternative CMS called <a href="https://quarto.org/">Quarto</a> which I had never heard of. Apparently I had been living under a rock because Quarto appears to be all the rage. Quarto’s website says it’s an open-source scientific and technical publishing system. I think it’s fair to think of it as a way to render plain text or source code from languages like python, R, and julia into a variety of different published formats like websites, books, or journal articles. It was developed by the good folks over at RStudio, and the project has a pretty active following over on <a href="https://github.com/quarto-dev">github</a>, so I think it’s less likely to suddenly disappear like fastpages.</p>
-<p>So anyway, I’ve been migrating my content over into this new quarto universe.</p>
-<p>You mayofficially consider this blog resurrected from the dead, because this is the first new post published after the migration. The site has a bit of a new look and feel, so I hope you like it. Do let me know in the comments if you find anything amiss with the new website. Otherwise we’ll just assume it’s fabulous.</p>
-<p>I’m working on a post about how to create a blog with quarto using jupyter and python, so you can too!</p>
-<p>See you in more posts real soon! Love, Matt.</p>
- ]]></description>
-  <category>blogging</category>
-  <guid>https://randomrealizations.com/posts/random-realizations-resurrected/index.html</guid>
-  <pubDate>Tue, 01 Aug 2023 21:00:00 GMT</pubDate>
-  <media:content url="https://randomrealizations.com/posts/random-realizations-resurrected/thumbnail.jpg" medium="image" type="image/jpeg"/>
-</item>
-<item>
-  <title>XGBoost from Scratch</title>
-  <dc:creator>Matt Bowers</dc:creator>
-  <link>https://randomrealizations.com/posts/xgboost-from-scratch/index.html</link>
-  <description><![CDATA[ 
-
-
-
-<div class="quarto-figure quarto-figure-center">
-<figure class="figure">
-<p><img src="https://randomrealizations.com/posts/xgboost-from-scratch/main.jpg" class="img-fluid figure-img"></p>
-<figcaption class="figure-caption">A weathered tree reaches toward the sea at Playa Mal País</figcaption>
-</figure>
-</div>
-<p>Well, dear reader, it’s that time again, time for us to do a seemingly unnecessary scratch build of a popular algorithm that most people would simply import from the library without a second thought. But readers of this blog are not most people. Of course you know that when we do scratch builds, it’s not for the hell of it, it’s for the purpose of demystification. To that end, today we are going to implement XGBoost from scratch in python, using only numpy and pandas.</p>
-<p>Specifically we’re going to implement the core statistical learning algorithm of XGBoost, including most of the key hyperparameters and their functionality. Our implementation will also support user-defined custom objective functions, meaning that it can perform regression, classification, and whatever exotic learning tasks you can dream up, as long as you can write down a twice-differentiable objective function. We’ll refrain from implementing some simple features like column subsampling which will be left to you, gentle reader, as exercises. In terms of tree methods, we’re going to implement the exact tree-splitting algorithm, leaving the sparsity-aware method (used to handle missing feature values) and the approximate method (used for scalability) as exercises or maybe topics for future posts.</p>
-<p>As always, if something is unclear, try backtracking through the previous posts on gradient boosting and decision trees to clarify your intuition. We’ve already built up all the statistical and computational background needed to make sense of this scratch build. Here are the most important prerequisite posts:</p>
-<ol type="1">
-<li><a href="../../posts/gradient-boosting-machine-from-scratch/">Gradient Boosting Machine from Scratch</a></li>
-<li><a href="../../posts/decision-tree-from-scratch/">Decision Tree From Scratch</a></li>
-<li><a href="../../posts/how-to-understand-xgboost/">How to Understand XGBoost</a></li>
-</ol>
-<p>Great, let’s do this.</p>
-<section id="the-xgboost-model-class" class="level2">
-<h2 class="anchored" data-anchor-id="the-xgboost-model-class">The XGBoost Model Class</h2>
-<p>We begin with the user-facing API for our model, a class called <code>XGBoostModel</code> which will implement gradient boosting and prediction. To be more consistent with the XGBoost library, we’ll pass hyperparameters to our model in a parameter dictionary, so our init method is going to pull relevant parameters out of the dictionary and set them as object attributes. Note the use of python’s <code>defaultdict</code> so we don’t have to worry about handling key errors if we try to access a parameter that the user didn’t set in the dictionary.</p>
-<div class="cell" data-execution_count="1">
-<div class="sourceCode cell-code" id="cb1" style="background: #f1f3f5;"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb1-1"><span class="im" style="color: #00769E;
+font-style: inherit;">getattr</span>(out_df[datetime_feature].dt, datetime_attribute)</span>
+<span id="cb7-12">    <span class="cf" style="color: #003B4F;
 background-color: null;
-font-style: inherit;">import</span> math</span>
-<span id="cb1-2"><span class="im" style="color: #00769E;
+font-style: inherit;">return</span> out_df</span>
+<span id="cb7-13"></span>
+<span id="cb7-14">datetime_features <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">import</span> numpy <span class="im" style="color: #00769E;
+font-style: inherit;">=</span> [</span>
+<span id="cb7-15">    <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">as</span> np </span>
-<span id="cb1-3"><span class="im" style="color: #00769E;
+font-style: inherit;">'saledate'</span>,</span>
+<span id="cb7-16">]</span>
+<span id="cb7-17">datetime_attributes <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">import</span> pandas <span class="im" style="color: #00769E;
+font-style: inherit;">=</span> [</span>
+<span id="cb7-18">    <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">as</span> pd</span>
-<span id="cb1-4"><span class="im" style="color: #00769E;
+font-style: inherit;">'year'</span>,</span>
+<span id="cb7-19">    <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">from</span> collections <span class="im" style="color: #00769E;
+font-style: inherit;">'month'</span>,</span>
+<span id="cb7-20">    <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">import</span> defaultdict</span></code></pre></div>
-</div>
-<div class="cell" data-execution_count="2">
-<div class="sourceCode cell-code" id="cb2" style="background: #f1f3f5;"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb2-1"><span class="kw" style="color: #003B4F;
+font-style: inherit;">'day'</span>,</span>
+<span id="cb7-21">    <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">class</span> XGBoostModel():</span>
-<span id="cb2-2">    <span class="co" style="color: #5E5E5E;
+font-style: inherit;">'quarter'</span>,</span>
+<span id="cb7-22">    <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">'''XGBoost from Scratch</span></span>
-<span id="cb2-3"><span class="co" style="color: #5E5E5E;
+font-style: inherit;">'day_of_year'</span>,</span>
+<span id="cb7-23">    <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">    '''</span></span>
-<span id="cb2-4">    </span>
-<span id="cb2-5">    <span class="kw" style="color: #003B4F;
+font-style: inherit;">'day_of_week'</span>,</span>
+<span id="cb7-24">    <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">def</span> <span class="fu" style="color: #4758AB;
+font-style: inherit;">'days_since_epoch'</span>,</span>
+<span id="cb7-25">]</span>
+<span id="cb7-26"></span>
+<span id="cb7-27">df <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">__init__</span>(<span class="va" style="color: #111111;
+font-style: inherit;">=</span> encode_datetime_features(df, datetime_features, datetime_attributes)</span></code></pre></div>
+</div>
+</section>
+<section id="transform-the-target-if-necessary" class="level3">
+<h3 class="anchored" data-anchor-id="transform-the-target-if-necessary">Transform the target if necessary</h3>
+<p>In the interest of speed and efficiency, we didn’t bother doing any EDA with the feature data. Part of my justification for this is that trees are incredibly robust to outliers, colinearity, missingness, and other assorted nonsense in the feature data. However, they are not necessarily robust to nonsense in the target variable, so it’s worth having a look at it before proceeding any further.</p>
+<div class="cell" data-execution_count="6">
+<div class="sourceCode cell-code" id="cb8" style="background: #f1f3f5;"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb8-1">df.SalePrice.hist()<span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">self</span>, params, random_seed<span class="op" style="color: #5E5E5E;
+font-style: inherit;">;</span> plt.xlabel(<span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">=</span><span class="va" style="color: #111111;
+font-style: inherit;">'SalePrice'</span>)<span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">None</span>):</span>
-<span id="cb2-6">        <span class="va" style="color: #111111;
+font-style: inherit;">;</span></span></code></pre></div>
+<div class="cell-output cell-output-display">
+<p><img src="https://randomrealizations.com/posts/xgboost-for-regression-in-python/index_files/figure-html/cell-7-output-1.png" class="img-fluid" alt="histogram of sale price showing right-skewed data"></p>
+</div>
+</div>
+<p>Often when predicting prices it makes sense to use log price, especially when they span multiple orders of magnitude or have a strong right skew. These data look pretty friendly, lacking outliers and exhibiting only a mild positive skew; we could probably get away without doing any transformation. But checking the evaluation metric used to score the Kagle competition, we see they’re using root mean squared log error. That’s equivalent to using RMSE on log-transformed target data, so let’s go ahead and work with log prices.</p>
+<div class="cell" data-execution_count="7">
+<div class="sourceCode cell-code" id="cb9" style="background: #f1f3f5;"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb9-1">df[<span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">self</span>.params <span class="op" style="color: #5E5E5E;
+font-style: inherit;">'logSalePrice'</span>] <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">=</span> defaultdict(<span class="kw" style="color: #003B4F;
+font-style: inherit;">=</span> np.log1p(df[<span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">lambda</span>: <span class="va" style="color: #111111;
+font-style: inherit;">'SalePrice'</span>])</span>
+<span id="cb9-2">df.logSalePrice.hist()<span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">None</span>, params)</span>
-<span id="cb2-7">        <span class="va" style="color: #111111;
+font-style: inherit;">;</span> plt.xlabel(<span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">self</span>.subsample <span class="op" style="color: #5E5E5E;
+font-style: inherit;">'logSalePrice'</span>)<span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">=</span> <span class="va" style="color: #111111;
+font-style: inherit;">;</span></span></code></pre></div>
+<div class="cell-output cell-output-display">
+<p><img src="https://randomrealizations.com/posts/xgboost-for-regression-in-python/index_files/figure-html/cell-8-output-1.png" class="img-fluid" alt="histogram of log sale price showing a more symetric distribution"></p>
+</div>
+</div>
+</section>
+</section>
+<section id="train-and-evaluate-the-xgboost-regression-model" class="level2">
+<h2 class="anchored" data-anchor-id="train-and-evaluate-the-xgboost-regression-model">Train and Evaluate the XGBoost regression model</h2>
+<p>Having prepared our dataset, we are now ready to train an XGBoost model. We’ll walk through the flow step-by-step first, then later we’ll collect the code in a single cell, so it’s easier to quickly iterate through variations of the model.</p>
+<section id="specify-target-and-feature-columns" class="level3">
+<h3 class="anchored" data-anchor-id="specify-target-and-feature-columns">Specify target and feature columns</h3>
+<p>First we’ll put together a list of our features and define the target column. I like to have an actual list defined in the code so it’s easier to see everything we’re puting into the model and easier to add or remove features as we iterate. Just run something like <code>list(df.columns)</code> in a cel to get a copy-pasteable list of columns, then edit it down to the full list of features, i.e.&nbsp;remove the target, date columns, and other non-feature columns..</p>
+<div class="cell" data-execution_count="8">
+<div class="sourceCode cell-code" id="cb10" style="background: #f1f3f5;"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb10-1"><span class="co" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">self</span>.params[<span class="st" style="color: #20794D;
+font-style: inherit;"># list(df.columns)</span></span></code></pre></div>
+</div>
+<div class="cell" data-execution_count="9">
+<div class="sourceCode cell-code" id="cb11" style="background: #f1f3f5;"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb11-1">features <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">'subsample'</span>] <span class="op" style="color: #5E5E5E;
+font-style: inherit;">=</span> [</span>
+<span id="cb11-2">    <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">\</span></span>
-<span id="cb2-8">            <span class="cf" style="color: #003B4F;
+font-style: inherit;">'SalesID'</span>,</span>
+<span id="cb11-3">    <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">if</span> <span class="va" style="color: #111111;
+font-style: inherit;">'MachineID'</span>,</span>
+<span id="cb11-4">    <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">self</span>.params[<span class="st" style="color: #20794D;
+font-style: inherit;">'ModelID'</span>,</span>
+<span id="cb11-5">    <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">'subsample'</span>] <span class="cf" style="color: #003B4F;
+font-style: inherit;">'datasource'</span>,</span>
+<span id="cb11-6">    <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">else</span> <span class="fl" style="color: #AD0000;
+font-style: inherit;">'auctioneerID'</span>,</span>
+<span id="cb11-7">    <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">1.0</span></span>
-<span id="cb2-9">        <span class="va" style="color: #111111;
+font-style: inherit;">'YearMade'</span>,</span>
+<span id="cb11-8">    <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">self</span>.learning_rate <span class="op" style="color: #5E5E5E;
+font-style: inherit;">'MachineHoursCurrentMeter'</span>,</span>
+<span id="cb11-9">    <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">=</span> <span class="va" style="color: #111111;
+font-style: inherit;">'UsageBand'</span>,</span>
+<span id="cb11-10">    <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">self</span>.params[<span class="st" style="color: #20794D;
+font-style: inherit;">'fiModelDesc'</span>,</span>
+<span id="cb11-11">    <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">'learning_rate'</span>] <span class="op" style="color: #5E5E5E;
+font-style: inherit;">'fiBaseModel'</span>,</span>
+<span id="cb11-12">    <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">\</span></span>
-<span id="cb2-10">            <span class="cf" style="color: #003B4F;
+font-style: inherit;">'fiSecondaryDesc'</span>,</span>
+<span id="cb11-13">    <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">if</span> <span class="va" style="color: #111111;
+font-style: inherit;">'fiModelSeries'</span>,</span>
+<span id="cb11-14">    <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">self</span>.params[<span class="st" style="color: #20794D;
+font-style: inherit;">'fiModelDescriptor'</span>,</span>
+<span id="cb11-15">    <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">'learning_rate'</span>] <span class="cf" style="color: #003B4F;
+font-style: inherit;">'ProductSize'</span>,</span>
+<span id="cb11-16">    <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">else</span> <span class="fl" style="color: #AD0000;
+font-style: inherit;">'fiProductClassDesc'</span>,</span>
+<span id="cb11-17">    <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">0.3</span></span>
-<span id="cb2-11">        <span class="va" style="color: #111111;
+font-style: inherit;">'state'</span>,</span>
+<span id="cb11-18">    <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">self</span>.base_prediction <span class="op" style="color: #5E5E5E;
+font-style: inherit;">'ProductGroup'</span>,</span>
+<span id="cb11-19">    <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">=</span> <span class="va" style="color: #111111;
+font-style: inherit;">'ProductGroupDesc'</span>,</span>
+<span id="cb11-20">    <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">self</span>.params[<span class="st" style="color: #20794D;
+font-style: inherit;">'Drive_System'</span>,</span>
+<span id="cb11-21">    <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">'base_score'</span>] <span class="op" style="color: #5E5E5E;
+font-style: inherit;">'Enclosure'</span>,</span>
+<span id="cb11-22">    <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">\</span></span>
-<span id="cb2-12">            <span class="cf" style="color: #003B4F;
+font-style: inherit;">'Forks'</span>,</span>
+<span id="cb11-23">    <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">if</span> <span class="va" style="color: #111111;
+font-style: inherit;">'Pad_Type'</span>,</span>
+<span id="cb11-24">    <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">self</span>.params[<span class="st" style="color: #20794D;
+font-style: inherit;">'Ride_Control'</span>,</span>
+<span id="cb11-25">    <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">'base_score'</span>] <span class="cf" style="color: #003B4F;
+font-style: inherit;">'Stick'</span>,</span>
+<span id="cb11-26">    <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">else</span> <span class="fl" style="color: #AD0000;
+font-style: inherit;">'Transmission'</span>,</span>
+<span id="cb11-27">    <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">0.5</span></span>
-<span id="cb2-13">        <span class="va" style="color: #111111;
+font-style: inherit;">'Turbocharged'</span>,</span>
+<span id="cb11-28">    <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">self</span>.max_depth <span class="op" style="color: #5E5E5E;
+font-style: inherit;">'Blade_Extension'</span>,</span>
+<span id="cb11-29">    <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">=</span> <span class="va" style="color: #111111;
+font-style: inherit;">'Blade_Width'</span>,</span>
+<span id="cb11-30">    <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">self</span>.params[<span class="st" style="color: #20794D;
+font-style: inherit;">'Enclosure_Type'</span>,</span>
+<span id="cb11-31">    <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">'max_depth'</span>] <span class="op" style="color: #5E5E5E;
+font-style: inherit;">'Engine_Horsepower'</span>,</span>
+<span id="cb11-32">    <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">\</span></span>
-<span id="cb2-14">            <span class="cf" style="color: #003B4F;
+font-style: inherit;">'Hydraulics'</span>,</span>
+<span id="cb11-33">    <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">if</span> <span class="va" style="color: #111111;
+font-style: inherit;">'Pushblock'</span>,</span>
+<span id="cb11-34">    <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">self</span>.params[<span class="st" style="color: #20794D;
+font-style: inherit;">'Ripper'</span>,</span>
+<span id="cb11-35">    <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">'max_depth'</span>] <span class="cf" style="color: #003B4F;
+font-style: inherit;">'Scarifier'</span>,</span>
+<span id="cb11-36">    <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">else</span> <span class="dv" style="color: #AD0000;
+font-style: inherit;">'Tip_Control'</span>,</span>
+<span id="cb11-37">    <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">5</span></span>
-<span id="cb2-15">        <span class="va" style="color: #111111;
+font-style: inherit;">'Tire_Size'</span>,</span>
+<span id="cb11-38">    <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">self</span>.rng <span class="op" style="color: #5E5E5E;
+font-style: inherit;">'Coupler'</span>,</span>
+<span id="cb11-39">    <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">=</span> np.random.default_rng(seed<span class="op" style="color: #5E5E5E;
+font-style: inherit;">'Coupler_System'</span>,</span>
+<span id="cb11-40">    <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">=</span>random_seed)</span></code></pre></div>
-</div>
-<p>The fit method, based on our classic GBM, takes a feature dataframe, a target vector, the objective function, and the number of boosting rounds as arguments. The user-supplied objective function should be an object with loss, gradient, and hessian methods, each of which takes a target vector and a prediction vector as input; the loss method should return a scalar loss score, the gradient method should return a vector of gradients, and the hessian method should return a vector of hessians.</p>
-<p>In contrast to boosting in the classic GBM, instead of computing residuals between the current predictions and the target, we compute gradients and hessians of the loss function with respect to the current predictions, and instead of predicting residuals with a decision tree, we fit a special XGBoost tree booster (which we’ll implement in a moment) using the gradients and hessians. I’ve also added row subsampling by drawing a random subset of instance indices and passing them to the tree booster during each boosting round. The rest of the fit method is the same as the classic GBM, and the predict method is identical too.</p>
-<div class="cell" data-execution_count="3">
-<div class="sourceCode cell-code" id="cb3" style="background: #f1f3f5;"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb3-1"><span class="kw" style="color: #003B4F;
+font-style: inherit;">'Grouser_Tracks'</span>,</span>
+<span id="cb11-41">    <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">def</span> fit(<span class="va" style="color: #111111;
+font-style: inherit;">'Hydraulics_Flow'</span>,</span>
+<span id="cb11-42">    <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">self</span>, X, y, objective, num_boost_round, verbose<span class="op" style="color: #5E5E5E;
+font-style: inherit;">'Track_Type'</span>,</span>
+<span id="cb11-43">    <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">=</span><span class="va" style="color: #111111;
+font-style: inherit;">'Undercarriage_Pad_Width'</span>,</span>
+<span id="cb11-44">    <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">False</span>):</span>
-<span id="cb3-2">    current_predictions <span class="op" style="color: #5E5E5E;
+font-style: inherit;">'Stick_Length'</span>,</span>
+<span id="cb11-45">    <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">=</span> <span class="va" style="color: #111111;
+font-style: inherit;">'Thumb'</span>,</span>
+<span id="cb11-46">    <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">self</span>.base_prediction <span class="op" style="color: #5E5E5E;
+font-style: inherit;">'Pattern_Changer'</span>,</span>
+<span id="cb11-47">    <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">*</span> np.ones(shape<span class="op" style="color: #5E5E5E;
+font-style: inherit;">'Grouser_Type'</span>,</span>
+<span id="cb11-48">    <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">=</span>y.shape)</span>
-<span id="cb3-3">    <span class="va" style="color: #111111;
+font-style: inherit;">'Backhoe_Mounting'</span>,</span>
+<span id="cb11-49">    <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">self</span>.boosters <span class="op" style="color: #5E5E5E;
+font-style: inherit;">'Blade_Type'</span>,</span>
+<span id="cb11-50">    <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">=</span> []</span>
-<span id="cb3-4">    <span class="cf" style="color: #003B4F;
+font-style: inherit;">'Travel_Controls'</span>,</span>
+<span id="cb11-51">    <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">for</span> i <span class="kw" style="color: #003B4F;
+font-style: inherit;">'Differential_Type'</span>,</span>
+<span id="cb11-52">    <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">in</span> <span class="bu" style="color: null;
+font-style: inherit;">'Steering_Controls'</span>,</span>
+<span id="cb11-53">    <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">range</span>(num_boost_round):</span>
-<span id="cb3-5">        gradients <span class="op" style="color: #5E5E5E;
+font-style: inherit;">'saledate_year'</span>,</span>
+<span id="cb11-54">    <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">=</span> objective.gradient(y, current_predictions)</span>
-<span id="cb3-6">        hessians <span class="op" style="color: #5E5E5E;
+font-style: inherit;">'saledate_month'</span>,</span>
+<span id="cb11-55">    <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">=</span> objective.hessian(y, current_predictions)</span>
-<span id="cb3-7">        sample_idxs <span class="op" style="color: #5E5E5E;
+font-style: inherit;">'saledate_day'</span>,</span>
+<span id="cb11-56">    <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">=</span> <span class="va" style="color: #111111;
+font-style: inherit;">'saledate_quarter'</span>,</span>
+<span id="cb11-57">    <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">None</span> <span class="cf" style="color: #003B4F;
+font-style: inherit;">'saledate_day_of_year'</span>,</span>
+<span id="cb11-58">    <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">if</span> <span class="va" style="color: #111111;
+font-style: inherit;">'saledate_day_of_week'</span>,</span>
+<span id="cb11-59">    <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">self</span>.subsample <span class="op" style="color: #5E5E5E;
+font-style: inherit;">'saledate_days_since_epoch'</span></span>
+<span id="cb11-60">]</span>
+<span id="cb11-61"></span>
+<span id="cb11-62">target <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">==</span> <span class="fl" style="color: #AD0000;
+font-style: inherit;">=</span> <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">1.0</span> <span class="op" style="color: #5E5E5E;
+font-style: inherit;">'logSalePrice'</span></span></code></pre></div>
+</div>
+</section>
+<section id="split-the-data-into-training-and-validation-sets" class="level3">
+<h3 class="anchored" data-anchor-id="split-the-data-into-training-and-validation-sets">Split the data into training and validation sets</h3>
+<p>Next we split the dataset into a training set and a validation set. Of course since we’re going to evaluate against the validation set a number of times as we iterate, it’s best practice to keep a separate test set reserved to check our final model to ensure it generalizes well. Assuming that final test set is hidden away, we can use the rest of the data for training and validation.</p>
+<p>There are two main ways we might want to select the validation set. If there isn’t a temporal ordering of the observations, we might be able to randomly sample. In practice, it’s much more common that observations have a temporal ordering, and that models are trained on observations up to a certain time and used to predict on observations occuring after that time. Since this data is temporal, we don’t want to split randomly; instead we’ll split on observation date, reserving the latest observations for the validation set.</p>
+<div class="cell" data-execution_count="10">
+<div class="sourceCode cell-code" id="cb12" style="background: #f1f3f5;"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb12-1"><span class="co" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">\</span></span>
-<span id="cb3-8">            <span class="cf" style="color: #003B4F;
+font-style: inherit;"># Temporal Validation Set</span></span>
+<span id="cb12-2"><span class="kw" style="color: #003B4F;
 background-color: null;
-font-style: inherit;">else</span> <span class="va" style="color: #111111;
+font-style: inherit;">def</span> train_test_split_temporal(df, datetime_column, n_test):</span>
+<span id="cb12-3">    idx_sort <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">self</span>.rng.choice(<span class="bu" style="color: null;
+font-style: inherit;">=</span> np.argsort(df[datetime_column])</span>
+<span id="cb12-4">    idx_train, idx_test <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">len</span>(y), </span>
-<span id="cb3-9">                                 size<span class="op" style="color: #5E5E5E;
+font-style: inherit;">=</span> idx_sort[:<span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">=</span>math.floor(<span class="va" style="color: #111111;
+font-style: inherit;">-</span>n_valid], idx_sort[<span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">self</span>.subsample<span class="op" style="color: #5E5E5E;
+font-style: inherit;">-</span>n_valid:]</span>
+<span id="cb12-5">    <span class="cf" style="color: #003B4F;
 background-color: null;
-font-style: inherit;">*</span><span class="bu" style="color: null;
+font-style: inherit;">return</span> df.iloc[idx_train, :], df.iloc[idx_test, :]</span>
+<span id="cb12-6"></span>
+<span id="cb12-7"></span>
+<span id="cb12-8"><span class="co" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">len</span>(y)), </span>
-<span id="cb3-10">                                 replace<span class="op" style="color: #5E5E5E;
+font-style: inherit;"># Random Validation Set</span></span>
+<span id="cb12-9"><span class="kw" style="color: #003B4F;
 background-color: null;
-font-style: inherit;">=</span><span class="va" style="color: #111111;
+font-style: inherit;">def</span> train_test_split_random(df, n_test):</span>
+<span id="cb12-10">    np.random.seed(<span class="dv" style="color: #AD0000;
 background-color: null;
-font-style: inherit;">False</span>)</span>
-<span id="cb3-11">        booster <span class="op" style="color: #5E5E5E;
+font-style: inherit;">42</span>)</span>
+<span id="cb12-11">    idx_sort <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">=</span> TreeBooster(X, gradients, hessians, </span>
-<span id="cb3-12">                              <span class="va" style="color: #111111;
+font-style: inherit;">=</span> np.random.permutation(<span class="bu" style="color: null;
 background-color: null;
-font-style: inherit;">self</span>.params, <span class="va" style="color: #111111;
+font-style: inherit;">len</span>(df))</span>
+<span id="cb12-12">    idx_train, idx_test <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">self</span>.max_depth, sample_idxs)</span>
-<span id="cb3-13">        current_predictions <span class="op" style="color: #5E5E5E;
+font-style: inherit;">=</span> idx_sort[:<span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">+=</span> <span class="va" style="color: #111111;
+font-style: inherit;">-</span>n_valid], idx_sort[<span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">self</span>.learning_rate <span class="op" style="color: #5E5E5E;
+font-style: inherit;">-</span>n_valid:]</span>
+<span id="cb12-13">    <span class="cf" style="color: #003B4F;
 background-color: null;
-font-style: inherit;">*</span> booster.predict(X)</span>
-<span id="cb3-14">        <span class="va" style="color: #111111;
+font-style: inherit;">return</span> df.iloc[idx_train, :], df.iloc[idx_test, :]</span>
+<span id="cb12-14"></span>
+<span id="cb12-15">my_train_test_split <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">self</span>.boosters.append(booster)</span>
-<span id="cb3-15">        <span class="cf" style="color: #003B4F;
+font-style: inherit;">=</span> <span class="kw" style="color: #003B4F;
 background-color: null;
-font-style: inherit;">if</span> verbose: </span>
-<span id="cb3-16">            <span class="bu" style="color: null;
+font-style: inherit;">lambda</span> d, n_valid: train_test_split_temporal(d, <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">print</span>(<span class="ss" style="color: #20794D;
+font-style: inherit;">'saledate'</span>, n_valid)</span>
+<span id="cb12-16"><span class="co" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">f'[</span><span class="sc" style="color: #5E5E5E;
+font-style: inherit;"># my_train_test_split = lambda d, n_valid: train_test_split_random(d, n_valid)</span></span></code></pre></div>
+</div>
+<div class="cell" data-execution_count="11">
+<div class="sourceCode cell-code" id="cb13" style="background: #f1f3f5;"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb13-1">n_valid <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">{</span>i<span class="sc" style="color: #5E5E5E;
+font-style: inherit;">=</span> <span class="dv" style="color: #AD0000;
 background-color: null;
-font-style: inherit;">}</span><span class="ss" style="color: #20794D;
+font-style: inherit;">12000</span></span>
+<span id="cb13-2">train_df, valid_df <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">] train loss = </span><span class="sc" style="color: #5E5E5E;
+font-style: inherit;">=</span> my_train_test_split(df, n_valid)</span>
+<span id="cb13-3"></span>
+<span id="cb13-4">train_df.shape, valid_df.shape</span></code></pre></div>
+<div class="cell-output cell-output-display" data-execution_count="11">
+<pre><code>((389125, 61), (12000, 61))</code></pre>
+</div>
+</div>
+</section>
+<section id="create-dmatrix-data-objects" class="level3">
+<h3 class="anchored" data-anchor-id="create-dmatrix-data-objects">Create <code>DMatrix</code> data objects</h3>
+<p>XGBoost uses a data type called dense matrix for efficient training and prediction, so next we need to create <code>DMatrix</code> objects for our training and validation datasets.</p>
+<blockquote class="blockquote">
+<p>If you prefer to use the scikit-learn interface to XGBoost, you don’t need to create these dense matrix objects. More on that below.</p>
+</blockquote>
+<div class="cell" data-execution_count="12">
+<div class="sourceCode cell-code" id="cb15" style="background: #f1f3f5;"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb15-1">dtrain <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">{</span>objective<span class="sc" style="color: #5E5E5E;
+font-style: inherit;">=</span> xgb.DMatrix(data<span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">.</span>loss(y, current_predictions)<span class="sc" style="color: #5E5E5E;
+font-style: inherit;">=</span>train_df[features], label<span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">}</span><span class="ss" style="color: #20794D;
+font-style: inherit;">=</span>train_df[target], enable_categorical<span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">'</span>)</span>
-<span id="cb3-17">            </span>
-<span id="cb3-18"><span class="kw" style="color: #003B4F;
+font-style: inherit;">=</span><span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">def</span> predict(<span class="va" style="color: #111111;
+font-style: inherit;">True</span>)</span>
+<span id="cb15-2">dvalid <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">self</span>, X):</span>
-<span id="cb3-19">    <span class="cf" style="color: #003B4F;
+font-style: inherit;">=</span> xgb.DMatrix(data<span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">return</span> (<span class="va" style="color: #111111;
+font-style: inherit;">=</span>valid_df[features], label<span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">self</span>.base_prediction <span class="op" style="color: #5E5E5E;
+font-style: inherit;">=</span>valid_df[target], enable_categorical<span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">+</span> <span class="va" style="color: #111111;
+font-style: inherit;">=</span><span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">self</span>.learning_rate </span>
-<span id="cb3-20">            <span class="op" style="color: #5E5E5E;
+font-style: inherit;">True</span>)</span></code></pre></div>
+</div>
+</section>
+<section id="set-the-xgboost-parameters" class="level3">
+<h3 class="anchored" data-anchor-id="set-the-xgboost-parameters">Set the XGBoost parameters</h3>
+<p>XGBoost has <a href="https://xgboost.readthedocs.io/en/latest/parameter.html">numerous hyperparameters</a>. Fortunately, just a handful of them tend to be the most influential; furthermore, the default values are not bad in most situations. I like to start out with a dictionary containing the default parameter values for just the ones I think are most important. For training there is one required boosting parameter called <code>num_boost_round</code> which I set to 50 as a starting point; you can make this smaller initially if training takes too long.</p>
+<div class="cell" data-execution_count="13">
+<div class="sourceCode cell-code" id="cb16" style="background: #f1f3f5;"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb16-1"><span class="co" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">*</span> np.<span class="bu" style="color: null;
+font-style: inherit;"># default values for important parameters</span></span>
+<span id="cb16-2">params <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">sum</span>([booster.predict(X) <span class="cf" style="color: #003B4F;
+font-style: inherit;">=</span> {</span>
+<span id="cb16-3">    <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">for</span> booster <span class="kw" style="color: #003B4F;
+font-style: inherit;">'learning_rate'</span>: <span class="fl" style="color: #AD0000;
 background-color: null;
-font-style: inherit;">in</span> <span class="va" style="color: #111111;
+font-style: inherit;">0.3</span>,</span>
+<span id="cb16-4">    <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">self</span>.boosters], axis<span class="op" style="color: #5E5E5E;
+font-style: inherit;">'max_depth'</span>: <span class="dv" style="color: #AD0000;
 background-color: null;
-font-style: inherit;">=</span><span class="dv" style="color: #AD0000;
+font-style: inherit;">6</span>,</span>
+<span id="cb16-5">    <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">0</span>))</span>
-<span id="cb3-21"></span>
-<span id="cb3-22">XGBoostModel.fit <span class="op" style="color: #5E5E5E;
+font-style: inherit;">'min_child_weight'</span>: <span class="dv" style="color: #AD0000;
 background-color: null;
-font-style: inherit;">=</span> fit</span>
-<span id="cb3-23">XGBoostModel.predict <span class="op" style="color: #5E5E5E;
+font-style: inherit;">1</span>,</span>
+<span id="cb16-6">    <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">=</span> predict            </span></code></pre></div>
-</div>
-<p>All we have to do now is implement the tree booster.</p>
-</section>
-<section id="the-xgboost-tree-booster" class="level2">
-<h2 class="anchored" data-anchor-id="the-xgboost-tree-booster">The XGBoost Tree Booster</h2>
-<p>The XGBoost tree booster is a modified version of the decision tree that we built in the decision tree from scratch post. Like the decision tree, we recursively build a binary tree structure by finding the best split rule for each node in the tree. The main difference is the criterion for evaluating splits and the way that we define a leaf’s predicted value. Instead of being functions of the target values of the instances in each node, the criterion and predicted values are functions of the instance gradients and hessians. Thus we need only make a couple of modifications to our previous decision tree implementation to create the XGBoost tree booster.</p>
-<section id="initialization-and-inserting-child-nodes" class="level3">
-<h3 class="anchored" data-anchor-id="initialization-and-inserting-child-nodes">Initialization and Inserting Child Nodes</h3>
-<p>Most of the init method is just parsing the parameter dictionary to assign parameters as object attributes. The one notable difference from our decision tree is in the way we define the node’s predicted value. We define <code>self.value</code> according to equation 5 of the XGBoost paper, a simple function of the gradient and hessian values of the instances in the current node. Of course the init also goes on to build the tree via the maybe insert child nodes method. This method is nearly identical to the one we implemented for our decision tree. So far so good.</p>
-<div class="cell" data-execution_count="4">
-<div class="sourceCode cell-code" id="cb4" style="background: #f1f3f5;"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb4-1"><span class="kw" style="color: #003B4F;
+font-style: inherit;">'subsample'</span>: <span class="dv" style="color: #AD0000;
 background-color: null;
-font-style: inherit;">class</span> TreeBooster():</span>
-<span id="cb4-2"> </span>
-<span id="cb4-3">    <span class="kw" style="color: #003B4F;
+font-style: inherit;">1</span>,</span>
+<span id="cb16-7">    <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">def</span> <span class="fu" style="color: #4758AB;
+font-style: inherit;">'colsample_bynode'</span>: <span class="dv" style="color: #AD0000;
 background-color: null;
-font-style: inherit;">__init__</span>(<span class="va" style="color: #111111;
+font-style: inherit;">1</span>,</span>
+<span id="cb16-8">    <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">self</span>, X, g, h, params, max_depth, idxs<span class="op" style="color: #5E5E5E;
+font-style: inherit;">'objective'</span>: <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">=</span><span class="va" style="color: #111111;
+font-style: inherit;">'reg:squarederror'</span>,</span>
+<span id="cb16-9">}</span>
+<span id="cb16-10">num_boost_round <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">None</span>):</span>
-<span id="cb4-4">        <span class="va" style="color: #111111;
+font-style: inherit;">=</span> <span class="dv" style="color: #AD0000;
 background-color: null;
-font-style: inherit;">self</span>.params <span class="op" style="color: #5E5E5E;
+font-style: inherit;">50</span></span></code></pre></div>
+</div>
+</section>
+<section id="train-the-xgboost-model" class="level3">
+<h3 class="anchored" data-anchor-id="train-the-xgboost-model">Train the XGBoost model</h3>
+<p>Check out the <a href="https://xgboost.readthedocs.io/en/latest/python/python_api.html#module-xgboost.training">documentation on the learning API</a> to see all the training options. During training, I like to have XGBoost print out the evaluation metric on the train and validation set after every few boosting rounds and again at the end of training; that can be done by setting <code>evals</code> and <code>verbose_eval</code>. You can also save the evaluation results in a dictionary passed into <code>evals_result</code> to inspect and plot the objective curve over the training iterations.</p>
+<div class="cell" data-execution_count="14">
+<div class="sourceCode cell-code" id="cb17" style="background: #f1f3f5;"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb17-1">evals_result <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">=</span> params</span>
-<span id="cb4-5">        <span class="va" style="color: #111111;
+font-style: inherit;">=</span> {}</span>
+<span id="cb17-2">m <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">self</span>.max_depth <span class="op" style="color: #5E5E5E;
+font-style: inherit;">=</span> xgb.train(params<span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">=</span> max_depth</span>
-<span id="cb4-6">        <span class="cf" style="color: #003B4F;
+font-style: inherit;">=</span>params, dtrain<span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">assert</span> <span class="va" style="color: #111111;
+font-style: inherit;">=</span>dtrain, num_boost_round<span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">self</span>.max_depth <span class="op" style="color: #5E5E5E;
+font-style: inherit;">=</span>num_boost_round,</span>
+<span id="cb17-3">              evals<span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">&gt;=</span> <span class="dv" style="color: #AD0000;
+font-style: inherit;">=</span>[(dtrain, <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">0</span>, <span class="st" style="color: #20794D;
+font-style: inherit;">'train'</span>), (dvalid, <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">'max_depth must be nonnegative'</span></span>
-<span id="cb4-7">        <span class="va" style="color: #111111;
+font-style: inherit;">'valid'</span>)],</span>
+<span id="cb17-4">              verbose_eval<span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">self</span>.min_child_weight <span class="op" style="color: #5E5E5E;
+font-style: inherit;">=</span><span class="dv" style="color: #AD0000;
 background-color: null;
-font-style: inherit;">=</span> params[<span class="st" style="color: #20794D;
+font-style: inherit;">10</span>,</span>
+<span id="cb17-5">              evals_result<span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">'min_child_weight'</span>] <span class="op" style="color: #5E5E5E;
+font-style: inherit;">=</span>evals_result)</span></code></pre></div>
+<div class="cell-output cell-output-stdout">
+<pre><code>[0] train-rmse:6.74422  valid-rmse:6.79733
+[10]    train-rmse:0.34798  valid-rmse:0.37158
+[20]    train-rmse:0.26289  valid-rmse:0.28239
+[30]    train-rmse:0.25148  valid-rmse:0.27028
+[40]    train-rmse:0.24375  valid-rmse:0.26420
+[49]    train-rmse:0.23738  valid-rmse:0.25855</code></pre>
+</div>
+</div>
+</section>
+<section id="train-the-xgboost-model-using-the-sklearn-interface" class="level3">
+<h3 class="anchored" data-anchor-id="train-the-xgboost-model-using-the-sklearn-interface">Train the XGBoost model using the sklearn interface</h3>
+<p>You can optionally use the <a href="https://xgboost.readthedocs.io/en/latest/python/sklearn_estimator.html">sklearn estimator interface</a> to XGBoost. This will bypass the need to use the <code>DMatrix</code> data objects for training and prediction, and it will allow you to leverage many of the other scikit-learn ecosystem tools like pipelines, parameter search, partial dependence plots, etc. The <code>XGBRegressor</code> is available in the <code>xgboost</code> library that we’ve already imported.</p>
+<div class="cell" data-execution_count="15">
+<div class="sourceCode cell-code" id="cb19" style="background: #f1f3f5;"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb19-1"><span class="co" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">\</span></span>
-<span id="cb4-8">            <span class="cf" style="color: #003B4F;
+font-style: inherit;"># scikit-learn interface</span></span>
+<span id="cb19-2">reg <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">if</span> params[<span class="st" style="color: #20794D;
+font-style: inherit;">=</span> xgb.XGBRegressor(n_estimators<span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">'min_child_weight'</span>] <span class="cf" style="color: #003B4F;
+font-style: inherit;">=</span>num_boost_round, <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">else</span> <span class="fl" style="color: #AD0000;
+font-style: inherit;">**</span>params)</span>
+<span id="cb19-3">reg.fit(train_df[features], train_df[target], </span>
+<span id="cb19-4">        eval_set<span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">1.0</span></span>
-<span id="cb4-9">        <span class="va" style="color: #111111;
+font-style: inherit;">=</span>[(train_df[features], train_df[target]), (valid_df[features], valid_df[target])], </span>
+<span id="cb19-5">        verbose<span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">self</span>.reg_lambda <span class="op" style="color: #5E5E5E;
+font-style: inherit;">=</span><span class="dv" style="color: #AD0000;
 background-color: null;
-font-style: inherit;">=</span> params[<span class="st" style="color: #20794D;
+font-style: inherit;">10</span>)<span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">'reg_lambda'</span>] <span class="cf" style="color: #003B4F;
+font-style: inherit;">;</span></span></code></pre></div>
+<div class="cell-output cell-output-stdout">
+<pre><code>[0] validation_0-rmse:6.74422   validation_1-rmse:6.79733
+[10]    validation_0-rmse:0.34798   validation_1-rmse:0.37158
+[20]    validation_0-rmse:0.26289   validation_1-rmse:0.28239
+[30]    validation_0-rmse:0.25148   validation_1-rmse:0.27028
+[40]    validation_0-rmse:0.24375   validation_1-rmse:0.26420
+[49]    validation_0-rmse:0.23738   validation_1-rmse:0.25855</code></pre>
+</div>
+</div>
+<p>Since not all features of XGBoost are available through the scikit-learn estimator interface, you might want to get the native booster object back out of the sklearn wrapper.</p>
+<div class="cell" data-execution_count="16">
+<div class="sourceCode cell-code" id="cb21" style="background: #f1f3f5;"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb21-1">m <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">if</span> params[<span class="st" style="color: #20794D;
+font-style: inherit;">=</span> reg.get_booster()</span></code></pre></div>
+</div>
+</section>
+<section id="evaluate-the-model-and-check-for-overfitting" class="level3">
+<h3 class="anchored" data-anchor-id="evaluate-the-model-and-check-for-overfitting">Evaluate the model and check for overfitting</h3>
+<p>We get the model evaluation metrics on the training and validation sets printed to stdout when we use the <code>evals</code> argument to the training API. Typically I just look at those printed metrics, but let’s double check by hand.</p>
+<div class="cell" data-execution_count="17">
+<div class="sourceCode cell-code" id="cb22" style="background: #f1f3f5;"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb22-1"><span class="kw" style="color: #003B4F;
 background-color: null;
-font-style: inherit;">'reg_lambda'</span>] <span class="cf" style="color: #003B4F;
+font-style: inherit;">def</span> root_mean_squared_error(y_true, y_pred):</span>
+<span id="cb22-2">    <span class="cf" style="color: #003B4F;
 background-color: null;
-font-style: inherit;">else</span> <span class="fl" style="color: #AD0000;
+font-style: inherit;">return</span> np.sqrt(np.mean((y_true <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">1.0</span></span>
-<span id="cb4-10">        <span class="va" style="color: #111111;
+font-style: inherit;">-</span> y_pred)<span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">self</span>.gamma <span class="op" style="color: #5E5E5E;
+font-style: inherit;">**</span><span class="dv" style="color: #AD0000;
 background-color: null;
-font-style: inherit;">=</span> params[<span class="st" style="color: #20794D;
+font-style: inherit;">2</span>))</span>
+<span id="cb22-3"></span>
+<span id="cb22-4">root_mean_squared_error(dvalid.get_label(), m.predict(dvalid))</span></code></pre></div>
+<div class="cell-output cell-output-display" data-execution_count="17">
+<pre><code>0.25855368</code></pre>
+</div>
+</div>
+<p>So, how good is that RMSLE of 0.259? Well, checking the <a href="https://www.kaggle.com/competitions/bluebook-for-bulldozers/leaderboard">Kagle leaderboard</a> for this competition, we would have come in 53rd out of 474, which is in the top 12% of submissions. That’s not bad for 10 minutes of work doing the bare minimum necessary to transform the raw data into a format consumable by XGBoost and then training a model using default hyperparameter values.</p>
+<blockquote class="blockquote">
+<p>Note that we’re using a different validation set from that used for the final leaderboard (which is long closed), but our score is likely still a decent approximation for how we would have done in the competition.</p>
+</blockquote>
+<p>It can be helpful to take a look at objective curves for training and validation data to get a sense for the extent of overfitting. A huge difference between training and validation performance indicates overfitting. In the below curve, there is very little overfitting, indicating we can be aggressive with hyperparameters that increase model flexibility. More on that soon.</p>
+<div class="cell" data-execution_count="18">
+<div class="sourceCode cell-code" id="cb24" style="background: #f1f3f5;"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb24-1">pd.DataFrame({</span>
+<span id="cb24-2">    <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">'gamma'</span>] <span class="cf" style="color: #003B4F;
+font-style: inherit;">'train'</span>: evals_result[<span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">if</span> params[<span class="st" style="color: #20794D;
+font-style: inherit;">'train'</span>][<span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">'gamma'</span>] <span class="cf" style="color: #003B4F;
+font-style: inherit;">'rmse'</span>],</span>
+<span id="cb24-3">    <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">else</span> <span class="fl" style="color: #AD0000;
+font-style: inherit;">'valid'</span>: evals_result[<span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">0.0</span></span>
-<span id="cb4-11">        <span class="va" style="color: #111111;
+font-style: inherit;">'valid'</span>][<span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">self</span>.colsample_bynode <span class="op" style="color: #5E5E5E;
+font-style: inherit;">'rmse'</span>]</span>
+<span id="cb24-4">}).plot()<span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">=</span> params[<span class="st" style="color: #20794D;
+font-style: inherit;">;</span> plt.xlabel(<span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">'colsample_bynode'</span>] <span class="op" style="color: #5E5E5E;
+font-style: inherit;">'boosting round'</span>)<span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">\</span></span>
-<span id="cb4-12">            <span class="cf" style="color: #003B4F;
+font-style: inherit;">;</span> plt.ylabel(<span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">if</span> params[<span class="st" style="color: #20794D;
+font-style: inherit;">'objective'</span>)<span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">'colsample_bynode'</span>] <span class="cf" style="color: #003B4F;
+font-style: inherit;">;</span></span></code></pre></div>
+<div class="cell-output cell-output-display">
+<p><img src="https://randomrealizations.com/posts/xgboost-for-regression-in-python/index_files/figure-html/cell-19-output-1.png" class="img-fluid" alt="line plot showing objective function versus training iteration for training and validation sets"></p>
+</div>
+</div>
+</section>
+<section id="check-feature-importance" class="level3">
+<h3 class="anchored" data-anchor-id="check-feature-importance">Check feature importance</h3>
+<p>It’s helpful to get an idea of how much the model is using each feature. In following iterations we might want to try dropping low-signal features or examining the important ones more closely for feature engineering ideas. The gigantic caveat to keep in mind here is that there are different measures of feature importance, and each one will give different importances. XGBoost provides three importance measures; I tend to prefer looking at the weight measure because its rankings usually seem most intuitive.</p>
+<div class="cell" data-execution_count="19">
+<div class="sourceCode cell-code" id="cb25" style="background: #f1f3f5;"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb25-1">fig, ax <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">else</span> <span class="fl" style="color: #AD0000;
+font-style: inherit;">=</span> plt.subplots(figsize<span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">1.0</span></span>
-<span id="cb4-13">        <span class="cf" style="color: #003B4F;
+font-style: inherit;">=</span>(<span class="dv" style="color: #AD0000;
 background-color: null;
-font-style: inherit;">if</span> <span class="bu" style="color: null;
+font-style: inherit;">5</span>,<span class="dv" style="color: #AD0000;
 background-color: null;
-font-style: inherit;">isinstance</span>(g, pd.Series): g <span class="op" style="color: #5E5E5E;
+font-style: inherit;">10</span>))</span>
+<span id="cb25-2">feature_importances <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">=</span> g.values</span>
-<span id="cb4-14">        <span class="cf" style="color: #003B4F;
+font-style: inherit;">=</span> pd.Series(m.get_score(importance_type<span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">if</span> <span class="bu" style="color: null;
+font-style: inherit;">=</span><span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">isinstance</span>(h, pd.Series): h <span class="op" style="color: #5E5E5E;
+font-style: inherit;">'weight'</span>)).sort_values(ascending<span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">=</span> h.values</span>
-<span id="cb4-15">        <span class="cf" style="color: #003B4F;
+font-style: inherit;">=</span><span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">if</span> idxs <span class="kw" style="color: #003B4F;
+font-style: inherit;">False</span>)</span>
+<span id="cb25-3">feature_importances.plot.barh(ax<span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">is</span> <span class="va" style="color: #111111;
+font-style: inherit;">=</span>ax)</span>
+<span id="cb25-4">plt.title(<span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">None</span>: idxs <span class="op" style="color: #5E5E5E;
+font-style: inherit;">'Feature Importance'</span>)<span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">=</span> np.arange(<span class="bu" style="color: null;
+font-style: inherit;">;</span></span></code></pre></div>
+<div class="cell-output cell-output-display">
+<p><img src="https://randomrealizations.com/posts/xgboost-for-regression-in-python/index_files/figure-html/cell-20-output-1.png" class="img-fluid" alt="feature importance plot showing a few high importance features and many low importance ones"></p>
+</div>
+</div>
+</section>
+</section>
+<section id="improve-performance-using-a-model-iteration-loop" class="level2">
+<h2 class="anchored" data-anchor-id="improve-performance-using-a-model-iteration-loop">Improve performance using a model iteration loop</h2>
+<p>At this point we have a half-decent prototype model. Now we enter the model iteration loop in which we adjust features and model parameters to find configurations that have better and better performance.</p>
+<p>Let’s start by putting the feature and target specification, the training/validation split, the model training, and the evaluation all together in one code block that we can copy paste for easy model iteration.</p>
+<blockquote class="blockquote">
+<p>Note that for this process to be effective, model training needs to take less than 10 seconds. Otherwise you’ll be sitting around waiting way too long. If training takes too long, try training on a sample of the training data, or try reducing the number of boosting rounds.</p>
+</blockquote>
+<div class="cell" data-execution_count="20">
+<div class="sourceCode cell-code" id="cb26" style="background: #f1f3f5;"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb26-1">features <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">len</span>(g))</span>
-<span id="cb4-16">        <span class="va" style="color: #111111;
+font-style: inherit;">=</span> [</span>
+<span id="cb26-2">    <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">self</span>.X, <span class="va" style="color: #111111;
+font-style: inherit;">'SalesID'</span>,</span>
+<span id="cb26-3">    <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">self</span>.g, <span class="va" style="color: #111111;
+font-style: inherit;">'MachineID'</span>,</span>
+<span id="cb26-4">    <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">self</span>.h, <span class="va" style="color: #111111;
+font-style: inherit;">'ModelID'</span>,</span>
+<span id="cb26-5">    <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">self</span>.idxs <span class="op" style="color: #5E5E5E;
+font-style: inherit;">'datasource'</span>,</span>
+<span id="cb26-6">    <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">=</span> X, g, h, idxs</span>
-<span id="cb4-17">        <span class="va" style="color: #111111;
+font-style: inherit;">'auctioneerID'</span>,</span>
+<span id="cb26-7">    <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">self</span>.n, <span class="va" style="color: #111111;
+font-style: inherit;">'YearMade'</span>,</span>
+<span id="cb26-8">    <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">self</span>.c <span class="op" style="color: #5E5E5E;
+font-style: inherit;">'MachineHoursCurrentMeter'</span>,</span>
+<span id="cb26-9">    <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">=</span> <span class="bu" style="color: null;
+font-style: inherit;">'UsageBand'</span>,</span>
+<span id="cb26-10">    <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">len</span>(idxs), X.shape[<span class="dv" style="color: #AD0000;
+font-style: inherit;">'fiModelDesc'</span>,</span>
+<span id="cb26-11">    <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">1</span>]</span>
-<span id="cb4-18">        <span class="va" style="color: #111111;
+font-style: inherit;">'fiBaseModel'</span>,</span>
+<span id="cb26-12">    <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">self</span>.value <span class="op" style="color: #5E5E5E;
+font-style: inherit;">'fiSecondaryDesc'</span>,</span>
+<span id="cb26-13">    <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">=</span> <span class="op" style="color: #5E5E5E;
+font-style: inherit;">'fiModelSeries'</span>,</span>
+<span id="cb26-14">    <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">-</span>g[idxs].<span class="bu" style="color: null;
+font-style: inherit;">'fiModelDescriptor'</span>,</span>
+<span id="cb26-15">    <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">sum</span>() <span class="op" style="color: #5E5E5E;
+font-style: inherit;">'ProductSize'</span>,</span>
+<span id="cb26-16">    <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">/</span> (h[idxs].<span class="bu" style="color: null;
+font-style: inherit;">'fiProductClassDesc'</span>,</span>
+<span id="cb26-17">    <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">sum</span>() <span class="op" style="color: #5E5E5E;
+font-style: inherit;">'state'</span>,</span>
+<span id="cb26-18">    <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">+</span> <span class="va" style="color: #111111;
+font-style: inherit;">'ProductGroup'</span>,</span>
+<span id="cb26-19">    <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">self</span>.reg_lambda) <span class="co" style="color: #5E5E5E;
+font-style: inherit;">'ProductGroupDesc'</span>,</span>
+<span id="cb26-20">    <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;"># Eq (5)</span></span>
-<span id="cb4-19">        <span class="va" style="color: #111111;
+font-style: inherit;">'Drive_System'</span>,</span>
+<span id="cb26-21">    <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">self</span>.best_score_so_far <span class="op" style="color: #5E5E5E;
+font-style: inherit;">'Enclosure'</span>,</span>
+<span id="cb26-22">    <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">=</span> <span class="fl" style="color: #AD0000;
+font-style: inherit;">'Forks'</span>,</span>
+<span id="cb26-23">    <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">0.</span></span>
-<span id="cb4-20">        <span class="cf" style="color: #003B4F;
+font-style: inherit;">'Pad_Type'</span>,</span>
+<span id="cb26-24">    <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">if</span> <span class="va" style="color: #111111;
+font-style: inherit;">'Ride_Control'</span>,</span>
+<span id="cb26-25">    <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">self</span>.max_depth <span class="op" style="color: #5E5E5E;
+font-style: inherit;">'Stick'</span>,</span>
+<span id="cb26-26">    <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">&gt;</span> <span class="dv" style="color: #AD0000;
+font-style: inherit;">'Transmission'</span>,</span>
+<span id="cb26-27">    <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">0</span>:</span>
-<span id="cb4-21">            <span class="va" style="color: #111111;
+font-style: inherit;">'Turbocharged'</span>,</span>
+<span id="cb26-28">    <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">self</span>._maybe_insert_child_nodes()</span>
-<span id="cb4-22"></span>
-<span id="cb4-23">    <span class="kw" style="color: #003B4F;
+font-style: inherit;">'Blade_Extension'</span>,</span>
+<span id="cb26-29">    <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">def</span> _maybe_insert_child_nodes(<span class="va" style="color: #111111;
+font-style: inherit;">'Blade_Width'</span>,</span>
+<span id="cb26-30">    <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">self</span>):</span>
-<span id="cb4-24">        <span class="cf" style="color: #003B4F;
+font-style: inherit;">'Enclosure_Type'</span>,</span>
+<span id="cb26-31">    <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">for</span> i <span class="kw" style="color: #003B4F;
+font-style: inherit;">'Engine_Horsepower'</span>,</span>
+<span id="cb26-32">    <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">in</span> <span class="bu" style="color: null;
+font-style: inherit;">'Hydraulics'</span>,</span>
+<span id="cb26-33">    <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">range</span>(<span class="va" style="color: #111111;
+font-style: inherit;">'Pushblock'</span>,</span>
+<span id="cb26-34">    <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">self</span>.c): <span class="va" style="color: #111111;
+font-style: inherit;">'Ripper'</span>,</span>
+<span id="cb26-35">    <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">self</span>._find_better_split(i)</span>
-<span id="cb4-25">        <span class="cf" style="color: #003B4F;
+font-style: inherit;">'Scarifier'</span>,</span>
+<span id="cb26-36">    <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">if</span> <span class="va" style="color: #111111;
+font-style: inherit;">'Tip_Control'</span>,</span>
+<span id="cb26-37">    <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">self</span>.is_leaf: <span class="cf" style="color: #003B4F;
+font-style: inherit;">'Tire_Size'</span>,</span>
+<span id="cb26-38">    <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">return</span></span>
-<span id="cb4-26">        x <span class="op" style="color: #5E5E5E;
+font-style: inherit;">'Coupler'</span>,</span>
+<span id="cb26-39">    <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">=</span> <span class="va" style="color: #111111;
+font-style: inherit;">'Coupler_System'</span>,</span>
+<span id="cb26-40">    <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">self</span>.X.values[<span class="va" style="color: #111111;
+font-style: inherit;">'Grouser_Tracks'</span>,</span>
+<span id="cb26-41">    <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">self</span>.idxs,<span class="va" style="color: #111111;
+font-style: inherit;">'Hydraulics_Flow'</span>,</span>
+<span id="cb26-42">    <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">self</span>.split_feature_idx]</span>
-<span id="cb4-27">        left_idx <span class="op" style="color: #5E5E5E;
+font-style: inherit;">'Track_Type'</span>,</span>
+<span id="cb26-43">    <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">=</span> np.nonzero(x <span class="op" style="color: #5E5E5E;
+font-style: inherit;">'Undercarriage_Pad_Width'</span>,</span>
+<span id="cb26-44">    <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">&lt;=</span> <span class="va" style="color: #111111;
+font-style: inherit;">'Stick_Length'</span>,</span>
+<span id="cb26-45">    <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">self</span>.threshold)[<span class="dv" style="color: #AD0000;
+font-style: inherit;">'Thumb'</span>,</span>
+<span id="cb26-46">    <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">0</span>]</span>
-<span id="cb4-28">        right_idx <span class="op" style="color: #5E5E5E;
+font-style: inherit;">'Pattern_Changer'</span>,</span>
+<span id="cb26-47">    <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">=</span> np.nonzero(x <span class="op" style="color: #5E5E5E;
+font-style: inherit;">'Grouser_Type'</span>,</span>
+<span id="cb26-48">    <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">&gt;</span> <span class="va" style="color: #111111;
+font-style: inherit;">'Backhoe_Mounting'</span>,</span>
+<span id="cb26-49">    <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">self</span>.threshold)[<span class="dv" style="color: #AD0000;
+font-style: inherit;">'Blade_Type'</span>,</span>
+<span id="cb26-50">    <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">0</span>]</span>
-<span id="cb4-29">        <span class="va" style="color: #111111;
+font-style: inherit;">'Travel_Controls'</span>,</span>
+<span id="cb26-51">    <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">self</span>.left <span class="op" style="color: #5E5E5E;
+font-style: inherit;">'Differential_Type'</span>,</span>
+<span id="cb26-52">    <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">=</span> TreeBooster(<span class="va" style="color: #111111;
+font-style: inherit;">'Steering_Controls'</span>,</span>
+<span id="cb26-53">    <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">self</span>.X, <span class="va" style="color: #111111;
+font-style: inherit;">'saledate_year'</span>,</span>
+<span id="cb26-54">    <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">self</span>.g, <span class="va" style="color: #111111;
+font-style: inherit;">'saledate_month'</span>,</span>
+<span id="cb26-55">    <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">self</span>.h, <span class="va" style="color: #111111;
+font-style: inherit;">'saledate_day'</span>,</span>
+<span id="cb26-56">    <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">self</span>.params, </span>
-<span id="cb4-30">                                <span class="va" style="color: #111111;
+font-style: inherit;">'saledate_quarter'</span>,</span>
+<span id="cb26-57">    <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">self</span>.max_depth <span class="op" style="color: #5E5E5E;
+font-style: inherit;">'saledate_day_of_year'</span>,</span>
+<span id="cb26-58">    <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">-</span> <span class="dv" style="color: #AD0000;
+font-style: inherit;">'saledate_day_of_week'</span>,</span>
+<span id="cb26-59">    <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">1</span>, <span class="va" style="color: #111111;
+font-style: inherit;">'saledate_days_since_epoch'</span>,</span>
+<span id="cb26-60">]</span>
+<span id="cb26-61"></span>
+<span id="cb26-62">target <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">self</span>.idxs[left_idx])</span>
-<span id="cb4-31">        <span class="va" style="color: #111111;
+font-style: inherit;">=</span> <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">self</span>.right <span class="op" style="color: #5E5E5E;
+font-style: inherit;">'logSalePrice'</span></span>
+<span id="cb26-63"></span>
+<span id="cb26-64">train_df, valid_df <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">=</span> TreeBooster(<span class="va" style="color: #111111;
+font-style: inherit;">=</span> train_test_split_temporal(df, <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">self</span>.X, <span class="va" style="color: #111111;
+font-style: inherit;">'saledate'</span>, <span class="dv" style="color: #AD0000;
 background-color: null;
-font-style: inherit;">self</span>.g, <span class="va" style="color: #111111;
+font-style: inherit;">12000</span>)</span>
+<span id="cb26-65">dtrain <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">self</span>.h, <span class="va" style="color: #111111;
+font-style: inherit;">=</span> xgb.DMatrix(data<span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">self</span>.params, </span>
-<span id="cb4-32">                                 <span class="va" style="color: #111111;
+font-style: inherit;">=</span>train_df[features], label<span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">self</span>.max_depth <span class="op" style="color: #5E5E5E;
+font-style: inherit;">=</span>train_df[target], enable_categorical<span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">-</span> <span class="dv" style="color: #AD0000;
+font-style: inherit;">=</span><span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">1</span>, <span class="va" style="color: #111111;
+font-style: inherit;">True</span>)</span>
+<span id="cb26-66">dvalid <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">self</span>.idxs[right_idx])</span>
-<span id="cb4-33"></span>
-<span id="cb4-34">    <span class="at" style="color: #657422;
+font-style: inherit;">=</span> xgb.DMatrix(data<span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">@property</span></span>
-<span id="cb4-35">    <span class="kw" style="color: #003B4F;
+font-style: inherit;">=</span>valid_df[features], label<span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">def</span> is_leaf(<span class="va" style="color: #111111;
+font-style: inherit;">=</span>valid_df[target], enable_categorical<span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">self</span>): <span class="cf" style="color: #003B4F;
+font-style: inherit;">=</span><span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">return</span> <span class="va" style="color: #111111;
+font-style: inherit;">True</span>)</span>
+<span id="cb26-67"></span>
+<span id="cb26-68">params <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">self</span>.best_score_so_far <span class="op" style="color: #5E5E5E;
+font-style: inherit;">=</span> {</span>
+<span id="cb26-69">    <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">==</span> <span class="fl" style="color: #AD0000;
+font-style: inherit;">'learning_rate'</span>: <span class="fl" style="color: #AD0000;
 background-color: null;
-font-style: inherit;">0.</span></span>
-<span id="cb4-36"></span>
-<span id="cb4-37">    <span class="kw" style="color: #003B4F;
+font-style: inherit;">0.3</span>,</span>
+<span id="cb26-70">    <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">def</span> _find_better_split(<span class="va" style="color: #111111;
+font-style: inherit;">'max_depth'</span>: <span class="dv" style="color: #AD0000;
 background-color: null;
-font-style: inherit;">self</span>, feature_idx):</span>
-<span id="cb4-38">        <span class="cf" style="color: #003B4F;
+font-style: inherit;">6</span>,</span>
+<span id="cb26-71">    <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">pass</span></span></code></pre></div>
-</div>
-</section>
-<section id="split-finding" class="level3">
-<h3 class="anchored" data-anchor-id="split-finding">Split Finding</h3>
-<p>Split finding follows the exact same pattern that we used in the decision tree, except we keep track of gradient and hessian stats instead of target value stats, and of course we use the XGBoost gain criterion (equation 7 from the paper) for evaluating splits.</p>
-<div class="cell" data-execution_count="5">
-<div class="sourceCode cell-code" id="cb5" style="background: #f1f3f5;"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb5-1"><span class="kw" style="color: #003B4F;
+font-style: inherit;">'min_child_weight'</span>: <span class="dv" style="color: #AD0000;
 background-color: null;
-font-style: inherit;">def</span> _find_better_split(<span class="va" style="color: #111111;
+font-style: inherit;">1</span>,</span>
+<span id="cb26-72">    <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">self</span>, feature_idx):</span>
-<span id="cb5-2">    x <span class="op" style="color: #5E5E5E;
+font-style: inherit;">'subsample'</span>: <span class="dv" style="color: #AD0000;
 background-color: null;
-font-style: inherit;">=</span> <span class="va" style="color: #111111;
+font-style: inherit;">1</span>,</span>
+<span id="cb26-73">    <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">self</span>.X.values[<span class="va" style="color: #111111;
+font-style: inherit;">'colsample_bynode'</span>: <span class="dv" style="color: #AD0000;
 background-color: null;
-font-style: inherit;">self</span>.idxs, feature_idx]</span>
-<span id="cb5-3">    g, h <span class="op" style="color: #5E5E5E;
+font-style: inherit;">1</span>,</span>
+<span id="cb26-74">    <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">=</span> <span class="va" style="color: #111111;
+font-style: inherit;">'objective'</span>: <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">self</span>.g[<span class="va" style="color: #111111;
+font-style: inherit;">'reg:squarederror'</span>,</span>
+<span id="cb26-75">}</span>
+<span id="cb26-76">num_boost_round <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">self</span>.idxs], <span class="va" style="color: #111111;
+font-style: inherit;">=</span> <span class="dv" style="color: #AD0000;
 background-color: null;
-font-style: inherit;">self</span>.h[<span class="va" style="color: #111111;
+font-style: inherit;">50</span></span>
+<span id="cb26-77"></span>
+<span id="cb26-78">m <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">self</span>.idxs]</span>
-<span id="cb5-4">    sort_idx <span class="op" style="color: #5E5E5E;
+font-style: inherit;">=</span> xgb.train(params<span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">=</span> np.argsort(x)</span>
-<span id="cb5-5">    sort_g, sort_h, sort_x <span class="op" style="color: #5E5E5E;
+font-style: inherit;">=</span>params, dtrain<span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">=</span> g[sort_idx], h[sort_idx], x[sort_idx]</span>
-<span id="cb5-6">    sum_g, sum_h <span class="op" style="color: #5E5E5E;
+font-style: inherit;">=</span>dtrain, num_boost_round<span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">=</span> g.<span class="bu" style="color: null;
+font-style: inherit;">=</span>num_boost_round,</span>
+<span id="cb26-79">              evals<span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">sum</span>(), h.<span class="bu" style="color: null;
+font-style: inherit;">=</span>[(dtrain, <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">sum</span>()</span>
-<span id="cb5-7">    sum_g_right, sum_h_right <span class="op" style="color: #5E5E5E;
+font-style: inherit;">'train'</span>), (dvalid, <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">=</span> sum_g, sum_h</span>
-<span id="cb5-8">    sum_g_left, sum_h_left <span class="op" style="color: #5E5E5E;
+font-style: inherit;">'valid'</span>)],verbose_eval<span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">=</span> <span class="fl" style="color: #AD0000;
+font-style: inherit;">=</span><span class="dv" style="color: #AD0000;
 background-color: null;
-font-style: inherit;">0.</span>, <span class="fl" style="color: #AD0000;
+font-style: inherit;">10</span>)</span></code></pre></div>
+<div class="cell-output cell-output-stdout">
+<pre><code>[0] train-rmse:6.74422  valid-rmse:6.79733
+[10]    train-rmse:0.34798  valid-rmse:0.37158
+[20]    train-rmse:0.26289  valid-rmse:0.28239
+[30]    train-rmse:0.25148  valid-rmse:0.27028
+[40]    train-rmse:0.24375  valid-rmse:0.26420
+[49]    train-rmse:0.23738  valid-rmse:0.25855</code></pre>
+</div>
+</div>
+<section id="feature-selection" class="level3">
+<h3 class="anchored" data-anchor-id="feature-selection">Feature selection</h3>
+<section id="drop-low-importance-features" class="level4">
+<h4 class="anchored" data-anchor-id="drop-low-importance-features">Drop low-importance features</h4>
+<p>Let’s try training a model on only the top k most important features. You can try different values of k for the rankings created from each of the three importance measures. You can play with how many to keep, looking for the optimal number manually.</p>
+<div class="cell" data-execution_count="21">
+<div class="sourceCode cell-code" id="cb28" style="background: #f1f3f5;"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb28-1">feature_importances_weight <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">0.</span></span>
-<span id="cb5-9"></span>
-<span id="cb5-10">    <span class="cf" style="color: #003B4F;
+font-style: inherit;">=</span> pd.Series(m.get_score(importance_type<span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">for</span> i <span class="kw" style="color: #003B4F;
+font-style: inherit;">=</span><span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">in</span> <span class="bu" style="color: null;
+font-style: inherit;">'weight'</span>)).sort_values(ascending<span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">range</span>(<span class="dv" style="color: #AD0000;
+font-style: inherit;">=</span><span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">0</span>, <span class="va" style="color: #111111;
+font-style: inherit;">False</span>)</span>
+<span id="cb28-2">feature_importances_cover <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">self</span>.n <span class="op" style="color: #5E5E5E;
+font-style: inherit;">=</span> pd.Series(m.get_score(importance_type<span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">-</span> <span class="dv" style="color: #AD0000;
+font-style: inherit;">=</span><span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">1</span>):</span>
-<span id="cb5-11">        g_i, h_i, x_i, x_i_next <span class="op" style="color: #5E5E5E;
+font-style: inherit;">'cover'</span>)).sort_values(ascending<span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">=</span> sort_g[i], sort_h[i], sort_x[i], sort_x[i <span class="op" style="color: #5E5E5E;
+font-style: inherit;">=</span><span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">+</span> <span class="dv" style="color: #AD0000;
+font-style: inherit;">False</span>)</span>
+<span id="cb28-3">feature_importances_gain <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">1</span>]</span>
-<span id="cb5-12">        sum_g_left <span class="op" style="color: #5E5E5E;
+font-style: inherit;">=</span> pd.Series(m.get_score(importance_type<span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">+=</span> g_i<span class="op" style="color: #5E5E5E;
+font-style: inherit;">=</span><span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">;</span> sum_g_right <span class="op" style="color: #5E5E5E;
+font-style: inherit;">'gain'</span>)).sort_values(ascending<span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">-=</span> g_i</span>
-<span id="cb5-13">        sum_h_left <span class="op" style="color: #5E5E5E;
+font-style: inherit;">=</span><span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">+=</span> h_i<span class="op" style="color: #5E5E5E;
+font-style: inherit;">False</span>)</span></code></pre></div>
+</div>
+<div class="cell" data-execution_count="22">
+<div class="sourceCode cell-code" id="cb29" style="background: #f1f3f5;"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb29-1"><span class="co" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">;</span> sum_h_right <span class="op" style="color: #5E5E5E;
+font-style: inherit;"># features = list(feature_importances_weight[:30].index)</span></span>
+<span id="cb29-2"><span class="co" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">-=</span> h_i</span>
-<span id="cb5-14">        <span class="cf" style="color: #003B4F;
+font-style: inherit;"># features = list(feature_importances_cover[:35].index)</span></span>
+<span id="cb29-3">features <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">if</span> sum_h_left <span class="op" style="color: #5E5E5E;
+font-style: inherit;">=</span> <span class="bu" style="color: null;
 background-color: null;
-font-style: inherit;">&lt;</span> <span class="va" style="color: #111111;
+font-style: inherit;">list</span>(feature_importances_gain[:<span class="dv" style="color: #AD0000;
 background-color: null;
-font-style: inherit;">self</span>.min_child_weight <span class="kw" style="color: #003B4F;
+font-style: inherit;">30</span>].index)</span>
+<span id="cb29-4"></span>
+<span id="cb29-5">dtrain <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">or</span> x_i <span class="op" style="color: #5E5E5E;
+font-style: inherit;">=</span> xgb.DMatrix(data<span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">==</span> x_i_next:<span class="cf" style="color: #003B4F;
+font-style: inherit;">=</span>train_df[features], label<span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">continue</span></span>
-<span id="cb5-15">        <span class="cf" style="color: #003B4F;
+font-style: inherit;">=</span>train_df[target], enable_categorical<span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">if</span> sum_h_right <span class="op" style="color: #5E5E5E;
+font-style: inherit;">=</span><span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">&lt;</span> <span class="va" style="color: #111111;
+font-style: inherit;">True</span>)</span>
+<span id="cb29-6">dvalid <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">self</span>.min_child_weight: <span class="cf" style="color: #003B4F;
+font-style: inherit;">=</span> xgb.DMatrix(data<span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">break</span></span>
-<span id="cb5-16"></span>
-<span id="cb5-17">        gain <span class="op" style="color: #5E5E5E;
+font-style: inherit;">=</span>valid_df[features], label<span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">=</span> <span class="fl" style="color: #AD0000;
+font-style: inherit;">=</span>valid_df[target], enable_categorical<span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">0.5</span> <span class="op" style="color: #5E5E5E;
+font-style: inherit;">=</span><span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">*</span> ((sum_g_left<span class="op" style="color: #5E5E5E;
+font-style: inherit;">True</span>)</span>
+<span id="cb29-7"></span>
+<span id="cb29-8">params <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">**</span><span class="dv" style="color: #AD0000;
+font-style: inherit;">=</span> {</span>
+<span id="cb29-9">    <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">2</span> <span class="op" style="color: #5E5E5E;
+font-style: inherit;">'learning_rate'</span>: <span class="fl" style="color: #AD0000;
 background-color: null;
-font-style: inherit;">/</span> (sum_h_left <span class="op" style="color: #5E5E5E;
+font-style: inherit;">0.3</span>,</span>
+<span id="cb29-10">    <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">+</span> <span class="va" style="color: #111111;
+font-style: inherit;">'max_depth'</span>: <span class="dv" style="color: #AD0000;
 background-color: null;
-font-style: inherit;">self</span>.reg_lambda))</span>
-<span id="cb5-18">                        <span class="op" style="color: #5E5E5E;
+font-style: inherit;">6</span>,</span>
+<span id="cb29-11">    <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">+</span> (sum_g_right<span class="op" style="color: #5E5E5E;
+font-style: inherit;">'min_child_weight'</span>: <span class="dv" style="color: #AD0000;
 background-color: null;
-font-style: inherit;">**</span><span class="dv" style="color: #AD0000;
+font-style: inherit;">1</span>,</span>
+<span id="cb29-12">    <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">2</span> <span class="op" style="color: #5E5E5E;
+font-style: inherit;">'subsample'</span>: <span class="dv" style="color: #AD0000;
 background-color: null;
-font-style: inherit;">/</span> (sum_h_right <span class="op" style="color: #5E5E5E;
+font-style: inherit;">1</span>,</span>
+<span id="cb29-13">    <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">+</span> <span class="va" style="color: #111111;
+font-style: inherit;">'colsample_bynode'</span>: <span class="dv" style="color: #AD0000;
 background-color: null;
-font-style: inherit;">self</span>.reg_lambda))</span>
-<span id="cb5-19">                        <span class="op" style="color: #5E5E5E;
+font-style: inherit;">1</span>,</span>
+<span id="cb29-14">    <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">-</span> (sum_g<span class="op" style="color: #5E5E5E;
+font-style: inherit;">'objective'</span>: <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">**</span><span class="dv" style="color: #AD0000;
+font-style: inherit;">'reg:squarederror'</span>,</span>
+<span id="cb29-15">}</span>
+<span id="cb29-16">num_boost_round <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">2</span> <span class="op" style="color: #5E5E5E;
+font-style: inherit;">=</span> <span class="dv" style="color: #AD0000;
 background-color: null;
-font-style: inherit;">/</span> (sum_h <span class="op" style="color: #5E5E5E;
+font-style: inherit;">50</span></span>
+<span id="cb29-17"></span>
+<span id="cb29-18">m <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">+</span> <span class="va" style="color: #111111;
+font-style: inherit;">=</span> xgb.train(params<span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">self</span>.reg_lambda))</span>
-<span id="cb5-20">                        ) <span class="op" style="color: #5E5E5E;
+font-style: inherit;">=</span>params, dtrain<span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">-</span> <span class="va" style="color: #111111;
+font-style: inherit;">=</span>dtrain, num_boost_round<span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">self</span>.gamma<span class="op" style="color: #5E5E5E;
+font-style: inherit;">=</span>num_boost_round,</span>
+<span id="cb29-19">              evals<span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">/</span><span class="dv" style="color: #AD0000;
+font-style: inherit;">=</span>[(dtrain, <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">2</span> <span class="co" style="color: #5E5E5E;
+font-style: inherit;">'train'</span>), (dvalid, <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;"># Eq(7) in the xgboost paper</span></span>
-<span id="cb5-21">        <span class="cf" style="color: #003B4F;
+font-style: inherit;">'valid'</span>)], verbose_eval<span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">if</span> gain <span class="op" style="color: #5E5E5E;
+font-style: inherit;">=</span><span class="dv" style="color: #AD0000;
 background-color: null;
-font-style: inherit;">&gt;</span> <span class="va" style="color: #111111;
+font-style: inherit;">10</span>)</span></code></pre></div>
+<div class="cell-output cell-output-stdout">
+<pre><code>[0] train-rmse:6.74422  valid-rmse:6.79733
+[10]    train-rmse:0.34798  valid-rmse:0.37150
+[20]    train-rmse:0.26182  valid-rmse:0.27986
+[30]    train-rmse:0.24974  valid-rmse:0.26896
+[40]    train-rmse:0.24282  valid-rmse:0.26043
+[49]    train-rmse:0.23768  valid-rmse:0.25664</code></pre>
+</div>
+</div>
+<p>Looks like keeping the top 30 from the gain importance type gives a slight performance improvement.</p>
+</section>
+<section id="drop-one-feature-at-a-time" class="level4">
+<h4 class="anchored" data-anchor-id="drop-one-feature-at-a-time">Drop one feature at a time</h4>
+<p>Next try dropping each feature out of the model one-at-a-time to see if there are any more features that you can drop. For each feature, drop it from the feature set, then train a new model, then record the evaluation score. At the end, sort the scores to see which features are the best candidates for removal.</p>
+<div class="cell" data-execution_count="23">
+<div class="sourceCode cell-code" id="cb31" style="background: #f1f3f5;"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb31-1">features <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">self</span>.best_score_so_far: </span>
-<span id="cb5-22">            <span class="va" style="color: #111111;
+font-style: inherit;">=</span> [</span>
+<span id="cb31-2">    <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">self</span>.split_feature_idx <span class="op" style="color: #5E5E5E;
+font-style: inherit;">'Coupler_System'</span>,</span>
+<span id="cb31-3">     <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">=</span> feature_idx</span>
-<span id="cb5-23">            <span class="va" style="color: #111111;
+font-style: inherit;">'Tire_Size'</span>,</span>
+<span id="cb31-4">     <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">self</span>.best_score_so_far <span class="op" style="color: #5E5E5E;
+font-style: inherit;">'Scarifier'</span>,</span>
+<span id="cb31-5">     <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">=</span> gain</span>
-<span id="cb5-24">            <span class="va" style="color: #111111;
+font-style: inherit;">'ProductSize'</span>,</span>
+<span id="cb31-6">     <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">self</span>.threshold <span class="op" style="color: #5E5E5E;
+font-style: inherit;">'Ride_Control'</span>,</span>
+<span id="cb31-7">     <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">=</span> (x_i <span class="op" style="color: #5E5E5E;
+font-style: inherit;">'fiBaseModel'</span>,</span>
+<span id="cb31-8">     <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">+</span> x_i_next) <span class="op" style="color: #5E5E5E;
+font-style: inherit;">'Enclosure'</span>,</span>
+<span id="cb31-9">     <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">/</span> <span class="dv" style="color: #AD0000;
+font-style: inherit;">'Pad_Type'</span>,</span>
+<span id="cb31-10">     <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">2</span></span>
-<span id="cb5-25">            </span>
-<span id="cb5-26">TreeBooster._find_better_split <span class="op" style="color: #5E5E5E;
+font-style: inherit;">'YearMade'</span>,</span>
+<span id="cb31-11">     <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">=</span> _find_better_split</span></code></pre></div>
-</div>
-</section>
-<section id="prediction" class="level3">
-<h3 class="anchored" data-anchor-id="prediction">Prediction</h3>
-<p>Prediction works exactly the same as in our decision tree, and the methods are nearly identical.</p>
-<div class="cell" data-execution_count="6">
-<div class="sourceCode cell-code" id="cb6" style="background: #f1f3f5;"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb6-1"><span class="kw" style="color: #003B4F;
+font-style: inherit;">'fiSecondaryDesc'</span>,</span>
+<span id="cb31-12">     <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">def</span> predict(<span class="va" style="color: #111111;
+font-style: inherit;">'ProductGroup'</span>,</span>
+<span id="cb31-13">     <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">self</span>, X):</span>
-<span id="cb6-2">    <span class="cf" style="color: #003B4F;
+font-style: inherit;">'Drive_System'</span>,</span>
+<span id="cb31-14">     <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">return</span> np.array([<span class="va" style="color: #111111;
+font-style: inherit;">'Ripper'</span>,</span>
+<span id="cb31-15">     <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">self</span>._predict_row(row) <span class="cf" style="color: #003B4F;
+font-style: inherit;">'saledate_days_since_epoch'</span>,</span>
+<span id="cb31-16">     <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">for</span> i, row <span class="kw" style="color: #003B4F;
+font-style: inherit;">'fiModelDescriptor'</span>,</span>
+<span id="cb31-17">     <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">in</span> X.iterrows()])</span>
-<span id="cb6-3"></span>
-<span id="cb6-4"><span class="kw" style="color: #003B4F;
+font-style: inherit;">'fiProductClassDesc'</span>,</span>
+<span id="cb31-18">     <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">def</span> _predict_row(<span class="va" style="color: #111111;
+font-style: inherit;">'MachineID'</span>,</span>
+<span id="cb31-19">     <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">self</span>, row):</span>
-<span id="cb6-5">    <span class="cf" style="color: #003B4F;
+font-style: inherit;">'Hydraulics'</span>,</span>
+<span id="cb31-20">     <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">if</span> <span class="va" style="color: #111111;
+font-style: inherit;">'SalesID'</span>,</span>
+<span id="cb31-21">     <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">self</span>.is_leaf: </span>
-<span id="cb6-6">        <span class="cf" style="color: #003B4F;
+font-style: inherit;">'Track_Type'</span>,</span>
+<span id="cb31-22">     <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">return</span> <span class="va" style="color: #111111;
+font-style: inherit;">'ModelID'</span>,</span>
+<span id="cb31-23">     <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">self</span>.value</span>
-<span id="cb6-7">    child <span class="op" style="color: #5E5E5E;
+font-style: inherit;">'fiModelDesc'</span>,</span>
+<span id="cb31-24">     <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">=</span> <span class="va" style="color: #111111;
+font-style: inherit;">'Travel_Controls'</span>,</span>
+<span id="cb31-25">     <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">self</span>.left <span class="cf" style="color: #003B4F;
+font-style: inherit;">'Transmission'</span>,</span>
+<span id="cb31-26">     <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">if</span> row[<span class="va" style="color: #111111;
+font-style: inherit;">'Blade_Extension'</span>,</span>
+<span id="cb31-27">     <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">self</span>.split_feature_idx] <span class="op" style="color: #5E5E5E;
+font-style: inherit;">'fiModelSeries'</span>,</span>
+<span id="cb31-28">     <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">&lt;=</span> <span class="va" style="color: #111111;
+font-style: inherit;">'Grouser_Tracks'</span>,</span>
+<span id="cb31-29">     <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">self</span>.threshold <span class="op" style="color: #5E5E5E;
+font-style: inherit;">'Undercarriage_Pad_Width'</span>,</span>
+<span id="cb31-30">     <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">\</span></span>
-<span id="cb6-8">        <span class="cf" style="color: #003B4F;
+font-style: inherit;">'Stick'</span>,</span>
+<span id="cb31-31">     <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">else</span> <span class="va" style="color: #111111;
+font-style: inherit;">'Thumb'</span></span>
+<span id="cb31-32">]</span>
+<span id="cb31-33"></span>
+<span id="cb31-34"><span class="co" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">self</span>.right</span>
-<span id="cb6-9">    <span class="cf" style="color: #003B4F;
+font-style: inherit;"># drop each feature one-at-a-time</span></span>
+<span id="cb31-35">scores <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">return</span> child._predict_row(row)</span>
-<span id="cb6-10"></span>
-<span id="cb6-11">TreeBooster.predict <span class="op" style="color: #5E5E5E;
+font-style: inherit;">=</span> []</span>
+<span id="cb31-36"><span class="cf" style="color: #003B4F;
 background-color: null;
-font-style: inherit;">=</span> predict </span>
-<span id="cb6-12">TreeBooster._predict_row <span class="op" style="color: #5E5E5E;
+font-style: inherit;">for</span> i, feature <span class="kw" style="color: #003B4F;
 background-color: null;
-font-style: inherit;">=</span> _predict_row </span></code></pre></div>
-</div>
-</section>
-</section>
-<section id="the-complete-xgboost-from-scratch-implementation" class="level2">
-<h2 class="anchored" data-anchor-id="the-complete-xgboost-from-scratch-implementation">The Complete XGBoost From Scratch Implementation</h2>
-<p>Here’s the entire implementation which produces a usable <code>XGBoostModel</code> class with fit and predict methods.</p>
-<div class="cell" data-execution_count="7">
-<div class="sourceCode cell-code" id="cb7" style="background: #f1f3f5;"><pre class="sourceCode numberSource python number-lines code-with-copy"><code class="sourceCode python"><span id="cb7-1"><span class="kw" style="color: #003B4F;
+font-style: inherit;">in</span> <span class="bu" style="color: null;
 background-color: null;
-font-style: inherit;">class</span> XGBoostModel():</span>
-<span id="cb7-2">    <span class="co" style="color: #5E5E5E;
+font-style: inherit;">enumerate</span>(features):</span>
+<span id="cb31-37">    drop_one_features <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">'''XGBoost from Scratch</span></span>
-<span id="cb7-3"><span class="co" style="color: #5E5E5E;
+font-style: inherit;">=</span> features[:i] <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">    '''</span></span>
-<span id="cb7-4">    </span>
-<span id="cb7-5">    <span class="kw" style="color: #003B4F;
+font-style: inherit;">+</span> features[i<span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">def</span> <span class="fu" style="color: #4758AB;
+font-style: inherit;">+</span><span class="dv" style="color: #AD0000;
 background-color: null;
-font-style: inherit;">__init__</span>(<span class="va" style="color: #111111;
+font-style: inherit;">1</span>:]</span>
+<span id="cb31-38"></span>
+<span id="cb31-39">    dtrain <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">self</span>, params, random_seed<span class="op" style="color: #5E5E5E;
+font-style: inherit;">=</span> xgb.DMatrix(data<span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">=</span><span class="va" style="color: #111111;
+font-style: inherit;">=</span>train_df[drop_one_features], label<span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">None</span>):</span>
-<span id="cb7-6">        <span class="va" style="color: #111111;
+font-style: inherit;">=</span>train_df[target], enable_categorical<span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">self</span>.params <span class="op" style="color: #5E5E5E;
+font-style: inherit;">=</span><span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">=</span> defaultdict(<span class="kw" style="color: #003B4F;
+font-style: inherit;">True</span>)</span>
+<span id="cb31-40">    dvalid <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">lambda</span>: <span class="va" style="color: #111111;
+font-style: inherit;">=</span> xgb.DMatrix(data<span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">None</span>, params)</span>
-<span id="cb7-7">        <span class="va" style="color: #111111;
+font-style: inherit;">=</span>valid_df[drop_one_features], label<span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">self</span>.subsample <span class="op" style="color: #5E5E5E;
+font-style: inherit;">=</span>valid_df[target], enable_categorical<span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">=</span> <span class="va" style="color: #111111;
+font-style: inherit;">=</span><span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">self</span>.params[<span class="st" style="color: #20794D;
+font-style: inherit;">True</span>)</span>
+<span id="cb31-41"></span>
+<span id="cb31-42">    params <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">'subsample'</span>] <span class="op" style="color: #5E5E5E;
+font-style: inherit;">=</span> {</span>
+<span id="cb31-43">        <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">\</span></span>
-<span id="cb7-8">            <span class="cf" style="color: #003B4F;
+font-style: inherit;">'learning_rate'</span>: <span class="fl" style="color: #AD0000;
 background-color: null;
-font-style: inherit;">if</span> <span class="va" style="color: #111111;
+font-style: inherit;">0.3</span>,</span>
+<span id="cb31-44">        <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">self</span>.params[<span class="st" style="color: #20794D;
+font-style: inherit;">'max_depth'</span>: <span class="dv" style="color: #AD0000;
 background-color: null;
-font-style: inherit;">'subsample'</span>] <span class="cf" style="color: #003B4F;
+font-style: inherit;">6</span>,</span>
+<span id="cb31-45">        <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">else</span> <span class="fl" style="color: #AD0000;
+font-style: inherit;">'min_child_weight'</span>: <span class="dv" style="color: #AD0000;
 background-color: null;
-font-style: inherit;">1.0</span></span>
-<span id="cb7-9">        <span class="va" style="color: #111111;
+font-style: inherit;">1</span>,</span>
+<span id="cb31-46">        <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">self</span>.learning_rate <span class="op" style="color: #5E5E5E;
+font-style: inherit;">'subsample'</span>: <span class="dv" style="color: #AD0000;
 background-color: null;
-font-style: inherit;">=</span> <span class="va" style="color: #111111;
+font-style: inherit;">1</span>,</span>
+<span id="cb31-47">        <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">self</span>.params[<span class="st" style="color: #20794D;
+font-style: inherit;">'colsample_bynode'</span>: <span class="dv" style="color: #AD0000;
 background-color: null;
-font-style: inherit;">'learning_rate'</span>] <span class="op" style="color: #5E5E5E;
+font-style: inherit;">1</span>,</span>
+<span id="cb31-48">        <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">\</span></span>
-<span id="cb7-10">            <span class="cf" style="color: #003B4F;
+font-style: inherit;">'objective'</span>: <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">if</span> <span class="va" style="color: #111111;
+font-style: inherit;">'reg:squarederror'</span>,</span>
+<span id="cb31-49">    }</span>
+<span id="cb31-50">    num_boost_round <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">self</span>.params[<span class="st" style="color: #20794D;
+font-style: inherit;">=</span> <span class="dv" style="color: #AD0000;
 background-color: null;
-font-style: inherit;">'learning_rate'</span>] <span class="cf" style="color: #003B4F;
+font-style: inherit;">50</span></span>
+<span id="cb31-51"></span>
+<span id="cb31-52">    m <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">else</span> <span class="fl" style="color: #AD0000;
+font-style: inherit;">=</span> xgb.train(params<span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">0.3</span></span>
-<span id="cb7-11">        <span class="va" style="color: #111111;
+font-style: inherit;">=</span>params, dtrain<span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">self</span>.base_prediction <span class="op" style="color: #5E5E5E;
+font-style: inherit;">=</span>dtrain, num_boost_round<span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">=</span> <span class="va" style="color: #111111;
+font-style: inherit;">=</span>num_boost_round,</span>
+<span id="cb31-53">                evals<span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">self</span>.params[<span class="st" style="color: #20794D;
+font-style: inherit;">=</span>[(dtrain, <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">'base_score'</span>] <span class="op" style="color: #5E5E5E;
+font-style: inherit;">'train'</span>), (dvalid, <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">\</span></span>
-<span id="cb7-12">            <span class="cf" style="color: #003B4F;
+font-style: inherit;">'valid'</span>)],</span>
+<span id="cb31-54">                verbose_eval<span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">if</span> <span class="va" style="color: #111111;
+font-style: inherit;">=</span><span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">self</span>.params[<span class="st" style="color: #20794D;
+font-style: inherit;">False</span>)</span>
+<span id="cb31-55">    score <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">'base_score'</span>] <span class="cf" style="color: #003B4F;
+font-style: inherit;">=</span> root_mean_squared_error(dvalid.get_label(), m.predict(dvalid))</span>
+<span id="cb31-56">    scores.append(score)</span>
+<span id="cb31-57"></span>
+<span id="cb31-58">results_df <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">else</span> <span class="fl" style="color: #AD0000;
+font-style: inherit;">=</span> pd.DataFrame({</span>
+<span id="cb31-59">    <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">0.5</span></span>
-<span id="cb7-13">        <span class="va" style="color: #111111;
+font-style: inherit;">'feature'</span>: features,</span>
+<span id="cb31-60">    <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">self</span>.max_depth <span class="op" style="color: #5E5E5E;
+font-style: inherit;">'score'</span>: scores</span>
+<span id="cb31-61">})</span>
+<span id="cb31-62">results_df.sort_values(by<span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">=</span> <span class="va" style="color: #111111;
+font-style: inherit;">=</span><span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">self</span>.params[<span class="st" style="color: #20794D;
+font-style: inherit;">'score'</span>)</span></code></pre></div>
+<div class="cell-output cell-output-display" data-execution_count="23">
+<div>
+
+
+<table class="dataframe table table-sm table-striped small" data-quarto-postprocess="true" data-border="1">
+<thead>
+<tr class="header">
+<th data-quarto-table-cell-role="th"></th>
+<th data-quarto-table-cell-role="th">feature</th>
+<th data-quarto-table-cell-role="th">score</th>
+</tr>
+</thead>
+<tbody>
+<tr class="odd">
+<td data-quarto-table-cell-role="th">18</td>
+<td>SalesID</td>
+<td>0.252617</td>
+</tr>
+<tr class="even">
+<td data-quarto-table-cell-role="th">5</td>
+<td>fiBaseModel</td>
+<td>0.253710</td>
+</tr>
+<tr class="odd">
+<td data-quarto-table-cell-role="th">27</td>
+<td>Undercarriage_Pad_Width</td>
+<td>0.254032</td>
+</tr>
+<tr class="even">
+<td data-quarto-table-cell-role="th">17</td>
+<td>Hydraulics</td>
+<td>0.254114</td>
+</tr>
+<tr class="odd">
+<td data-quarto-table-cell-role="th">20</td>
+<td>ModelID</td>
+<td>0.254169</td>
+</tr>
+<tr class="even">
+<td data-quarto-table-cell-role="th">4</td>
+<td>Ride_Control</td>
+<td>0.254278</td>
+</tr>
+<tr class="odd">
+<td data-quarto-table-cell-role="th">16</td>
+<td>MachineID</td>
+<td>0.254413</td>
+</tr>
+<tr class="even">
+<td data-quarto-table-cell-role="th">19</td>
+<td>Track_Type</td>
+<td>0.254825</td>
+</tr>
+<tr class="odd">
+<td data-quarto-table-cell-role="th">6</td>
+<td>Enclosure</td>
+<td>0.254958</td>
+</tr>
+<tr class="even">
+<td data-quarto-table-cell-role="th">28</td>
+<td>Stick</td>
+<td>0.255164</td>
+</tr>
+<tr class="odd">
+<td data-quarto-table-cell-role="th">1</td>
+<td>Tire_Size</td>
+<td>0.255365</td>
+</tr>
+<tr class="even">
+<td data-quarto-table-cell-role="th">10</td>
+<td>ProductGroup</td>
+<td>0.255404</td>
+</tr>
+<tr class="odd">
+<td data-quarto-table-cell-role="th">22</td>
+<td>Travel_Controls</td>
+<td>0.255895</td>
+</tr>
+<tr class="even">
+<td data-quarto-table-cell-role="th">29</td>
+<td>Thumb</td>
+<td>0.256300</td>
+</tr>
+<tr class="odd">
+<td data-quarto-table-cell-role="th">23</td>
+<td>Transmission</td>
+<td>0.256380</td>
+</tr>
+<tr class="even">
+<td data-quarto-table-cell-role="th">26</td>
+<td>Grouser_Tracks</td>
+<td>0.256395</td>
+</tr>
+<tr class="odd">
+<td data-quarto-table-cell-role="th">11</td>
+<td>Drive_System</td>
+<td>0.256652</td>
+</tr>
+<tr class="even">
+<td data-quarto-table-cell-role="th">24</td>
+<td>Blade_Extension</td>
+<td>0.256698</td>
+</tr>
+<tr class="odd">
+<td data-quarto-table-cell-role="th">7</td>
+<td>Pad_Type</td>
+<td>0.256952</td>
+</tr>
+<tr class="even">
+<td data-quarto-table-cell-role="th">25</td>
+<td>fiModelSeries</td>
+<td>0.257073</td>
+</tr>
+<tr class="odd">
+<td data-quarto-table-cell-role="th">2</td>
+<td>Scarifier</td>
+<td>0.257590</td>
+</tr>
+<tr class="even">
+<td data-quarto-table-cell-role="th">12</td>
+<td>Ripper</td>
+<td>0.257848</td>
+</tr>
+<tr class="odd">
+<td data-quarto-table-cell-role="th">0</td>
+<td>Coupler_System</td>
+<td>0.258074</td>
+</tr>
+<tr class="even">
+<td data-quarto-table-cell-role="th">21</td>
+<td>fiModelDesc</td>
+<td>0.258712</td>
+</tr>
+<tr class="odd">
+<td data-quarto-table-cell-role="th">13</td>
+<td>saledate_days_since_epoch</td>
+<td>0.259856</td>
+</tr>
+<tr class="even">
+<td data-quarto-table-cell-role="th">14</td>
+<td>fiModelDescriptor</td>
+<td>0.260439</td>
+</tr>
+<tr class="odd">
+<td data-quarto-table-cell-role="th">9</td>
+<td>fiSecondaryDesc</td>
+<td>0.260782</td>
+</tr>
+<tr class="even">
+<td data-quarto-table-cell-role="th">15</td>
+<td>fiProductClassDesc</td>
+<td>0.263790</td>
+</tr>
+<tr class="odd">
+<td data-quarto-table-cell-role="th">3</td>
+<td>ProductSize</td>
+<td>0.268068</td>
+</tr>
+<tr class="even">
+<td data-quarto-table-cell-role="th">8</td>
+<td>YearMade</td>
+<td>0.313105</td>
+</tr>
+</tbody>
+</table>
+
+</div>
+</div>
+</div>
+<p>Next try removing the feature with the best removal score. Then with that feature still removed, also try removing the feature with the next best removal score and so on. Repeat this process until the model evaluation metric is no longer improving. I think this could be considered a faster version of backward stepwise feature selection.</p>
+<div class="cell" data-execution_count="24">
+<div class="sourceCode cell-code" id="cb32" style="background: #f1f3f5;"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb32-1">features <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">'max_depth'</span>] <span class="op" style="color: #5E5E5E;
+font-style: inherit;">=</span> [</span>
+<span id="cb32-2">    <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">\</span></span>
-<span id="cb7-14">            <span class="cf" style="color: #003B4F;
+font-style: inherit;">'Coupler_System'</span>,</span>
+<span id="cb32-3">     <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">if</span> <span class="va" style="color: #111111;
+font-style: inherit;">'Tire_Size'</span>,</span>
+<span id="cb32-4">     <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">self</span>.params[<span class="st" style="color: #20794D;
+font-style: inherit;">'Scarifier'</span>,</span>
+<span id="cb32-5">     <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">'max_depth'</span>] <span class="cf" style="color: #003B4F;
+font-style: inherit;">'ProductSize'</span>,</span>
+<span id="cb32-6">     <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">else</span> <span class="dv" style="color: #AD0000;
+font-style: inherit;">'Ride_Control'</span>,</span>
+<span id="cb32-7"><span class="co" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">5</span></span>
-<span id="cb7-15">        <span class="va" style="color: #111111;
+font-style: inherit;">#      'fiBaseModel',</span></span>
+<span id="cb32-8">     <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">self</span>.rng <span class="op" style="color: #5E5E5E;
+font-style: inherit;">'Enclosure'</span>,</span>
+<span id="cb32-9">     <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">=</span> np.random.default_rng(seed<span class="op" style="color: #5E5E5E;
+font-style: inherit;">'Pad_Type'</span>,</span>
+<span id="cb32-10">     <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">=</span>random_seed)</span>
-<span id="cb7-16">                </span>
-<span id="cb7-17">    <span class="kw" style="color: #003B4F;
+font-style: inherit;">'YearMade'</span>,</span>
+<span id="cb32-11">     <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">def</span> fit(<span class="va" style="color: #111111;
+font-style: inherit;">'fiSecondaryDesc'</span>,</span>
+<span id="cb32-12">     <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">self</span>, X, y, objective, num_boost_round, verbose<span class="op" style="color: #5E5E5E;
+font-style: inherit;">'ProductGroup'</span>,</span>
+<span id="cb32-13">     <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">=</span><span class="va" style="color: #111111;
+font-style: inherit;">'Drive_System'</span>,</span>
+<span id="cb32-14">     <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">False</span>):</span>
-<span id="cb7-18">        current_predictions <span class="op" style="color: #5E5E5E;
+font-style: inherit;">'Ripper'</span>,</span>
+<span id="cb32-15">     <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">=</span> <span class="va" style="color: #111111;
+font-style: inherit;">'saledate_days_since_epoch'</span>,</span>
+<span id="cb32-16">     <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">self</span>.base_prediction <span class="op" style="color: #5E5E5E;
+font-style: inherit;">'fiModelDescriptor'</span>,</span>
+<span id="cb32-17">     <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">*</span> np.ones(shape<span class="op" style="color: #5E5E5E;
+font-style: inherit;">'fiProductClassDesc'</span>,</span>
+<span id="cb32-18">     <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">=</span>y.shape)</span>
-<span id="cb7-19">        <span class="va" style="color: #111111;
+font-style: inherit;">'MachineID'</span>,</span>
+<span id="cb32-19"><span class="co" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">self</span>.boosters <span class="op" style="color: #5E5E5E;
+font-style: inherit;">#      'Hydraulics',</span></span>
+<span id="cb32-20"><span class="co" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">=</span> []</span>
-<span id="cb7-20">        <span class="cf" style="color: #003B4F;
+font-style: inherit;">#      'SalesID',</span></span>
+<span id="cb32-21">     <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">for</span> i <span class="kw" style="color: #003B4F;
+font-style: inherit;">'Track_Type'</span>,</span>
+<span id="cb32-22">     <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">in</span> <span class="bu" style="color: null;
+font-style: inherit;">'ModelID'</span>,</span>
+<span id="cb32-23">     <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">range</span>(num_boost_round):</span>
-<span id="cb7-21">            gradients <span class="op" style="color: #5E5E5E;
+font-style: inherit;">'fiModelDesc'</span>,</span>
+<span id="cb32-24">     <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">=</span> objective.gradient(y, current_predictions)</span>
-<span id="cb7-22">            hessians <span class="op" style="color: #5E5E5E;
+font-style: inherit;">'Travel_Controls'</span>,</span>
+<span id="cb32-25">     <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">=</span> objective.hessian(y, current_predictions)</span>
-<span id="cb7-23">            sample_idxs <span class="op" style="color: #5E5E5E;
+font-style: inherit;">'Transmission'</span>,</span>
+<span id="cb32-26">     <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">=</span> <span class="va" style="color: #111111;
+font-style: inherit;">'Blade_Extension'</span>,</span>
+<span id="cb32-27">     <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">None</span> <span class="cf" style="color: #003B4F;
+font-style: inherit;">'fiModelSeries'</span>,</span>
+<span id="cb32-28">     <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">if</span> <span class="va" style="color: #111111;
+font-style: inherit;">'Grouser_Tracks'</span>,</span>
+<span id="cb32-29"><span class="co" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">self</span>.subsample <span class="op" style="color: #5E5E5E;
+font-style: inherit;">#      'Undercarriage_Pad_Width',</span></span>
+<span id="cb32-30">     <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">==</span> <span class="fl" style="color: #AD0000;
+font-style: inherit;">'Stick'</span>,</span>
+<span id="cb32-31">     <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">1.0</span> <span class="op" style="color: #5E5E5E;
+font-style: inherit;">'Thumb'</span></span>
+<span id="cb32-32">]</span>
+<span id="cb32-33"></span>
+<span id="cb32-34">dtrain <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">\</span></span>
-<span id="cb7-24">                <span class="cf" style="color: #003B4F;
+font-style: inherit;">=</span> xgb.DMatrix(data<span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">else</span> <span class="va" style="color: #111111;
+font-style: inherit;">=</span>train_df[features], label<span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">self</span>.rng.choice(<span class="bu" style="color: null;
+font-style: inherit;">=</span>train_df[target], enable_categorical<span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">len</span>(y), </span>
-<span id="cb7-25">                                     size<span class="op" style="color: #5E5E5E;
+font-style: inherit;">=</span><span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">=</span>math.floor(<span class="va" style="color: #111111;
+font-style: inherit;">True</span>)</span>
+<span id="cb32-35">dvalid <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">self</span>.subsample<span class="op" style="color: #5E5E5E;
+font-style: inherit;">=</span> xgb.DMatrix(data<span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">*</span><span class="bu" style="color: null;
+font-style: inherit;">=</span>valid_df[features], label<span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">len</span>(y)), </span>
-<span id="cb7-26">                                     replace<span class="op" style="color: #5E5E5E;
+font-style: inherit;">=</span>valid_df[target], enable_categorical<span class="op" style="color: #5E5E5E;
 background-color: null;
 font-style: inherit;">=</span><span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">False</span>)</span>
-<span id="cb7-27">            booster <span class="op" style="color: #5E5E5E;
+font-style: inherit;">True</span>)</span>
+<span id="cb32-36"></span>
+<span id="cb32-37">params <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">=</span> TreeBooster(X, gradients, hessians, </span>
-<span id="cb7-28">                                  <span class="va" style="color: #111111;
+font-style: inherit;">=</span> {</span>
+<span id="cb32-38">    <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">self</span>.params, <span class="va" style="color: #111111;
+font-style: inherit;">'learning_rate'</span>: <span class="fl" style="color: #AD0000;
 background-color: null;
-font-style: inherit;">self</span>.max_depth, sample_idxs)</span>
-<span id="cb7-29">            current_predictions <span class="op" style="color: #5E5E5E;
+font-style: inherit;">0.3</span>,</span>
+<span id="cb32-39">    <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">+=</span> <span class="va" style="color: #111111;
+font-style: inherit;">'max_depth'</span>: <span class="dv" style="color: #AD0000;
 background-color: null;
-font-style: inherit;">self</span>.learning_rate <span class="op" style="color: #5E5E5E;
+font-style: inherit;">6</span>,</span>
+<span id="cb32-40">    <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">*</span> booster.predict(X)</span>
-<span id="cb7-30">            <span class="va" style="color: #111111;
+font-style: inherit;">'min_child_weight'</span>: <span class="dv" style="color: #AD0000;
 background-color: null;
-font-style: inherit;">self</span>.boosters.append(booster)</span>
-<span id="cb7-31">            <span class="cf" style="color: #003B4F;
+font-style: inherit;">1</span>,</span>
+<span id="cb32-41">    <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">if</span> verbose: </span>
-<span id="cb7-32">                <span class="bu" style="color: null;
+font-style: inherit;">'subsample'</span>: <span class="dv" style="color: #AD0000;
 background-color: null;
-font-style: inherit;">print</span>(<span class="ss" style="color: #20794D;
+font-style: inherit;">1</span>,</span>
+<span id="cb32-42">    <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">f'[</span><span class="sc" style="color: #5E5E5E;
+font-style: inherit;">'colsample_bynode'</span>: <span class="dv" style="color: #AD0000;
 background-color: null;
-font-style: inherit;">{</span>i<span class="sc" style="color: #5E5E5E;
+font-style: inherit;">1</span>,</span>
+<span id="cb32-43">    <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">}</span><span class="ss" style="color: #20794D;
+font-style: inherit;">'objective'</span>: <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">] train loss = </span><span class="sc" style="color: #5E5E5E;
+font-style: inherit;">'reg:squarederror'</span>,</span>
+<span id="cb32-44">}</span>
+<span id="cb32-45">num_boost_round <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">{</span>objective<span class="sc" style="color: #5E5E5E;
+font-style: inherit;">=</span> <span class="dv" style="color: #AD0000;
 background-color: null;
-font-style: inherit;">.</span>loss(y, current_predictions)<span class="sc" style="color: #5E5E5E;
+font-style: inherit;">50</span></span>
+<span id="cb32-46"></span>
+<span id="cb32-47">m <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">}</span><span class="ss" style="color: #20794D;
+font-style: inherit;">=</span> xgb.train(params<span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">'</span>)</span>
-<span id="cb7-33">            </span>
-<span id="cb7-34">    <span class="kw" style="color: #003B4F;
+font-style: inherit;">=</span>params, dtrain<span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">def</span> predict(<span class="va" style="color: #111111;
+font-style: inherit;">=</span>dtrain, num_boost_round<span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">self</span>, X):</span>
-<span id="cb7-35">        <span class="cf" style="color: #003B4F;
+font-style: inherit;">=</span>num_boost_round,</span>
+<span id="cb32-48">              evals<span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">return</span> (<span class="va" style="color: #111111;
+font-style: inherit;">=</span>[(dtrain, <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">self</span>.base_prediction <span class="op" style="color: #5E5E5E;
+font-style: inherit;">'train'</span>), (dvalid, <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">+</span> <span class="va" style="color: #111111;
+font-style: inherit;">'valid'</span>)], verbose_eval<span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">self</span>.learning_rate </span>
-<span id="cb7-36">                <span class="op" style="color: #5E5E5E;
+font-style: inherit;">=</span><span class="dv" style="color: #AD0000;
 background-color: null;
-font-style: inherit;">*</span> np.<span class="bu" style="color: null;
+font-style: inherit;">10</span>)</span></code></pre></div>
+<div class="cell-output cell-output-stdout">
+<pre><code>[0] train-rmse:6.74422  valid-rmse:6.79145
+[10]    train-rmse:0.34882  valid-rmse:0.37201
+[20]    train-rmse:0.26050  valid-rmse:0.27386
+[30]    train-rmse:0.24844  valid-rmse:0.26205
+[40]    train-rmse:0.24042  valid-rmse:0.25426
+[49]    train-rmse:0.23549  valid-rmse:0.25004</code></pre>
+</div>
+</div>
+<p>So here I was able to remove four more features before the score started getting worse. With our reduced feature set, we’re now ranking 39th on that Kagle leaderboard. Let’s see how far we can get with some hyperparameter tuning.</p>
+</section>
+</section>
+<section id="tune-the-xgboost-hyperparameters" class="level3">
+<h3 class="anchored" data-anchor-id="tune-the-xgboost-hyperparameters">Tune the XGBoost hyperparameters</h3>
+<p>This is a topic which deserves its own full-length post, but just for fun, here I’ll do a quick and dirty hand tuning without a ton of explanation.</p>
+<p>Broadly speaking, my process is to increase model expressiveness by increasing the maximum tree depth untill it looks like I’m overfitting. At that point, I start pushing tree pruning parameters like min child weight and regularization parameters like lambda to counteract the overfitting. That process lead me to the following parameters.</p>
+<div class="cell" data-execution_count="25">
+<div class="sourceCode cell-code" id="cb34" style="background: #f1f3f5;"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb34-1">params <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">sum</span>([booster.predict(X) <span class="cf" style="color: #003B4F;
+font-style: inherit;">=</span> {</span>
+<span id="cb34-2">    <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">for</span> booster <span class="kw" style="color: #003B4F;
+font-style: inherit;">'learning_rate'</span>: <span class="fl" style="color: #AD0000;
 background-color: null;
-font-style: inherit;">in</span> <span class="va" style="color: #111111;
+font-style: inherit;">0.3</span>,</span>
+<span id="cb34-3">    <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">self</span>.boosters], axis<span class="op" style="color: #5E5E5E;
+font-style: inherit;">'max_depth'</span>: <span class="dv" style="color: #AD0000;
 background-color: null;
-font-style: inherit;">=</span><span class="dv" style="color: #AD0000;
+font-style: inherit;">10</span>,</span>
+<span id="cb34-4">    <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">0</span>))</span>
-<span id="cb7-37">    </span>
-<span id="cb7-38"><span class="kw" style="color: #003B4F;
+font-style: inherit;">'min_child_weight'</span>: <span class="dv" style="color: #AD0000;
 background-color: null;
-font-style: inherit;">class</span> TreeBooster():</span>
-<span id="cb7-39"> </span>
-<span id="cb7-40">    <span class="kw" style="color: #003B4F;
+font-style: inherit;">14</span>,</span>
+<span id="cb34-5">    <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">def</span> <span class="fu" style="color: #4758AB;
+font-style: inherit;">'lambda'</span>: <span class="dv" style="color: #AD0000;
 background-color: null;
-font-style: inherit;">__init__</span>(<span class="va" style="color: #111111;
+font-style: inherit;">5</span>,</span>
+<span id="cb34-6">    <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">self</span>, X, g, h, params, max_depth, idxs<span class="op" style="color: #5E5E5E;
+font-style: inherit;">'subsample'</span>: <span class="dv" style="color: #AD0000;
 background-color: null;
-font-style: inherit;">=</span><span class="va" style="color: #111111;
+font-style: inherit;">1</span>,</span>
+<span id="cb34-7">    <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">None</span>):</span>
-<span id="cb7-41">        <span class="va" style="color: #111111;
+font-style: inherit;">'colsample_bynode'</span>: <span class="dv" style="color: #AD0000;
 background-color: null;
-font-style: inherit;">self</span>.params <span class="op" style="color: #5E5E5E;
+font-style: inherit;">1</span>,</span>
+<span id="cb34-8">    <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">=</span> params</span>
-<span id="cb7-42">        <span class="va" style="color: #111111;
+font-style: inherit;">'objective'</span>: <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">self</span>.max_depth <span class="op" style="color: #5E5E5E;
+font-style: inherit;">'reg:squarederror'</span>,}</span>
+<span id="cb34-9">num_boost_round <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">=</span> max_depth</span>
-<span id="cb7-43">        <span class="cf" style="color: #003B4F;
+font-style: inherit;">=</span> <span class="dv" style="color: #AD0000;
 background-color: null;
-font-style: inherit;">assert</span> <span class="va" style="color: #111111;
+font-style: inherit;">50</span></span>
+<span id="cb34-10"></span>
+<span id="cb34-11">m <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">self</span>.max_depth <span class="op" style="color: #5E5E5E;
+font-style: inherit;">=</span> xgb.train(params<span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">&gt;=</span> <span class="dv" style="color: #AD0000;
+font-style: inherit;">=</span>params, dtrain<span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">0</span>, <span class="st" style="color: #20794D;
+font-style: inherit;">=</span>dtrain, num_boost_round<span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">'max_depth must be nonnegative'</span></span>
-<span id="cb7-44">        <span class="va" style="color: #111111;
+font-style: inherit;">=</span>num_boost_round,</span>
+<span id="cb34-12">              evals<span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">self</span>.min_child_weight <span class="op" style="color: #5E5E5E;
+font-style: inherit;">=</span>[(dtrain, <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">=</span> params[<span class="st" style="color: #20794D;
+font-style: inherit;">'train'</span>), (dvalid, <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">'min_child_weight'</span>] <span class="op" style="color: #5E5E5E;
+font-style: inherit;">'valid'</span>)], verbose_eval<span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">\</span></span>
-<span id="cb7-45">            <span class="cf" style="color: #003B4F;
+font-style: inherit;">=</span><span class="dv" style="color: #AD0000;
 background-color: null;
-font-style: inherit;">if</span> params[<span class="st" style="color: #20794D;
+font-style: inherit;">10</span>)</span></code></pre></div>
+<div class="cell-output cell-output-stdout">
+<pre><code>[0] train-rmse:6.74473  valid-rmse:6.80196
+[10]    train-rmse:0.31833  valid-rmse:0.34151
+[20]    train-rmse:0.22651  valid-rmse:0.24885
+[30]    train-rmse:0.21501  valid-rmse:0.23904
+[40]    train-rmse:0.20897  valid-rmse:0.23645
+[49]    train-rmse:0.20418  valid-rmse:0.23412</code></pre>
+</div>
+</div>
+<p>That gets us up to 12th place. Next I start reducing the learning rate and increasing the boosting rounds in proportion to one another.</p>
+<div class="cell" data-execution_count="26">
+<div class="sourceCode cell-code" id="cb36" style="background: #f1f3f5;"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb36-1">params <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">'min_child_weight'</span>] <span class="cf" style="color: #003B4F;
+font-style: inherit;">=</span> {</span>
+<span id="cb36-2">    <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">else</span> <span class="fl" style="color: #AD0000;
+font-style: inherit;">'learning_rate'</span>: <span class="fl" style="color: #AD0000;
 background-color: null;
-font-style: inherit;">1.0</span></span>
-<span id="cb7-46">        <span class="va" style="color: #111111;
+font-style: inherit;">0.3</span><span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">self</span>.reg_lambda <span class="op" style="color: #5E5E5E;
+font-style: inherit;">/</span><span class="dv" style="color: #AD0000;
 background-color: null;
-font-style: inherit;">=</span> params[<span class="st" style="color: #20794D;
+font-style: inherit;">5</span>,</span>
+<span id="cb36-3">    <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">'reg_lambda'</span>] <span class="cf" style="color: #003B4F;
+font-style: inherit;">'max_depth'</span>: <span class="dv" style="color: #AD0000;
 background-color: null;
-font-style: inherit;">if</span> params[<span class="st" style="color: #20794D;
+font-style: inherit;">10</span>,</span>
+<span id="cb36-4">    <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">'reg_lambda'</span>] <span class="cf" style="color: #003B4F;
+font-style: inherit;">'min_child_weight'</span>: <span class="dv" style="color: #AD0000;
 background-color: null;
-font-style: inherit;">else</span> <span class="fl" style="color: #AD0000;
+font-style: inherit;">14</span>,</span>
+<span id="cb36-5">    <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">1.0</span></span>
-<span id="cb7-47">        <span class="va" style="color: #111111;
+font-style: inherit;">'lambda'</span>: <span class="dv" style="color: #AD0000;
 background-color: null;
-font-style: inherit;">self</span>.gamma <span class="op" style="color: #5E5E5E;
+font-style: inherit;">5</span>,</span>
+<span id="cb36-6">    <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">=</span> params[<span class="st" style="color: #20794D;
+font-style: inherit;">'subsample'</span>: <span class="dv" style="color: #AD0000;
 background-color: null;
-font-style: inherit;">'gamma'</span>] <span class="cf" style="color: #003B4F;
+font-style: inherit;">1</span>,</span>
+<span id="cb36-7">    <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">if</span> params[<span class="st" style="color: #20794D;
+font-style: inherit;">'colsample_bynode'</span>: <span class="dv" style="color: #AD0000;
 background-color: null;
-font-style: inherit;">'gamma'</span>] <span class="cf" style="color: #003B4F;
+font-style: inherit;">1</span>,</span>
+<span id="cb36-8">    <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">else</span> <span class="fl" style="color: #AD0000;
+font-style: inherit;">'objective'</span>: <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">0.0</span></span>
-<span id="cb7-48">        <span class="va" style="color: #111111;
+font-style: inherit;">'reg:squarederror'</span>,}</span>
+<span id="cb36-9">num_boost_round <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">self</span>.colsample_bynode <span class="op" style="color: #5E5E5E;
+font-style: inherit;">=</span> <span class="dv" style="color: #AD0000;
 background-color: null;
-font-style: inherit;">=</span> params[<span class="st" style="color: #20794D;
+font-style: inherit;">50</span><span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">'colsample_bynode'</span>] <span class="op" style="color: #5E5E5E;
+font-style: inherit;">*</span><span class="dv" style="color: #AD0000;
 background-color: null;
-font-style: inherit;">\</span></span>
-<span id="cb7-49">            <span class="cf" style="color: #003B4F;
+font-style: inherit;">5</span></span>
+<span id="cb36-10"></span>
+<span id="cb36-11">m <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">if</span> params[<span class="st" style="color: #20794D;
+font-style: inherit;">=</span> xgb.train(params<span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">'colsample_bynode'</span>] <span class="cf" style="color: #003B4F;
+font-style: inherit;">=</span>params, dtrain<span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">else</span> <span class="fl" style="color: #AD0000;
+font-style: inherit;">=</span>dtrain, num_boost_round<span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">1.0</span></span>
-<span id="cb7-50">        <span class="cf" style="color: #003B4F;
+font-style: inherit;">=</span>num_boost_round,</span>
+<span id="cb36-12">              evals<span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">if</span> <span class="bu" style="color: null;
+font-style: inherit;">=</span>[(dtrain, <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">isinstance</span>(g, pd.Series): g <span class="op" style="color: #5E5E5E;
+font-style: inherit;">'train'</span>), (dvalid, <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">=</span> g.values</span>
-<span id="cb7-51">        <span class="cf" style="color: #003B4F;
+font-style: inherit;">'valid'</span>)], verbose_eval<span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">if</span> <span class="bu" style="color: null;
+font-style: inherit;">=</span><span class="dv" style="color: #AD0000;
 background-color: null;
-font-style: inherit;">isinstance</span>(h, pd.Series): h <span class="op" style="color: #5E5E5E;
+font-style: inherit;">10</span>)</span></code></pre></div>
+<div class="cell-output cell-output-stdout">
+<pre><code>[0] train-rmse:9.04930  valid-rmse:9.12743
+[10]    train-rmse:4.88505  valid-rmse:4.93769
+[20]    train-rmse:2.64630  valid-rmse:2.68501
+[30]    train-rmse:1.44703  valid-rmse:1.47923
+[40]    train-rmse:0.81123  valid-rmse:0.84079
+[50]    train-rmse:0.48441  valid-rmse:0.51272
+[60]    train-rmse:0.32887  valid-rmse:0.35434
+[70]    train-rmse:0.26276  valid-rmse:0.28630
+[80]    train-rmse:0.23720  valid-rmse:0.26026
+[90]    train-rmse:0.22658  valid-rmse:0.24932
+[100]   train-rmse:0.22119  valid-rmse:0.24441
+[110]   train-rmse:0.21747  valid-rmse:0.24114
+[120]   train-rmse:0.21479  valid-rmse:0.23923
+[130]   train-rmse:0.21250  valid-rmse:0.23768
+[140]   train-rmse:0.21099  valid-rmse:0.23618
+[150]   train-rmse:0.20928  valid-rmse:0.23524
+[160]   train-rmse:0.20767  valid-rmse:0.23445
+[170]   train-rmse:0.20658  valid-rmse:0.23375
+[180]   train-rmse:0.20558  valid-rmse:0.23307
+[190]   train-rmse:0.20431  valid-rmse:0.23252
+[200]   train-rmse:0.20316  valid-rmse:0.23181
+[210]   train-rmse:0.20226  valid-rmse:0.23145
+[220]   train-rmse:0.20133  valid-rmse:0.23087
+[230]   train-rmse:0.20045  valid-rmse:0.23048
+[240]   train-rmse:0.19976  valid-rmse:0.23023
+[249]   train-rmse:0.19902  valid-rmse:0.23009</code></pre>
+</div>
+</div>
+<p>Decreasing the learning rate and increasing the boosting rounds got us up to a 2nd place score. Notice that the score is still decreasing on the validation set. We can actually continue boosting on this model by passing it to the <code>xgb_model</code> argument in the <code>train</code> function. We want to go very very slowly here to avoid overshooting the minimum of the objective function. To do that I ramp up the lambda regularization parameter and boost a few more rounds from where we left off.</p>
+<div class="cell" data-execution_count="27">
+<div class="sourceCode cell-code" id="cb38" style="background: #f1f3f5;"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb38-1"><span class="co" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">=</span> h.values</span>
-<span id="cb7-52">        <span class="cf" style="color: #003B4F;
+font-style: inherit;"># second stage</span></span>
+<span id="cb38-2">params <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">if</span> idxs <span class="kw" style="color: #003B4F;
+font-style: inherit;">=</span> {</span>
+<span id="cb38-3">    <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">is</span> <span class="va" style="color: #111111;
+font-style: inherit;">'learning_rate'</span>: <span class="fl" style="color: #AD0000;
 background-color: null;
-font-style: inherit;">None</span>: idxs <span class="op" style="color: #5E5E5E;
+font-style: inherit;">0.3</span><span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">=</span> np.arange(<span class="bu" style="color: null;
+font-style: inherit;">/</span><span class="dv" style="color: #AD0000;
 background-color: null;
-font-style: inherit;">len</span>(g))</span>
-<span id="cb7-53">        <span class="va" style="color: #111111;
+font-style: inherit;">10</span>,</span>
+<span id="cb38-4">    <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">self</span>.X, <span class="va" style="color: #111111;
+font-style: inherit;">'max_depth'</span>: <span class="dv" style="color: #AD0000;
 background-color: null;
-font-style: inherit;">self</span>.g, <span class="va" style="color: #111111;
+font-style: inherit;">10</span>,</span>
+<span id="cb38-5">    <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">self</span>.h, <span class="va" style="color: #111111;
+font-style: inherit;">'min_child_weight'</span>: <span class="dv" style="color: #AD0000;
 background-color: null;
-font-style: inherit;">self</span>.idxs <span class="op" style="color: #5E5E5E;
+font-style: inherit;">14</span>,</span>
+<span id="cb38-6">    <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">=</span> X, g, h, idxs</span>
-<span id="cb7-54">        <span class="va" style="color: #111111;
+font-style: inherit;">'lambda'</span>: <span class="dv" style="color: #AD0000;
 background-color: null;
-font-style: inherit;">self</span>.n, <span class="va" style="color: #111111;
+font-style: inherit;">60</span>,</span>
+<span id="cb38-7">    <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">self</span>.c <span class="op" style="color: #5E5E5E;
+font-style: inherit;">'subsample'</span>: <span class="dv" style="color: #AD0000;
 background-color: null;
-font-style: inherit;">=</span> <span class="bu" style="color: null;
+font-style: inherit;">1</span>,</span>
+<span id="cb38-8">    <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">len</span>(idxs), X.shape[<span class="dv" style="color: #AD0000;
+font-style: inherit;">'colsample_bynode'</span>: <span class="dv" style="color: #AD0000;
 background-color: null;
-font-style: inherit;">1</span>]</span>
-<span id="cb7-55">        <span class="va" style="color: #111111;
+font-style: inherit;">1</span>,</span>
+<span id="cb38-9">    <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">self</span>.value <span class="op" style="color: #5E5E5E;
+font-style: inherit;">'objective'</span>: <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">=</span> <span class="op" style="color: #5E5E5E;
+font-style: inherit;">'reg:squarederror'</span>,}</span>
+<span id="cb38-10">num_boost_round <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">-</span>g[idxs].<span class="bu" style="color: null;
+font-style: inherit;">=</span> <span class="dv" style="color: #AD0000;
 background-color: null;
-font-style: inherit;">sum</span>() <span class="op" style="color: #5E5E5E;
+font-style: inherit;">50</span><span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">/</span> (h[idxs].<span class="bu" style="color: null;
+font-style: inherit;">*</span><span class="dv" style="color: #AD0000;
 background-color: null;
-font-style: inherit;">sum</span>() <span class="op" style="color: #5E5E5E;
+font-style: inherit;">3</span></span>
+<span id="cb38-11"></span>
+<span id="cb38-12">m1 <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">+</span> <span class="va" style="color: #111111;
+font-style: inherit;">=</span> xgb.train(params<span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">self</span>.reg_lambda) <span class="co" style="color: #5E5E5E;
+font-style: inherit;">=</span>params, dtrain<span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;"># Eq (5)</span></span>
-<span id="cb7-56">        <span class="va" style="color: #111111;
+font-style: inherit;">=</span>dtrain, num_boost_round<span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">self</span>.best_score_so_far <span class="op" style="color: #5E5E5E;
+font-style: inherit;">=</span>num_boost_round,</span>
+<span id="cb38-13">              evals<span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">=</span> <span class="fl" style="color: #AD0000;
+font-style: inherit;">=</span>[(dtrain, <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">0.</span></span>
-<span id="cb7-57">        <span class="cf" style="color: #003B4F;
+font-style: inherit;">'train'</span>), (dvalid, <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">if</span> <span class="va" style="color: #111111;
+font-style: inherit;">'valid'</span>)], verbose_eval<span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">self</span>.max_depth <span class="op" style="color: #5E5E5E;
+font-style: inherit;">=</span><span class="dv" style="color: #AD0000;
 background-color: null;
-font-style: inherit;">&gt;</span> <span class="dv" style="color: #AD0000;
+font-style: inherit;">10</span>,</span>
+<span id="cb38-14">              xgb_model<span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">0</span>:</span>
-<span id="cb7-58">            <span class="va" style="color: #111111;
+font-style: inherit;">=</span>m)</span></code></pre></div>
+<div class="cell-output cell-output-stdout">
+<pre><code>[0] train-rmse:0.19900  valid-rmse:0.23007
+[10]    train-rmse:0.19862  valid-rmse:0.22990
+[20]    train-rmse:0.19831  valid-rmse:0.22975
+[30]    train-rmse:0.19796  valid-rmse:0.22964
+[40]    train-rmse:0.19768  valid-rmse:0.22955
+[50]    train-rmse:0.19739  valid-rmse:0.22940
+[60]    train-rmse:0.19714  valid-rmse:0.22935
+[70]    train-rmse:0.19689  valid-rmse:0.22927
+[80]    train-rmse:0.19664  valid-rmse:0.22915
+[90]    train-rmse:0.19646  valid-rmse:0.22915
+[100]   train-rmse:0.19620  valid-rmse:0.22910
+[110]   train-rmse:0.19604  valid-rmse:0.22907
+[120]   train-rmse:0.19583  valid-rmse:0.22901
+[130]   train-rmse:0.19562  valid-rmse:0.22899
+[140]   train-rmse:0.19546  valid-rmse:0.22898
+[149]   train-rmse:0.19520  valid-rmse:0.22886</code></pre>
+</div>
+</div>
+<div class="cell" data-execution_count="28">
+<div class="sourceCode cell-code" id="cb40" style="background: #f1f3f5;"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb40-1">root_mean_squared_error(dvalid.get_label(), m1.predict(dvalid))</span></code></pre></div>
+<div class="cell-output cell-output-display" data-execution_count="28">
+<pre><code>0.22885828</code></pre>
+</div>
+</div>
+<p>And that gets us to 1st place on the leaderboard.</p>
+</section>
+</section>
+<section id="wrapping-up" class="level2">
+<h2 class="anchored" data-anchor-id="wrapping-up">Wrapping Up</h2>
+<p>There you have it, how to use XGBoost to solve a regression problem in python with world class performance. Remember you can use the XGBoost regression notebook from my <a href="https://github.com/mcb00/ds-templates">ds-templates repo</a> to make it easy to follow this flow on your own problems. If you found this helpful, or if you have additional ideas about solving regression problems with XGBoost, let me know down in the comments.</p>
+</section>
+
+ ]]></description>
+  <category>python</category>
+  <category>tutorial</category>
+  <category>gradient boosting</category>
+  <category>xgboost</category>
+  <guid>https://randomrealizations.com/posts/xgboost-for-regression-in-python/index.html</guid>
+  <pubDate>Tue, 24 Oct 2023 22:00:00 GMT</pubDate>
+  <media:content url="https://randomrealizations.com/posts/xgboost-for-regression-in-python/kigali-branches.jpg" medium="image" type="image/jpeg"/>
+</item>
+<item>
+  <title>Blogging with Quarto and Jupyter: The Complete Guide</title>
+  <dc:creator>Matt Bowers</dc:creator>
+  <link>https://randomrealizations.com/posts/blogging-with-quarto-and-jupyter/index.html</link>
+  <description><![CDATA[ 
+
+
+
+<!-- 
+![](thumbnail.png "quarto, jupyter, and python logos in hexagons")
+-->
+<p>Ahh, blogging. I think we can all agree it’s probably one of the greatest forms of written communication to have ever existed.</p>
+<p>Whats that you say? You’d like to set up your own blog? And you say you want to use a dead simple, data science friendly tech stack? And you wouldn’t be caught dead handing over your painstakingly crafted content to Medium? No worries, friend, I know exactly what you need.</p>
+<p>Enter <a href="https://quarto.org">Quarto</a>.</p>
+<p>In this post we’ll set up a blog using a lightweight tech stack consisting of a terminal running quarto, git, and jupyter, and we’ll use Github Pages to host our website for free. Optionally, for a few dollars a year, we can even host our website at our own custom domain.</p>
+<p>A quick note on how to use this post. <a href="https://quarto.org/docs/websites/website-blog.html">Quarto’s documentation on blogging</a> provides a nice high-level overview of the blogging workflow, and I refer to it and many other bits of Quarto documentation here. At the time of writing, the handful of other blog posts about setting up quarto blogs are aimed at the RStudio user. This post exists to provide a jupyter and python-centric path for you to follow through the entire setup of your new quarto blog, and to impart my opinionated recommendations about best practices.</p>
+<p>Let’s get into it!</p>
+<section id="what-is-quarto" class="level2">
+<h2 class="anchored" data-anchor-id="what-is-quarto">What is Quarto?</h2>
+<p>Quarto is a way to render plain text source files containing markdown and code in python, R, and other languages into published formats like websites, books, slides, journal articles, etc. There is clearly a lot that we can do with it, but Today, we’ll use it to make a nice looking blog out of some jupyter notebook files.</p>
+<p>Quarto follows the familiar convention of using a project directory to house all material for a given project. The directory will include source files like jupyter notebooks or Rmarkdown files, as well as configuration files that control how output files are rendered. We can then use the quarto command line utility to perform actions like previewing and rendering within the project directory.</p>
+</section>
+<section id="instantiate-your-blog" class="level2">
+<h2 class="anchored" data-anchor-id="instantiate-your-blog">Instantiate your blog</h2>
+<section id="create-a-new-quarto-project" class="level3">
+<h3 class="anchored" data-anchor-id="create-a-new-quarto-project">Create a new Quarto project</h3>
+<p>After <a href="https://quarto.org/docs/get-started/">installing quarto</a> fire up a new terminal and check that the install was successful by running</p>
+<div class="sourceCode" id="cb1" style="background: #f1f3f5;"><pre class="sourceCode zsh code-with-copy"><code class="sourceCode zsh"><span id="cb1-1"><span class="ex" style="color: null;
 background-color: null;
-font-style: inherit;">self</span>._maybe_insert_child_nodes()</span>
-<span id="cb7-59"></span>
-<span id="cb7-60">    <span class="kw" style="color: #003B4F;
+font-style: inherit;">quarto</span> <span class="at" style="color: #657422;
 background-color: null;
-font-style: inherit;">def</span> _maybe_insert_child_nodes(<span class="va" style="color: #111111;
+font-style: inherit;">--version</span></span></code></pre></div>
+<p>Now think of a name for your blog’s project directory; this will also be the name of its git repository. The name will have no effect on your website’s name or URL, so don’t think too hard. The <a href="https://quarto.org/docs/websites/website-blog.html">quarto documentation</a> calls it <code>myblog</code>, so we’ll one-up them and call ours <code>pirate-ninja-blog</code>. Run the following command to create it in the current directory.</p>
+<div class="sourceCode" id="cb2" style="background: #f1f3f5;"><pre class="sourceCode zsh code-with-copy"><code class="sourceCode zsh"><span id="cb2-1"><span class="ex" style="color: null;
 background-color: null;
-font-style: inherit;">self</span>):</span>
-<span id="cb7-61">        <span class="cf" style="color: #003B4F;
+font-style: inherit;">quarto</span> create-project pirate-ninja-blog <span class="at" style="color: #657422;
 background-color: null;
-font-style: inherit;">for</span> i <span class="kw" style="color: #003B4F;
+font-style: inherit;">--type</span> website:blog</span></code></pre></div>
+<p>That command creates a directory called <code>pirate-ninja-blog</code> containing everything you need to render your new blog. You can preview your website by running</p>
+<div class="sourceCode" id="cb3" style="background: #f1f3f5;"><pre class="sourceCode zsh code-with-copy"><code class="sourceCode zsh"><span id="cb3-1"><span class="ex" style="color: null;
 background-color: null;
-font-style: inherit;">in</span> <span class="bu" style="color: null;
+font-style: inherit;">quarto</span> preview pirate-ninja-blog</span></code></pre></div>
+<p>Your local website will open in a new browser window. As you edit various aspects of your blog, the preview will update with your changes. This preview feature is so simple and so great.</p>
+<div class="quarto-figure quarto-figure-center">
+<figure class="figure">
+<p><img src="https://randomrealizations.com/posts/blogging-with-quarto-and-jupyter/pirate-ninja-blog-screenshot.png" class="img-fluid figure-img"></p>
+<figcaption class="figure-caption">Previewing your blog with quarto preview command</figcaption>
+</figure>
+</div>
+</section>
+<section id="set-up-a-git-repo" class="level3">
+<h3 class="anchored" data-anchor-id="set-up-a-git-repo">Set up a git repo</h3>
+<p>Change into your project directory and we’ll start setting up your git repo.</p>
+<div class="sourceCode" id="cb4" style="background: #f1f3f5;"><pre class="sourceCode zsh code-with-copy"><code class="sourceCode zsh"><span id="cb4-1"><span class="bu" style="color: null;
 background-color: null;
-font-style: inherit;">range</span>(<span class="va" style="color: #111111;
+font-style: inherit;">cd</span> pirate-ninja-blog</span></code></pre></div>
+<p>initialize a new git repo.</p>
+<div class="sourceCode" id="cb5" style="background: #f1f3f5;"><pre class="sourceCode zsh code-with-copy"><code class="sourceCode zsh"><span id="cb5-1"><span class="fu" style="color: #4758AB;
 background-color: null;
-font-style: inherit;">self</span>.c): <span class="va" style="color: #111111;
+font-style: inherit;">git</span> init <span class="at" style="color: #657422;
 background-color: null;
-font-style: inherit;">self</span>._find_better_split(i)</span>
-<span id="cb7-62">        <span class="cf" style="color: #003B4F;
+font-style: inherit;">-b</span> main</span></code></pre></div>
+<p>The <code>_site/</code> directory is where quarto puts the rendered output files, so you’ll want to ignore it in git. I also like to just ignore any hidden files too, so add the following to your <code>.gitignore</code> file.</p>
+<div class="code-with-filename">
+<div class="code-with-filename-file">
+<pre><strong>.gitignore</strong></pre>
+</div>
+<div class="sourceCode" id="cb6" style="background: #f1f3f5;"><pre class="sourceCode bash code-with-copy"><code class="sourceCode bash"><span id="cb6-1"><span class="ex" style="color: null;
 background-color: null;
-font-style: inherit;">if</span> <span class="va" style="color: #111111;
+font-style: inherit;">/.quarto/</span></span>
+<span id="cb6-2"><span class="ex" style="color: null;
 background-color: null;
-font-style: inherit;">self</span>.is_leaf: <span class="cf" style="color: #003B4F;
+font-style: inherit;">/_site/</span></span>
+<span id="cb6-3"><span class="bu" style="color: null;
 background-color: null;
-font-style: inherit;">return</span></span>
-<span id="cb7-63">        x <span class="op" style="color: #5E5E5E;
-background-color: null;
-font-style: inherit;">=</span> <span class="va" style="color: #111111;
-background-color: null;
-font-style: inherit;">self</span>.X.values[<span class="va" style="color: #111111;
-background-color: null;
-font-style: inherit;">self</span>.idxs,<span class="va" style="color: #111111;
-background-color: null;
-font-style: inherit;">self</span>.split_feature_idx]</span>
-<span id="cb7-64">        left_idx <span class="op" style="color: #5E5E5E;
+font-style: inherit;">.</span><span class="pp" style="color: #AD0000;
 background-color: null;
-font-style: inherit;">=</span> np.nonzero(x <span class="op" style="color: #5E5E5E;
+font-style: inherit;">*</span></span></code></pre></div>
+</div>
+<p>For now we’ll just stage the <code>.gitignore</code> file for the initial commit. Eventually you’ll want to commit the other files in your project too, either now or later as you edit them.</p>
+<div class="sourceCode" id="cb7" style="background: #f1f3f5;"><pre class="sourceCode zsh code-with-copy"><code class="sourceCode zsh"><span id="cb7-1"><span class="fu" style="color: #4758AB;
 background-color: null;
-font-style: inherit;">&lt;=</span> <span class="va" style="color: #111111;
+font-style: inherit;">git</span> add .gitignore </span>
+<span id="cb7-2"><span class="fu" style="color: #4758AB;
 background-color: null;
-font-style: inherit;">self</span>.threshold)[<span class="dv" style="color: #AD0000;
+font-style: inherit;">git</span> commit <span class="at" style="color: #657422;
 background-color: null;
-font-style: inherit;">0</span>]</span>
-<span id="cb7-65">        right_idx <span class="op" style="color: #5E5E5E;
+font-style: inherit;">-m</span> <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">=</span> np.nonzero(x <span class="op" style="color: #5E5E5E;
+font-style: inherit;">"Initial commit."</span></span></code></pre></div>
+<p>Then follow GitHub’s instructions to <a href="https://docs.github.com/en/migrations/importing-source-code/using-the-command-line-to-import-source-code/adding-locally-hosted-code-to-github#adding-a-local-repository-to-github-using-git">add the local repo to GitHub using git</a>. Basically just create a new blank repo on GitHub’s website, copy the remote repository url, then add the remote repo url to your local git repo.</p>
+<div class="sourceCode" id="cb8" style="background: #f1f3f5;"><pre class="sourceCode zsh code-with-copy"><code class="sourceCode zsh"><span id="cb8-1"><span class="fu" style="color: #4758AB;
 background-color: null;
-font-style: inherit;">&gt;</span> <span class="va" style="color: #111111;
+font-style: inherit;">git</span> remote add origin <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">self</span>.threshold)[<span class="dv" style="color: #AD0000;
+font-style: inherit;">&lt;</span>REMOTE_URL<span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">0</span>]</span>
-<span id="cb7-66">        <span class="va" style="color: #111111;
+font-style: inherit;">&gt;</span></span></code></pre></div>
+<p>Then you’ll be able to push any commits you make to your remote repository on GitHub by saying <code>git push</code>.</p>
+</section>
+</section>
+<section id="understand-the-components-of-a-quarto-blog" class="level2">
+<h2 class="anchored" data-anchor-id="understand-the-components-of-a-quarto-blog">Understand the components of a Quarto blog</h2>
+<section id="contents-of-the-quarto-project-directory" class="level3">
+<h3 class="anchored" data-anchor-id="contents-of-the-quarto-project-directory">Contents of the quarto project directory</h3>
+<p>Let’s have a quick look at what quarto put inside of the project directory.</p>
+<pre><code>_quarto.yml
+about.qmd
+index.qmd
+profile.jpg
+posts
+styles.css
+_site</code></pre>
+<ul>
+<li>Quarto uses yaml files to specify configurations. The <code>_quarto.yml</code> file specifies project-wide configurations.</li>
+<li>Quarto’s markdown file type uses extension <code>qmd``. Each qmd file will correspond to a page in our website.</code>index.qmd<code>is the homepage and</code>about.qmd` is the About page.</li>
+<li><code>profile.jpg</code> is an image that is included on the about page.</li>
+<li><code>styles.css</code> defines css styles for the website.</li>
+<li><code>posts</code> is a directory where we can put qmd and other documents which will be rendered into blog posts.</li>
+<li><code>posts/_metadata.yml</code> contains configurations that apply to all documents in the <code>posts</code> directory.</li>
+<li><code>_site</code> is a directory that contains the rendered website. Whereas all the other files and directories constitute the source code for our blog, <code>_site</code> is the rendered output, i.e.&nbsp;the website itself.</li>
+</ul>
+<p>Let’s take a closer look at these components and start to make the blog yours.</p>
+</section>
+<section id="project-wide-configurations" class="level3">
+<h3 class="anchored" data-anchor-id="project-wide-configurations">Project-wide Configurations</h3>
+<p>The <code>_quarto.yml</code> file controls project-wide configurations, website options, and HTML document options. Options in this file are specified in yaml in a key/value structure with three top level keys: <code>project</code>, <code>website</code>, and <code>format</code>. <a href="https://quarto.org/docs/reference/projects/websites.html">The quarto website options documentation</a> has the full list of options that you can set here. It will be very helpful to take a look at some example <code>_quarto.yml</code> files in the wild, such as the one from <a href="https://github.com/quarto-dev/quarto-web/blob/main/_quarto.yml">quarto.org</a> or even the one from <a href="https://github.com/mcb00/rr-blog/blob/main/_quarto.yml">this blog</a>.</p>
+<p>Under the <code>website</code> key, go ahead and set the title and description for your blog.</p>
+<pre><code>website:
+  title: "Pirate Ninja Blog"
+  description: "A blog about pirates, ninjas, and other things"</code></pre>
+<p>You can also customize your <a href="https://quarto.org/docs/reference/projects/websites.html#navbar">navbar</a> which is visible at the top of all pages on your site. Also go ahead and set your github and twitter urls for the icons in the navbar.</p>
+<p>Under the <code>format</code> key, you can also try changing the <a href="https://quarto.org/docs/output-formats/html-themes.html">HTML theme</a> to one of the other 25 built-in themes.</p>
+</section>
+<section id="the-about-page" class="level3">
+<h3 class="anchored" data-anchor-id="the-about-page">The About Page</h3>
+<p>The <code>about.qmd</code> file defines an About page for the blog. Go ahead and fill in your details in the <code>about.qmd</code> file; you can also replace the <code>profile.jpg</code> file with your own image. Have a look at <a href="https://quarto.org/docs/websites/website-about.html">the quarto documentation on About pages</a> to explore more functionality. Notably, you can change the <code>template</code> option to change the page layout.</p>
+</section>
+<section id="the-homepage" class="level3">
+<h3 class="anchored" data-anchor-id="the-homepage">The Homepage</h3>
+<p>The <code>index.qmd</code> file defines the landing page for your website. It is a <a href="https://quarto.org/docs/websites/website-listings.html">listing page</a> which shows links to all the pages in the <code>posts</code> directory. For now we don’t need to change anything here.</p>
+</section>
+<section id="the-posts-directory" class="level3">
+<h3 class="anchored" data-anchor-id="the-posts-directory">The <code>posts/</code> directory</h3>
+<p>The <code>posts</code> directory contains all your blog posts. There aren’t really requirements for subdirectory structure inside the <code>posts</code> directory, but it’s a best practice to create a new subdirectory for each new blog post. This just helps keep auxillary files like images or conda environment files organized. Out of the box, the <code>posts</code> directory looks like this.</p>
+<pre><code>posts
+├── _metadata.yml
+├── post-with-code
+│&nbsp;&nbsp; ├── image.jpg
+│&nbsp;&nbsp; └── index.qmd
+└── welcome
+    ├── index.qmd
+    └── thumbnail.jpg</code></pre>
+<p>There are two reasons we want to be deliberate about how we organize and name things in the <code>posts</code> directory. First, the vast majority of our blog’s content will live here, so we don’t want it to be a big confusing mess. Second, the directory sstructure and file naming will be reflected in the URLs to our blog posts; if you prefer tidy-looking URLs, and I know you do, then you want to use tidy directory and file names in the <code>posts</code> directory.</p>
+<p>You can check how the URLs look by navigating to one of the pre-populated posts in the site preview in your browser. For instance, the welcome post’s URL would be</p>
+<pre><code>https://example.com/posts/welcome/</code></pre>
+<p>When quarto renders the qmd file at <code>posts/welcome/index.qmd</code> it creates an output document in the website at <code>posts/welcome/index.html</code>. In fact the full URL to the post is,</p>
+<pre><code>https://example.com/posts/welcome/index.html</code></pre>
+<p>but the browser knows if you give it a URL with a path ending in a <code>/</code>, then it should look for the <code>index.html</code> file inside that directory.</p>
+<p>So I think the best practice here is to name your new post subdirectory with the title of the post in all lower case with dashes for spaces, e.g.&nbsp;<code>post-with-code</code>. Then to force all output pages to be called <code>index.html</code>, you can set the <code>output-file</code> key in the <code>posts/_metadata.yml</code> file like this.</p>
+<div class="code-with-filename">
+<div class="code-with-filename-file">
+<pre><strong>posts/_metadata.yml</strong></pre>
+</div>
+<div class="sourceCode" id="cb14" style="background: #f1f3f5;"><pre class="sourceCode bash code-with-copy"><code class="sourceCode bash"><span id="cb14-1"><span class="ex" style="color: null;
 background-color: null;
-font-style: inherit;">self</span>.left <span class="op" style="color: #5E5E5E;
+font-style: inherit;">output-file:</span> index.html</span></code></pre></div>
+</div>
+<p>Note that alternative naming conventions are possible; notably you might want to prefix each post name with the date in yyyy-mm-dd format, so the post subdirectories sort temporally and look nice in a list. That’s the convention used in Quarto’s own blog at <a href="https://github.com/quarto-dev/quarto-web/tree/main">quarto.org</a>, As long as you keep everything for a given post inside its subdirectory, you should be good to go with nice-looking URLs.</p>
+</section>
+</section>
+<section id="authoring-posts-with-jupyter" class="level2">
+<h2 class="anchored" data-anchor-id="authoring-posts-with-jupyter">Authoring posts with jupyter</h2>
+<section id="creating-a-new-post" class="level3">
+<h3 class="anchored" data-anchor-id="creating-a-new-post">Creating a new post</h3>
+<p>It turns out that quarto will render not only <code>.qmd</code> files, but also <code>.ipynb</code> files in the <code>posts</code> directory. So let’s create a new blog post from a notebook.</p>
+<p>I think it’s a best practice to write draft posts in their own git branches, that way if you need to deploy some kind of hotfix to main while you’re drafting a post, you won’t have to deploy a half-written post livin on the main branch. To start a new post, create a new development branch, change into the posts directory, create a new subdirectory with your preferred naming convention, change into that new directory, and fire up jupyter.</p>
+<div class="sourceCode" id="cb15" style="background: #f1f3f5;"><pre class="sourceCode zsh code-with-copy"><code class="sourceCode zsh"><span id="cb15-1"><span class="fu" style="color: #4758AB;
 background-color: null;
-font-style: inherit;">=</span> TreeBooster(<span class="va" style="color: #111111;
+font-style: inherit;">git</span> checkout <span class="at" style="color: #657422;
 background-color: null;
-font-style: inherit;">self</span>.X, <span class="va" style="color: #111111;
+font-style: inherit;">-b</span> new-post</span>
+<span id="cb15-2"><span class="bu" style="color: null;
 background-color: null;
-font-style: inherit;">self</span>.g, <span class="va" style="color: #111111;
+font-style: inherit;">cd</span> posts</span>
+<span id="cb15-3"><span class="fu" style="color: #4758AB;
 background-color: null;
-font-style: inherit;">self</span>.h, <span class="va" style="color: #111111;
+font-style: inherit;">mkdir</span> new-post</span>
+<span id="cb15-4"><span class="bu" style="color: null;
 background-color: null;
-font-style: inherit;">self</span>.params, </span>
-<span id="cb7-67">                                <span class="va" style="color: #111111;
+font-style: inherit;">cd</span> new-post</span>
+<span id="cb15-5"><span class="ex" style="color: null;
 background-color: null;
-font-style: inherit;">self</span>.max_depth <span class="op" style="color: #5E5E5E;
+font-style: inherit;">jupyter</span> notebook</span></code></pre></div>
+<p>Now create a new notebook from the jupyter UI. In order for quarto to recognize the document, the first cell of the notebook must be a raw text cell (press <code>r</code> in command mode to change a cell to raw text), and it must contain the document’s yaml front matter. You can use the following as a frontmatter template.</p>
+<pre><code>---
+title: New Post
+date: 2023-07-12
+description: A nice new post
+categories: [nonsense, code]
+---</code></pre>
+<p>Now to preview your post, open a new terminal, change into your blog’s project directory and run the <code>quarto preview</code> command. You’ll see a link to the new post in the listing on the homepage. I usually like to have the preview open in a browser while I’m editing the jupyter notebook, just to make sure things look the way I want in the rendered output. From here you can keep editing the notebook, and the preview will update in the browser dynamically.</p>
+</section>
+<section id="markdown-and-code-cells" class="level3">
+<h3 class="anchored" data-anchor-id="markdown-and-code-cells">Markdown and code cells</h3>
+<p>From here you can put text in markdown cells and you can write code in code cells. Let’s add a markdown cell with some markdown formatting.</p>
+<pre><code>## A nice heading
+
+Here is some lovely text and an equation.
+
+$$ a^2 + b^2 = c^2 $$
+
+Here's a list.
+
+- a link to an [external website](https://quarto.org).
+- a link to [another post in this blog](/posts/welcome/index.qmd).</code></pre>
+<p>This markdown will be rendered into the HTML page for the post. The last line in the above cell demonstrates the best practice for using relative urls to link to other resources within your website. Instead of providing the full url in the parentheses, just give the path to the qmd or ipynb file that you want to link to. Note that paths need to start with the <code>/</code> at the root of the quarto project, since without it, quarto will try to resolve paths relative to the location of the current document instead of the root of the project.</p>
+<p>Then create a code cell with some code. Try something like this.</p>
+<div class="sourceCode" id="cb18" style="background: #f1f3f5;"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb18-1"><span class="bu" style="color: null;
 background-color: null;
-font-style: inherit;">-</span> <span class="dv" style="color: #AD0000;
+font-style: inherit;">print</span>(<span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">1</span>, <span class="va" style="color: #111111;
+font-style: inherit;">'Hello, Quarto!'</span>)</span></code></pre></div>
+<p>By default, both code and cell output will be rendered into the HTML output. So far our jupyter notebook looks like this.</p>
+<div class="quarto-figure quarto-figure-center">
+<figure class="figure">
+<p><img src="https://randomrealizations.com/posts/blogging-with-quarto-and-jupyter/authoring-in-jupyter.png" class="img-fluid figure-img"></p>
+<figcaption class="figure-caption">View of a new post being written in jupyter notebook</figcaption>
+</figure>
+</div>
+<p>Back in the browser window running your blog preview, you can see the rendered page of the new post.</p>
+<div class="quarto-figure quarto-figure-center">
+<figure class="figure">
+<p><img src="https://randomrealizations.com/posts/blogging-with-quarto-and-jupyter/new-post-preview.png" class="img-fluid figure-img"></p>
+<figcaption class="figure-caption">View of the preview of the rendered post</figcaption>
+</figure>
+</div>
+</section>
+<section id="figures" class="level3">
+<h3 class="anchored" data-anchor-id="figures">Figures</h3>
+<p>Let’s add a figure to our post. Add a new code cell with the following code.</p>
+<div class="sourceCode" id="cb19" style="background: #f1f3f5;"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb19-1"><span class="co" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">self</span>.idxs[left_idx])</span>
-<span id="cb7-68">        <span class="va" style="color: #111111;
+font-style: inherit;"># | fig-cap: This is my lovely line plot</span></span>
+<span id="cb19-2"><span class="co" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">self</span>.right <span class="op" style="color: #5E5E5E;
+font-style: inherit;"># | fig-alt: A line plot extending up and to the right</span></span>
+<span id="cb19-3"></span>
+<span id="cb19-4"><span class="im" style="color: #00769E;
 background-color: null;
-font-style: inherit;">=</span> TreeBooster(<span class="va" style="color: #111111;
+font-style: inherit;">import</span> numpy <span class="im" style="color: #00769E;
 background-color: null;
-font-style: inherit;">self</span>.X, <span class="va" style="color: #111111;
+font-style: inherit;">as</span> np</span>
+<span id="cb19-5"><span class="im" style="color: #00769E;
 background-color: null;
-font-style: inherit;">self</span>.g, <span class="va" style="color: #111111;
+font-style: inherit;">import</span> matplotlib.pyplot <span class="im" style="color: #00769E;
 background-color: null;
-font-style: inherit;">self</span>.h, <span class="va" style="color: #111111;
+font-style: inherit;">as</span> plt</span>
+<span id="cb19-6"></span>
+<span id="cb19-7">x <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">self</span>.params, </span>
-<span id="cb7-69">                                 <span class="va" style="color: #111111;
+font-style: inherit;">=</span> np.arange(<span class="dv" style="color: #AD0000;
 background-color: null;
-font-style: inherit;">self</span>.max_depth <span class="op" style="color: #5E5E5E;
+font-style: inherit;">10</span>)</span>
+<span id="cb19-8">y <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">-</span> <span class="dv" style="color: #AD0000;
+font-style: inherit;">=</span> <span class="dv" style="color: #AD0000;
 background-color: null;
-font-style: inherit;">1</span>, <span class="va" style="color: #111111;
+font-style: inherit;">2</span> <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">self</span>.idxs[right_idx])</span>
-<span id="cb7-70"></span>
-<span id="cb7-71">    <span class="at" style="color: #657422;
+font-style: inherit;">*</span> x <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">@property</span></span>
-<span id="cb7-72">    <span class="kw" style="color: #003B4F;
+font-style: inherit;">+</span> <span class="dv" style="color: #AD0000;
 background-color: null;
-font-style: inherit;">def</span> is_leaf(<span class="va" style="color: #111111;
+font-style: inherit;">1</span></span>
+<span id="cb19-9">plt.plot(x, y)<span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">self</span>): <span class="cf" style="color: #003B4F;
+font-style: inherit;">;</span></span></code></pre></div>
+<p>Notice a couple of important details. First I placed a semicolon at the end of the last line. That supresses the <code>[&lt;matplotlib.lines.Line2D at 0x1111d00a0&gt;]</code> text output, which would otherwise show up in your blog post too.</p>
+<p>Second, I added a couple of special comments at the top of the cell. Quarto allows you to specify numerous <a href="https://quarto.org/docs/computations/execution-options.html">code execution options</a>, designated by the <code># |</code> prefix, to control the behavior and appearance of the code and output at a cell level. I set two keys here, <code>fig-cap</code> and <code>fig-alt</code> which respectively set the figure caption text and the image alt tag text. The <code>fig-alt</code> key is particularly important to set on all your figures because it provides the non-visual description for screenreader users reading your post. The alt tag should be a simple description of what the plot is and possibly what it shows or means. Be a friend of the blind and visually impaired community and set <code>fig-alt</code> on all of your figures.</p>
+</section>
+<section id="version-control" class="level3">
+<h3 class="anchored" data-anchor-id="version-control">Version control</h3>
+<p>As you edit your new post, go ahead and commit your changes on your development branch. Once you’ve finished your new post, you can merge it into main like this.</p>
+<div class="sourceCode" id="cb20" style="background: #f1f3f5;"><pre class="sourceCode zsh code-with-copy"><code class="sourceCode zsh"><span id="cb20-1"><span class="fu" style="color: #4758AB;
 background-color: null;
-font-style: inherit;">return</span> <span class="va" style="color: #111111;
+font-style: inherit;">git</span> checkout main</span>
+<span id="cb20-2"><span class="fu" style="color: #4758AB;
 background-color: null;
-font-style: inherit;">self</span>.best_score_so_far <span class="op" style="color: #5E5E5E;
+font-style: inherit;">git</span> merge new-post</span></code></pre></div>
+<p>Then you can push to GitHub by running <code>git push</code>. You should also be sure to run a final <code>quarto preview</code> to check that everything looks good before publishing to the web.</p>
+</section>
+</section>
+<section id="publishing-your-blog-to-the-web" class="level2">
+<h2 class="anchored" data-anchor-id="publishing-your-blog-to-the-web">Publishing your blog to the web</h2>
+<section id="hosting-with-github-pages" class="level3">
+<h3 class="anchored" data-anchor-id="hosting-with-github-pages">Hosting with GitHub Pages</h3>
+<p>It’s likely that the easiest (read best) option for you is to host your blog on <a href="https://pages.github.com/">GitHub Pages</a>. This is because GitHub pages is free, and since you already have your blog’s source code checked into a remote repository at GitHub, it’s very easy to set up. <a href="https://quarto.org/docs/publishing/github-pages.html">Quarto’s documentation on publishing to GitHub Pages</a> outlines three ways to publish your website, but I recommend their option 2, using the <code>quarto publish</code> command. Once you set up your <code>gh-pages</code> branch as described in the documentation, you simply run <code>quarto publish</code> at the command line and your updates are deployed to your website.</p>
+</section>
+<section id="setting-up-your-domain-name" class="level3">
+<h3 class="anchored" data-anchor-id="setting-up-your-domain-name">Setting up your domain name</h3>
+<p>By default, if you choose to host with GitHub Pages, your website will be published to a url in the form <code>https://username.github.io/reponame/</code>. You can certainly do this; for example Jake VanderPlas’s awesome blog Pythonic Perambulations lives at <a href="http://jakevdp.github.io">http://jakevdp.github.io</a>.</p>
+<p>But, like me, you might want to get your own custom domain by buying, or really renting, one from a registrar. I use <a href="https://www.namecheap.com">Namecheap</a>. If you decide to go for a custom domain, refer to <a href="https://docs.github.com/en/pages/configuring-a-custom-domain-for-your-github-pages-site/about-custom-domains-and-github-pages">GitHub’s documentation on custom domains</a>. You’ll also need to point your domain registrar to the IP address where GitHub Pages is hosting your website. For an example of how to do this at Namecheap, see <a href="https://www.namecheap.com">Namecheap’s documentation about GitHub Pages</a></p>
+<p>Whether you decide to use the standard <code>github.io</code> domain or your own custom domain, be sure to set the <code>site-url</code> key in your <code>_quarto.yml</code> file to ensure other quarto functionality works correctly. For example</p>
+<div class="code-with-filename">
+<div class="code-with-filename-file">
+<pre><strong>_quarto.yml</strong></pre>
+</div>
+<div class="sourceCode" id="cb21" style="background: #f1f3f5;"><pre class="sourceCode bash code-with-copy"><code class="sourceCode bash"><span id="cb21-1"><span class="ex" style="color: null;
 background-color: null;
-font-style: inherit;">==</span> <span class="fl" style="color: #AD0000;
+font-style: inherit;">website:</span></span>
+<span id="cb21-2">  <span class="ex" style="color: null;
 background-color: null;
-font-style: inherit;">0.</span></span>
-<span id="cb7-73">    </span>
-<span id="cb7-74">    <span class="kw" style="color: #003B4F;
+font-style: inherit;">site-url:</span> https://example.com/</span></code></pre></div>
+</div>
+<p>Edit: I found that after upgrading to quarto 1.3, using <code>quarto publish</code> to publish from the <code>gh-pages</code> branch obliterates the <code>CNAME</code> file that is created when you set a custom domain in your repository settings &gt; Pages &gt; Custom Domain. That breaks the mapping from your custom domain to your published website. See this <a href="https://github.com/quarto-dev/quarto-cli/discussions/3249">disscussion thread</a> for details. The fix is to manually create a <code>CNAME</code> file in the root of your project, and include it in the rendered website using the <code>resources</code> option under the <code>project</code> key in <code>_quarto.yml</code>. The <code>CNAME</code> file should just contain your custom domain, excluding any <code>https://</code>.</p>
+<div class="code-with-filename">
+<div class="code-with-filename-file">
+<pre><strong>CNAME</strong></pre>
+</div>
+<div class="sourceCode" id="cb22" style="background: #f1f3f5;"><pre class="sourceCode bash code-with-copy"><code class="sourceCode bash"><span id="cb22-1"><span class="ex" style="color: null;
 background-color: null;
-font-style: inherit;">def</span> _find_better_split(<span class="va" style="color: #111111;
+font-style: inherit;">example.com</span></span></code></pre></div>
+</div>
+<p>With the <code>CNAME</code> file in the root of your quarto project, you can then include it in the rendered output.</p>
+<div class="code-with-filename">
+<div class="code-with-filename-file">
+<pre><strong>_quarto.yml</strong></pre>
+</div>
+<div class="sourceCode" id="cb23" style="background: #f1f3f5;"><pre class="sourceCode bash code-with-copy"><code class="sourceCode bash"><span id="cb23-1"><span class="ex" style="color: null;
 background-color: null;
-font-style: inherit;">self</span>, feature_idx):</span>
-<span id="cb7-75">        x <span class="op" style="color: #5E5E5E;
+font-style: inherit;">project:</span></span>
+<span id="cb23-2">  <span class="ex" style="color: null;
 background-color: null;
-font-style: inherit;">=</span> <span class="va" style="color: #111111;
+font-style: inherit;">resources:</span></span>
+<span id="cb23-3">    <span class="ex" style="color: null;
 background-color: null;
-font-style: inherit;">self</span>.X.values[<span class="va" style="color: #111111;
+font-style: inherit;">-</span> CNAME</span></code></pre></div>
+</div>
+</section>
+</section>
+<section id="keep-in-touch-with-your-readers" class="level2">
+<h2 class="anchored" data-anchor-id="keep-in-touch-with-your-readers">Keep in touch with your readers</h2>
+<section id="rss-feed" class="level3">
+<h3 class="anchored" data-anchor-id="rss-feed">RSS Feed</h3>
+<p>The RSS feed is handy for syndicating your posts to feed readers, other websites, and to your email subscribers. As described in <a href="https://quarto.org/docs/websites/website-blog.html#rss-feed">quarto’s documentation on RSS feeds</a>, you can automatically generate an RSS feed for your blog by first setting the value of <code>site-url</code> under the <code>website</code> key in <code>_quarto.yml</code>, and then setting <code>feed: true</code> under the <code>listing</code> key in the frontmatter of <code>index.qmd</code>. This will generate an RSS feed in the root of your website called <code>index.xml</code>. Once you have an RSS feed, go ahead and submit it to <a href="https://python-bloggers.com">Python-Bloggers</a> to have your work syndicated to a wider audience and to strengthen our little community of independent data science blogs.</p>
+</section>
+<section id="email-subscriptions" class="level3">
+<h3 class="anchored" data-anchor-id="email-subscriptions">Email Subscriptions</h3>
+<p>The idea here is to have a form field on your website where readers can input their email address to be added to your mailing list. <a href="https://quarto.org/docs/websites/website-blog.html#subscriptions">Quarto’s documentation on subscriptions</a> describes how to set up a subscribe box on your blog using MailChimp, so we won’t repeat it here. Once you have some subscribers, you can send them updates whenever you write a new post. You could do this manually or, in my case, set up an <a href="https://mailchimp.com/help/share-your-blog-posts-with-mailchimp/">automation through MailChimp</a> which uses your RSS feed to send out email updates to the list about new posts.</p>
+</section>
+<section id="comments" class="level3">
+<h3 class="anchored" data-anchor-id="comments">Comments</h3>
+<p>Quarto has build-in support for three different comment systems: hypothesis, utterances, and giscus. The good news is that these are all free to use, easy to set up, and AFAIK do not engage in any sketchy tracking activities. The bad news is that none of them are ideal because they all require the user to create an account and login to leave a comment. We want to encourage readers to comment, so we don’t want them to have to create accounts or deal with passwords or pick all the squares with bicycles or any such nonsense, just to leave a little comment. To that end, I’ve actually been working on self-hosted login-free comments for this blog using <a href="https://isso-comments.de">isso</a>, but it’s a bit more involved than these built-in solutions, so we’ll have to discuss it at length in a future post.</p>
+<p>If you prefer an easy, out-of-the-box solution, I can recommend utterances, which uses GitHub issues to store comments for each post. I used utterances for comments on the first jekyll-based incarnation of this blog; you can still see the utterances comments on posts before this one. Go check out the <a href="https://quarto.org/docs/reference/projects/websites.html#comments">Quarto documentation on comments</a> to see how to set up utterances in your project.</p>
+</section>
+<section id="analytics" class="level3">
+<h3 class="anchored" data-anchor-id="analytics">Analytics</h3>
+<p>As a data enthusiast, you’ll likely enjoy collecting some data about page views and visitors to your site. You might be tempted to use Google Analytics to do this; indeed quarto makes it very easy to just add a line to your <code>_quarto.yml</code> file to set it up. Unfortunately, in this case, going with the easy and free solution means supporting <a href="https://en.wikipedia.org/wiki/Privacy_concerns_regarding_Google">Google’s dubious corporate surveillance activities</a>. Be a conscientious internet citizen and avoid using Google Analytics on your blog. Fortunately, there are numerous privacy-friendly alternatives to Google Analytics. For this blog I’m self-hosting <a href="https://umami.is">umami analytics</a>, which might warrant its own post in the future.</p>
+</section>
+</section>
+<section id="more-humbly-suggested-best-practices" class="level2">
+<h2 class="anchored" data-anchor-id="more-humbly-suggested-best-practices">More humbly suggested best practices</h2>
+<section id="using-conda-environments-for-reproducibility" class="level3">
+<h3 class="anchored" data-anchor-id="using-conda-environments-for-reproducibility">Using conda environments for reproducibility</h3>
+<p>As you know, it’s a good practice to use an environment manager to keep track of packages, their versions, and other dependencies for software in a data science project. The same applies to blog posts; especially if you’re using unusual or bleeding-edge packages in a post. This will help us out a lot when we have to go back and re-run a notebook a couple years later to regenerate the output. Here we’ll use <a href="https://docs.conda.io/projects/conda/en/latest/index.html">conda</a> as our environment manager.</p>
+<p>To be clear, I don’t bother doing this if I’m just using fairly stable functionality in standard packages like pandas, numpy, and matplotlib, but we’ll do it here for illustration. From a terminal sitting inside our post subdirectory at <code>posts/new-post</code>, create a new conda environment with the packages you’re using in the post.</p>
+<div class="sourceCode" id="cb24" style="background: #f1f3f5;"><pre class="sourceCode zsh code-with-copy"><code class="sourceCode zsh"><span id="cb24-1"><span class="ex" style="color: null;
 background-color: null;
-font-style: inherit;">self</span>.idxs, feature_idx]</span>
-<span id="cb7-76">        g, h <span class="op" style="color: #5E5E5E;
+font-style: inherit;">conda</span> create <span class="at" style="color: #657422;
 background-color: null;
-font-style: inherit;">=</span> <span class="va" style="color: #111111;
+font-style: inherit;">-p</span> ./venv jupyter numpy matplotlib</span></code></pre></div>
+<p>Note the <code>-p</code> flag which tells conda to save the environment to <code>./venv</code> in the current working directory. This will save all the installed packages here in the post directory instead of in your system-wide location for conda environments. Note also that you’ll want to avoid checking anything in the <code>venv</code> directory into source control, so add <code>venv</code> to the <code>.gitignore</code> file at the root of the quarto project to ignore all <code>venv</code> directories throughout your quarto project.</p>
+<p>Now whenever you work on this post, you’ll navigate to the post subdirectory with a terminal and activate the conda environment.</p>
+<div class="sourceCode" id="cb25" style="background: #f1f3f5;"><pre class="sourceCode zsh code-with-copy"><code class="sourceCode zsh"><span id="cb25-1"><span class="ex" style="color: null;
 background-color: null;
-font-style: inherit;">self</span>.g[<span class="va" style="color: #111111;
+font-style: inherit;">conda</span> activate ./venv</span></code></pre></div>
+<p>Then you can fire up your jupyter notebook from the command line, and it will use the active conda environment.</p>
+<p>Since we don’t want to check the <code>venv</code> directory with all its installed libraries into source control, we need to create an <code>environment.yml</code> file from which the environment can later be reproduced. With the local conda environment active, run the following.</p>
+<div class="sourceCode" id="cb26" style="background: #f1f3f5;"><pre class="sourceCode zsh code-with-copy"><code class="sourceCode zsh"><span id="cb26-1"><span class="ex" style="color: null;
 background-color: null;
-font-style: inherit;">self</span>.idxs], <span class="va" style="color: #111111;
+font-style: inherit;">conda</span> env export <span class="at" style="color: #657422;
 background-color: null;
-font-style: inherit;">self</span>.h[<span class="va" style="color: #111111;
+font-style: inherit;">--from-history</span> <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">self</span>.idxs]</span>
-<span id="cb7-77">        sort_idx <span class="op" style="color: #5E5E5E;
+font-style: inherit;">&gt;</span> environment.yml</span></code></pre></div>
+<p>The <code>--from-history</code> flag tells conda to skip adding a bunch of system specific stuff that will gunk up your environment yaml file and make it harder to use for cross-platform reproducibility. This <code>environment.yml</code> file is the only environment management artifact that you need to check into git.</p>
+<p>Later if you need to recreate the environment from the <code>environment.yml</code> file, you can use the following command.</p>
+<div class="sourceCode" id="cb27" style="background: #f1f3f5;"><pre class="sourceCode zsh code-with-copy"><code class="sourceCode zsh"><span id="cb27-1"><span class="ex" style="color: null;
 background-color: null;
-font-style: inherit;">=</span> np.argsort(x)</span>
-<span id="cb7-78">        sort_g, sort_h, sort_x <span class="op" style="color: #5E5E5E;
+font-style: inherit;">conda</span> env create <span class="at" style="color: #657422;
 background-color: null;
-font-style: inherit;">=</span> g[sort_idx], h[sort_idx], x[sort_idx]</span>
-<span id="cb7-79">        sum_g, sum_h <span class="op" style="color: #5E5E5E;
+font-style: inherit;">-f</span> environment.yml <span class="at" style="color: #657422;
 background-color: null;
-font-style: inherit;">=</span> g.<span class="bu" style="color: null;
+font-style: inherit;">-p</span> ./venv<span class="kw" style="color: #003B4F;
 background-color: null;
-font-style: inherit;">sum</span>(), h.<span class="bu" style="color: null;
+font-style: inherit;">`</span></span></code></pre></div>
+</section>
+<section id="image-file-best-practices" class="level3">
+<h3 class="anchored" data-anchor-id="image-file-best-practices">Image file best practices</h3>
+<p>Let’s talk about image file sizes. The key idea is that we want images to have just enough resolution to look good; any more than that and we’re just draging around larger-than-necessary files and wasting bandwidth and slowing down page load times.</p>
+<p>You can read all about <a href="https://www.foregroundweb.com/image-size/">choosing optimal image sizes</a>, but the TLDR is that images should be just large enough (in pixels) to fill the containers they occupy on the page. In our quarto blog, the two most common kinds of images are inline images we put in the body of posts and image thumbnails that show up as the associated image for a post, e.g.&nbsp;in the listing on our homepage. The inline image container seems to be about 800 pixels wide in my browser and the thumbnails are smaller, so adding some margin of error, I decided to go for 1000x750 for inline images and 500x375 for the thumbnails.</p>
+<p>I use a command line tool called <a href="https://imagemagick.org">Image Magick</a> to resize image files. Go ahead and <a href="https://formulae.brew.sh/formula/imagemagick">install image magick with homebrew</a>, and let’s add some images to our new post.</p>
+<p>For this example I’ll use a nice shot of the <a href="https://upload.wikimedia.org/wikipedia/commons/thumb/9/93/Why_London_Underground_is_nicknamed_The_Tube.jpg/1920px-Why_London_Underground_is_nicknamed_The_Tube.jpg">London Underground</a> from Wikipedia. Save your image as <code>image.jpg</code>. Then use image magick to create two new resized images for inline and thumbnail use.</p>
+<div class="sourceCode" id="cb28" style="background: #f1f3f5;"><pre class="sourceCode zsh code-with-copy"><code class="sourceCode zsh"><span id="cb28-1"><span class="ex" style="color: null;
 background-color: null;
-font-style: inherit;">sum</span>()</span>
-<span id="cb7-80">        sum_g_right, sum_h_right <span class="op" style="color: #5E5E5E;
+font-style: inherit;">convert</span> image.jpg <span class="at" style="color: #657422;
 background-color: null;
-font-style: inherit;">=</span> sum_g, sum_h</span>
-<span id="cb7-81">        sum_g_left, sum_h_left <span class="op" style="color: #5E5E5E;
+font-style: inherit;">-resize</span> 1000x1000 main.jpg </span>
+<span id="cb28-2"><span class="ex" style="color: null;
 background-color: null;
-font-style: inherit;">=</span> <span class="fl" style="color: #AD0000;
+font-style: inherit;">convert</span> image.jpg <span class="at" style="color: #657422;
 background-color: null;
-font-style: inherit;">0.</span>, <span class="fl" style="color: #AD0000;
+font-style: inherit;">-resize</span> 500x500 thumbnail.jpg </span></code></pre></div>
+<p>These commands do not change the aspect ratio of the image; they just reduce the size so that the image fits within the size specified.</p>
+<p>Now move both of your new images into the post subdirectory at <code>posts/new-post/</code>. To specify the thumbnail image, set the <code>image</code> key in the post’s front matter. Be sure to also add an alt tag description of the image using the <code>image-alt</code> key to keep it accessible for screen reader users. Our post’s frontmatter now looks like this.</p>
+<pre><code>---
+title: New Post
+date: 2023-07-12
+description: A nice new post
+categories: [nonsense, code]
+image: thumbnail.jpg
+image-alt: "A London Underground train emerging from a tunnel"
+---</code></pre>
+<p>To include an image within the body of a post, use markdown in the post to include the image. I added a markdown cell just under the front matter containing the following.</p>
+<pre><code>![A London Underground train emerging from a tunnel](main.jpg "")</code></pre>
+<p>In your preview browser window, you can see we have the thumbnail for our new post on the homepage listing.</p>
+<div class="quarto-figure quarto-figure-center">
+<figure class="figure">
+<p><img src="https://randomrealizations.com/posts/blogging-with-quarto-and-jupyter/homepage-with-new-post-thumbnail.jpg" class="img-fluid figure-img"></p>
+<figcaption class="figure-caption">A screenshot of the homepage showing the new post’s thumbnail image</figcaption>
+</figure>
+</div>
+<p>And we also have the inline image appearing in the body of the post.</p>
+<div class="quarto-figure quarto-figure-center">
+<figure class="figure">
+<p><img src="https://randomrealizations.com/posts/blogging-with-quarto-and-jupyter/new-post-with-inline-image.jpg" class="img-fluid figure-img"></p>
+<figcaption class="figure-caption">A screenshot of the new post showing the image included in the body of the post</figcaption>
+</figure>
+</div>
+<p>You can take a look at <a href="https://github.com/mcb00/rr-blog">the source code for this blog</a> to see some examples of including images in posts.</p>
+</section>
+</section>
+<section id="seo" class="level2">
+<h2 class="anchored" data-anchor-id="seo">SEO</h2>
+<p>SEO is a huge topic, but here we’ll just focus on a few fundamental technical aspects that we want to be sure to get right. This boils down to registering with the top search engines by market share and ensuring that we’re providing them with the information they need to properly index our pages.</p>
+<p>I checked the <a href="https://www.statista.com/statistics/216573/worldwide-market-share-of-search-engines/#main-content">top search engines by global market share</a> and as of 2023 it looks like Google has about 85%, Bing has about 8%, and the others have 2% or less each. So let’s focus on setting our site up to work well with Google search and Bing to get over 90% coverage.</p>
+<section id="google-search-console-and-bing-webmaster-tools" class="level3">
+<h3 class="anchored" data-anchor-id="google-search-console-and-bing-webmaster-tools">Google Search Console and Bing Webmaster Tools</h3>
+<p><a href="https://search.google.com/search-console/about">Google Search Console</a> is a tool for web admins to help analyze search traffic and identify any technical issues that might prevent pages from appearing or ranking well in search. Go ahead and set up an account and register your blog in search console. You can refer to <a href="https://developers.google.com/search/docs/monitor-debug/search-console-start">Google’s documentation on search console</a> to guide you through setup and configuration.</p>
+<p>Once you get set up on GSC, you can also create an account for <a href="https://www.bing.com/webmasters/about">Bing Webmaster Tools</a>. Do this after setting up GSC because there is an option to import your information from your GSC account.</p>
+<p>Once you’re set up with GSC and BWT, you’ll get email alerts anytime they crawl your site and detect any indexing problems. When that happens, track down the issues and fix them so your pages can appear in organic searches.</p>
+</section>
+<section id="sitemap" class="level3">
+<h3 class="anchored" data-anchor-id="sitemap">Sitemap</h3>
+<p>A sitemap is an xml document that lists all the pages on your website. It’s a map for the search engine bots that crawl the web looking for new pages to index. Quarto will automatically generate a sitemap called <code>sitemap.xml</code> in the root of your website, as long as you’ve filled out the <code>site-url</code> key in <code>_quarto.yml</code>. You can submit your website for indexing by providing your sitemap in Google Search Console and Bing Webmaster Tools.</p>
+</section>
+</section>
+<section id="wrapping-up" class="level2">
+<h2 class="anchored" data-anchor-id="wrapping-up">Wrapping Up</h2>
+<p>Boy howdy, that was a lot, but at this point you should have a fully functioning blog, built with a minimalist, data-science-friendly tech stack consisting of quarto, jupyter, and GitHub. If you do create a blog using quarto, drop a link to it in the comments, and we can all check it out and celebrate your creation!</p>
+</section>
+
+ ]]></description>
+  <category>python</category>
+  <category>tutorial</category>
+  <category>blogging</category>
+  <guid>https://randomrealizations.com/posts/blogging-with-quarto-and-jupyter/index.html</guid>
+  <pubDate>Tue, 05 Sep 2023 22:00:00 GMT</pubDate>
+  <media:content url="https://randomrealizations.com/posts/blogging-with-quarto-and-jupyter/thumbnail.png" medium="image" type="image/png" height="108" width="144"/>
+</item>
+<item>
+  <title>Random Realizations Resurrected</title>
+  <dc:creator>Matt Bowers</dc:creator>
+  <link>https://randomrealizations.com/posts/random-realizations-resurrected/index.html</link>
+  <description><![CDATA[ 
+
+
+
+<div class="quarto-figure quarto-figure-center">
+<figure class="figure">
+<p><img src="https://randomrealizations.com/posts/random-realizations-resurrected/main.jpg" class="img-fluid figure-img"></p>
+<figcaption class="figure-caption">Christ the Redeemer towers into a vast blue Brazilian sky.!</figcaption>
+</figure>
+</div>
+<p>Well it’s been over a year since I posted anything here. You see, a lot has been going on here at the Random Realizations Remote Global Headquarters that has distracted from producing the high-quality data science content that you’re used to. Mostly I went on hiatus from work and started traveling, which turns out to be it’s own full time job. I had aspirations of writing more after leaving work, but of course, after leaving, I couldn’t be bothered to sit down at my laptop and type stuff about data science to yall. After all, life is bigger than that.</p>
+<p>When I finally felt like opening up my laptop, I was confronted with an email from the maintainers of <a href="https://github.com/fastai/fastpages">fastpages</a>, the open source content management system (CMS) I originally used to create this blog, notifying me that the project was being deprecated and that I would need to migrate my content to some other platform.</p>
+<p>Boo.</p>
+<p>That didn’t sound like much fun, so I spent another few months ignoring the blog. But eventually, dear reader, I decided it was time to roll up my sleeves and get this blog thriving once again.</p>
+<p>Ok so fastpages was going to be deprecated, and I needed to find a new CMS. My requirements were pretty simple: I wanted to write the blog posts with jupyter notebook, and I wanted to host the site on my own domain. Helpfully, the former maintainers of fastpages recommended an alternative CMS called <a href="https://quarto.org/">Quarto</a> which I had never heard of. Apparently I had been living under a rock because Quarto appears to be all the rage. Quarto’s website says it’s an open-source scientific and technical publishing system. I think it’s fair to think of it as a way to render plain text or source code from languages like python, R, and julia into a variety of different published formats like websites, books, or journal articles. It was developed by the good folks over at RStudio, and the project has a pretty active following over on <a href="https://github.com/quarto-dev">github</a>, so I think it’s less likely to suddenly disappear like fastpages.</p>
+<p>So anyway, I’ve been migrating my content over into this new quarto universe.</p>
+<p>You mayofficially consider this blog resurrected from the dead, because this is the first new post published after the migration. The site has a bit of a new look and feel, so I hope you like it. Do let me know in the comments if you find anything amiss with the new website. Otherwise we’ll just assume it’s fabulous.</p>
+<p>I’m working on a post about how to create a blog with quarto using jupyter and python, so you can too!</p>
+<p>See you in more posts real soon! Love, Matt.</p>
+ ]]></description>
+  <category>blogging</category>
+  <guid>https://randomrealizations.com/posts/random-realizations-resurrected/index.html</guid>
+  <pubDate>Tue, 01 Aug 2023 22:00:00 GMT</pubDate>
+  <media:content url="https://randomrealizations.com/posts/random-realizations-resurrected/thumbnail.jpg" medium="image" type="image/jpeg"/>
+</item>
+<item>
+  <title>XGBoost from Scratch</title>
+  <dc:creator>Matt Bowers</dc:creator>
+  <link>https://randomrealizations.com/posts/xgboost-from-scratch/index.html</link>
+  <description><![CDATA[ 
+
+
+
+<div class="quarto-figure quarto-figure-center">
+<figure class="figure">
+<p><img src="https://randomrealizations.com/posts/xgboost-from-scratch/main.jpg" class="img-fluid figure-img"></p>
+<figcaption class="figure-caption">A weathered tree reaches toward the sea at Playa Mal País</figcaption>
+</figure>
+</div>
+<p>Well, dear reader, it’s that time again, time for us to do a seemingly unnecessary scratch build of a popular algorithm that most people would simply import from the library without a second thought. But readers of this blog are not most people. Of course you know that when we do scratch builds, it’s not for the hell of it, it’s for the purpose of demystification. To that end, today we are going to implement XGBoost from scratch in python, using only numpy and pandas.</p>
+<p>Specifically we’re going to implement the core statistical learning algorithm of XGBoost, including most of the key hyperparameters and their functionality. Our implementation will also support user-defined custom objective functions, meaning that it can perform regression, classification, and whatever exotic learning tasks you can dream up, as long as you can write down a twice-differentiable objective function. We’ll refrain from implementing some simple features like column subsampling which will be left to you, gentle reader, as exercises. In terms of tree methods, we’re going to implement the exact tree-splitting algorithm, leaving the sparsity-aware method (used to handle missing feature values) and the approximate method (used for scalability) as exercises or maybe topics for future posts.</p>
+<p>As always, if something is unclear, try backtracking through the previous posts on gradient boosting and decision trees to clarify your intuition. We’ve already built up all the statistical and computational background needed to make sense of this scratch build. Here are the most important prerequisite posts:</p>
+<ol type="1">
+<li><a href="../../posts/gradient-boosting-machine-from-scratch/">Gradient Boosting Machine from Scratch</a></li>
+<li><a href="../../posts/decision-tree-from-scratch/">Decision Tree From Scratch</a></li>
+<li><a href="../../posts/how-to-understand-xgboost/">How to Understand XGBoost</a></li>
+</ol>
+<p>Great, let’s do this.</p>
+<section id="the-xgboost-model-class" class="level2">
+<h2 class="anchored" data-anchor-id="the-xgboost-model-class">The XGBoost Model Class</h2>
+<p>We begin with the user-facing API for our model, a class called <code>XGBoostModel</code> which will implement gradient boosting and prediction. To be more consistent with the XGBoost library, we’ll pass hyperparameters to our model in a parameter dictionary, so our init method is going to pull relevant parameters out of the dictionary and set them as object attributes. Note the use of python’s <code>defaultdict</code> so we don’t have to worry about handling key errors if we try to access a parameter that the user didn’t set in the dictionary.</p>
+<div class="cell" data-execution_count="1">
+<div class="sourceCode cell-code" id="cb1" style="background: #f1f3f5;"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb1-1"><span class="im" style="color: #00769E;
 background-color: null;
-font-style: inherit;">0.</span></span>
-<span id="cb7-82"></span>
-<span id="cb7-83">        <span class="cf" style="color: #003B4F;
+font-style: inherit;">import</span> math</span>
+<span id="cb1-2"><span class="im" style="color: #00769E;
 background-color: null;
-font-style: inherit;">for</span> i <span class="kw" style="color: #003B4F;
+font-style: inherit;">import</span> numpy <span class="im" style="color: #00769E;
 background-color: null;
-font-style: inherit;">in</span> <span class="bu" style="color: null;
+font-style: inherit;">as</span> np </span>
+<span id="cb1-3"><span class="im" style="color: #00769E;
 background-color: null;
-font-style: inherit;">range</span>(<span class="dv" style="color: #AD0000;
+font-style: inherit;">import</span> pandas <span class="im" style="color: #00769E;
 background-color: null;
-font-style: inherit;">0</span>, <span class="va" style="color: #111111;
+font-style: inherit;">as</span> pd</span>
+<span id="cb1-4"><span class="im" style="color: #00769E;
 background-color: null;
-font-style: inherit;">self</span>.n <span class="op" style="color: #5E5E5E;
+font-style: inherit;">from</span> collections <span class="im" style="color: #00769E;
 background-color: null;
-font-style: inherit;">-</span> <span class="dv" style="color: #AD0000;
+font-style: inherit;">import</span> defaultdict</span></code></pre></div>
+</div>
+<div class="cell" data-execution_count="2">
+<div class="sourceCode cell-code" id="cb2" style="background: #f1f3f5;"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb2-1"><span class="kw" style="color: #003B4F;
 background-color: null;
-font-style: inherit;">1</span>):</span>
-<span id="cb7-84">            g_i, h_i, x_i, x_i_next <span class="op" style="color: #5E5E5E;
+font-style: inherit;">class</span> XGBoostModel():</span>
+<span id="cb2-2">    <span class="co" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">=</span> sort_g[i], sort_h[i], sort_x[i], sort_x[i <span class="op" style="color: #5E5E5E;
+font-style: inherit;">'''XGBoost from Scratch</span></span>
+<span id="cb2-3"><span class="co" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">+</span> <span class="dv" style="color: #AD0000;
+font-style: inherit;">    '''</span></span>
+<span id="cb2-4">    </span>
+<span id="cb2-5">    <span class="kw" style="color: #003B4F;
 background-color: null;
-font-style: inherit;">1</span>]</span>
-<span id="cb7-85">            sum_g_left <span class="op" style="color: #5E5E5E;
+font-style: inherit;">def</span> <span class="fu" style="color: #4758AB;
 background-color: null;
-font-style: inherit;">+=</span> g_i<span class="op" style="color: #5E5E5E;
+font-style: inherit;">__init__</span>(<span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">;</span> sum_g_right <span class="op" style="color: #5E5E5E;
+font-style: inherit;">self</span>, params, random_seed<span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">-=</span> g_i</span>
-<span id="cb7-86">            sum_h_left <span class="op" style="color: #5E5E5E;
+font-style: inherit;">=</span><span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">+=</span> h_i<span class="op" style="color: #5E5E5E;
+font-style: inherit;">None</span>):</span>
+<span id="cb2-6">        <span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">;</span> sum_h_right <span class="op" style="color: #5E5E5E;
+font-style: inherit;">self</span>.params <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">-=</span> h_i</span>
-<span id="cb7-87">            <span class="cf" style="color: #003B4F;
+font-style: inherit;">=</span> defaultdict(<span class="kw" style="color: #003B4F;
 background-color: null;
-font-style: inherit;">if</span> sum_h_left <span class="op" style="color: #5E5E5E;
+font-style: inherit;">lambda</span>: <span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">&lt;</span> <span class="va" style="color: #111111;
+font-style: inherit;">None</span>, params)</span>
+<span id="cb2-7">        <span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">self</span>.min_child_weight <span class="kw" style="color: #003B4F;
+font-style: inherit;">self</span>.subsample <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">or</span> x_i <span class="op" style="color: #5E5E5E;
+font-style: inherit;">=</span> <span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">==</span> x_i_next:<span class="cf" style="color: #003B4F;
+font-style: inherit;">self</span>.params[<span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">continue</span></span>
-<span id="cb7-88">            <span class="cf" style="color: #003B4F;
+font-style: inherit;">'subsample'</span>] <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">if</span> sum_h_right <span class="op" style="color: #5E5E5E;
+font-style: inherit;">\</span></span>
+<span id="cb2-8">            <span class="cf" style="color: #003B4F;
 background-color: null;
-font-style: inherit;">&lt;</span> <span class="va" style="color: #111111;
+font-style: inherit;">if</span> <span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">self</span>.min_child_weight: <span class="cf" style="color: #003B4F;
+font-style: inherit;">self</span>.params[<span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">break</span></span>
-<span id="cb7-89"></span>
-<span id="cb7-90">            gain <span class="op" style="color: #5E5E5E;
+font-style: inherit;">'subsample'</span>] <span class="cf" style="color: #003B4F;
 background-color: null;
-font-style: inherit;">=</span> <span class="fl" style="color: #AD0000;
+font-style: inherit;">else</span> <span class="fl" style="color: #AD0000;
 background-color: null;
-font-style: inherit;">0.5</span> <span class="op" style="color: #5E5E5E;
+font-style: inherit;">1.0</span></span>
+<span id="cb2-9">        <span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">*</span> ((sum_g_left<span class="op" style="color: #5E5E5E;
+font-style: inherit;">self</span>.learning_rate <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">**</span><span class="dv" style="color: #AD0000;
+font-style: inherit;">=</span> <span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">2</span> <span class="op" style="color: #5E5E5E;
+font-style: inherit;">self</span>.params[<span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">/</span> (sum_h_left <span class="op" style="color: #5E5E5E;
+font-style: inherit;">'learning_rate'</span>] <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">+</span> <span class="va" style="color: #111111;
+font-style: inherit;">\</span></span>
+<span id="cb2-10">            <span class="cf" style="color: #003B4F;
 background-color: null;
-font-style: inherit;">self</span>.reg_lambda))</span>
-<span id="cb7-91">                            <span class="op" style="color: #5E5E5E;
+font-style: inherit;">if</span> <span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">+</span> (sum_g_right<span class="op" style="color: #5E5E5E;
+font-style: inherit;">self</span>.params[<span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">**</span><span class="dv" style="color: #AD0000;
+font-style: inherit;">'learning_rate'</span>] <span class="cf" style="color: #003B4F;
 background-color: null;
-font-style: inherit;">2</span> <span class="op" style="color: #5E5E5E;
+font-style: inherit;">else</span> <span class="fl" style="color: #AD0000;
 background-color: null;
-font-style: inherit;">/</span> (sum_h_right <span class="op" style="color: #5E5E5E;
+font-style: inherit;">0.3</span></span>
+<span id="cb2-11">        <span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">+</span> <span class="va" style="color: #111111;
+font-style: inherit;">self</span>.base_prediction <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">self</span>.reg_lambda))</span>
-<span id="cb7-92">                            <span class="op" style="color: #5E5E5E;
+font-style: inherit;">=</span> <span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">-</span> (sum_g<span class="op" style="color: #5E5E5E;
+font-style: inherit;">self</span>.params[<span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">**</span><span class="dv" style="color: #AD0000;
+font-style: inherit;">'base_score'</span>] <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">2</span> <span class="op" style="color: #5E5E5E;
+font-style: inherit;">\</span></span>
+<span id="cb2-12">            <span class="cf" style="color: #003B4F;
 background-color: null;
-font-style: inherit;">/</span> (sum_h <span class="op" style="color: #5E5E5E;
+font-style: inherit;">if</span> <span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">+</span> <span class="va" style="color: #111111;
+font-style: inherit;">self</span>.params[<span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">self</span>.reg_lambda))</span>
-<span id="cb7-93">                            ) <span class="op" style="color: #5E5E5E;
+font-style: inherit;">'base_score'</span>] <span class="cf" style="color: #003B4F;
 background-color: null;
-font-style: inherit;">-</span> <span class="va" style="color: #111111;
+font-style: inherit;">else</span> <span class="fl" style="color: #AD0000;
 background-color: null;
-font-style: inherit;">self</span>.gamma<span class="op" style="color: #5E5E5E;
+font-style: inherit;">0.5</span></span>
+<span id="cb2-13">        <span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">/</span><span class="dv" style="color: #AD0000;
+font-style: inherit;">self</span>.max_depth <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">2</span> <span class="co" style="color: #5E5E5E;
+font-style: inherit;">=</span> <span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;"># Eq(7) in the xgboost paper</span></span>
-<span id="cb7-94">            <span class="cf" style="color: #003B4F;
+font-style: inherit;">self</span>.params[<span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">if</span> gain <span class="op" style="color: #5E5E5E;
+font-style: inherit;">'max_depth'</span>] <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">&gt;</span> <span class="va" style="color: #111111;
+font-style: inherit;">\</span></span>
+<span id="cb2-14">            <span class="cf" style="color: #003B4F;
 background-color: null;
-font-style: inherit;">self</span>.best_score_so_far: </span>
-<span id="cb7-95">                <span class="va" style="color: #111111;
+font-style: inherit;">if</span> <span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">self</span>.split_feature_idx <span class="op" style="color: #5E5E5E;
+font-style: inherit;">self</span>.params[<span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">=</span> feature_idx</span>
-<span id="cb7-96">                <span class="va" style="color: #111111;
+font-style: inherit;">'max_depth'</span>] <span class="cf" style="color: #003B4F;
 background-color: null;
-font-style: inherit;">self</span>.best_score_so_far <span class="op" style="color: #5E5E5E;
+font-style: inherit;">else</span> <span class="dv" style="color: #AD0000;
 background-color: null;
-font-style: inherit;">=</span> gain</span>
-<span id="cb7-97">                <span class="va" style="color: #111111;
+font-style: inherit;">5</span></span>
+<span id="cb2-15">        <span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">self</span>.threshold <span class="op" style="color: #5E5E5E;
+font-style: inherit;">self</span>.rng <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">=</span> (x_i <span class="op" style="color: #5E5E5E;
+font-style: inherit;">=</span> np.random.default_rng(seed<span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">+</span> x_i_next) <span class="op" style="color: #5E5E5E;
+font-style: inherit;">=</span>random_seed)</span></code></pre></div>
+</div>
+<p>The fit method, based on our classic GBM, takes a feature dataframe, a target vector, the objective function, and the number of boosting rounds as arguments. The user-supplied objective function should be an object with loss, gradient, and hessian methods, each of which takes a target vector and a prediction vector as input; the loss method should return a scalar loss score, the gradient method should return a vector of gradients, and the hessian method should return a vector of hessians.</p>
+<p>In contrast to boosting in the classic GBM, instead of computing residuals between the current predictions and the target, we compute gradients and hessians of the loss function with respect to the current predictions, and instead of predicting residuals with a decision tree, we fit a special XGBoost tree booster (which we’ll implement in a moment) using the gradients and hessians. I’ve also added row subsampling by drawing a random subset of instance indices and passing them to the tree booster during each boosting round. The rest of the fit method is the same as the classic GBM, and the predict method is identical too.</p>
+<div class="cell" data-execution_count="3">
+<div class="sourceCode cell-code" id="cb3" style="background: #f1f3f5;"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb3-1"><span class="kw" style="color: #003B4F;
 background-color: null;
-font-style: inherit;">/</span> <span class="dv" style="color: #AD0000;
+font-style: inherit;">def</span> fit(<span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">2</span></span>
-<span id="cb7-98">                </span>
-<span id="cb7-99">    <span class="kw" style="color: #003B4F;
+font-style: inherit;">self</span>, X, y, objective, num_boost_round, verbose<span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">def</span> predict(<span class="va" style="color: #111111;
+font-style: inherit;">=</span><span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">self</span>, X):</span>
-<span id="cb7-100">        <span class="cf" style="color: #003B4F;
+font-style: inherit;">False</span>):</span>
+<span id="cb3-2">    current_predictions <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">return</span> np.array([<span class="va" style="color: #111111;
+font-style: inherit;">=</span> <span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">self</span>._predict_row(row) <span class="cf" style="color: #003B4F;
+font-style: inherit;">self</span>.base_prediction <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">for</span> i, row <span class="kw" style="color: #003B4F;
+font-style: inherit;">*</span> np.ones(shape<span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">in</span> X.iterrows()])</span>
-<span id="cb7-101"></span>
-<span id="cb7-102">    <span class="kw" style="color: #003B4F;
+font-style: inherit;">=</span>y.shape)</span>
+<span id="cb3-3">    <span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">def</span> _predict_row(<span class="va" style="color: #111111;
+font-style: inherit;">self</span>.boosters <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">self</span>, row):</span>
-<span id="cb7-103">        <span class="cf" style="color: #003B4F;
+font-style: inherit;">=</span> []</span>
+<span id="cb3-4">    <span class="cf" style="color: #003B4F;
 background-color: null;
-font-style: inherit;">if</span> <span class="va" style="color: #111111;
+font-style: inherit;">for</span> i <span class="kw" style="color: #003B4F;
 background-color: null;
-font-style: inherit;">self</span>.is_leaf: </span>
-<span id="cb7-104">            <span class="cf" style="color: #003B4F;
+font-style: inherit;">in</span> <span class="bu" style="color: null;
 background-color: null;
-font-style: inherit;">return</span> <span class="va" style="color: #111111;
+font-style: inherit;">range</span>(num_boost_round):</span>
+<span id="cb3-5">        gradients <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">self</span>.value</span>
-<span id="cb7-105">        child <span class="op" style="color: #5E5E5E;
+font-style: inherit;">=</span> objective.gradient(y, current_predictions)</span>
+<span id="cb3-6">        hessians <span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">=</span> objective.hessian(y, current_predictions)</span>
+<span id="cb3-7">        sample_idxs <span class="op" style="color: #5E5E5E;
 background-color: null;
 font-style: inherit;">=</span> <span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">self</span>.left <span class="cf" style="color: #003B4F;
+font-style: inherit;">None</span> <span class="cf" style="color: #003B4F;
 background-color: null;
-font-style: inherit;">if</span> row[<span class="va" style="color: #111111;
+font-style: inherit;">if</span> <span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">self</span>.split_feature_idx] <span class="op" style="color: #5E5E5E;
+font-style: inherit;">self</span>.subsample <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">&lt;=</span> <span class="va" style="color: #111111;
+font-style: inherit;">==</span> <span class="fl" style="color: #AD0000;
 background-color: null;
-font-style: inherit;">self</span>.threshold <span class="op" style="color: #5E5E5E;
+font-style: inherit;">1.0</span> <span class="op" style="color: #5E5E5E;
 background-color: null;
 font-style: inherit;">\</span></span>
-<span id="cb7-106">            <span class="cf" style="color: #003B4F;
+<span id="cb3-8">            <span class="cf" style="color: #003B4F;
 background-color: null;
 font-style: inherit;">else</span> <span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">self</span>.right</span>
-<span id="cb7-107">        <span class="cf" style="color: #003B4F;
-background-color: null;
-font-style: inherit;">return</span> child._predict_row(row)</span></code></pre></div>
-</div>
-</section>
-<section id="testing" class="level2">
-<h2 class="anchored" data-anchor-id="testing">Testing</h2>
-<p>Let’s take this baby for a spin and benchmark its performance against the actual XGBoost library. We use the scikit learn <a href="https://scikit-learn.org/stable/modules/generated/sklearn.datasets.fetch_california_housing.html">California housing dataset</a> for benchmarking.</p>
-<div class="cell" data-execution_count="8">
-<div class="sourceCode cell-code" id="cb8" style="background: #f1f3f5;"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb8-1"><span class="im" style="color: #00769E;
+font-style: inherit;">self</span>.rng.choice(<span class="bu" style="color: null;
 background-color: null;
-font-style: inherit;">from</span> sklearn.datasets <span class="im" style="color: #00769E;
+font-style: inherit;">len</span>(y), </span>
+<span id="cb3-9">                                 size<span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">import</span> fetch_california_housing</span>
-<span id="cb8-2"><span class="im" style="color: #00769E;
+font-style: inherit;">=</span>math.floor(<span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">from</span> sklearn.model_selection <span class="im" style="color: #00769E;
+font-style: inherit;">self</span>.subsample<span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">import</span> train_test_split</span>
-<span id="cb8-3">    </span>
-<span id="cb8-4">X, y <span class="op" style="color: #5E5E5E;
+font-style: inherit;">*</span><span class="bu" style="color: null;
 background-color: null;
-font-style: inherit;">=</span> fetch_california_housing(as_frame<span class="op" style="color: #5E5E5E;
+font-style: inherit;">len</span>(y)), </span>
+<span id="cb3-10">                                 replace<span class="op" style="color: #5E5E5E;
 background-color: null;
 font-style: inherit;">=</span><span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">True</span>, return_X_y<span class="op" style="color: #5E5E5E;
+font-style: inherit;">False</span>)</span>
+<span id="cb3-11">        booster <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">=</span><span class="va" style="color: #111111;
+font-style: inherit;">=</span> TreeBooster(X, gradients, hessians, </span>
+<span id="cb3-12">                              <span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">True</span>)</span>
-<span id="cb8-5">X_train, X_test, y_train, y_test <span class="op" style="color: #5E5E5E;
+font-style: inherit;">self</span>.params, <span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">=</span> train_test_split(X, y, test_size<span class="op" style="color: #5E5E5E;
+font-style: inherit;">self</span>.max_depth, sample_idxs)</span>
+<span id="cb3-13">        current_predictions <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">=</span><span class="fl" style="color: #AD0000;
+font-style: inherit;">+=</span> <span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">0.3</span>, </span>
-<span id="cb8-6">                                                    random_state<span class="op" style="color: #5E5E5E;
+font-style: inherit;">self</span>.learning_rate <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">=</span><span class="dv" style="color: #AD0000;
+font-style: inherit;">*</span> booster.predict(X)</span>
+<span id="cb3-14">        <span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">43</span>)</span></code></pre></div>
-</div>
-<p>Let’s start with a nice friendly squared error objective function for training. We should probably have a future post all about how to define custom objective functions in XGBoost, but for now, here’s how I define squared error.</p>
-<div class="cell" data-execution_count="9">
-<div class="sourceCode cell-code" id="cb9" style="background: #f1f3f5;"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb9-1"><span class="kw" style="color: #003B4F;
+font-style: inherit;">self</span>.boosters.append(booster)</span>
+<span id="cb3-15">        <span class="cf" style="color: #003B4F;
 background-color: null;
-font-style: inherit;">class</span> SquaredErrorObjective():</span>
-<span id="cb9-2">    <span class="kw" style="color: #003B4F;
+font-style: inherit;">if</span> verbose: </span>
+<span id="cb3-16">            <span class="bu" style="color: null;
 background-color: null;
-font-style: inherit;">def</span> loss(<span class="va" style="color: #111111;
+font-style: inherit;">print</span>(<span class="ss" style="color: #20794D;
 background-color: null;
-font-style: inherit;">self</span>, y, pred): <span class="cf" style="color: #003B4F;
+font-style: inherit;">f'[</span><span class="sc" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">return</span> np.mean((y <span class="op" style="color: #5E5E5E;
+font-style: inherit;">{</span>i<span class="sc" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">-</span> pred)<span class="op" style="color: #5E5E5E;
+font-style: inherit;">}</span><span class="ss" style="color: #20794D;
 background-color: null;
-font-style: inherit;">**</span><span class="dv" style="color: #AD0000;
+font-style: inherit;">] train loss = </span><span class="sc" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">2</span>)</span>
-<span id="cb9-3">    <span class="kw" style="color: #003B4F;
+font-style: inherit;">{</span>objective<span class="sc" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">def</span> gradient(<span class="va" style="color: #111111;
+font-style: inherit;">.</span>loss(y, current_predictions)<span class="sc" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">self</span>, y, pred): <span class="cf" style="color: #003B4F;
+font-style: inherit;">}</span><span class="ss" style="color: #20794D;
 background-color: null;
-font-style: inherit;">return</span> pred <span class="op" style="color: #5E5E5E;
+font-style: inherit;">'</span>)</span>
+<span id="cb3-17">            </span>
+<span id="cb3-18"><span class="kw" style="color: #003B4F;
 background-color: null;
-font-style: inherit;">-</span> y</span>
-<span id="cb9-4">    <span class="kw" style="color: #003B4F;
+font-style: inherit;">def</span> predict(<span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">def</span> hessian(<span class="va" style="color: #111111;
+font-style: inherit;">self</span>, X):</span>
+<span id="cb3-19">    <span class="cf" style="color: #003B4F;
 background-color: null;
-font-style: inherit;">self</span>, y, pred): <span class="cf" style="color: #003B4F;
+font-style: inherit;">return</span> (<span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">return</span> np.ones(<span class="bu" style="color: null;
+font-style: inherit;">self</span>.base_prediction <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">len</span>(y))</span></code></pre></div>
+font-style: inherit;">+</span> <span class="va" style="color: #111111;
+background-color: null;
+font-style: inherit;">self</span>.learning_rate </span>
+<span id="cb3-20">            <span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">*</span> np.<span class="bu" style="color: null;
+background-color: null;
+font-style: inherit;">sum</span>([booster.predict(X) <span class="cf" style="color: #003B4F;
+background-color: null;
+font-style: inherit;">for</span> booster <span class="kw" style="color: #003B4F;
+background-color: null;
+font-style: inherit;">in</span> <span class="va" style="color: #111111;
+background-color: null;
+font-style: inherit;">self</span>.boosters], axis<span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">=</span><span class="dv" style="color: #AD0000;
+background-color: null;
+font-style: inherit;">0</span>))</span>
+<span id="cb3-21"></span>
+<span id="cb3-22">XGBoostModel.fit <span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">=</span> fit</span>
+<span id="cb3-23">XGBoostModel.predict <span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">=</span> predict            </span></code></pre></div>
 </div>
-<p>Here I use a more or less arbitrary set of hyperparameters for training. Feel free to play around with tuning and trying other parameter combinations yourself.</p>
-<div class="cell" data-execution_count="10">
-<div class="sourceCode cell-code" id="cb10" style="background: #f1f3f5;"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb10-1"><span class="im" style="color: #00769E;
+<p>All we have to do now is implement the tree booster.</p>
+</section>
+<section id="the-xgboost-tree-booster" class="level2">
+<h2 class="anchored" data-anchor-id="the-xgboost-tree-booster">The XGBoost Tree Booster</h2>
+<p>The XGBoost tree booster is a modified version of the decision tree that we built in the decision tree from scratch post. Like the decision tree, we recursively build a binary tree structure by finding the best split rule for each node in the tree. The main difference is the criterion for evaluating splits and the way that we define a leaf’s predicted value. Instead of being functions of the target values of the instances in each node, the criterion and predicted values are functions of the instance gradients and hessians. Thus we need only make a couple of modifications to our previous decision tree implementation to create the XGBoost tree booster.</p>
+<section id="initialization-and-inserting-child-nodes" class="level3">
+<h3 class="anchored" data-anchor-id="initialization-and-inserting-child-nodes">Initialization and Inserting Child Nodes</h3>
+<p>Most of the init method is just parsing the parameter dictionary to assign parameters as object attributes. The one notable difference from our decision tree is in the way we define the node’s predicted value. We define <code>self.value</code> according to equation 5 of the XGBoost paper, a simple function of the gradient and hessian values of the instances in the current node. Of course the init also goes on to build the tree via the maybe insert child nodes method. This method is nearly identical to the one we implemented for our decision tree. So far so good.</p>
+<div class="cell" data-execution_count="4">
+<div class="sourceCode cell-code" id="cb4" style="background: #f1f3f5;"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb4-1"><span class="kw" style="color: #003B4F;
 background-color: null;
-font-style: inherit;">import</span> xgboost <span class="im" style="color: #00769E;
+font-style: inherit;">class</span> TreeBooster():</span>
+<span id="cb4-2"> </span>
+<span id="cb4-3">    <span class="kw" style="color: #003B4F;
 background-color: null;
-font-style: inherit;">as</span> xgb</span>
-<span id="cb10-2"></span>
-<span id="cb10-3">params <span class="op" style="color: #5E5E5E;
+font-style: inherit;">def</span> <span class="fu" style="color: #4758AB;
 background-color: null;
-font-style: inherit;">=</span> {</span>
-<span id="cb10-4">    <span class="st" style="color: #20794D;
+font-style: inherit;">__init__</span>(<span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">'learning_rate'</span>: <span class="fl" style="color: #AD0000;
+font-style: inherit;">self</span>, X, g, h, params, max_depth, idxs<span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">0.1</span>,</span>
-<span id="cb10-5">    <span class="st" style="color: #20794D;
+font-style: inherit;">=</span><span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">'max_depth'</span>: <span class="dv" style="color: #AD0000;
+font-style: inherit;">None</span>):</span>
+<span id="cb4-4">        <span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">5</span>,</span>
-<span id="cb10-6">    <span class="st" style="color: #20794D;
+font-style: inherit;">self</span>.params <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">'subsample'</span>: <span class="fl" style="color: #AD0000;
+font-style: inherit;">=</span> params</span>
+<span id="cb4-5">        <span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">0.8</span>,</span>
-<span id="cb10-7">    <span class="st" style="color: #20794D;
+font-style: inherit;">self</span>.max_depth <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">'reg_lambda'</span>: <span class="fl" style="color: #AD0000;
+font-style: inherit;">=</span> max_depth</span>
+<span id="cb4-6">        <span class="cf" style="color: #003B4F;
 background-color: null;
-font-style: inherit;">1.5</span>,</span>
-<span id="cb10-8">    <span class="st" style="color: #20794D;
+font-style: inherit;">assert</span> <span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">'gamma'</span>: <span class="fl" style="color: #AD0000;
+font-style: inherit;">self</span>.max_depth <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">0.0</span>,</span>
-<span id="cb10-9">    <span class="st" style="color: #20794D;
+font-style: inherit;">&gt;=</span> <span class="dv" style="color: #AD0000;
 background-color: null;
-font-style: inherit;">'min_child_weight'</span>: <span class="dv" style="color: #AD0000;
+font-style: inherit;">0</span>, <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">25</span>,</span>
-<span id="cb10-10">    <span class="st" style="color: #20794D;
+font-style: inherit;">'max_depth must be nonnegative'</span></span>
+<span id="cb4-7">        <span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">'base_score'</span>: <span class="fl" style="color: #AD0000;
+font-style: inherit;">self</span>.min_child_weight <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">0.0</span>,</span>
-<span id="cb10-11">    <span class="st" style="color: #20794D;
+font-style: inherit;">=</span> params[<span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">'tree_method'</span>: <span class="st" style="color: #20794D;
+font-style: inherit;">'min_child_weight'</span>] <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">'exact'</span>,</span>
-<span id="cb10-12">}</span>
-<span id="cb10-13">num_boost_round <span class="op" style="color: #5E5E5E;
+font-style: inherit;">\</span></span>
+<span id="cb4-8">            <span class="cf" style="color: #003B4F;
 background-color: null;
-font-style: inherit;">=</span> <span class="dv" style="color: #AD0000;
+font-style: inherit;">if</span> params[<span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">50</span></span>
-<span id="cb10-14"></span>
-<span id="cb10-15"><span class="co" style="color: #5E5E5E;
+font-style: inherit;">'min_child_weight'</span>] <span class="cf" style="color: #003B4F;
 background-color: null;
-font-style: inherit;"># train the from-scratch XGBoost model</span></span>
-<span id="cb10-16">model_scratch <span class="op" style="color: #5E5E5E;
+font-style: inherit;">else</span> <span class="fl" style="color: #AD0000;
 background-color: null;
-font-style: inherit;">=</span> XGBoostModel(params, random_seed<span class="op" style="color: #5E5E5E;
+font-style: inherit;">1.0</span></span>
+<span id="cb4-9">        <span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">=</span><span class="dv" style="color: #AD0000;
+font-style: inherit;">self</span>.reg_lambda <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">42</span>)</span>
-<span id="cb10-17">model_scratch.fit(X_train, y_train, SquaredErrorObjective(), num_boost_round)</span>
-<span id="cb10-18"></span>
-<span id="cb10-19"><span class="co" style="color: #5E5E5E;
+font-style: inherit;">=</span> params[<span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;"># train the library XGBoost model</span></span>
-<span id="cb10-20">dtrain <span class="op" style="color: #5E5E5E;
+font-style: inherit;">'reg_lambda'</span>] <span class="cf" style="color: #003B4F;
 background-color: null;
-font-style: inherit;">=</span> xgb.DMatrix(X_train, label<span class="op" style="color: #5E5E5E;
+font-style: inherit;">if</span> params[<span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">=</span>y_train)</span>
-<span id="cb10-21">dtest <span class="op" style="color: #5E5E5E;
+font-style: inherit;">'reg_lambda'</span>] <span class="cf" style="color: #003B4F;
 background-color: null;
-font-style: inherit;">=</span> xgb.DMatrix(X_test, label<span class="op" style="color: #5E5E5E;
+font-style: inherit;">else</span> <span class="fl" style="color: #AD0000;
 background-color: null;
-font-style: inherit;">=</span>y_test)</span>
-<span id="cb10-22">model_xgb <span class="op" style="color: #5E5E5E;
+font-style: inherit;">1.0</span></span>
+<span id="cb4-10">        <span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">=</span> xgb.train(params, dtrain, num_boost_round)</span></code></pre></div>
-</div>
-<p>Let’s check the models’ performance on the held out test data to benchmark our implementation.</p>
-<div class="cell" data-execution_count="11">
-<div class="sourceCode cell-code" id="cb11" style="background: #f1f3f5;"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb11-1">pred_scratch <span class="op" style="color: #5E5E5E;
+font-style: inherit;">self</span>.gamma <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">=</span> model_scratch.predict(X_test)</span>
-<span id="cb11-2">pred_xgb <span class="op" style="color: #5E5E5E;
+font-style: inherit;">=</span> params[<span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">=</span> model_xgb.predict(dtest)</span>
-<span id="cb11-3"><span class="bu" style="color: null;
+font-style: inherit;">'gamma'</span>] <span class="cf" style="color: #003B4F;
 background-color: null;
-font-style: inherit;">print</span>(<span class="ss" style="color: #20794D;
+font-style: inherit;">if</span> params[<span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">f'scratch score: </span><span class="sc" style="color: #5E5E5E;
+font-style: inherit;">'gamma'</span>] <span class="cf" style="color: #003B4F;
 background-color: null;
-font-style: inherit;">{</span>SquaredErrorObjective()<span class="sc" style="color: #5E5E5E;
+font-style: inherit;">else</span> <span class="fl" style="color: #AD0000;
 background-color: null;
-font-style: inherit;">.</span>loss(y_test, pred_scratch)<span class="sc" style="color: #5E5E5E;
+font-style: inherit;">0.0</span></span>
+<span id="cb4-11">        <span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">}</span><span class="ss" style="color: #20794D;
+font-style: inherit;">self</span>.colsample_bynode <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">'</span>)</span>
-<span id="cb11-4"><span class="bu" style="color: null;
+font-style: inherit;">=</span> params[<span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">print</span>(<span class="ss" style="color: #20794D;
+font-style: inherit;">'colsample_bynode'</span>] <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">f'xgboost score: </span><span class="sc" style="color: #5E5E5E;
+font-style: inherit;">\</span></span>
+<span id="cb4-12">            <span class="cf" style="color: #003B4F;
 background-color: null;
-font-style: inherit;">{</span>SquaredErrorObjective()<span class="sc" style="color: #5E5E5E;
+font-style: inherit;">if</span> params[<span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">.</span>loss(y_test, pred_xgb)<span class="sc" style="color: #5E5E5E;
+font-style: inherit;">'colsample_bynode'</span>] <span class="cf" style="color: #003B4F;
 background-color: null;
-font-style: inherit;">}</span><span class="ss" style="color: #20794D;
+font-style: inherit;">else</span> <span class="fl" style="color: #AD0000;
 background-color: null;
-font-style: inherit;">'</span>)</span></code></pre></div>
-<div class="cell-output cell-output-stdout">
-<pre><code>scratch score: 0.2434125759558149
-xgboost score: 0.24123239765807963</code></pre>
-</div>
-</div>
-<p>Well, look at that! Our scratch-built SGBoost is looking pretty consistent with the library. Go us!</p>
-</section>
-<section id="wrapping-up" class="level2">
-<h2 class="anchored" data-anchor-id="wrapping-up">Wrapping Up</h2>
-<p>I’d say this is a pretty good milestone for us here at Random Realizations. We’ve been hammering away at the various concepts around gradient boosting, leaving a trail of equations and scratch-built algos in our wake. Today we put all of that together to create a legit scratch build of XGBoost, something that would have been out of reach for me before we embarked on this journey together over a year ago. To anyone with the patience to read through this stuff, cheers to you! I hope you’re learning and enjoying this as much as I am.</p>
-</section>
-<section id="reader-exercises" class="level2">
-<h2 class="anchored" data-anchor-id="reader-exercises">Reader Exercises</h2>
-<p>If you want to take this a step further and deepen your understanding and coding abilities, let me recommend some exercises for you.</p>
-<ol type="1">
-<li>Implement column subsampling. XGBoost itself provides column subsampling by tree, by level, and by node. Try implementing by tree first, then try adding by level or by node as well. These should be pretty straightforward to do.</li>
-<li>Implement sparsity aware split finding for missing feature values (Algorithm 2 in the <a href="https://arxiv.org/abs/1603.02754">XGBoost paper</a>). This will be a little more involved, since you’ll need to refactor and modify several parts of the tree booster class.</li>
-</ol>
-</section>
-
- ]]></description>
-  <category>python</category>
-  <category>gradient boosting</category>
-  <category>from scratch</category>
-  <guid>https://randomrealizations.com/posts/xgboost-from-scratch/index.html</guid>
-  <pubDate>Fri, 06 May 2022 21:00:00 GMT</pubDate>
-  <media:content url="https://randomrealizations.com/posts/xgboost-from-scratch/thumbnail.jpg" medium="image" type="image/jpeg"/>
-</item>
-<item>
-  <title>XGBoost Explained</title>
-  <dc:creator>Matt Bowers</dc:creator>
-  <link>https://randomrealizations.com/posts/xgboost-explained/index.html</link>
-  <description><![CDATA[ 
-
-
-
-<div class="quarto-figure quarto-figure-center">
-<figure class="figure">
-<p><img src="https://randomrealizations.com/posts/xgboost-explained/main.jpg" class="img-fluid figure-img"></p>
-<figcaption class="figure-caption">Tree branches on a chilly day in Johnson City</figcaption>
-</figure>
-</div>
-<p>Ahh, XGBoost, what an absolutely stellar implementation of gradient boosting. Once Tianqi Chen and Carlos Guestrin of the University of Washington published the <a href="https://www.kdd.org/kdd2016/papers/files/rfp0697-chenAemb.pdf">XGBoost paper</a> and shared the <a href="https://github.com/dmlc/xgboost">open source code</a> in the mid 2010’s, the algorithm quickly gained adoption in the ML community, appearing in over half of winning Kagle submissions in 2015. Nowadays it’s certainly among the most popular gradient boosting libraries, along with LightGBM and CatBoost, although the highly scientific indicator of <a href="https://www.githubcompare.com/dmlc/xgboost+microsoft/lightgbm+catboost/catboost">GitHub stars per year</a> indicates that it is in fact the most beloved gradient boosting package of all. Since it was the first of the modern popular boosting frameworks, and since <a href="https://blog.dataiku.com/the-many-flavors-of-gradient-boosting-algorithms">benchmarking</a> indicates that no other boosting algorithm outperforms it, we can comfortably focus our attention on understanding XGBoost.</p>
-<p>The XGBoost authors identify two key aspects of a machine learning system: (1) a flexible statistical model and (2) a scalable learning system to fit that model using data. XGBoost improves on both of these aspects, providing a more flexible and feature-rich statistical model and building a truly scalable system to fit it. In this post we’re going to focus on the statistical modeling innovations, outlining the key differences from the classic gradient boosting machine and divinginto the mathematical derivation of the XGBoost learning algorithm. If you’re not already familiar with <a href="../../posts/gradient-boosting-machine-from-scratch/">gradient boosting</a>, go back and read the earlier posts in the series before jumping in here.</p>
-<p>Buckle up, dear reader. Today we understand how XGBoost works, no hand waving required.</p>
-<section id="xgboost-is-a-gradient-boosting-machine" class="level2">
-<h2 class="anchored" data-anchor-id="xgboost-is-a-gradient-boosting-machine">XGBoost is a Gradient Boosting Machine</h2>
-<p>At a high level, XGBoost is an iteratively constructed composite model, just like the classic gradient boosting machine we discussed back in the <a href="../../posts/gradient-boosting-machine-from-scratch/">GBM post</a> . The final model takes the form</p>
-<p><img src="https://latex.codecogs.com/png.latex?%5Chat%7By%7D_i%20=%20F(%5Cmathbf%7Bx%7D_i)%20=%20b%20+%20%5Ceta%20%5Csum_%7Bk=1%7D%5EK%20f_k(%5Cmathbf%7Bx%7D_i)%20"></p>
-<p>where <img src="https://latex.codecogs.com/png.latex?b"> is the base prediction, <img src="https://latex.codecogs.com/png.latex?%5Ceta"> is the learning rate hyperparameter that helps control overfitting by reducing the contributions of each booster, and each of the <img src="https://latex.codecogs.com/png.latex?K"> boosters <img src="https://latex.codecogs.com/png.latex?f_k"> is a decision tree. To help us connect the dots between theory and code, whenever we encounter new hyperparameters, I’ll point out their names from the <a href="https://xgboost.readthedocs.io/en/stable/parameter.html">XGBoost Parameter Documentation</a>. So, <img src="https://latex.codecogs.com/png.latex?b"> can be set by <code>base_score</code>, and <img src="https://latex.codecogs.com/png.latex?%5Ceta"> is set by either <code>eta</code> or <code>learning_rate</code>.</p>
-<p>XGBoost introduces two key statistical learning improvements over the classic gradient boosting model. First, it reimagines the gradient descent algorithm used for training, and second it uses a custom-built decision tree with extra functionality as its booster. We’ll dive into each of these key innovations in the following sections.</p>
-</section>
-<section id="descent-algorithm-innovations" class="level2">
-<h2 class="anchored" data-anchor-id="descent-algorithm-innovations">Descent Algorithm Innovations</h2>
-<section id="regularized-objective-function" class="level3">
-<h3 class="anchored" data-anchor-id="regularized-objective-function">Regularized Objective Function</h3>
-<p>In the post on <a href="../../posts/gradient-boosting-machine-with-any-loss-function/">GBM with any loss function</a>, we looked at loss functions of the form <img src="https://latex.codecogs.com/png.latex?%5Csum_i%20l(y_i,%5Chat%7By%7D_i)"> which compute some distance between targets <img src="https://latex.codecogs.com/png.latex?y_i"> and predictions <img src="https://latex.codecogs.com/png.latex?%5Chat%7By%7D_i"> and sum them up over the training dataset. XGBoost introduces regularization into the objective function so that the objective takes the form</p>
-<p><img src="https://latex.codecogs.com/png.latex?%20L%20=%20%5Csum_i%20l(y_i,%5Chat%7By%7D_i)%20+%20%5Csum_k%20%5COmega(f_k)%20"></p>
-<p>where <img src="https://latex.codecogs.com/png.latex?l"> is some twice-differentiable loss function. <img src="https://latex.codecogs.com/png.latex?%5COmega"> is a regularization that penalizes the complexity of each tree booster, taking the form</p>
-<p><img src="https://latex.codecogs.com/png.latex?%20%5COmega(f)%20=%20%5Cgamma%20T%20+%20%5Cfrac%7B1%7D%7B2%7D%20%5Clambda%20%7C%7Cw%7C%7C%5E2%20"></p>
-<p>where <img src="https://latex.codecogs.com/png.latex?T"> is the number of leaf nodes and <img src="https://latex.codecogs.com/png.latex?%7C%7Cw%7C%7C%5E2"> is the squared sum of the leaf prediction values. This introduces two new hyperparameters: <img src="https://latex.codecogs.com/png.latex?%5Cgamma"> which penalizes the number of leaf nodes and <img src="https://latex.codecogs.com/png.latex?%5Clambda"> which is the L-2 regularization parameter for leaf predicted values. These are set by <code>gamma</code> and <code>reg_lambbda</code> in the XGBoost parametrization. Together, these provide powerful new controls to reduce overfitting due to overly complex tree boosters. Note that <img src="https://latex.codecogs.com/png.latex?%5Cgamma=%5Clambda=0"> reduces the objective back to an unregularized loss function as used in the classic GBM.</p>
-</section>
-<section id="an-aside-on-newtons-method" class="level3">
-<h3 class="anchored" data-anchor-id="an-aside-on-newtons-method">An Aside on Newton’s Method</h3>
-<p>As we’ll see soon, XGBoost uses <a href="https://en.wikipedia.org/wiki/Newton%27s_method_in_optimization">Newton’s Method</a> to minimize its objective function, so let’s start with a quick refresher.</p>
-<p>Newton’s method is an iterative procedure for minimizing a function <img src="https://latex.codecogs.com/png.latex?s(x)">. At each step we have some input <img src="https://latex.codecogs.com/png.latex?x_t">, and our goal is to find a nudge value <img src="https://latex.codecogs.com/png.latex?u"> such that</p>
-<p><img src="https://latex.codecogs.com/png.latex?%20s(x_t%20+%20u)%20%5Cle%20s(x_t)"></p>
-<p>To find a good nudge value <img src="https://latex.codecogs.com/png.latex?u">, we generate a local quadratic approximation of the function in the neighborhood of the input <img src="https://latex.codecogs.com/png.latex?x_t">, and then we find the input value that would bring us to the minimum of the quadratic approximation.</p>
-<div class="quarto-figure quarto-figure-center">
-<figure class="figure">
-<p><img src="https://randomrealizations.com/posts/xgboost-explained/newton.png" class="img-fluid figure-img"></p>
-<figcaption class="figure-caption">Schematic of Newton’s method</figcaption>
-</figure>
-</div>
-<p>The figure shows a single Newton step where we start at <img src="https://latex.codecogs.com/png.latex?x_t">, find the local quadratic approximation, and then jump a distance <img src="https://latex.codecogs.com/png.latex?u"> along the <img src="https://latex.codecogs.com/png.latex?x">-axis to land at the minimum of the quadratic. If we iterate in this way, we are likely to land close to the minimum of <img src="https://latex.codecogs.com/png.latex?s(x)">.</p>
-<p>So how do we compute the quadratic approximation? We use the second order Taylor series expansion of <img src="https://latex.codecogs.com/png.latex?s(x)"> near the point <img src="https://latex.codecogs.com/png.latex?x_t">.</p>
-<p><img src="https://latex.codecogs.com/png.latex?%20s(x_t%20+%20u)%20%5Capprox%20%20s(x_t)%20+%20s'(x_t)u%20+%20%5Cfrac%7B1%7D%7B2%7D%20s''(x_t)%20u%5E2%20"></p>
-<p>To find the nudge value <img src="https://latex.codecogs.com/png.latex?u"> that minimizes the quadratic approximation, we can take the derivative with respect to <img src="https://latex.codecogs.com/png.latex?u">, set it to zero, and solve for <img src="https://latex.codecogs.com/png.latex?u">.</p>
-<p><img src="https://latex.codecogs.com/png.latex?%200%20=%20%5Cfrac%7Bd%7D%7Bdu%7D%20%20%5Cleft%20(%20s(x_t)%20+%20s'(x_t)u%20+%20%5Cfrac%7B1%7D%7B2%7D%20s''(x_t)%20u%5E2%20%5Cright%20)%20=%20s'(x_t)%20+%20s''(x_t)%20u%20"></p>
-<p><img src="https://latex.codecogs.com/png.latex?%5Crightarrow%20u%5E*%20=%20-%5Cfrac%7Bs'(x_t)%7D%7Bs''(x_t)%7D%20"></p>
-<p>And as long as <img src="https://latex.codecogs.com/png.latex?s''(x_t)%3E0"> (i.e., the parabola is pointing up), <img src="https://latex.codecogs.com/png.latex?s(x_t%20+%20u%5E*)%20%5Cle%20s(x_t)">.</p>
-</section>
-<section id="tree-boosting-with-newtons-method" class="level3">
-<h3 class="anchored" data-anchor-id="tree-boosting-with-newtons-method">Tree Boosting with Newton’s Method</h3>
-<p>This lands us at the heart of XGBoost, which uses Newton’s method, rather than gradient descent, to guide each round of boosting. This explanation will correspond very closely to section 2.2 of the XGBoost paper, but here I’ll explicitly spell out some of the intermediate steps which are omitted from their derivation, and you’ll get some additional commentary from me along the way.</p>
-<section id="newton-descent-in-tree-space" class="level4">
-<h4 class="anchored" data-anchor-id="newton-descent-in-tree-space">Newton Descent in Tree Space</h4>
-<p>Suppose we’ve done <img src="https://latex.codecogs.com/png.latex?t-1"> boosting rounds, and we want to add the <img src="https://latex.codecogs.com/png.latex?t">-th booster to our composite model. Our current model’s prediction for instance <img src="https://latex.codecogs.com/png.latex?i"> is <img src="https://latex.codecogs.com/png.latex?%5Chat%7By%7D_i%5E%7B(t-1)%7D">. If we add a new tree booster <img src="https://latex.codecogs.com/png.latex?f_t"> to our model, the objective function would give</p>
-<p><img src="https://latex.codecogs.com/png.latex?%20L%5E%7B(t)%7D%20=%20%5Csum_%7Bi=1%7D%5En%20l(y_i,%20%5Chat%7By%7D_i%5E%7B(t-1)%7D%20+%20f_t(%5Cmathbf%7Bx%7D_i))%20+%20%5COmega(f_t)%20"></p>
-<p>We need to choose <img src="https://latex.codecogs.com/png.latex?f_t"> so that it decreases the loss, i.e.&nbsp;we want</p>
-<p><img src="https://latex.codecogs.com/png.latex?%20l(y_i,%20%5Chat%7By%7D_i%5E%7B(t-1)%7D%20+%20f_t(%5Cmathbf%7Bx%7D_i))%20%5Cle%20l(y_i,%20%5Chat%7By%7D_i%5E%7B(t-1)%7D)"></p>
-<p>Does that sound familiar? In the previous section we used Newton’s method to find a value of <img src="https://latex.codecogs.com/png.latex?u"> that would make <img src="https://latex.codecogs.com/png.latex?s(x_t%20+%20u)%20%5Cle%20s(x_t)">. Let’s try the same thing with our loss function. To be explicit, the parallels are: <img src="https://latex.codecogs.com/png.latex?s(%5Ccdot)%20%5Crightarrow%20l(y_i,%20%5Ccdot)">, <img src="https://latex.codecogs.com/png.latex?x_t%20%5Crightarrow%20%5Chat%7By%7D_i%5E%7B(t-1)%7D">, and <img src="https://latex.codecogs.com/png.latex?u%20%5Crightarrow%20f_t(%5Cmathbf%7Bx%7D_i)">.</p>
-<p>Let’s start by finding the second order Taylor series approximation for the loss around the point <img src="https://latex.codecogs.com/png.latex?%5Chat%7By%7D_i%5E%7B(t-1)%7D">.</p>
-<p><img src="https://latex.codecogs.com/png.latex?%20l(y_i,%20%5Chat%7By%7D_i%5E%7B(t-1)%7D%20+%20f_t(%5Cmathbf%7Bx%7D_i))%20%5Capprox%20l(y_i,%20%5Chat%7By%7D_i%5E%7B(t-1)%7D)%20+%20g_i%20f_t(%5Cmathbf%7Bx%7D_i)%20+%20%5Cfrac%7B1%7D%7B2%7D%20h_i%20f_t(%5Cmathbf%7Bx%7D_i)%5E2%20"></p>
-<p>where</p>
-<p><img src="https://latex.codecogs.com/png.latex?%20g_i%20=%20%5Cfrac%7B%5Cpartial%7D%7B%5Cpartial%20%5Chat%7By%7D_i%5E%7B(t-1)%7D%7D%20l(y_i,%20%5Chat%7By%7D_i%5E%7B(t-1)%7D)"></p>
-<p>and</p>
-<p><img src="https://latex.codecogs.com/png.latex?%20h_i%20=%20%5Cfrac%7B%5Cpartial%7D%7B%5Cpartial%5E2%20%5Chat%7By%7D_i%5E%7B(t-1)%7D%7D%20l(y_i,%20%5Chat%7By%7D_i%5E%7B(t-1)%7D)"></p>
-<p>are the first and second order partial derivatives of the loss with respect to the current predictions. The XGBoost paper calls these the gradients and hessians, respectively. Remember that when we specify an actual loss function to use, we would also specify the functional form of the gradients and hessians, so that they are directly computable.</p>
-<p>Now we can go back and substitute our quadratic approximation in for the loss function to get an approximation of the objective function in the neighborhood of <img src="https://latex.codecogs.com/png.latex?%5Chat%7By%7D_i%5E%7B(t-1)%7D">..</p>
-<p><img src="https://latex.codecogs.com/png.latex?%20L%5E%7B(t)%7D%20%5Capprox%20%5Csum_%7Bi=1%7D%5En%20%5Bl(y_i,%20%5Chat%7By%7D_i%5E%7B(t-1)%7D)%20+%20g_i%20f_t(%5Cmathbf%7Bx%7D_i)%20+%20%5Cfrac%7B1%7D%7B2%7D%20h_i%20f_t(%5Cmathbf%7Bx%7D_i)%5E2%5D%20+%20%5COmega(f_t)%20"></p>
-<p>Since <img src="https://latex.codecogs.com/png.latex?l(y_i,%5Chat%7By%7D_i%5E%7B(t-1)%7D)"> is constant regardless of our choice of <img src="https://latex.codecogs.com/png.latex?f_t">, we can drop it and instead work with the modified objective, which gives us Equation (3) from the paper.</p>
-<p><img src="https://latex.codecogs.com/png.latex?%20%5Ctilde%7BL%7D%5E%7B(t)%7D%20=%20%5Csum_%7Bi=1%7D%5En%20%5B%20g_i%20f_t(%5Cmathbf%7Bx%7D_i)%20+%20%5Cfrac%7B1%7D%7B2%7D%20h_i%20f_t(%5Cmathbf%7Bx%7D_i)%5E2%5D%20+%20%5COmega(f_t)%20"></p>
-<p>Now the authors are about to do something great. They’re about to show how to directly compute the optimal prediction values for the leaf nodes of <img src="https://latex.codecogs.com/png.latex?f_t">. We’ll circle back in a moment about how we find a good structure for <img src="https://latex.codecogs.com/png.latex?f_t">, i.e.&nbsp;good node splits, but we’re going to find the optimal predicted values for any tree structure having <img src="https://latex.codecogs.com/png.latex?T"> terminal nodes. Let <img src="https://latex.codecogs.com/png.latex?I_j"> denote the set of instances <img src="https://latex.codecogs.com/png.latex?i"> that are in the <img src="https://latex.codecogs.com/png.latex?j">-th leaf node of <img src="https://latex.codecogs.com/png.latex?f_t">. Then we can rewrite the objective.</p>
-<p><img src="https://latex.codecogs.com/png.latex?%20%5Ctilde%7BL%7D%5E%7B(t)%7D%20=%20%5Csum_%7Bj=1%7D%5ET%20%5Cleft%20%5B%20%5Csum_%7Bi%20%5Cin%20I_j%7D%20g_i%20f_t(%5Cmathbf%7Bx%7D_i)%20+%20%5Cfrac%7B1%7D%7B2%7D%20%20%5Csum_%7Bi%20%5Cin%20I_j%7D%20h_i%20f_t(%5Cmathbf%7Bx%7D_i)%5E2%20%5Cright%20%5D%20+%20%5COmega(f_t)"></p>
-<p>We notice that for all instances in <img src="https://latex.codecogs.com/png.latex?I_j">, the tree yields the same predicted value <img src="https://latex.codecogs.com/png.latex?f_t(%5Cmathbf%7Bx%7D_i)=w_j">. Substituting in <img src="https://latex.codecogs.com/png.latex?w_j"> for the predicted values and expanding <img src="https://latex.codecogs.com/png.latex?%5COmega(f_t)"> we get</p>
-<p><img src="https://latex.codecogs.com/png.latex?%20%5Ctilde%7BL%7D%5E%7B(t)%7D%20=%20%5Csum_%7Bj=1%7D%5ET%20%5Cleft%20%5B%20%5Csum_%7Bi%20%5Cin%20I_j%7D%20g_i%20w_j%20+%20%5Cfrac%7B1%7D%7B2%7D%20%20%5Csum_%7Bi%20%5Cin%20I_j%7D%20h_i%20w_j%5E2%20%5Cright%20%5D%20+%20%5Cgamma%20T%20+%20%5Cfrac%7B1%7D%7B2%7D%20%5Clambda%20%5Csum_%7Bj=1%7D%5ET%20w_j%5E2"></p>
-<p>Rearranging terms we obtain Equation (4).</p>
-<p><img src="https://latex.codecogs.com/png.latex?%20%5Ctilde%7BL%7D%5E%7B(t)%7D%20=%20%5Csum_%7Bj=1%7D%5ET%20%5Cleft%20%5B%20w_j%20%5Csum_%7Bi%20%5Cin%20I_j%7D%20g_i%20+%20%5Cfrac%7B1%7D%7B2%7D%20%20w_j%5E2%20%5Cleft%20(%20%5Csum_%7Bi%20%5Cin%20I_j%7D%20h_i%20+%20%5Clambda%20%5Cright%20)%20%20%5Cright%20%5D%20+%20%5Cgamma%20T%20"></p>
-<p>For each leaf node <img src="https://latex.codecogs.com/png.latex?j">, our modified objective function is quadratic in <img src="https://latex.codecogs.com/png.latex?w_j">. To find the optimal predicted values we take the derivative, set to zero, and solve for <img src="https://latex.codecogs.com/png.latex?w_j">.</p>
-<p><img src="https://latex.codecogs.com/png.latex?%200%20=%20%5Cfrac%7Bd%7D%7Bdw_j%7D%20%5Cleft%20%5B%20w_j%20%5Csum_%7Bi%20%5Cin%20I_j%7D%20g_i%20+%20%5Cfrac%7B1%7D%7B2%7D%20%20w_j%5E2%20%5Cleft%20(%20%5Csum_%7Bi%20%5Cin%20I_j%7D%20h_i%20+%20%5Clambda%20%5Cright%20)%20%20%5Cright%20%5D%20=%20%5Cleft%20(%20%5Csum_%7Bi%20%5Cin%20I_j%7D%20h_i%20+%20%5Clambda%20%5Cright%20)%20w_j%20+%20%5Csum_%7Bi%20%5Cin%20I_j%7D%20g_i%20"></p>
-<p>This yields Equation (5).</p>
-<p><img src="https://latex.codecogs.com/png.latex?%20w_j%5E*%20=%20-%20%5Cfrac%7B%5Csum_%7Bi%20%5Cin%20I_j%7D%20g_i%20%7D%20%7B%5Csum_%7Bi%20%5Cin%20I_j%7D%20h_i%20+%20%5Clambda%20%7D%20"></p>
-</section>
-<section id="split-finding" class="level4">
-<h4 class="anchored" data-anchor-id="split-finding">Split Finding</h4>
-<p>Now that we know how to find the optimal predicted value for any leaf node, we need to identify a criterion for finding a good tree structure, which boils down to finding the best split for a given node. Back in the [decision tree from scratch](/decision-tree-from-scratch post, we derived a split evaluation metric based on the reduction in the objective function associated with a particular split.<br>
-To do that, first we need a way to compute the objective function given a particular tree structure. Substituting the optimal predicted values <img src="https://latex.codecogs.com/png.latex?w_j%5E*"> into the objective function, we get Equation (6).</p>
-<p><img src="https://latex.codecogs.com/png.latex?%20%5Ctilde%7BL%7D%5E%7B(t)%7D%20=%20-%5Cfrac%7B1%7D%7B2%7D%20%5Csum_%7Bj=1%7D%5ET%20%5Cfrac%7B%20(%5Csum_%7Bi%20%5Cin%20I_j%7D%20g_i%20)%5E2%20%7D%20%7B%5Csum_%7Bi%20%5Cin%20I_j%7D%20h_i%20+%20%5Clambda%7D%20+%20%5Cgamma%20T%20"></p>
-<p>We can then evaluate potential splits by comparing the objective before making a split to the objective after making a split, where the split with the maximum reduction in objective (a.k.a. gain) is best.</p>
-<p>More formally, let <img src="https://latex.codecogs.com/png.latex?I"> be the set of <img src="https://latex.codecogs.com/png.latex?n"> data instances in the current node, and let <img src="https://latex.codecogs.com/png.latex?I_L"> and <img src="https://latex.codecogs.com/png.latex?I_R"> be the instances that fall into the left and right child nodes of a proposed split. Let <img src="https://latex.codecogs.com/png.latex?L"> be the total loss for all instances in the node, while <img src="https://latex.codecogs.com/png.latex?L_L"> and <img src="https://latex.codecogs.com/png.latex?L_R"> are the losses for the left and right child nodes. The total loss contributed by instances in node <img src="https://latex.codecogs.com/png.latex?I"> prior to any split is</p>
-<p><img src="https://latex.codecogs.com/png.latex?L_%7B%5Ctext%7Bbefore%20split%7D%7D%20=%20-%5Cfrac%7B1%7D%7B2%7D%20%5Cfrac%7B%20(%5Csum_%7Bi%20%5Cin%20I%7D%20g_i%20)%5E2%20%7D%20%7B%5Csum_%7Bi%20%5Cin%20I%7D%20h_i%20+%20%5Clambda%7D%20+%20%5Cgamma%20"></p>
-<p>And the loss after splitting <img src="https://latex.codecogs.com/png.latex?I"> into <img src="https://latex.codecogs.com/png.latex?I_L"> and <img src="https://latex.codecogs.com/png.latex?I_R"> is</p>
-<p><img src="https://latex.codecogs.com/png.latex?L_%7B%5Ctext%7Bafter%20split%7D%7D%20=%20L_L%20+%20L_R%20=%20-%5Cfrac%7B1%7D%7B2%7D%20%20%5Cfrac%7B%20(%5Csum_%7Bi%20%5Cin%20I_L%7D%20g_i%20)%5E2%20%7D%20%7B%5Csum_%7Bi%20%5Cin%20I_L%7D%20h_i%20+%20%5Clambda%7D%20-%5Cfrac%7B1%7D%7B2%7D%20%20%5Cfrac%7B%20(%5Csum_%7Bi%20%5Cin%20I_R%7D%20g_i%20)%5E2%20%7D%20%7B%5Csum_%7Bi%20%5Cin%20I_R%7D%20h_i%20+%20%5Clambda%7D%20+%202%20%5Cgamma%20"></p>
-<p>The gain from this split is then</p>
-<p><img src="https://latex.codecogs.com/png.latex?%20%5CDelta%20L%20=%20L_%7B%5Ctext%7Bbefore%20split%7D%7D%20-%20%20L_%7B%5Ctext%7Bafter%20split%7D%7D%20=%20L%20-%20(L_L%20+%20L_R)"> <img src="https://latex.codecogs.com/png.latex?%5CDelta%20L%20=%20%5Cfrac%7B1%7D%7B2%7D%20%5Cleft%20%5B%20%5Cfrac%7B%20(%5Csum_%7Bi%20%5Cin%20I_L%7D%20g_i%20)%5E2%20%7D%20%7B%5Csum_%7Bi%20%5Cin%20I_L%7D%20h_i%20+%20%5Clambda%7D%20+%20%5Cfrac%7B%20(%5Csum_%7Bi%20%5Cin%20I_R%7D%20g_i%20)%5E2%20%7D%20%7B%5Csum_%7Bi%20%5Cin%20I_R%7D%20h_i%20+%20%5Clambda%7D%20-%20%5Cfrac%7B%20(%5Csum_%7Bi%20%5Cin%20I%7D%20g_i%20)%5E2%20%7D%20%7B%5Csum_%7Bi%20%5Cin%20I%7D%20h_i%20+%20%5Clambda%7D%20%5Cright%20%5D%20-%20%5Cgamma%20"></p>
-<p>which is Equation (7) from the paper. In practice it makes sense to accept a split only if the gain is positive, thus the <img src="https://latex.codecogs.com/png.latex?%5Cgamma"> parameter sets the minimum gain required to make a further split. This is why <img src="https://latex.codecogs.com/png.latex?%5Cgamma"> can be set with the parameter <code>gamma</code> or the more descriptive<code>min_loss_split</code>.</p>
-</section>
-</section>
-</section>
-<section id="tree-booster-innovations" class="level2">
-<h2 class="anchored" data-anchor-id="tree-booster-innovations">Tree Booster Innovations</h2>
-<section id="missing-values-and-sparsity-aware-split-finding" class="level3">
-<h3 class="anchored" data-anchor-id="missing-values-and-sparsity-aware-split-finding">Missing Values and Sparsity-Aware Split Finding</h3>
-<p>The XGBoost paper also introduces a modified algorithm for tree split finding which explicitly handles missing feature values. Recall that in order to find the best threshold value for a given feature, we can simply try all possible threshold values, recording the score for each. If some feature values are missing, the XGBoost split finding algorithm simply scores each threshold twice: once with missing value instances in the left node and once with them in the right node. The best split will then specify both the threshold value and to which node instances with missing values should be assigned. The paper calls this the sparsity aware split finding routine, which is defined as Algorithm 2.</p>
-</section>
-<section id="preventing-further-splitting" class="level3">
-<h3 class="anchored" data-anchor-id="preventing-further-splitting">Preventing Further Splitting</h3>
-<p>In addition to <code>min_loss_split</code> discussed above, XGBoost offers another parameter for limiting further tree splitting called <code>min_child_weight</code>. This name is a little confusing to me because the word “weight” has various meanings. In the context of this parameter, “weight” refers to the sum of the hessians <img src="https://latex.codecogs.com/png.latex?%5Csum%20h_i"> over instances in the node. For squared error loss <img src="https://latex.codecogs.com/png.latex?h_i=1">, so this is equivalent to the number of samples. Thus this parameter generalizes the notion of the minimum number of samples allowed in a terminal node.</p>
-</section>
-<section id="sampling" class="level3">
-<h3 class="anchored" data-anchor-id="sampling">Sampling</h3>
-<p>XGBoost takes a cue from Random Forest and introduces both column and row subsampling. These sampling methods can prevent overfitting and reduce training time by limiting the amount of data to be processed during boosting.</p>
-<p>Like random forest, XGBoost implements column subsampling, which limits tree split finding to randomly selected subsets of features. XGBoost provides column sampling for each tree, for each depth level within a tree, and for each split point within a tree, controlled by <code>colsample_bytree</code>, <code>colsample_bbylevel</code>, and <code>colsample_bbynode</code> respectively.</p>
-<p>One interesting distinction is that XGBoost implements row sampling without replacement using <code>subbsample</code>, whereas random forest uses bootstrapping. The choice to bootstrap rows in RF probably spurred from a desire to use as much data as possible while training on the smaller datasets of the 1990’s when RF was developed. With larger datasets and the ability to generate a large number of trees, XGBoost simply takes a subsample of rows for each tree.</p>
-</section>
-</section>
-<section id="scalability" class="level2">
-<h2 class="anchored" data-anchor-id="scalability">Scalability</h2>
-<p>Even though we’re focused on statistical learning, I figured I’d comment on why XGBoost is highly scalable. Basically it boils down to efficient, parallelizable, and distributable methods for growing trees. You’ll notice there is a <code>tree_method</code> parameter which allows you to choose between the greedy exact algorithm (like the one we discussed in the decision tree from scratch post) and the approximate algorithm, which offers various scalability-related functionality, notably including the ability to consider only a small number of candidate split points instead of trying all possible splits. The algorithm also uses clever tricks like pre-sorting data for split finding and caching frequently needed values.</p>
-<section id="why-xgboost-is-so-successful" class="level3">
-<h3 class="anchored" data-anchor-id="why-xgboost-is-so-successful">Why XGBoost is so Successful</h3>
-<p>As I mentioned in the intro, XGBoost is simply a very good implementation of the gradient boosting tree model. Therefore it inherits all the benefits of <a href="../../consider-the-decision-tree">decision trees and tree ensembles</a>, while making even further improvements over the classic gradient boosting machine. These improvements boil down to</p>
-<ol type="1">
-<li>more ways to control overfitting</li>
-<li>elegant handling of custom objectives</li>
-<li>scalability</li>
-</ol>
-<p>First, XGBoost introduces two new tree regularization hyperparameters <img src="https://latex.codecogs.com/png.latex?%5Cgamma"> and <img src="https://latex.codecogs.com/png.latex?%5Clambda"> which are baked directly into its objective function. Combining these with the additional column and row sampling functionality provides a variety of ways to reduce overfitting.</p>
-<p>Second, the XGBoost formulation provides a much more elegant way to train models on custom objective functions. Recall that for <a href="../../posts/gradient-boosting-machine-with-any-loss-function/">custom objectives</a>, the classic GBM finds tree structure by fitting a squared error decision tree to the gradients of the loss function and then sets each leaf’s predicted value by running a numerical optimization routine to find the optimal predicted value.</p>
-<p>The XGBoost formulation improves on this two-stage approach by unifying the generation of tree structure and predicted values. Both the split scoring metric and the predicted values are directly computable from the instance gradient and hessian values, which are connected directly back to the overall training objective. This also removes the need for additional numerical optimizations, which contributes to speed, stability, and scalability.</p>
-<p>Finally, speaking of scalability, XGBoost emerged at a time when industrial dataset size was exploding. Many use cases require scalable ML systems, and all use cases benefit from faster training and higher model development velocity.</p>
-</section>
-</section>
-<section id="wrapping-up" class="level2">
-<h2 class="anchored" data-anchor-id="wrapping-up">Wrapping Up</h2>
-<p>Well, there you go, those are the salient ideas behind XGBoost, the gold standard in gradient boosting model implementations. Hopefully now we all understand the mathematical basis for the algorithm and appreciate the key improvements it makes over the classic GBM. If you want to go even deeper, you can join us for the next post where we’ll roll up our sleeves and implement XGBoost entirely from scratch.</p>
-</section>
-<section id="references" class="level2">
-<h2 class="anchored" data-anchor-id="references">References</h2>
-<p><a href="https://www.kdd.org/kdd2016/papers/files/rfp0697-chenAemb.pdf">The XGBoost paper</a></p>
-</section>
-<section id="exercise" class="level2">
-<h2 class="anchored" data-anchor-id="exercise">Exercise</h2>
-<p>Proove that the XGBoost Newton Descent generalizes the classic GBM gradient descent. Hint: show that XGBoost with a squared error objective and no regularization reduces to the classic GBM.</p>
-</section>
-
- ]]></description>
-  <category>gradient boosting</category>
-  <guid>https://randomrealizations.com/posts/xgboost-explained/index.html</guid>
-  <pubDate>Sat, 12 Mar 2022 21:00:00 GMT</pubDate>
-  <media:content url="https://randomrealizations.com/posts/xgboost-explained/thumbnail.jpg" medium="image" type="image/jpeg"/>
-</item>
-<item>
-  <title>Decision Tree from Scratch</title>
-  <dc:creator>Matt Bowers</dc:creator>
-  <link>https://randomrealizations.com/posts/decision-tree-from-scratch/index.html</link>
-  <description><![CDATA[ 
-
-
-
-<p><img src="https://randomrealizations.com/posts/decision-tree-from-scratch/thumbnail.png" title="decision tree" class="img-fluid"></p>
-<p>Yesterday we had a lovely discussion about the key strengths and weaknesses of decision trees and why tree ensembles are so great. But today, gentle reader, we abandon our philosophizing and get down to the business of implementing one of these decision trees from scratch.</p>
-<p>A note before we get started. This is going to be the most involved scratch-build that we’ve done at Random Realizations so far. It is not the kind of algorithm that I could just sit down and write all at once. We need to start with a basic frame and then add functionality step by step, testing all along the way to make sure things are working properly. Since I’m writing this in a jupyter notebook, I’ll try to give you a sense for how I actually put the algorithm together interactively in pieces, eventually landing on a fully-functional final product.</p>
-<p>Shall we?</p>
-<section id="binary-tree-data-structure" class="level2">
-<h2 class="anchored" data-anchor-id="binary-tree-data-structure">Binary Tree Data Structure</h2>
-<p>A decision tree takes a dataset with features and a target, partitions the feature space into chunks, and assigns a prediction value to each chunk. Since each partitioning step divides one chunk in two, and since the partitioning is done recursively, it’s natural to use a binary tree data structure to represent a decision tree.</p>
-<p>The basic idea of the binary tree is that we define a class to represent nodes in the tree. If we want to add children to a given node, we simply assign them as attributes of the parent node. The child nodes we add are themselves instances of the same class, so we can add children to them in the same way.</p>
-<p>Let’s start out with a simple class for our decision tree. It takes a single value called <code>max_depth</code> as input, which will dictate how many layers of child nodes should be inserted below the root. This controls the depth of the tree. As long as <code>max_depth</code> is positive, the parent will instantiate two new instances of the binary tree node class, passing along <code>max_depth</code> decremented by one and attaching the two children to itself as attributes called <code>left</code> and <code>right</code>.</p>
-<div class="cell" data-execution_count="2">
-<div class="sourceCode cell-code" id="cb1" style="background: #f1f3f5;"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb1-1"><span class="im" style="color: #00769E;
+font-style: inherit;">1.0</span></span>
+<span id="cb4-13">        <span class="cf" style="color: #003B4F;
+background-color: null;
+font-style: inherit;">if</span> <span class="bu" style="color: null;
+background-color: null;
+font-style: inherit;">isinstance</span>(g, pd.Series): g <span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">=</span> g.values</span>
+<span id="cb4-14">        <span class="cf" style="color: #003B4F;
+background-color: null;
+font-style: inherit;">if</span> <span class="bu" style="color: null;
+background-color: null;
+font-style: inherit;">isinstance</span>(h, pd.Series): h <span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">=</span> h.values</span>
+<span id="cb4-15">        <span class="cf" style="color: #003B4F;
+background-color: null;
+font-style: inherit;">if</span> idxs <span class="kw" style="color: #003B4F;
+background-color: null;
+font-style: inherit;">is</span> <span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">import</span> math</span>
-<span id="cb1-2"><span class="im" style="color: #00769E;
+font-style: inherit;">None</span>: idxs <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">import</span> numpy <span class="im" style="color: #00769E;
+font-style: inherit;">=</span> np.arange(<span class="bu" style="color: null;
 background-color: null;
-font-style: inherit;">as</span> np </span>
-<span id="cb1-3"><span class="im" style="color: #00769E;
+font-style: inherit;">len</span>(g))</span>
+<span id="cb4-16">        <span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">import</span> pandas <span class="im" style="color: #00769E;
+font-style: inherit;">self</span>.X, <span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">as</span> pd</span>
-<span id="cb1-4"><span class="im" style="color: #00769E;
+font-style: inherit;">self</span>.g, <span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">import</span> matplotlib.pyplot <span class="im" style="color: #00769E;
+font-style: inherit;">self</span>.h, <span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">as</span> plt</span></code></pre></div>
-</div>
-<div class="cell" data-execution_count="3">
-<div class="sourceCode cell-code" id="cb2" style="background: #f1f3f5;"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb2-1"><span class="kw" style="color: #003B4F;
+font-style: inherit;">self</span>.idxs <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">class</span> DecisionTree():</span>
-<span id="cb2-2"></span>
-<span id="cb2-3">        <span class="kw" style="color: #003B4F;
+font-style: inherit;">=</span> X, g, h, idxs</span>
+<span id="cb4-17">        <span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">def</span> <span class="fu" style="color: #4758AB;
+font-style: inherit;">self</span>.n, <span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">__init__</span>(<span class="va" style="color: #111111;
+font-style: inherit;">self</span>.c <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">self</span>, max_depth):</span>
-<span id="cb2-4">            <span class="cf" style="color: #003B4F;
+font-style: inherit;">=</span> <span class="bu" style="color: null;
 background-color: null;
-font-style: inherit;">assert</span> max_depth <span class="op" style="color: #5E5E5E;
+font-style: inherit;">len</span>(idxs), X.shape[<span class="dv" style="color: #AD0000;
 background-color: null;
-font-style: inherit;">&gt;=</span> <span class="dv" style="color: #AD0000;
+font-style: inherit;">1</span>]</span>
+<span id="cb4-18">        <span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">0</span>, <span class="st" style="color: #20794D;
+font-style: inherit;">self</span>.value <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">'max_depth must be nonnegative'</span></span>
-<span id="cb2-5">            <span class="va" style="color: #111111;
+font-style: inherit;">=</span> <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">self</span>.max_depth <span class="op" style="color: #5E5E5E;
+font-style: inherit;">-</span>g[idxs].<span class="bu" style="color: null;
 background-color: null;
-font-style: inherit;">=</span> max_depth</span>
-<span id="cb2-6">            <span class="cf" style="color: #003B4F;
+font-style: inherit;">sum</span>() <span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">/</span> (h[idxs].<span class="bu" style="color: null;
+background-color: null;
+font-style: inherit;">sum</span>() <span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">+</span> <span class="va" style="color: #111111;
+background-color: null;
+font-style: inherit;">self</span>.reg_lambda) <span class="co" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;"># Eq (5)</span></span>
+<span id="cb4-19">        <span class="va" style="color: #111111;
+background-color: null;
+font-style: inherit;">self</span>.best_score_so_far <span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">=</span> <span class="fl" style="color: #AD0000;
+background-color: null;
+font-style: inherit;">0.</span></span>
+<span id="cb4-20">        <span class="cf" style="color: #003B4F;
+background-color: null;
+font-style: inherit;">if</span> <span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">if</span> max_depth <span class="op" style="color: #5E5E5E;
+font-style: inherit;">self</span>.max_depth <span class="op" style="color: #5E5E5E;
 background-color: null;
 font-style: inherit;">&gt;</span> <span class="dv" style="color: #AD0000;
 background-color: null;
 font-style: inherit;">0</span>:</span>
-<span id="cb2-7">                <span class="va" style="color: #111111;
+<span id="cb4-21">            <span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">self</span>.left <span class="op" style="color: #5E5E5E;
+font-style: inherit;">self</span>._maybe_insert_child_nodes()</span>
+<span id="cb4-22"></span>
+<span id="cb4-23">    <span class="kw" style="color: #003B4F;
 background-color: null;
-font-style: inherit;">=</span> DecisionTree(max_depth<span class="op" style="color: #5E5E5E;
+font-style: inherit;">def</span> _maybe_insert_child_nodes(<span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">=</span>max_depth<span class="op" style="color: #5E5E5E;
+font-style: inherit;">self</span>):</span>
+<span id="cb4-24">        <span class="cf" style="color: #003B4F;
 background-color: null;
-font-style: inherit;">-</span><span class="dv" style="color: #AD0000;
+font-style: inherit;">for</span> i <span class="kw" style="color: #003B4F;
 background-color: null;
-font-style: inherit;">1</span>)</span>
-<span id="cb2-8">                <span class="va" style="color: #111111;
+font-style: inherit;">in</span> <span class="bu" style="color: null;
 background-color: null;
-font-style: inherit;">self</span>.right <span class="op" style="color: #5E5E5E;
+font-style: inherit;">range</span>(<span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">=</span> DecisionTree(max_depth<span class="op" style="color: #5E5E5E;
+font-style: inherit;">self</span>.c): <span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">=</span>max_depth<span class="op" style="color: #5E5E5E;
+font-style: inherit;">self</span>._find_better_split(i)</span>
+<span id="cb4-25">        <span class="cf" style="color: #003B4F;
 background-color: null;
-font-style: inherit;">-</span><span class="dv" style="color: #AD0000;
+font-style: inherit;">if</span> <span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">1</span>)</span></code></pre></div>
-</div>
-<p>Let’s make a new instance of our decision tree class, a tree with depth 2.</p>
-<div class="cell" data-execution_count="4">
-<div class="sourceCode cell-code" id="cb3" style="background: #f1f3f5;"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb3-1">t <span class="op" style="color: #5E5E5E;
+font-style: inherit;">self</span>.is_leaf: <span class="cf" style="color: #003B4F;
 background-color: null;
-font-style: inherit;">=</span> DecisionTree(max_depth<span class="op" style="color: #5E5E5E;
+font-style: inherit;">return</span></span>
+<span id="cb4-26">        x <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">=</span><span class="dv" style="color: #AD0000;
+font-style: inherit;">=</span> <span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">2</span>)</span></code></pre></div>
-</div>
-<div class="quarto-figure quarto-figure-center">
-<figure class="figure">
-<p><img src="https://randomrealizations.com/posts/decision-tree-from-scratch/binary_tree.png" class="img-fluid figure-img"></p>
-<figcaption class="figure-caption">Binary tree structure diagram</figcaption>
-</figure>
-</div>
-<p>We can access individual nodes and check their value of <code>max_depth</code>.</p>
-<div class="cell" data-execution_count="5">
-<div class="sourceCode cell-code" id="cb4" style="background: #f1f3f5;"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb4-1">t.max_depth, t.left.max_depth, t.left.right.max_depth</span></code></pre></div>
-<div class="cell-output cell-output-display" data-execution_count="5">
-<pre><code>(2, 1, 0)</code></pre>
-</div>
-</div>
-<p>Our full decision tree can expand on this idea where each node receives some input, modifies it, creates two child nodes, and passes the modified input along to them. Specifically, each node in our decision tree will receive a dataset, determine how best to split the dataset into two parts, create two child nodes, and pass one part of the data to the left child and the other part to the right child.</p>
-<p>All we have to do now is add some additional functionality to our decision tree. First we’ll start by capturing all the inputs we need to grow a tree, which include the feature dataframe <code>X</code>, the target array <code>y</code>, <code>max_depth</code> to explicitly limit tree depth, <code>min_samples_leaf</code> to specify the minimum number of observations that are allowed in a leaf node, and an optional <code>idxs</code> which specifies the indices of data that the node should use. The indices argument is useful for users of our decision tree because it will allow them to implement row subsampling in ensemble methods like random forest. It will also be handy for internal use inside the decision tree when passing data along to child nodes; instead of passing copies of the two data subsets, we’ll just pass a reference to the full dataset and pass along a set of indices to identify that node’s instance subset.</p>
-<p>Once we get our input, we’ll do a little bit of input validation and store things that we want to keep as object attributes. In case this is a leaf node, we’ll go ahead and compute its predicted value; since this is a regression tree, the prediction is just the mean of the target <code>y</code>. We’ll also go ahead and initialize a score metric which we’ll use to help us find the best split later; since lower scores are going to be better, we’ll initialize it to positive infinity. Finally, we’ll push the logic to add child nodes into a method called <code>_maybe_insert_child_nodes</code> that we’ll define next.</p>
-<div class="callout callout-style-default callout-note callout-titled">
-<div class="callout-header d-flex align-content-center">
-<div class="callout-icon-container">
-<i class="callout-icon"></i>
-</div>
-<div class="callout-title-container flex-fill">
-Note
-</div>
-</div>
-<div class="callout-body-container callout-body">
-<p>a leading underscore in a method name indicates the method is for internal use and not part of the user-facing API of the class.</p>
-</div>
-</div>
-<div class="cell" data-execution_count="6">
-<div class="sourceCode cell-code" id="cb6" style="background: #f1f3f5;"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb6-1"><span class="kw" style="color: #003B4F;
+font-style: inherit;">self</span>.X.values[<span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">class</span> DecisionTree():</span>
-<span id="cb6-2"></span>
-<span id="cb6-3">    <span class="kw" style="color: #003B4F;
+font-style: inherit;">self</span>.idxs,<span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">def</span> <span class="fu" style="color: #4758AB;
+font-style: inherit;">self</span>.split_feature_idx]</span>
+<span id="cb4-27">        left_idx <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">__init__</span>(<span class="va" style="color: #111111;
+font-style: inherit;">=</span> np.nonzero(x <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">self</span>, X, y, min_samples_leaf<span class="op" style="color: #5E5E5E;
+font-style: inherit;">&lt;=</span> <span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">=</span><span class="dv" style="color: #AD0000;
+font-style: inherit;">self</span>.threshold)[<span class="dv" style="color: #AD0000;
 background-color: null;
-font-style: inherit;">5</span>, max_depth<span class="op" style="color: #5E5E5E;
+font-style: inherit;">0</span>]</span>
+<span id="cb4-28">        right_idx <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">=</span><span class="dv" style="color: #AD0000;
+font-style: inherit;">=</span> np.nonzero(x <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">6</span>, idxs<span class="op" style="color: #5E5E5E;
+font-style: inherit;">&gt;</span> <span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">=</span><span class="va" style="color: #111111;
+font-style: inherit;">self</span>.threshold)[<span class="dv" style="color: #AD0000;
 background-color: null;
-font-style: inherit;">None</span>):</span>
-<span id="cb6-4">        <span class="cf" style="color: #003B4F;
+font-style: inherit;">0</span>]</span>
+<span id="cb4-29">        <span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">assert</span> max_depth <span class="op" style="color: #5E5E5E;
+font-style: inherit;">self</span>.left <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">&gt;=</span> <span class="dv" style="color: #AD0000;
+font-style: inherit;">=</span> TreeBooster(<span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">0</span>, <span class="st" style="color: #20794D;
+font-style: inherit;">self</span>.X, <span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">'max_depth must be nonnegative'</span></span>
-<span id="cb6-5">        <span class="cf" style="color: #003B4F;
+font-style: inherit;">self</span>.g, <span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">assert</span> min_samples_leaf <span class="op" style="color: #5E5E5E;
+font-style: inherit;">self</span>.h, <span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">&gt;</span> <span class="dv" style="color: #AD0000;
+font-style: inherit;">self</span>.params, </span>
+<span id="cb4-30">                                <span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">0</span>, <span class="st" style="color: #20794D;
+font-style: inherit;">self</span>.max_depth <span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">-</span> <span class="dv" style="color: #AD0000;
+background-color: null;
+font-style: inherit;">1</span>, <span class="va" style="color: #111111;
+background-color: null;
+font-style: inherit;">self</span>.idxs[left_idx])</span>
+<span id="cb4-31">        <span class="va" style="color: #111111;
+background-color: null;
+font-style: inherit;">self</span>.right <span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">=</span> TreeBooster(<span class="va" style="color: #111111;
+background-color: null;
+font-style: inherit;">self</span>.X, <span class="va" style="color: #111111;
+background-color: null;
+font-style: inherit;">self</span>.g, <span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">'min_samples_leaf must be positive'</span></span>
-<span id="cb6-6">        <span class="va" style="color: #111111;
+font-style: inherit;">self</span>.h, <span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">self</span>.min_samples_leaf, <span class="va" style="color: #111111;
+font-style: inherit;">self</span>.params, </span>
+<span id="cb4-32">                                 <span class="va" style="color: #111111;
 background-color: null;
 font-style: inherit;">self</span>.max_depth <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">=</span> min_samples_leaf, max_depth</span>
-<span id="cb6-7">        <span class="cf" style="color: #003B4F;
+font-style: inherit;">-</span> <span class="dv" style="color: #AD0000;
 background-color: null;
-font-style: inherit;">if</span> <span class="bu" style="color: null;
+font-style: inherit;">1</span>, <span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">isinstance</span>(y, pd.Series): y <span class="op" style="color: #5E5E5E;
+font-style: inherit;">self</span>.idxs[right_idx])</span>
+<span id="cb4-33"></span>
+<span id="cb4-34">    <span class="at" style="color: #657422;
 background-color: null;
-font-style: inherit;">=</span> y.values</span>
-<span id="cb6-8">        <span class="cf" style="color: #003B4F;
+font-style: inherit;">@property</span></span>
+<span id="cb4-35">    <span class="kw" style="color: #003B4F;
 background-color: null;
-font-style: inherit;">if</span> idxs <span class="kw" style="color: #003B4F;
+font-style: inherit;">def</span> is_leaf(<span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">is</span> <span class="va" style="color: #111111;
+font-style: inherit;">self</span>): <span class="cf" style="color: #003B4F;
 background-color: null;
-font-style: inherit;">None</span>: idxs <span class="op" style="color: #5E5E5E;
+font-style: inherit;">return</span> <span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">=</span> np.arange(<span class="bu" style="color: null;
+font-style: inherit;">self</span>.best_score_so_far <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">len</span>(y))</span>
-<span id="cb6-9">        <span class="va" style="color: #111111;
+font-style: inherit;">==</span> <span class="fl" style="color: #AD0000;
 background-color: null;
-font-style: inherit;">self</span>.X, <span class="va" style="color: #111111;
+font-style: inherit;">0.</span></span>
+<span id="cb4-36"></span>
+<span id="cb4-37">    <span class="kw" style="color: #003B4F;
 background-color: null;
-font-style: inherit;">self</span>.y, <span class="va" style="color: #111111;
+font-style: inherit;">def</span> _find_better_split(<span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">self</span>.idxs <span class="op" style="color: #5E5E5E;
+font-style: inherit;">self</span>, feature_idx):</span>
+<span id="cb4-38">        <span class="cf" style="color: #003B4F;
 background-color: null;
-font-style: inherit;">=</span> X, y, idxs</span>
-<span id="cb6-10">        <span class="va" style="color: #111111;
+font-style: inherit;">pass</span></span></code></pre></div>
+</div>
+</section>
+<section id="split-finding" class="level3">
+<h3 class="anchored" data-anchor-id="split-finding">Split Finding</h3>
+<p>Split finding follows the exact same pattern that we used in the decision tree, except we keep track of gradient and hessian stats instead of target value stats, and of course we use the XGBoost gain criterion (equation 7 from the paper) for evaluating splits.</p>
+<div class="cell" data-execution_count="5">
+<div class="sourceCode cell-code" id="cb5" style="background: #f1f3f5;"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb5-1"><span class="kw" style="color: #003B4F;
 background-color: null;
-font-style: inherit;">self</span>.n, <span class="va" style="color: #111111;
+font-style: inherit;">def</span> _find_better_split(<span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">self</span>.c <span class="op" style="color: #5E5E5E;
+font-style: inherit;">self</span>, feature_idx):</span>
+<span id="cb5-2">    x <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">=</span> <span class="bu" style="color: null;
+font-style: inherit;">=</span> <span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">len</span>(idxs), X.shape[<span class="dv" style="color: #AD0000;
+font-style: inherit;">self</span>.X.values[<span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">1</span>]</span>
-<span id="cb6-11">        <span class="va" style="color: #111111;
+font-style: inherit;">self</span>.idxs, feature_idx]</span>
+<span id="cb5-3">    g, h <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">self</span>.value <span class="op" style="color: #5E5E5E;
+font-style: inherit;">=</span> <span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">=</span> np.mean(y[idxs]) <span class="co" style="color: #5E5E5E;
+font-style: inherit;">self</span>.g[<span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;"># node's prediction value</span></span>
-<span id="cb6-12">        <span class="va" style="color: #111111;
+font-style: inherit;">self</span>.idxs], <span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">self</span>.best_score_so_far <span class="op" style="color: #5E5E5E;
+font-style: inherit;">self</span>.h[<span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">=</span> <span class="bu" style="color: null;
+font-style: inherit;">self</span>.idxs]</span>
+<span id="cb5-4">    sort_idx <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">float</span>(<span class="st" style="color: #20794D;
+font-style: inherit;">=</span> np.argsort(x)</span>
+<span id="cb5-5">    sort_g, sort_h, sort_x <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">'inf'</span>) <span class="co" style="color: #5E5E5E;
+font-style: inherit;">=</span> g[sort_idx], h[sort_idx], x[sort_idx]</span>
+<span id="cb5-6">    sum_g, sum_h <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;"># initial loss before split finding</span></span>
-<span id="cb6-13">        <span class="cf" style="color: #003B4F;
+font-style: inherit;">=</span> g.<span class="bu" style="color: null;
 background-color: null;
-font-style: inherit;">if</span> <span class="va" style="color: #111111;
+font-style: inherit;">sum</span>(), h.<span class="bu" style="color: null;
 background-color: null;
-font-style: inherit;">self</span>.max_depth <span class="op" style="color: #5E5E5E;
+font-style: inherit;">sum</span>()</span>
+<span id="cb5-7">    sum_g_right, sum_h_right <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">&gt;</span> <span class="dv" style="color: #AD0000;
+font-style: inherit;">=</span> sum_g, sum_h</span>
+<span id="cb5-8">    sum_g_left, sum_h_left <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">0</span>:</span>
-<span id="cb6-14">            <span class="va" style="color: #111111;
+font-style: inherit;">=</span> <span class="fl" style="color: #AD0000;
 background-color: null;
-font-style: inherit;">self</span>._maybe_insert_child_nodes()</span>
-<span id="cb6-15">            </span>
-<span id="cb6-16">    <span class="kw" style="color: #003B4F;
+font-style: inherit;">0.</span>, <span class="fl" style="color: #AD0000;
 background-color: null;
-font-style: inherit;">def</span> _maybe_insert_child_nodes(<span class="va" style="color: #111111;
+font-style: inherit;">0.</span></span>
+<span id="cb5-9"></span>
+<span id="cb5-10">    <span class="cf" style="color: #003B4F;
 background-color: null;
-font-style: inherit;">self</span>):</span>
-<span id="cb6-17">        <span class="cf" style="color: #003B4F;
+font-style: inherit;">for</span> i <span class="kw" style="color: #003B4F;
+background-color: null;
+font-style: inherit;">in</span> <span class="bu" style="color: null;
+background-color: null;
+font-style: inherit;">range</span>(<span class="dv" style="color: #AD0000;
+background-color: null;
+font-style: inherit;">0</span>, <span class="va" style="color: #111111;
+background-color: null;
+font-style: inherit;">self</span>.n <span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">-</span> <span class="dv" style="color: #AD0000;
+background-color: null;
+font-style: inherit;">1</span>):</span>
+<span id="cb5-11">        g_i, h_i, x_i, x_i_next <span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">=</span> sort_g[i], sort_h[i], sort_x[i], sort_x[i <span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">+</span> <span class="dv" style="color: #AD0000;
+background-color: null;
+font-style: inherit;">1</span>]</span>
+<span id="cb5-12">        sum_g_left <span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">+=</span> g_i<span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">;</span> sum_g_right <span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">-=</span> g_i</span>
+<span id="cb5-13">        sum_h_left <span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">+=</span> h_i<span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">pass</span></span></code></pre></div>
-</div>
-<p>Now in order to test our class, we’ll need some actual data. We can use the same scikit-learn diabetes data from the last post.</p>
-<div class="cell" data-execution_count="7">
-<div class="sourceCode cell-code" id="cb7" style="background: #f1f3f5;"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb7-1"><span class="im" style="color: #00769E;
+font-style: inherit;">;</span> sum_h_right <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">from</span> sklearn.datasets <span class="im" style="color: #00769E;
+font-style: inherit;">-=</span> h_i</span>
+<span id="cb5-14">        <span class="cf" style="color: #003B4F;
 background-color: null;
-font-style: inherit;">import</span> load_diabetes</span>
-<span id="cb7-2"></span>
-<span id="cb7-3">X, y <span class="op" style="color: #5E5E5E;
+font-style: inherit;">if</span> sum_h_left <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">=</span> load_diabetes(as_frame<span class="op" style="color: #5E5E5E;
+font-style: inherit;">&lt;</span> <span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">=</span><span class="va" style="color: #111111;
+font-style: inherit;">self</span>.min_child_weight <span class="kw" style="color: #003B4F;
 background-color: null;
-font-style: inherit;">True</span>, return_X_y<span class="op" style="color: #5E5E5E;
+font-style: inherit;">or</span> x_i <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">=</span><span class="va" style="color: #111111;
+font-style: inherit;">==</span> x_i_next:<span class="cf" style="color: #003B4F;
 background-color: null;
-font-style: inherit;">True</span>)</span></code></pre></div>
-</div>
-<div class="cell" data-execution_count="8">
-<div class="sourceCode cell-code" id="cb8" style="background: #f1f3f5;"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb8-1">t <span class="op" style="color: #5E5E5E;
+font-style: inherit;">continue</span></span>
+<span id="cb5-15">        <span class="cf" style="color: #003B4F;
 background-color: null;
-font-style: inherit;">=</span> DecisionTree(X, y, min_samples_leaf<span class="op" style="color: #5E5E5E;
+font-style: inherit;">if</span> sum_h_right <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">=</span><span class="dv" style="color: #AD0000;
+font-style: inherit;">&lt;</span> <span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">5</span>, max_depth<span class="op" style="color: #5E5E5E;
+font-style: inherit;">self</span>.min_child_weight: <span class="cf" style="color: #003B4F;
 background-color: null;
-font-style: inherit;">=</span><span class="dv" style="color: #AD0000;
+font-style: inherit;">break</span></span>
+<span id="cb5-16"></span>
+<span id="cb5-17">        gain <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">5</span>)</span></code></pre></div>
-</div>
-<p>So far, so good.</p>
-</section>
-<section id="inserting-child-nodes" class="level2">
-<h2 class="anchored" data-anchor-id="inserting-child-nodes">Inserting Child Nodes</h2>
-<p>Our node inserting function <code>_maybe_insert_child_nodes</code> needs to first find the best split; then if a valid split exists, it needs to insert the child nodes. To find the best valid split, we need to loop through the columns and search each one for the best valid split. Again we’ll push the logic of finding the best split into a function that we’ll define later. Next if no split was found, we need to bail by returning before trying to insert the child nodes. To check if this node is a leaf (i.e.&nbsp;it shouldn’t have child nodes), we define a property called <code>is_leaf</code> which will just check if the best score so far is still infinity, in which case no split was found and the node is a leaf.</p>
-<p>If a valid split was found, then we need to insert the child nodes. We’ll assume that our split finding function assigned attributes called <code>split_feature_idx</code> and <code>threshold</code> to tell us the split feature’s index and the split threshold value. We then use these to compute the indices of the data to be passed to the child nodes; the left child gets instances where the split feature value is less than or equal to the threshold, and the right child node gets instances where the split feature value is greater than the threshold. Then we create two new decision trees, passing the corresponding data indices to each and assigning them to the <code>left</code> and <code>right</code> attributes of the current node.</p>
-<div class="cell" data-execution_count="9">
-<div class="sourceCode cell-code" id="cb9" style="background: #f1f3f5;"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb9-1">    <span class="kw" style="color: #003B4F;
+font-style: inherit;">=</span> <span class="fl" style="color: #AD0000;
 background-color: null;
-font-style: inherit;">def</span> _maybe_insert_child_nodes(<span class="va" style="color: #111111;
+font-style: inherit;">0.5</span> <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">self</span>):</span>
-<span id="cb9-2">        <span class="cf" style="color: #003B4F;
+font-style: inherit;">*</span> ((sum_g_left<span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">for</span> j <span class="kw" style="color: #003B4F;
+font-style: inherit;">**</span><span class="dv" style="color: #AD0000;
 background-color: null;
-font-style: inherit;">in</span> <span class="bu" style="color: null;
+font-style: inherit;">2</span> <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">range</span>(<span class="va" style="color: #111111;
+font-style: inherit;">/</span> (sum_h_left <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">self</span>.c): </span>
-<span id="cb9-3">            <span class="va" style="color: #111111;
+font-style: inherit;">+</span> <span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">self</span>._find_better_split(j)</span>
-<span id="cb9-4">        <span class="cf" style="color: #003B4F;
+font-style: inherit;">self</span>.reg_lambda))</span>
+<span id="cb5-18">                        <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">if</span> <span class="va" style="color: #111111;
+font-style: inherit;">+</span> (sum_g_right<span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">self</span>.is_leaf: <span class="co" style="color: #5E5E5E;
+font-style: inherit;">**</span><span class="dv" style="color: #AD0000;
 background-color: null;
-font-style: inherit;"># do not insert children</span></span>
-<span id="cb9-5">            <span class="cf" style="color: #003B4F;
+font-style: inherit;">2</span> <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">return</span> </span>
-<span id="cb9-6">        x <span class="op" style="color: #5E5E5E;
+font-style: inherit;">/</span> (sum_h_right <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">=</span> <span class="va" style="color: #111111;
+font-style: inherit;">+</span> <span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">self</span>.X.values[<span class="va" style="color: #111111;
+font-style: inherit;">self</span>.reg_lambda))</span>
+<span id="cb5-19">                        <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">self</span>.idxs,<span class="va" style="color: #111111;
+font-style: inherit;">-</span> (sum_g<span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">self</span>.split_feature_idx]</span>
-<span id="cb9-7">        left_idx <span class="op" style="color: #5E5E5E;
+font-style: inherit;">**</span><span class="dv" style="color: #AD0000;
 background-color: null;
-font-style: inherit;">=</span> np.nonzero(x <span class="op" style="color: #5E5E5E;
+font-style: inherit;">2</span> <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">&lt;=</span> <span class="va" style="color: #111111;
+font-style: inherit;">/</span> (sum_h <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">self</span>.threshold)[<span class="dv" style="color: #AD0000;
+font-style: inherit;">+</span> <span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">0</span>]</span>
-<span id="cb9-8">        right_idx <span class="op" style="color: #5E5E5E;
+font-style: inherit;">self</span>.reg_lambda))</span>
+<span id="cb5-20">                        ) <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">=</span> np.nonzero(x <span class="op" style="color: #5E5E5E;
+font-style: inherit;">-</span> <span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">&gt;</span> <span class="va" style="color: #111111;
+font-style: inherit;">self</span>.gamma<span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">self</span>.threshold)[<span class="dv" style="color: #AD0000;
+font-style: inherit;">/</span><span class="dv" style="color: #AD0000;
 background-color: null;
-font-style: inherit;">0</span>]</span>
-<span id="cb9-9">        <span class="va" style="color: #111111;
+font-style: inherit;">2</span> <span class="co" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">self</span>.left <span class="op" style="color: #5E5E5E;
+font-style: inherit;"># Eq(7) in the xgboost paper</span></span>
+<span id="cb5-21">        <span class="cf" style="color: #003B4F;
 background-color: null;
-font-style: inherit;">=</span> DecisionTree(<span class="va" style="color: #111111;
+font-style: inherit;">if</span> gain <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">self</span>.X, <span class="va" style="color: #111111;
+font-style: inherit;">&gt;</span> <span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">self</span>.y, <span class="va" style="color: #111111;
+font-style: inherit;">self</span>.best_score_so_far: </span>
+<span id="cb5-22">            <span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">self</span>.min_samples_leaf, </span>
-<span id="cb9-10">                                  <span class="va" style="color: #111111;
+font-style: inherit;">self</span>.split_feature_idx <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">self</span>.max_depth <span class="op" style="color: #5E5E5E;
+font-style: inherit;">=</span> feature_idx</span>
+<span id="cb5-23">            <span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">-</span> <span class="dv" style="color: #AD0000;
+font-style: inherit;">self</span>.best_score_so_far <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">1</span>, <span class="va" style="color: #111111;
+font-style: inherit;">=</span> gain</span>
+<span id="cb5-24">            <span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">self</span>.idxs[left_idx])</span>
-<span id="cb9-11">        <span class="va" style="color: #111111;
+font-style: inherit;">self</span>.threshold <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">self</span>.right <span class="op" style="color: #5E5E5E;
+font-style: inherit;">=</span> (x_i <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">=</span> DecisionTree(<span class="va" style="color: #111111;
+font-style: inherit;">+</span> x_i_next) <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">self</span>.X, <span class="va" style="color: #111111;
+font-style: inherit;">/</span> <span class="dv" style="color: #AD0000;
 background-color: null;
-font-style: inherit;">self</span>.y, <span class="va" style="color: #111111;
+font-style: inherit;">2</span></span>
+<span id="cb5-25">            </span>
+<span id="cb5-26">TreeBooster._find_better_split <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">self</span>.min_samples_leaf, </span>
-<span id="cb9-12">                                  <span class="va" style="color: #111111;
+font-style: inherit;">=</span> _find_better_split</span></code></pre></div>
+</div>
+</section>
+<section id="prediction" class="level3">
+<h3 class="anchored" data-anchor-id="prediction">Prediction</h3>
+<p>Prediction works exactly the same as in our decision tree, and the methods are nearly identical.</p>
+<div class="cell" data-execution_count="6">
+<div class="sourceCode cell-code" id="cb6" style="background: #f1f3f5;"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb6-1"><span class="kw" style="color: #003B4F;
 background-color: null;
-font-style: inherit;">self</span>.max_depth <span class="op" style="color: #5E5E5E;
+font-style: inherit;">def</span> predict(<span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">-</span> <span class="dv" style="color: #AD0000;
+font-style: inherit;">self</span>, X):</span>
+<span id="cb6-2">    <span class="cf" style="color: #003B4F;
 background-color: null;
-font-style: inherit;">1</span>, <span class="va" style="color: #111111;
+font-style: inherit;">return</span> np.array([<span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">self</span>.idxs[right_idx])</span>
-<span id="cb9-13"></span>
-<span id="cb9-14">    <span class="kw" style="color: #003B4F;
+font-style: inherit;">self</span>._predict_row(row) <span class="cf" style="color: #003B4F;
 background-color: null;
-font-style: inherit;">def</span> _find_better_split(<span class="va" style="color: #111111;
+font-style: inherit;">for</span> i, row <span class="kw" style="color: #003B4F;
 background-color: null;
-font-style: inherit;">self</span>, feature_idx):</span>
-<span id="cb9-15">        <span class="cf" style="color: #003B4F;
+font-style: inherit;">in</span> X.iterrows()])</span>
+<span id="cb6-3"></span>
+<span id="cb6-4"><span class="kw" style="color: #003B4F;
 background-color: null;
-font-style: inherit;">pass</span></span>
-<span id="cb9-16">    </span>
-<span id="cb9-17">    <span class="at" style="color: #657422;
+font-style: inherit;">def</span> _predict_row(<span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">@property</span></span>
-<span id="cb9-18">    <span class="kw" style="color: #003B4F;
+font-style: inherit;">self</span>, row):</span>
+<span id="cb6-5">    <span class="cf" style="color: #003B4F;
 background-color: null;
-font-style: inherit;">def</span> is_leaf(<span class="va" style="color: #111111;
+font-style: inherit;">if</span> <span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">self</span>): <span class="cf" style="color: #003B4F;
+font-style: inherit;">self</span>.is_leaf: </span>
+<span id="cb6-6">        <span class="cf" style="color: #003B4F;
 background-color: null;
 font-style: inherit;">return</span> <span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">self</span>.best_score_so_far <span class="op" style="color: #5E5E5E;
+font-style: inherit;">self</span>.value</span>
+<span id="cb6-7">    child <span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">=</span> <span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">==</span> <span class="bu" style="color: null;
+font-style: inherit;">self</span>.left <span class="cf" style="color: #003B4F;
 background-color: null;
-font-style: inherit;">float</span>(<span class="st" style="color: #20794D;
+font-style: inherit;">if</span> row[<span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">'inf'</span>)</span></code></pre></div>
-</div>
-<p>To test these new methods , we can assign them to our <code>DecisionTree</code> class and create a new class instance to make sure things are still working.</p>
-<div class="cell" data-execution_count="10">
-<div class="sourceCode cell-code" id="cb10" style="background: #f1f3f5;"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb10-1">DecisionTree._maybe_insert_child_nodes <span class="op" style="color: #5E5E5E;
+font-style: inherit;">self</span>.split_feature_idx] <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">=</span> _maybe_insert_child_nodes</span>
-<span id="cb10-2">DecisionTree._find_better_split <span class="op" style="color: #5E5E5E;
+font-style: inherit;">&lt;=</span> <span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">=</span> _find_better_split</span>
-<span id="cb10-3">DecisionTree.is_leaf <span class="op" style="color: #5E5E5E;
+font-style: inherit;">self</span>.threshold <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">=</span> is_leaf</span>
-<span id="cb10-4">t <span class="op" style="color: #5E5E5E;
+font-style: inherit;">\</span></span>
+<span id="cb6-8">        <span class="cf" style="color: #003B4F;
 background-color: null;
-font-style: inherit;">=</span> DecisionTree(X, y, min_samples_leaf<span class="op" style="color: #5E5E5E;
+font-style: inherit;">else</span> <span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">=</span><span class="dv" style="color: #AD0000;
+font-style: inherit;">self</span>.right</span>
+<span id="cb6-9">    <span class="cf" style="color: #003B4F;
 background-color: null;
-font-style: inherit;">5</span>, max_depth<span class="op" style="color: #5E5E5E;
+font-style: inherit;">return</span> child._predict_row(row)</span>
+<span id="cb6-10"></span>
+<span id="cb6-11">TreeBooster.predict <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">=</span><span class="dv" style="color: #AD0000;
+font-style: inherit;">=</span> predict </span>
+<span id="cb6-12">TreeBooster._predict_row <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">6</span>)</span></code></pre></div>
+font-style: inherit;">=</span> _predict_row </span></code></pre></div>
 </div>
-<p>Yep, we’re still looking good.</p>
 </section>
-<section id="split-finding" class="level2">
-<h2 class="anchored" data-anchor-id="split-finding">Split Finding</h2>
-<p>Now we need to fill in the functionality of the split finding method. The overall strategy is to consider every possible way to split on the current feature, measuring the quality of each potential split with some scoring mechanism, and keeping track of the best split we’ve seen so far. We’ll come back to the issue of how to try all the possible splits in a moment, but let’s start by figuring out how to score a particular potential split.</p>
-<p>Like other machine learning models, trees are trained by attempting to minimize some loss function that measures how well the model predicts the target data. We’ll be training our regression tree to minimize squared error.</p>
-<p><img src="https://latex.codecogs.com/png.latex?%20L%20=%20%5Csum_%7Bi=1%7D%5En%20(y_i-%5Chat%7By%7D_i)%5E2"></p>
-<p>For a given node, we can replace <img src="https://latex.codecogs.com/png.latex?%5Chat%7By%7D"> with <img src="https://latex.codecogs.com/png.latex?%5Cbar%7By%7D"> because each node uses the sample mean of its target instances as its prediction. We can then rewrite the loss for a given node as</p>
-<p><img src="https://latex.codecogs.com/png.latex?%20L%20=%20%5Csum_%7Bi=1%7D%5En(y_i%20-%20%5Cbar%7By%7D)%5E2%20"> <img src="https://latex.codecogs.com/png.latex?%20%20=%20%5Csum_%7Bi=1%7D%5En(y_i%5E2%20-2y_i%5Cbar%7By%7D%20+%20%5Cbar%7By%7D%5E2)%20%20"> <img src="https://latex.codecogs.com/png.latex?%20%20=%20%5Csum_%7Bi=1%7D%5Eny_i%5E2%20-2%5Cbar%7By%7D%5Csum_%7Bi=1%7D%5Eny_i%20+%20n%5Cbar%7By%7D%5E2%20"> <img src="https://latex.codecogs.com/png.latex?%20%20=%20%5Csum_%7Bi=1%7D%5Eny_i%5E2%20-%20%5Cfrac%7B1%7D%7Bn%7D%20%5Cleft%20(%20%5Csum_%7Bi=1%7D%5Eny_i%20%5Cright%20)%5E2%20"></p>
-<p>We can then evaluate potential splits by comparing the loss after splitting to the loss before splitting, where the split with the greatest loss reduction is best. Let’s work out a simple expression for the loss reduction from a given split.</p>
-<p>Let <img src="https://latex.codecogs.com/png.latex?I"> be the set of <img src="https://latex.codecogs.com/png.latex?n"> data instances in the current node, and let <img src="https://latex.codecogs.com/png.latex?I_L"> and <img src="https://latex.codecogs.com/png.latex?I_R"> be the instances that fall into the left and right child nodes of a proposed split. Let <img src="https://latex.codecogs.com/png.latex?L"> be the total loss for all instances in the node, while <img src="https://latex.codecogs.com/png.latex?L_L"> and <img src="https://latex.codecogs.com/png.latex?L_R"> are the losses for the left and right child nodes. The total loss contributed by instances in <img src="https://latex.codecogs.com/png.latex?I"> prior to any split is</p>
-<p><img src="https://latex.codecogs.com/png.latex?L_%7B%5Ctext%7Bbefore%20split%7D%7D%20=%20L%20=%20%20%5Csum_%7Bi%20%5Cin%20I%7D%20y_i%5E2%20-%20%5Cfrac%7B1%7D%7Bn%7D%20%5Cleft%20(%20%5Csum_%7Bi%20%5Cin%20I%7D%20y_i%20%5Cright%20)%5E2%20"></p>
-<p>And the loss after splitting <img src="https://latex.codecogs.com/png.latex?I"> into <img src="https://latex.codecogs.com/png.latex?I_L"> and <img src="https://latex.codecogs.com/png.latex?I_R"> is</p>
-<p><img src="https://latex.codecogs.com/png.latex?L_%7B%5Ctext%7Bafter%20split%7D%7D%20=%20L_L%20+%20L_R%20=%20%20%5Csum_%7Bi%20%5Cin%20I_L%7D%20y_i%5E2%20-%20%5Cfrac%7B1%7D%7Bn_L%7D%20%5Cleft%20(%20%5Csum_%7Bi%20%5Cin%20I_L%7D%20y_i%20%5Cright%20)%5E2%20+%20%5Csum_%7Bi%20%5Cin%20I_R%7D%20y_i%5E2%20-%20%5Cfrac%7B1%7D%7Bn_R%7D%20%5Cleft%20(%20%5Csum_%7Bi%20%5Cin%20I_R%7D%20y_i%20%5Cright%20)%5E2%20"></p>
-<p>The reduction in loss from this split is</p>
-<p><img src="https://latex.codecogs.com/png.latex?%20%5CDelta%20L%20=%20L_%7B%5Ctext%7Bafter%20split%7D%7D%20-%20%20L_%7B%5Ctext%7Bbefore%20split%7D%7D%20=%20(L_L%20+%20L_R)%20-%20L%20"> <img src="https://latex.codecogs.com/png.latex?%20%20=%20%5Csum_%7Bi%20%5Cin%20I_L%7D%20y_i%5E2%20-%20%5Cfrac%7B1%7D%7Bn_L%7D%20%5Cleft%20(%20%5Csum_%7Bi%20%5Cin%20I_L%7D%20y_i%20%5Cright%20)%5E2%20+%20%5Csum_%7Bi%20%5Cin%20I_R%7D%20y_i%5E2%20-%20%5Cfrac%7B1%7D%7Bn_R%7D%20%5Cleft%20(%20%5Csum_%7Bi%20%5Cin%20I_R%7D%20y_i%20%5Cright%20)%5E2%20-%20%5Cleft%20(%20%5Csum_%7Bi%20%5Cin%20I%7D%20y_i%5E2%20-%20%5Cfrac%7B1%7D%7Bn%7D%20%5Cleft%20(%20%5Csum_%7Bi%20%5Cin%20I%7D%20y_i%20%5Cright%20)%5E2%20%5Cright%20)%20"></p>
-<p>Since <img src="https://latex.codecogs.com/png.latex?I%20=%20I_L%20%5Ccup%20I_R"> the <img src="https://latex.codecogs.com/png.latex?%5Csum%20y%5E2"> terms cancel and we can simplify.</p>
-<p><img src="https://latex.codecogs.com/png.latex?%20%5CDelta%20L%20=%20-%20%5Cfrac%7B1%7D%7Bn_L%7D%20%5Cleft%20(%20%5Csum_%7Bi%20%5Cin%20I_L%7D%20y_i%20%5Cright%20)%5E2%0A-%20%5Cfrac%7B1%7D%7Bn_R%7D%20%5Cleft%20(%20%5Csum_%7Bi%20%5Cin%20I_R%7D%20y_i%20%5Cright%20)%5E2%0A+%20%5Cfrac%7B1%7D%7Bn%7D%20%5Cleft%20(%20%5Csum_%7Bi%20%5Cin%20I%7D%20y_i%20%5Cright%20)%5E2%20%20"></p>
-<p>This is a really nice formulation of the split scoring metric from a computational complexity perspective. We can sort the data by the feature values then, starting with the smallest <code>min_samples_leaf</code> instances in the left node and the rest in the right node, we check the score. Then to check the next split, we simply move a single target value from the right node into the left node, updating the score by subtracting it from the right node’s partial sum and adding it to the left node’s partial sum. The third term is constant for all splits, so we only need to compute it once. If any split’s score is lower than the best score so far, then we update the best score so far, the split feature, and the threshold value. When we’re done we can be sure we found the best possible split. The time bottleneck is the sort, which puts us at an average time complexity of <img src="https://latex.codecogs.com/png.latex?O(n%5Clog%20n)">.</p>
-<div class="cell" data-execution_count="11">
-<div class="sourceCode cell-code" id="cb11" style="background: #f1f3f5;"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb11-1">    <span class="kw" style="color: #003B4F;
-background-color: null;
-font-style: inherit;">def</span> _find_better_split(<span class="va" style="color: #111111;
+</section>
+<section id="the-complete-xgboost-from-scratch-implementation" class="level2">
+<h2 class="anchored" data-anchor-id="the-complete-xgboost-from-scratch-implementation">The Complete XGBoost From Scratch Implementation</h2>
+<p>Here’s the entire implementation which produces a usable <code>XGBoostModel</code> class with fit and predict methods.</p>
+<div class="cell" data-execution_count="7">
+<div class="sourceCode cell-code" id="cb7" style="background: #f1f3f5;"><pre class="sourceCode numberSource python number-lines code-with-copy"><code class="sourceCode python"><span id="cb7-1"><span class="kw" style="color: #003B4F;
 background-color: null;
-font-style: inherit;">self</span>, feature_idx):</span>
-<span id="cb11-2">        x <span class="op" style="color: #5E5E5E;
+font-style: inherit;">class</span> XGBoostModel():</span>
+<span id="cb7-2">    <span class="co" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">=</span> <span class="va" style="color: #111111;
+font-style: inherit;">'''XGBoost from Scratch</span></span>
+<span id="cb7-3"><span class="co" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">self</span>.X.values[<span class="va" style="color: #111111;
+font-style: inherit;">    '''</span></span>
+<span id="cb7-4">    </span>
+<span id="cb7-5">    <span class="kw" style="color: #003B4F;
 background-color: null;
-font-style: inherit;">self</span>.idxs,feature_idx]</span>
-<span id="cb11-3">        y <span class="op" style="color: #5E5E5E;
+font-style: inherit;">def</span> <span class="fu" style="color: #4758AB;
 background-color: null;
-font-style: inherit;">=</span> <span class="va" style="color: #111111;
+font-style: inherit;">__init__</span>(<span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">self</span>.y[<span class="va" style="color: #111111;
+font-style: inherit;">self</span>, params, random_seed<span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">self</span>.idxs]</span>
-<span id="cb11-4">        sort_idx <span class="op" style="color: #5E5E5E;
+font-style: inherit;">=</span><span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">=</span> np.argsort(x)</span>
-<span id="cb11-5">        sort_y, sort_x <span class="op" style="color: #5E5E5E;
+font-style: inherit;">None</span>):</span>
+<span id="cb7-6">        <span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">=</span> y[sort_idx], x[sort_idx]</span>
-<span id="cb11-6">        sum_y, n <span class="op" style="color: #5E5E5E;
+font-style: inherit;">self</span>.params <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">=</span> y.<span class="bu" style="color: null;
+font-style: inherit;">=</span> defaultdict(<span class="kw" style="color: #003B4F;
 background-color: null;
-font-style: inherit;">sum</span>(), <span class="bu" style="color: null;
+font-style: inherit;">lambda</span>: <span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">len</span>(y)</span>
-<span id="cb11-7">        sum_y_right, n_right <span class="op" style="color: #5E5E5E;
+font-style: inherit;">None</span>, params)</span>
+<span id="cb7-7">        <span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">=</span> sum_y, n</span>
-<span id="cb11-8">        sum_y_left, n_left <span class="op" style="color: #5E5E5E;
+font-style: inherit;">self</span>.subsample <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">=</span> <span class="fl" style="color: #AD0000;
+font-style: inherit;">=</span> <span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">0.</span>, <span class="dv" style="color: #AD0000;
+font-style: inherit;">self</span>.params[<span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">0</span></span>
-<span id="cb11-9">    </span>
-<span id="cb11-10">        <span class="cf" style="color: #003B4F;
+font-style: inherit;">'subsample'</span>] <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">for</span> i <span class="kw" style="color: #003B4F;
+font-style: inherit;">\</span></span>
+<span id="cb7-8">            <span class="cf" style="color: #003B4F;
 background-color: null;
-font-style: inherit;">in</span> <span class="bu" style="color: null;
+font-style: inherit;">if</span> <span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">range</span>(<span class="dv" style="color: #AD0000;
+font-style: inherit;">self</span>.params[<span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">0</span>, <span class="va" style="color: #111111;
+font-style: inherit;">'subsample'</span>] <span class="cf" style="color: #003B4F;
 background-color: null;
-font-style: inherit;">self</span>.n <span class="op" style="color: #5E5E5E;
+font-style: inherit;">else</span> <span class="fl" style="color: #AD0000;
 background-color: null;
-font-style: inherit;">-</span> <span class="va" style="color: #111111;
+font-style: inherit;">1.0</span></span>
+<span id="cb7-9">        <span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">self</span>.min_samples_leaf):</span>
-<span id="cb11-11">            y_i, x_i, x_i_next <span class="op" style="color: #5E5E5E;
+font-style: inherit;">self</span>.learning_rate <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">=</span> sort_y[i], sort_x[i], sort_x[i <span class="op" style="color: #5E5E5E;
+font-style: inherit;">=</span> <span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">+</span> <span class="dv" style="color: #AD0000;
+font-style: inherit;">self</span>.params[<span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">1</span>]</span>
-<span id="cb11-12">            sum_y_left <span class="op" style="color: #5E5E5E;
+font-style: inherit;">'learning_rate'</span>] <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">+=</span> y_i<span class="op" style="color: #5E5E5E;
+font-style: inherit;">\</span></span>
+<span id="cb7-10">            <span class="cf" style="color: #003B4F;
 background-color: null;
-font-style: inherit;">;</span> sum_y_right <span class="op" style="color: #5E5E5E;
+font-style: inherit;">if</span> <span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">-=</span> y_i</span>
-<span id="cb11-13">            n_left <span class="op" style="color: #5E5E5E;
+font-style: inherit;">self</span>.params[<span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">+=</span> <span class="dv" style="color: #AD0000;
+font-style: inherit;">'learning_rate'</span>] <span class="cf" style="color: #003B4F;
 background-color: null;
-font-style: inherit;">1</span><span class="op" style="color: #5E5E5E;
+font-style: inherit;">else</span> <span class="fl" style="color: #AD0000;
 background-color: null;
-font-style: inherit;">;</span> n_right <span class="op" style="color: #5E5E5E;
+font-style: inherit;">0.3</span></span>
+<span id="cb7-11">        <span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">-=</span> <span class="dv" style="color: #AD0000;
+font-style: inherit;">self</span>.base_prediction <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">1</span></span>
-<span id="cb11-14">            <span class="cf" style="color: #003B4F;
+font-style: inherit;">=</span> <span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">if</span>  n_left <span class="op" style="color: #5E5E5E;
+font-style: inherit;">self</span>.params[<span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">&lt;</span> <span class="va" style="color: #111111;
+font-style: inherit;">'base_score'</span>] <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">self</span>.min_samples_leaf <span class="kw" style="color: #003B4F;
+font-style: inherit;">\</span></span>
+<span id="cb7-12">            <span class="cf" style="color: #003B4F;
 background-color: null;
-font-style: inherit;">or</span> x_i <span class="op" style="color: #5E5E5E;
+font-style: inherit;">if</span> <span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">==</span> x_i_next:</span>
-<span id="cb11-15">                <span class="cf" style="color: #003B4F;
+font-style: inherit;">self</span>.params[<span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">continue</span></span>
-<span id="cb11-16">            score <span class="op" style="color: #5E5E5E;
+font-style: inherit;">'base_score'</span>] <span class="cf" style="color: #003B4F;
 background-color: null;
-font-style: inherit;">=</span> <span class="op" style="color: #5E5E5E;
+font-style: inherit;">else</span> <span class="fl" style="color: #AD0000;
 background-color: null;
-font-style: inherit;">-</span> sum_y_left<span class="op" style="color: #5E5E5E;
+font-style: inherit;">0.5</span></span>
+<span id="cb7-13">        <span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">**</span><span class="dv" style="color: #AD0000;
+font-style: inherit;">self</span>.max_depth <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">2</span> <span class="op" style="color: #5E5E5E;
+font-style: inherit;">=</span> <span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">/</span> n_left <span class="op" style="color: #5E5E5E;
+font-style: inherit;">self</span>.params[<span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">-</span> sum_y_right<span class="op" style="color: #5E5E5E;
+font-style: inherit;">'max_depth'</span>] <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">**</span><span class="dv" style="color: #AD0000;
+font-style: inherit;">\</span></span>
+<span id="cb7-14">            <span class="cf" style="color: #003B4F;
 background-color: null;
-font-style: inherit;">2</span> <span class="op" style="color: #5E5E5E;
+font-style: inherit;">if</span> <span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">/</span> n_right <span class="op" style="color: #5E5E5E;
+font-style: inherit;">self</span>.params[<span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">+</span> sum_y<span class="op" style="color: #5E5E5E;
+font-style: inherit;">'max_depth'</span>] <span class="cf" style="color: #003B4F;
 background-color: null;
-font-style: inherit;">**</span><span class="dv" style="color: #AD0000;
+font-style: inherit;">else</span> <span class="dv" style="color: #AD0000;
 background-color: null;
-font-style: inherit;">2</span> <span class="op" style="color: #5E5E5E;
+font-style: inherit;">5</span></span>
+<span id="cb7-15">        <span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">/</span> n</span>
-<span id="cb11-17">            <span class="cf" style="color: #003B4F;
+font-style: inherit;">self</span>.rng <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">if</span> score <span class="op" style="color: #5E5E5E;
+font-style: inherit;">=</span> np.random.default_rng(seed<span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">&lt;</span> <span class="va" style="color: #111111;
+font-style: inherit;">=</span>random_seed)</span>
+<span id="cb7-16">                </span>
+<span id="cb7-17">    <span class="kw" style="color: #003B4F;
 background-color: null;
-font-style: inherit;">self</span>.best_score_so_far:</span>
-<span id="cb11-18">                <span class="va" style="color: #111111;
+font-style: inherit;">def</span> fit(<span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">self</span>.best_score_so_far <span class="op" style="color: #5E5E5E;
+font-style: inherit;">self</span>, X, y, objective, num_boost_round, verbose<span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">=</span> score</span>
-<span id="cb11-19">                <span class="va" style="color: #111111;
+font-style: inherit;">=</span><span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">self</span>.split_feature_idx <span class="op" style="color: #5E5E5E;
+font-style: inherit;">False</span>):</span>
+<span id="cb7-18">        current_predictions <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">=</span> feature_idx</span>
-<span id="cb11-20">                <span class="va" style="color: #111111;
+font-style: inherit;">=</span> <span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">self</span>.threshold <span class="op" style="color: #5E5E5E;
+font-style: inherit;">self</span>.base_prediction <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">=</span> (x_i <span class="op" style="color: #5E5E5E;
+font-style: inherit;">*</span> np.ones(shape<span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">+</span> x_i_next) <span class="op" style="color: #5E5E5E;
+font-style: inherit;">=</span>y.shape)</span>
+<span id="cb7-19">        <span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">/</span> <span class="dv" style="color: #AD0000;
+font-style: inherit;">self</span>.boosters <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">2</span></span></code></pre></div>
-</div>
-<p>Again, we assign the split finding method to our class and instantiate a new tree to make sure things are still working.</p>
-<div class="cell" data-execution_count="12">
-<div class="sourceCode cell-code" id="cb12" style="background: #f1f3f5;"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb12-1">DecisionTree._find_better_split <span class="op" style="color: #5E5E5E;
+font-style: inherit;">=</span> []</span>
+<span id="cb7-20">        <span class="cf" style="color: #003B4F;
 background-color: null;
-font-style: inherit;">=</span> _find_better_split</span>
-<span id="cb12-2">t <span class="op" style="color: #5E5E5E;
+font-style: inherit;">for</span> i <span class="kw" style="color: #003B4F;
 background-color: null;
-font-style: inherit;">=</span> DecisionTree(X, y, min_samples_leaf<span class="op" style="color: #5E5E5E;
+font-style: inherit;">in</span> <span class="bu" style="color: null;
 background-color: null;
-font-style: inherit;">=</span><span class="dv" style="color: #AD0000;
+font-style: inherit;">range</span>(num_boost_round):</span>
+<span id="cb7-21">            gradients <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">5</span>, max_depth<span class="op" style="color: #5E5E5E;
+font-style: inherit;">=</span> objective.gradient(y, current_predictions)</span>
+<span id="cb7-22">            hessians <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">=</span><span class="dv" style="color: #AD0000;
+font-style: inherit;">=</span> objective.hessian(y, current_predictions)</span>
+<span id="cb7-23">            sample_idxs <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">6</span>)</span>
-<span id="cb12-3">X.columns[t.split_feature_idx], t.threshold</span></code></pre></div>
-<div class="cell-output cell-output-display" data-execution_count="12">
-<pre><code>('s5', -0.0037611760063045703)</code></pre>
-</div>
-</div>
-<p>Nice! Looks like the tree started with a split on the <code>s5</code> feature.</p>
-</section>
-<section id="inspecting-the-tree" class="level2">
-<h2 class="anchored" data-anchor-id="inspecting-the-tree">Inspecting the Tree</h2>
-<p>While we’re developing something complex like a decision tree class, we need a good way to inspect the object to help with testing and debugging. Let’s write a quick string representation method to make it easier to check what’s going on with a particular node.</p>
-<div class="cell" data-execution_count="13">
-<div class="sourceCode cell-code" id="cb14" style="background: #f1f3f5;"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb14-1">    <span class="kw" style="color: #003B4F;
+font-style: inherit;">=</span> <span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">def</span> <span class="fu" style="color: #4758AB;
+font-style: inherit;">None</span> <span class="cf" style="color: #003B4F;
 background-color: null;
-font-style: inherit;">__repr__</span>(<span class="va" style="color: #111111;
+font-style: inherit;">if</span> <span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">self</span>):</span>
-<span id="cb14-2">        s <span class="op" style="color: #5E5E5E;
+font-style: inherit;">self</span>.subsample <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">=</span> <span class="ss" style="color: #20794D;
+font-style: inherit;">==</span> <span class="fl" style="color: #AD0000;
 background-color: null;
-font-style: inherit;">f'n: </span><span class="sc" style="color: #5E5E5E;
+font-style: inherit;">1.0</span> <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">{</span><span class="va" style="color: #111111;
+font-style: inherit;">\</span></span>
+<span id="cb7-24">                <span class="cf" style="color: #003B4F;
 background-color: null;
-font-style: inherit;">self</span><span class="sc" style="color: #5E5E5E;
+font-style: inherit;">else</span> <span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">.</span>n<span class="sc" style="color: #5E5E5E;
+font-style: inherit;">self</span>.rng.choice(<span class="bu" style="color: null;
 background-color: null;
-font-style: inherit;">}</span><span class="ss" style="color: #20794D;
+font-style: inherit;">len</span>(y), </span>
+<span id="cb7-25">                                     size<span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">'</span></span>
-<span id="cb14-3">        s <span class="op" style="color: #5E5E5E;
+font-style: inherit;">=</span>math.floor(<span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">+=</span> <span class="ss" style="color: #20794D;
+font-style: inherit;">self</span>.subsample<span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">f'; value:</span><span class="sc" style="color: #5E5E5E;
+font-style: inherit;">*</span><span class="bu" style="color: null;
 background-color: null;
-font-style: inherit;">{</span><span class="va" style="color: #111111;
+font-style: inherit;">len</span>(y)), </span>
+<span id="cb7-26">                                     replace<span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">self</span><span class="sc" style="color: #5E5E5E;
+font-style: inherit;">=</span><span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">.</span>value<span class="sc" style="color: #5E5E5E;
+font-style: inherit;">False</span>)</span>
+<span id="cb7-27">            booster <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">:0.2f}</span><span class="ss" style="color: #20794D;
+font-style: inherit;">=</span> TreeBooster(X, gradients, hessians, </span>
+<span id="cb7-28">                                  <span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">'</span></span>
-<span id="cb14-4">        <span class="cf" style="color: #003B4F;
+font-style: inherit;">self</span>.params, <span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">if</span> <span class="kw" style="color: #003B4F;
+font-style: inherit;">self</span>.max_depth, sample_idxs)</span>
+<span id="cb7-29">            current_predictions <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">not</span> <span class="va" style="color: #111111;
+font-style: inherit;">+=</span> <span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">self</span>.is_leaf:</span>
-<span id="cb14-5">            split_feature_name <span class="op" style="color: #5E5E5E;
+font-style: inherit;">self</span>.learning_rate <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">=</span> <span class="va" style="color: #111111;
+font-style: inherit;">*</span> booster.predict(X)</span>
+<span id="cb7-30">            <span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">self</span>.X.columns[<span class="va" style="color: #111111;
+font-style: inherit;">self</span>.boosters.append(booster)</span>
+<span id="cb7-31">            <span class="cf" style="color: #003B4F;
 background-color: null;
-font-style: inherit;">self</span>.split_feature_idx]</span>
-<span id="cb14-6">            s <span class="op" style="color: #5E5E5E;
+font-style: inherit;">if</span> verbose: </span>
+<span id="cb7-32">                <span class="bu" style="color: null;
 background-color: null;
-font-style: inherit;">+=</span> <span class="ss" style="color: #20794D;
+font-style: inherit;">print</span>(<span class="ss" style="color: #20794D;
 background-color: null;
-font-style: inherit;">f'; split: </span><span class="sc" style="color: #5E5E5E;
+font-style: inherit;">f'[</span><span class="sc" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">{</span>split_feature_name<span class="sc" style="color: #5E5E5E;
+font-style: inherit;">{</span>i<span class="sc" style="color: #5E5E5E;
 background-color: null;
 font-style: inherit;">}</span><span class="ss" style="color: #20794D;
 background-color: null;
-font-style: inherit;"> &lt;= </span><span class="sc" style="color: #5E5E5E;
+font-style: inherit;">] train loss = </span><span class="sc" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">{</span><span class="va" style="color: #111111;
+font-style: inherit;">{</span>objective<span class="sc" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">self</span><span class="sc" style="color: #5E5E5E;
+font-style: inherit;">.</span>loss(y, current_predictions)<span class="sc" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">.</span>threshold<span class="sc" style="color: #5E5E5E;
+font-style: inherit;">}</span><span class="ss" style="color: #20794D;
 background-color: null;
-font-style: inherit;">:0.3f}</span><span class="ss" style="color: #20794D;
+font-style: inherit;">'</span>)</span>
+<span id="cb7-33">            </span>
+<span id="cb7-34">    <span class="kw" style="color: #003B4F;
 background-color: null;
-font-style: inherit;">'</span></span>
-<span id="cb14-7">        <span class="cf" style="color: #003B4F;
+font-style: inherit;">def</span> predict(<span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">return</span> s</span></code></pre></div>
-</div>
-<p>We can assign the string representation method to the class and print a few nodes.</p>
-<div class="cell" data-execution_count="14">
-<div class="sourceCode cell-code" id="cb15" style="background: #f1f3f5;"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb15-1">DecisionTree.<span class="fu" style="color: #4758AB;
+font-style: inherit;">self</span>, X):</span>
+<span id="cb7-35">        <span class="cf" style="color: #003B4F;
 background-color: null;
-font-style: inherit;">__repr__</span> <span class="op" style="color: #5E5E5E;
+font-style: inherit;">return</span> (<span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">=</span> <span class="fu" style="color: #4758AB;
+font-style: inherit;">self</span>.base_prediction <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">__repr__</span></span>
-<span id="cb15-2">t <span class="op" style="color: #5E5E5E;
+font-style: inherit;">+</span> <span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">=</span> DecisionTree(X, y, min_samples_leaf<span class="op" style="color: #5E5E5E;
+font-style: inherit;">self</span>.learning_rate </span>
+<span id="cb7-36">                <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">=</span><span class="dv" style="color: #AD0000;
+font-style: inherit;">*</span> np.<span class="bu" style="color: null;
 background-color: null;
-font-style: inherit;">5</span>, max_depth<span class="op" style="color: #5E5E5E;
+font-style: inherit;">sum</span>([booster.predict(X) <span class="cf" style="color: #003B4F;
 background-color: null;
-font-style: inherit;">=</span><span class="dv" style="color: #AD0000;
+font-style: inherit;">for</span> booster <span class="kw" style="color: #003B4F;
 background-color: null;
-font-style: inherit;">2</span>)</span>
-<span id="cb15-3"><span class="bu" style="color: null;
+font-style: inherit;">in</span> <span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">print</span>(t)</span>
-<span id="cb15-4"><span class="bu" style="color: null;
+font-style: inherit;">self</span>.boosters], axis<span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">print</span>(t.left)</span>
-<span id="cb15-5"><span class="bu" style="color: null;
+font-style: inherit;">=</span><span class="dv" style="color: #AD0000;
 background-color: null;
-font-style: inherit;">print</span>(t.left.left)</span></code></pre></div>
-<div class="cell-output cell-output-stdout">
-<pre><code>n: 442; value:152.13; split: s5 &lt;= -0.004
-n: 218; value:109.99; split: bmi &lt;= 0.006
-n: 171; value:96.31</code></pre>
-</div>
-</div>
-</section>
-<section id="prediction" class="level2">
-<h2 class="anchored" data-anchor-id="prediction">Prediction</h2>
-<p>We need a public <code>predict</code> method that takes a feature dataframe and returns an array of predictions. We’ll need to look up the predicted value for one instance at a time and stitch them together in an array. We can do that by iterating over the feature dataframe rows with a list comprehension that calls a <code>_predict_row</code> method to grab the prediction for each row. The row predict method needs to return the current node’s predicted value if it’s a leaf, or if not, it needs to identify the appropriate child node based on its split and ask it for a prediction.</p>
-<div class="cell" data-execution_count="15">
-<div class="sourceCode cell-code" id="cb17" style="background: #f1f3f5;"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb17-1">    <span class="kw" style="color: #003B4F;
+font-style: inherit;">0</span>))</span>
+<span id="cb7-37">    </span>
+<span id="cb7-38"><span class="kw" style="color: #003B4F;
 background-color: null;
-font-style: inherit;">def</span> predict(<span class="va" style="color: #111111;
+font-style: inherit;">class</span> TreeBooster():</span>
+<span id="cb7-39"> </span>
+<span id="cb7-40">    <span class="kw" style="color: #003B4F;
 background-color: null;
-font-style: inherit;">self</span>, X):</span>
-<span id="cb17-2">        <span class="cf" style="color: #003B4F;
+font-style: inherit;">def</span> <span class="fu" style="color: #4758AB;
 background-color: null;
-font-style: inherit;">return</span> np.array([<span class="va" style="color: #111111;
+font-style: inherit;">__init__</span>(<span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">self</span>._predict_row(row) <span class="cf" style="color: #003B4F;
+font-style: inherit;">self</span>, X, g, h, params, max_depth, idxs<span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">for</span> i, row <span class="kw" style="color: #003B4F;
+font-style: inherit;">=</span><span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">in</span> X.iterrows()])</span>
-<span id="cb17-3">    </span>
-<span id="cb17-4">    <span class="kw" style="color: #003B4F;
+font-style: inherit;">None</span>):</span>
+<span id="cb7-41">        <span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">def</span> _predict_row(<span class="va" style="color: #111111;
+font-style: inherit;">self</span>.params <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">self</span>, row):</span>
-<span id="cb17-5">        <span class="cf" style="color: #003B4F;
+font-style: inherit;">=</span> params</span>
+<span id="cb7-42">        <span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">if</span> <span class="va" style="color: #111111;
+font-style: inherit;">self</span>.max_depth <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">self</span>.is_leaf: </span>
-<span id="cb17-6">            <span class="cf" style="color: #003B4F;
+font-style: inherit;">=</span> max_depth</span>
+<span id="cb7-43">        <span class="cf" style="color: #003B4F;
 background-color: null;
-font-style: inherit;">return</span> <span class="va" style="color: #111111;
+font-style: inherit;">assert</span> <span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">self</span>.value</span>
-<span id="cb17-7">        child <span class="op" style="color: #5E5E5E;
+font-style: inherit;">self</span>.max_depth <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">=</span> <span class="va" style="color: #111111;
+font-style: inherit;">&gt;=</span> <span class="dv" style="color: #AD0000;
 background-color: null;
-font-style: inherit;">self</span>.left <span class="cf" style="color: #003B4F;
+font-style: inherit;">0</span>, <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">if</span> row[<span class="va" style="color: #111111;
+font-style: inherit;">'max_depth must be nonnegative'</span></span>
+<span id="cb7-44">        <span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">self</span>.split_feature_idx] <span class="op" style="color: #5E5E5E;
+font-style: inherit;">self</span>.min_child_weight <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">&lt;=</span> <span class="va" style="color: #111111;
+font-style: inherit;">=</span> params[<span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">self</span>.threshold <span class="op" style="color: #5E5E5E;
+font-style: inherit;">'min_child_weight'</span>] <span class="op" style="color: #5E5E5E;
 background-color: null;
 font-style: inherit;">\</span></span>
-<span id="cb17-8">                <span class="cf" style="color: #003B4F;
+<span id="cb7-45">            <span class="cf" style="color: #003B4F;
 background-color: null;
-font-style: inherit;">else</span> <span class="va" style="color: #111111;
+font-style: inherit;">if</span> params[<span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">self</span>.right</span>
-<span id="cb17-9">        <span class="cf" style="color: #003B4F;
+font-style: inherit;">'min_child_weight'</span>] <span class="cf" style="color: #003B4F;
 background-color: null;
-font-style: inherit;">return</span> child._predict_row(row)</span></code></pre></div>
-</div>
-<p>Let’s assign the predict methods and make predictions on a few rows.</p>
-<div class="cell" data-execution_count="16">
-<div class="sourceCode cell-code" id="cb18" style="background: #f1f3f5;"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb18-1">DecisionTree.predict <span class="op" style="color: #5E5E5E;
+font-style: inherit;">else</span> <span class="fl" style="color: #AD0000;
 background-color: null;
-font-style: inherit;">=</span> predict</span>
-<span id="cb18-2">DecisionTree._predict_row <span class="op" style="color: #5E5E5E;
+font-style: inherit;">1.0</span></span>
+<span id="cb7-46">        <span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">=</span> _predict_row</span>
-<span id="cb18-3">t.predict(X.iloc[:<span class="dv" style="color: #AD0000;
+font-style: inherit;">self</span>.reg_lambda <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">3</span>, :])</span></code></pre></div>
-<div class="cell-output cell-output-display" data-execution_count="16">
-<pre><code>array([225.87962963,  96.30994152, 225.87962963])</code></pre>
-</div>
-</div>
-</section>
-<section id="the-complete-decision-tree-implementation" class="level2">
-<h2 class="anchored" data-anchor-id="the-complete-decision-tree-implementation">The Complete Decision Tree Implementation</h2>
-<p>Here’s the implementation, all in one place.</p>
-<div class="cell" data-execution_count="17">
-<div class="sourceCode cell-code" id="cb20" style="background: #f1f3f5;"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb20-1"><span class="kw" style="color: #003B4F;
+font-style: inherit;">=</span> params[<span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">class</span> DecisionTree():</span>
-<span id="cb20-2"></span>
-<span id="cb20-3">    <span class="kw" style="color: #003B4F;
+font-style: inherit;">'reg_lambda'</span>] <span class="cf" style="color: #003B4F;
 background-color: null;
-font-style: inherit;">def</span> <span class="fu" style="color: #4758AB;
+font-style: inherit;">if</span> params[<span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">__init__</span>(<span class="va" style="color: #111111;
+font-style: inherit;">'reg_lambda'</span>] <span class="cf" style="color: #003B4F;
 background-color: null;
-font-style: inherit;">self</span>, X, y, min_samples_leaf<span class="op" style="color: #5E5E5E;
+font-style: inherit;">else</span> <span class="fl" style="color: #AD0000;
 background-color: null;
-font-style: inherit;">=</span><span class="dv" style="color: #AD0000;
+font-style: inherit;">1.0</span></span>
+<span id="cb7-47">        <span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">5</span>, max_depth<span class="op" style="color: #5E5E5E;
+font-style: inherit;">self</span>.gamma <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">=</span><span class="dv" style="color: #AD0000;
+font-style: inherit;">=</span> params[<span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">6</span>, idxs<span class="op" style="color: #5E5E5E;
+font-style: inherit;">'gamma'</span>] <span class="cf" style="color: #003B4F;
 background-color: null;
-font-style: inherit;">=</span><span class="va" style="color: #111111;
+font-style: inherit;">if</span> params[<span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">None</span>):</span>
-<span id="cb20-4">        <span class="cf" style="color: #003B4F;
+font-style: inherit;">'gamma'</span>] <span class="cf" style="color: #003B4F;
 background-color: null;
-font-style: inherit;">assert</span> max_depth <span class="op" style="color: #5E5E5E;
+font-style: inherit;">else</span> <span class="fl" style="color: #AD0000;
 background-color: null;
-font-style: inherit;">&gt;=</span> <span class="dv" style="color: #AD0000;
+font-style: inherit;">0.0</span></span>
+<span id="cb7-48">        <span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">0</span>, <span class="st" style="color: #20794D;
+font-style: inherit;">self</span>.colsample_bynode <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">'max_depth must be nonnegative'</span></span>
-<span id="cb20-5">        <span class="cf" style="color: #003B4F;
+font-style: inherit;">=</span> params[<span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">assert</span> min_samples_leaf <span class="op" style="color: #5E5E5E;
+font-style: inherit;">'colsample_bynode'</span>] <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">&gt;</span> <span class="dv" style="color: #AD0000;
+font-style: inherit;">\</span></span>
+<span id="cb7-49">            <span class="cf" style="color: #003B4F;
 background-color: null;
-font-style: inherit;">0</span>, <span class="st" style="color: #20794D;
+font-style: inherit;">if</span> params[<span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'colsample_bynode'</span>] <span class="cf" style="color: #003B4F;
 background-color: null;
-font-style: inherit;">'min_samples_leaf must be positive'</span></span>
-<span id="cb20-6">        <span class="va" style="color: #111111;
+font-style: inherit;">else</span> <span class="fl" style="color: #AD0000;
 background-color: null;
-font-style: inherit;">self</span>.min_samples_leaf, <span class="va" style="color: #111111;
+font-style: inherit;">1.0</span></span>
+<span id="cb7-50">        <span class="cf" style="color: #003B4F;
 background-color: null;
-font-style: inherit;">self</span>.max_depth <span class="op" style="color: #5E5E5E;
+font-style: inherit;">if</span> <span class="bu" style="color: null;
+background-color: null;
+font-style: inherit;">isinstance</span>(g, pd.Series): g <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">=</span> min_samples_leaf, max_depth</span>
-<span id="cb20-7">        <span class="cf" style="color: #003B4F;
+font-style: inherit;">=</span> g.values</span>
+<span id="cb7-51">        <span class="cf" style="color: #003B4F;
 background-color: null;
 font-style: inherit;">if</span> <span class="bu" style="color: null;
 background-color: null;
-font-style: inherit;">isinstance</span>(y, pd.Series): y <span class="op" style="color: #5E5E5E;
+font-style: inherit;">isinstance</span>(h, pd.Series): h <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">=</span> y.values</span>
-<span id="cb20-8">        <span class="cf" style="color: #003B4F;
+font-style: inherit;">=</span> h.values</span>
+<span id="cb7-52">        <span class="cf" style="color: #003B4F;
 background-color: null;
 font-style: inherit;">if</span> idxs <span class="kw" style="color: #003B4F;
 background-color: null;
@@ -3518,17 +4048,19 @@ font-style: inherit;">None</span>: idxs <span class="op" style="color: #5E5E5E;
 background-color: null;
 font-style: inherit;">=</span> np.arange(<span class="bu" style="color: null;
 background-color: null;
-font-style: inherit;">len</span>(y))</span>
-<span id="cb20-9">        <span class="va" style="color: #111111;
+font-style: inherit;">len</span>(g))</span>
+<span id="cb7-53">        <span class="va" style="color: #111111;
 background-color: null;
 font-style: inherit;">self</span>.X, <span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">self</span>.y, <span class="va" style="color: #111111;
+font-style: inherit;">self</span>.g, <span class="va" style="color: #111111;
+background-color: null;
+font-style: inherit;">self</span>.h, <span class="va" style="color: #111111;
 background-color: null;
 font-style: inherit;">self</span>.idxs <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">=</span> X, y, idxs</span>
-<span id="cb20-10">        <span class="va" style="color: #111111;
+font-style: inherit;">=</span> X, g, h, idxs</span>
+<span id="cb7-54">        <span class="va" style="color: #111111;
 background-color: null;
 font-style: inherit;">self</span>.n, <span class="va" style="color: #111111;
 background-color: null;
@@ -3539,25 +4071,33 @@ background-color: null;
 font-style: inherit;">len</span>(idxs), X.shape[<span class="dv" style="color: #AD0000;
 background-color: null;
 font-style: inherit;">1</span>]</span>
-<span id="cb20-11">        <span class="va" style="color: #111111;
+<span id="cb7-55">        <span class="va" style="color: #111111;
 background-color: null;
 font-style: inherit;">self</span>.value <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">=</span> np.mean(y[idxs]) <span class="co" style="color: #5E5E5E;
+font-style: inherit;">=</span> <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;"># node's prediction value</span></span>
-<span id="cb20-12">        <span class="va" style="color: #111111;
+font-style: inherit;">-</span>g[idxs].<span class="bu" style="color: null;
 background-color: null;
-font-style: inherit;">self</span>.best_score_so_far <span class="op" style="color: #5E5E5E;
+font-style: inherit;">sum</span>() <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">=</span> <span class="bu" style="color: null;
+font-style: inherit;">/</span> (h[idxs].<span class="bu" style="color: null;
+background-color: null;
+font-style: inherit;">sum</span>() <span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">+</span> <span class="va" style="color: #111111;
+background-color: null;
+font-style: inherit;">self</span>.reg_lambda) <span class="co" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">float</span>(<span class="st" style="color: #20794D;
+font-style: inherit;"># Eq (5)</span></span>
+<span id="cb7-56">        <span class="va" style="color: #111111;
+background-color: null;
+font-style: inherit;">self</span>.best_score_so_far <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">'inf'</span>) <span class="co" style="color: #5E5E5E;
+font-style: inherit;">=</span> <span class="fl" style="color: #AD0000;
 background-color: null;
-font-style: inherit;"># initial loss before split finding</span></span>
-<span id="cb20-13">        <span class="cf" style="color: #003B4F;
+font-style: inherit;">0.</span></span>
+<span id="cb7-57">        <span class="cf" style="color: #003B4F;
 background-color: null;
 font-style: inherit;">if</span> <span class="va" style="color: #111111;
 background-color: null;
@@ -3566,38 +4106,34 @@ background-color: null;
 font-style: inherit;">&gt;</span> <span class="dv" style="color: #AD0000;
 background-color: null;
 font-style: inherit;">0</span>:</span>
-<span id="cb20-14">            <span class="va" style="color: #111111;
+<span id="cb7-58">            <span class="va" style="color: #111111;
 background-color: null;
 font-style: inherit;">self</span>._maybe_insert_child_nodes()</span>
-<span id="cb20-15">            </span>
-<span id="cb20-16">    <span class="kw" style="color: #003B4F;
+<span id="cb7-59"></span>
+<span id="cb7-60">    <span class="kw" style="color: #003B4F;
 background-color: null;
 font-style: inherit;">def</span> _maybe_insert_child_nodes(<span class="va" style="color: #111111;
 background-color: null;
 font-style: inherit;">self</span>):</span>
-<span id="cb20-17">        <span class="cf" style="color: #003B4F;
+<span id="cb7-61">        <span class="cf" style="color: #003B4F;
 background-color: null;
-font-style: inherit;">for</span> j <span class="kw" style="color: #003B4F;
+font-style: inherit;">for</span> i <span class="kw" style="color: #003B4F;
 background-color: null;
 font-style: inherit;">in</span> <span class="bu" style="color: null;
 background-color: null;
 font-style: inherit;">range</span>(<span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">self</span>.c): </span>
-<span id="cb20-18">            <span class="va" style="color: #111111;
+font-style: inherit;">self</span>.c): <span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">self</span>._find_better_split(j)</span>
-<span id="cb20-19">        <span class="cf" style="color: #003B4F;
+font-style: inherit;">self</span>._find_better_split(i)</span>
+<span id="cb7-62">        <span class="cf" style="color: #003B4F;
 background-color: null;
 font-style: inherit;">if</span> <span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">self</span>.is_leaf: <span class="co" style="color: #5E5E5E;
-background-color: null;
-font-style: inherit;"># do not insert children</span></span>
-<span id="cb20-20">            <span class="cf" style="color: #003B4F;
+font-style: inherit;">self</span>.is_leaf: <span class="cf" style="color: #003B4F;
 background-color: null;
-font-style: inherit;">return</span> </span>
-<span id="cb20-21">        x <span class="op" style="color: #5E5E5E;
+font-style: inherit;">return</span></span>
+<span id="cb7-63">        x <span class="op" style="color: #5E5E5E;
 background-color: null;
 font-style: inherit;">=</span> <span class="va" style="color: #111111;
 background-color: null;
@@ -3606,7 +4142,7 @@ background-color: null;
 font-style: inherit;">self</span>.idxs,<span class="va" style="color: #111111;
 background-color: null;
 font-style: inherit;">self</span>.split_feature_idx]</span>
-<span id="cb20-22">        left_idx <span class="op" style="color: #5E5E5E;
+<span id="cb7-64">        left_idx <span class="op" style="color: #5E5E5E;
 background-color: null;
 font-style: inherit;">=</span> np.nonzero(x <span class="op" style="color: #5E5E5E;
 background-color: null;
@@ -3615,7 +4151,7 @@ background-color: null;
 font-style: inherit;">self</span>.threshold)[<span class="dv" style="color: #AD0000;
 background-color: null;
 font-style: inherit;">0</span>]</span>
-<span id="cb20-23">        right_idx <span class="op" style="color: #5E5E5E;
+<span id="cb7-65">        right_idx <span class="op" style="color: #5E5E5E;
 background-color: null;
 font-style: inherit;">=</span> np.nonzero(x <span class="op" style="color: #5E5E5E;
 background-color: null;
@@ -3624,18 +4160,20 @@ background-color: null;
 font-style: inherit;">self</span>.threshold)[<span class="dv" style="color: #AD0000;
 background-color: null;
 font-style: inherit;">0</span>]</span>
-<span id="cb20-24">        <span class="va" style="color: #111111;
+<span id="cb7-66">        <span class="va" style="color: #111111;
 background-color: null;
 font-style: inherit;">self</span>.left <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">=</span> DecisionTree(<span class="va" style="color: #111111;
+font-style: inherit;">=</span> TreeBooster(<span class="va" style="color: #111111;
 background-color: null;
 font-style: inherit;">self</span>.X, <span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">self</span>.y, <span class="va" style="color: #111111;
+font-style: inherit;">self</span>.g, <span class="va" style="color: #111111;
+background-color: null;
+font-style: inherit;">self</span>.h, <span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">self</span>.min_samples_leaf, </span>
-<span id="cb20-25">                                  <span class="va" style="color: #111111;
+font-style: inherit;">self</span>.params, </span>
+<span id="cb7-67">                                <span class="va" style="color: #111111;
 background-color: null;
 font-style: inherit;">self</span>.max_depth <span class="op" style="color: #5E5E5E;
 background-color: null;
@@ -3644,18 +4182,20 @@ background-color: null;
 font-style: inherit;">1</span>, <span class="va" style="color: #111111;
 background-color: null;
 font-style: inherit;">self</span>.idxs[left_idx])</span>
-<span id="cb20-26">        <span class="va" style="color: #111111;
+<span id="cb7-68">        <span class="va" style="color: #111111;
 background-color: null;
 font-style: inherit;">self</span>.right <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">=</span> DecisionTree(<span class="va" style="color: #111111;
+font-style: inherit;">=</span> TreeBooster(<span class="va" style="color: #111111;
 background-color: null;
 font-style: inherit;">self</span>.X, <span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">self</span>.y, <span class="va" style="color: #111111;
+font-style: inherit;">self</span>.g, <span class="va" style="color: #111111;
+background-color: null;
+font-style: inherit;">self</span>.h, <span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">self</span>.min_samples_leaf, </span>
-<span id="cb20-27">                                  <span class="va" style="color: #111111;
+font-style: inherit;">self</span>.params, </span>
+<span id="cb7-69">                                 <span class="va" style="color: #111111;
 background-color: null;
 font-style: inherit;">self</span>.max_depth <span class="op" style="color: #5E5E5E;
 background-color: null;
@@ -3664,11 +4204,11 @@ background-color: null;
 font-style: inherit;">1</span>, <span class="va" style="color: #111111;
 background-color: null;
 font-style: inherit;">self</span>.idxs[right_idx])</span>
-<span id="cb20-28">    </span>
-<span id="cb20-29">    <span class="at" style="color: #657422;
+<span id="cb7-70"></span>
+<span id="cb7-71">    <span class="at" style="color: #657422;
 background-color: null;
 font-style: inherit;">@property</span></span>
-<span id="cb20-30">    <span class="kw" style="color: #003B4F;
+<span id="cb7-72">    <span class="kw" style="color: #003B4F;
 background-color: null;
 font-style: inherit;">def</span> is_leaf(<span class="va" style="color: #111111;
 background-color: null;
@@ -3678,56 +4218,58 @@ font-style: inherit;">return</span> <span class="va" style="color: #111111;
 background-color: null;
 font-style: inherit;">self</span>.best_score_so_far <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">==</span> <span class="bu" style="color: null;
-background-color: null;
-font-style: inherit;">float</span>(<span class="st" style="color: #20794D;
+font-style: inherit;">==</span> <span class="fl" style="color: #AD0000;
 background-color: null;
-font-style: inherit;">'inf'</span>)</span>
-<span id="cb20-31">    </span>
-<span id="cb20-32">    <span class="kw" style="color: #003B4F;
+font-style: inherit;">0.</span></span>
+<span id="cb7-73">    </span>
+<span id="cb7-74">    <span class="kw" style="color: #003B4F;
 background-color: null;
 font-style: inherit;">def</span> _find_better_split(<span class="va" style="color: #111111;
 background-color: null;
 font-style: inherit;">self</span>, feature_idx):</span>
-<span id="cb20-33">        x <span class="op" style="color: #5E5E5E;
+<span id="cb7-75">        x <span class="op" style="color: #5E5E5E;
 background-color: null;
 font-style: inherit;">=</span> <span class="va" style="color: #111111;
 background-color: null;
 font-style: inherit;">self</span>.X.values[<span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">self</span>.idxs,feature_idx]</span>
-<span id="cb20-34">        y <span class="op" style="color: #5E5E5E;
+font-style: inherit;">self</span>.idxs, feature_idx]</span>
+<span id="cb7-76">        g, h <span class="op" style="color: #5E5E5E;
 background-color: null;
 font-style: inherit;">=</span> <span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">self</span>.y[<span class="va" style="color: #111111;
+font-style: inherit;">self</span>.g[<span class="va" style="color: #111111;
+background-color: null;
+font-style: inherit;">self</span>.idxs], <span class="va" style="color: #111111;
+background-color: null;
+font-style: inherit;">self</span>.h[<span class="va" style="color: #111111;
 background-color: null;
 font-style: inherit;">self</span>.idxs]</span>
-<span id="cb20-35">        sort_idx <span class="op" style="color: #5E5E5E;
+<span id="cb7-77">        sort_idx <span class="op" style="color: #5E5E5E;
 background-color: null;
 font-style: inherit;">=</span> np.argsort(x)</span>
-<span id="cb20-36">        sort_y, sort_x <span class="op" style="color: #5E5E5E;
+<span id="cb7-78">        sort_g, sort_h, sort_x <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">=</span> y[sort_idx], x[sort_idx]</span>
-<span id="cb20-37">        sum_y, n <span class="op" style="color: #5E5E5E;
+font-style: inherit;">=</span> g[sort_idx], h[sort_idx], x[sort_idx]</span>
+<span id="cb7-79">        sum_g, sum_h <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">=</span> y.<span class="bu" style="color: null;
+font-style: inherit;">=</span> g.<span class="bu" style="color: null;
 background-color: null;
-font-style: inherit;">sum</span>(), <span class="bu" style="color: null;
+font-style: inherit;">sum</span>(), h.<span class="bu" style="color: null;
 background-color: null;
-font-style: inherit;">len</span>(y)</span>
-<span id="cb20-38">        sum_y_right, n_right <span class="op" style="color: #5E5E5E;
+font-style: inherit;">sum</span>()</span>
+<span id="cb7-80">        sum_g_right, sum_h_right <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">=</span> sum_y, n</span>
-<span id="cb20-39">        sum_y_left, n_left <span class="op" style="color: #5E5E5E;
+font-style: inherit;">=</span> sum_g, sum_h</span>
+<span id="cb7-81">        sum_g_left, sum_h_left <span class="op" style="color: #5E5E5E;
 background-color: null;
 font-style: inherit;">=</span> <span class="fl" style="color: #AD0000;
 background-color: null;
-font-style: inherit;">0.</span>, <span class="dv" style="color: #AD0000;
+font-style: inherit;">0.</span>, <span class="fl" style="color: #AD0000;
 background-color: null;
-font-style: inherit;">0</span></span>
-<span id="cb20-40">    </span>
-<span id="cb20-41">        <span class="cf" style="color: #003B4F;
+font-style: inherit;">0.</span></span>
+<span id="cb7-82"></span>
+<span id="cb7-83">        <span class="cf" style="color: #003B4F;
 background-color: null;
 font-style: inherit;">for</span> i <span class="kw" style="color: #003B4F;
 background-color: null;
@@ -3739,186 +4281,142 @@ font-style: inherit;">0</span>, <span class="va" style="color: #111111;
 background-color: null;
 font-style: inherit;">self</span>.n <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">-</span> <span class="va" style="color: #111111;
+font-style: inherit;">-</span> <span class="dv" style="color: #AD0000;
 background-color: null;
-font-style: inherit;">self</span>.min_samples_leaf):</span>
-<span id="cb20-42">            y_i, x_i, x_i_next <span class="op" style="color: #5E5E5E;
+font-style: inherit;">1</span>):</span>
+<span id="cb7-84">            g_i, h_i, x_i, x_i_next <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">=</span> sort_y[i], sort_x[i], sort_x[i <span class="op" style="color: #5E5E5E;
+font-style: inherit;">=</span> sort_g[i], sort_h[i], sort_x[i], sort_x[i <span class="op" style="color: #5E5E5E;
 background-color: null;
 font-style: inherit;">+</span> <span class="dv" style="color: #AD0000;
 background-color: null;
 font-style: inherit;">1</span>]</span>
-<span id="cb20-43">            sum_y_left <span class="op" style="color: #5E5E5E;
-background-color: null;
-font-style: inherit;">+=</span> y_i<span class="op" style="color: #5E5E5E;
-background-color: null;
-font-style: inherit;">;</span> sum_y_right <span class="op" style="color: #5E5E5E;
+<span id="cb7-85">            sum_g_left <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">-=</span> y_i</span>
-<span id="cb20-44">            n_left <span class="op" style="color: #5E5E5E;
+font-style: inherit;">+=</span> g_i<span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">+=</span> <span class="dv" style="color: #AD0000;
+font-style: inherit;">;</span> sum_g_right <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">1</span><span class="op" style="color: #5E5E5E;
+font-style: inherit;">-=</span> g_i</span>
+<span id="cb7-86">            sum_h_left <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">;</span> n_right <span class="op" style="color: #5E5E5E;
+font-style: inherit;">+=</span> h_i<span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">-=</span> <span class="dv" style="color: #AD0000;
+font-style: inherit;">;</span> sum_h_right <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">1</span></span>
-<span id="cb20-45">            <span class="cf" style="color: #003B4F;
+font-style: inherit;">-=</span> h_i</span>
+<span id="cb7-87">            <span class="cf" style="color: #003B4F;
 background-color: null;
-font-style: inherit;">if</span>  n_left <span class="op" style="color: #5E5E5E;
+font-style: inherit;">if</span> sum_h_left <span class="op" style="color: #5E5E5E;
 background-color: null;
 font-style: inherit;">&lt;</span> <span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">self</span>.min_samples_leaf <span class="kw" style="color: #003B4F;
+font-style: inherit;">self</span>.min_child_weight <span class="kw" style="color: #003B4F;
 background-color: null;
 font-style: inherit;">or</span> x_i <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">==</span> x_i_next:</span>
-<span id="cb20-46">                <span class="cf" style="color: #003B4F;
+font-style: inherit;">==</span> x_i_next:<span class="cf" style="color: #003B4F;
 background-color: null;
 font-style: inherit;">continue</span></span>
-<span id="cb20-47">            score <span class="op" style="color: #5E5E5E;
-background-color: null;
-font-style: inherit;">=</span> <span class="op" style="color: #5E5E5E;
-background-color: null;
-font-style: inherit;">-</span> sum_y_left<span class="op" style="color: #5E5E5E;
-background-color: null;
-font-style: inherit;">**</span><span class="dv" style="color: #AD0000;
+<span id="cb7-88">            <span class="cf" style="color: #003B4F;
 background-color: null;
-font-style: inherit;">2</span> <span class="op" style="color: #5E5E5E;
+font-style: inherit;">if</span> sum_h_right <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">/</span> n_left <span class="op" style="color: #5E5E5E;
+font-style: inherit;">&lt;</span> <span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">-</span> sum_y_right<span class="op" style="color: #5E5E5E;
+font-style: inherit;">self</span>.min_child_weight: <span class="cf" style="color: #003B4F;
 background-color: null;
-font-style: inherit;">**</span><span class="dv" style="color: #AD0000;
+font-style: inherit;">break</span></span>
+<span id="cb7-89"></span>
+<span id="cb7-90">            gain <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">2</span> <span class="op" style="color: #5E5E5E;
+font-style: inherit;">=</span> <span class="fl" style="color: #AD0000;
 background-color: null;
-font-style: inherit;">/</span> n_right <span class="op" style="color: #5E5E5E;
+font-style: inherit;">0.5</span> <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">+</span> sum_y<span class="op" style="color: #5E5E5E;
+font-style: inherit;">*</span> ((sum_g_left<span class="op" style="color: #5E5E5E;
 background-color: null;
 font-style: inherit;">**</span><span class="dv" style="color: #AD0000;
 background-color: null;
 font-style: inherit;">2</span> <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">/</span> n</span>
-<span id="cb20-48">            <span class="cf" style="color: #003B4F;
-background-color: null;
-font-style: inherit;">if</span> score <span class="op" style="color: #5E5E5E;
-background-color: null;
-font-style: inherit;">&lt;</span> <span class="va" style="color: #111111;
-background-color: null;
-font-style: inherit;">self</span>.best_score_so_far:</span>
-<span id="cb20-49">                <span class="va" style="color: #111111;
-background-color: null;
-font-style: inherit;">self</span>.best_score_so_far <span class="op" style="color: #5E5E5E;
-background-color: null;
-font-style: inherit;">=</span> score</span>
-<span id="cb20-50">                <span class="va" style="color: #111111;
-background-color: null;
-font-style: inherit;">self</span>.split_feature_idx <span class="op" style="color: #5E5E5E;
-background-color: null;
-font-style: inherit;">=</span> feature_idx</span>
-<span id="cb20-51">                <span class="va" style="color: #111111;
-background-color: null;
-font-style: inherit;">self</span>.threshold <span class="op" style="color: #5E5E5E;
-background-color: null;
-font-style: inherit;">=</span> (x_i <span class="op" style="color: #5E5E5E;
-background-color: null;
-font-style: inherit;">+</span> x_i_next) <span class="op" style="color: #5E5E5E;
-background-color: null;
-font-style: inherit;">/</span> <span class="dv" style="color: #AD0000;
-background-color: null;
-font-style: inherit;">2</span></span>
-<span id="cb20-52">                </span>
-<span id="cb20-53">    <span class="kw" style="color: #003B4F;
-background-color: null;
-font-style: inherit;">def</span> <span class="fu" style="color: #4758AB;
-background-color: null;
-font-style: inherit;">__repr__</span>(<span class="va" style="color: #111111;
-background-color: null;
-font-style: inherit;">self</span>):</span>
-<span id="cb20-54">        s <span class="op" style="color: #5E5E5E;
+font-style: inherit;">/</span> (sum_h_left <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">=</span> <span class="ss" style="color: #20794D;
+font-style: inherit;">+</span> <span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">f'n: </span><span class="sc" style="color: #5E5E5E;
+font-style: inherit;">self</span>.reg_lambda))</span>
+<span id="cb7-91">                            <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">{</span><span class="va" style="color: #111111;
+font-style: inherit;">+</span> (sum_g_right<span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">self</span><span class="sc" style="color: #5E5E5E;
+font-style: inherit;">**</span><span class="dv" style="color: #AD0000;
 background-color: null;
-font-style: inherit;">.</span>n<span class="sc" style="color: #5E5E5E;
+font-style: inherit;">2</span> <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">}</span><span class="ss" style="color: #20794D;
+font-style: inherit;">/</span> (sum_h_right <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">'</span></span>
-<span id="cb20-55">        s <span class="op" style="color: #5E5E5E;
+font-style: inherit;">+</span> <span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">+=</span> <span class="ss" style="color: #20794D;
+font-style: inherit;">self</span>.reg_lambda))</span>
+<span id="cb7-92">                            <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">f'; value:</span><span class="sc" style="color: #5E5E5E;
+font-style: inherit;">-</span> (sum_g<span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">{</span><span class="va" style="color: #111111;
+font-style: inherit;">**</span><span class="dv" style="color: #AD0000;
 background-color: null;
-font-style: inherit;">self</span><span class="sc" style="color: #5E5E5E;
+font-style: inherit;">2</span> <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">.</span>value<span class="sc" style="color: #5E5E5E;
+font-style: inherit;">/</span> (sum_h <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">:0.2f}</span><span class="ss" style="color: #20794D;
+font-style: inherit;">+</span> <span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">'</span></span>
-<span id="cb20-56">        <span class="cf" style="color: #003B4F;
+font-style: inherit;">self</span>.reg_lambda))</span>
+<span id="cb7-93">                            ) <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">if</span> <span class="kw" style="color: #003B4F;
+font-style: inherit;">-</span> <span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">not</span> <span class="va" style="color: #111111;
+font-style: inherit;">self</span>.gamma<span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">self</span>.is_leaf:</span>
-<span id="cb20-57">            split_feature_name <span class="op" style="color: #5E5E5E;
+font-style: inherit;">/</span><span class="dv" style="color: #AD0000;
 background-color: null;
-font-style: inherit;">=</span> <span class="va" style="color: #111111;
+font-style: inherit;">2</span> <span class="co" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">self</span>.X.columns[<span class="va" style="color: #111111;
+font-style: inherit;"># Eq(7) in the xgboost paper</span></span>
+<span id="cb7-94">            <span class="cf" style="color: #003B4F;
 background-color: null;
-font-style: inherit;">self</span>.split_feature_idx]</span>
-<span id="cb20-58">            s <span class="op" style="color: #5E5E5E;
+font-style: inherit;">if</span> gain <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">+=</span> <span class="ss" style="color: #20794D;
+font-style: inherit;">&gt;</span> <span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">f'; split: </span><span class="sc" style="color: #5E5E5E;
+font-style: inherit;">self</span>.best_score_so_far: </span>
+<span id="cb7-95">                <span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">{</span>split_feature_name<span class="sc" style="color: #5E5E5E;
+font-style: inherit;">self</span>.split_feature_idx <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">}</span><span class="ss" style="color: #20794D;
+font-style: inherit;">=</span> feature_idx</span>
+<span id="cb7-96">                <span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;"> &lt;= </span><span class="sc" style="color: #5E5E5E;
+font-style: inherit;">self</span>.best_score_so_far <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">{</span><span class="va" style="color: #111111;
+font-style: inherit;">=</span> gain</span>
+<span id="cb7-97">                <span class="va" style="color: #111111;
 background-color: null;
-font-style: inherit;">self</span><span class="sc" style="color: #5E5E5E;
+font-style: inherit;">self</span>.threshold <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">.</span>threshold<span class="sc" style="color: #5E5E5E;
+font-style: inherit;">=</span> (x_i <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">:0.3f}</span><span class="ss" style="color: #20794D;
+font-style: inherit;">+</span> x_i_next) <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">'</span></span>
-<span id="cb20-59">        <span class="cf" style="color: #003B4F;
+font-style: inherit;">/</span> <span class="dv" style="color: #AD0000;
 background-color: null;
-font-style: inherit;">return</span> s</span>
-<span id="cb20-60">    </span>
-<span id="cb20-61">    <span class="kw" style="color: #003B4F;
+font-style: inherit;">2</span></span>
+<span id="cb7-98">                </span>
+<span id="cb7-99">    <span class="kw" style="color: #003B4F;
 background-color: null;
 font-style: inherit;">def</span> predict(<span class="va" style="color: #111111;
 background-color: null;
 font-style: inherit;">self</span>, X):</span>
-<span id="cb20-62">        <span class="cf" style="color: #003B4F;
+<span id="cb7-100">        <span class="cf" style="color: #003B4F;
 background-color: null;
 font-style: inherit;">return</span> np.array([<span class="va" style="color: #111111;
 background-color: null;
@@ -3927,23 +4425,23 @@ background-color: null;
 font-style: inherit;">for</span> i, row <span class="kw" style="color: #003B4F;
 background-color: null;
 font-style: inherit;">in</span> X.iterrows()])</span>
-<span id="cb20-63">    </span>
-<span id="cb20-64">    <span class="kw" style="color: #003B4F;
+<span id="cb7-101"></span>
+<span id="cb7-102">    <span class="kw" style="color: #003B4F;
 background-color: null;
 font-style: inherit;">def</span> _predict_row(<span class="va" style="color: #111111;
 background-color: null;
 font-style: inherit;">self</span>, row):</span>
-<span id="cb20-65">        <span class="cf" style="color: #003B4F;
+<span id="cb7-103">        <span class="cf" style="color: #003B4F;
 background-color: null;
 font-style: inherit;">if</span> <span class="va" style="color: #111111;
 background-color: null;
 font-style: inherit;">self</span>.is_leaf: </span>
-<span id="cb20-66">            <span class="cf" style="color: #003B4F;
+<span id="cb7-104">            <span class="cf" style="color: #003B4F;
 background-color: null;
 font-style: inherit;">return</span> <span class="va" style="color: #111111;
 background-color: null;
 font-style: inherit;">self</span>.value</span>
-<span id="cb20-67">        child <span class="op" style="color: #5E5E5E;
+<span id="cb7-105">        child <span class="op" style="color: #5E5E5E;
 background-color: null;
 font-style: inherit;">=</span> <span class="va" style="color: #111111;
 background-color: null;
@@ -3958,32 +4456,32 @@ background-color: null;
 font-style: inherit;">self</span>.threshold <span class="op" style="color: #5E5E5E;
 background-color: null;
 font-style: inherit;">\</span></span>
-<span id="cb20-68">                <span class="cf" style="color: #003B4F;
+<span id="cb7-106">            <span class="cf" style="color: #003B4F;
 background-color: null;
 font-style: inherit;">else</span> <span class="va" style="color: #111111;
 background-color: null;
 font-style: inherit;">self</span>.right</span>
-<span id="cb20-69">        <span class="cf" style="color: #003B4F;
+<span id="cb7-107">        <span class="cf" style="color: #003B4F;
 background-color: null;
 font-style: inherit;">return</span> child._predict_row(row)</span></code></pre></div>
 </div>
 </section>
-<section id="from-scratch-versus-scikit-learn" class="level2">
-<h2 class="anchored" data-anchor-id="from-scratch-versus-scikit-learn">From Scratch versus Scikit-Learn</h2>
-<p>As usual, we’ll test our homegrown handiwork by comparing it to the existing implementation in scikit-learn. First let’s train both models on the <a href="https://scikit-learn.org/stable/datasets/real_world.html">California Housing dataset</a> which gives us 20k instances and 8 features to predict median house price by district.</p>
-<div class="cell" data-execution_count="18">
-<div class="sourceCode cell-code" id="cb21" style="background: #f1f3f5;"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb21-1"><span class="im" style="color: #00769E;
+<section id="testing" class="level2">
+<h2 class="anchored" data-anchor-id="testing">Testing</h2>
+<p>Let’s take this baby for a spin and benchmark its performance against the actual XGBoost library. We use the scikit learn <a href="https://scikit-learn.org/stable/modules/generated/sklearn.datasets.fetch_california_housing.html">California housing dataset</a> for benchmarking.</p>
+<div class="cell" data-execution_count="8">
+<div class="sourceCode cell-code" id="cb8" style="background: #f1f3f5;"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb8-1"><span class="im" style="color: #00769E;
 background-color: null;
 font-style: inherit;">from</span> sklearn.datasets <span class="im" style="color: #00769E;
 background-color: null;
 font-style: inherit;">import</span> fetch_california_housing</span>
-<span id="cb21-2"><span class="im" style="color: #00769E;
+<span id="cb8-2"><span class="im" style="color: #00769E;
 background-color: null;
 font-style: inherit;">from</span> sklearn.model_selection <span class="im" style="color: #00769E;
 background-color: null;
 font-style: inherit;">import</span> train_test_split</span>
-<span id="cb21-3"></span>
-<span id="cb21-4">X, y <span class="op" style="color: #5E5E5E;
+<span id="cb8-3">    </span>
+<span id="cb8-4">X, y <span class="op" style="color: #5E5E5E;
 background-color: null;
 font-style: inherit;">=</span> fetch_california_housing(as_frame<span class="op" style="color: #5E5E5E;
 background-color: null;
@@ -3991,152 +4489,362 @@ font-style: inherit;">=</span><span class="va" style="color: #111111;
 background-color: null;
 font-style: inherit;">True</span>, return_X_y<span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">=</span><span class="va" style="color: #111111;
+font-style: inherit;">=</span><span class="va" style="color: #111111;
+background-color: null;
+font-style: inherit;">True</span>)</span>
+<span id="cb8-5">X_train, X_test, y_train, y_test <span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">=</span> train_test_split(X, y, test_size<span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">=</span><span class="fl" style="color: #AD0000;
+background-color: null;
+font-style: inherit;">0.3</span>, </span>
+<span id="cb8-6">                                                    random_state<span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">=</span><span class="dv" style="color: #AD0000;
+background-color: null;
+font-style: inherit;">43</span>)</span></code></pre></div>
+</div>
+<p>Let’s start with a nice friendly squared error objective function for training. We should probably have a future post all about how to define custom objective functions in XGBoost, but for now, here’s how I define squared error.</p>
+<div class="cell" data-execution_count="9">
+<div class="sourceCode cell-code" id="cb9" style="background: #f1f3f5;"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb9-1"><span class="kw" style="color: #003B4F;
+background-color: null;
+font-style: inherit;">class</span> SquaredErrorObjective():</span>
+<span id="cb9-2">    <span class="kw" style="color: #003B4F;
+background-color: null;
+font-style: inherit;">def</span> loss(<span class="va" style="color: #111111;
+background-color: null;
+font-style: inherit;">self</span>, y, pred): <span class="cf" style="color: #003B4F;
+background-color: null;
+font-style: inherit;">return</span> np.mean((y <span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">-</span> pred)<span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">**</span><span class="dv" style="color: #AD0000;
+background-color: null;
+font-style: inherit;">2</span>)</span>
+<span id="cb9-3">    <span class="kw" style="color: #003B4F;
+background-color: null;
+font-style: inherit;">def</span> gradient(<span class="va" style="color: #111111;
+background-color: null;
+font-style: inherit;">self</span>, y, pred): <span class="cf" style="color: #003B4F;
+background-color: null;
+font-style: inherit;">return</span> pred <span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">-</span> y</span>
+<span id="cb9-4">    <span class="kw" style="color: #003B4F;
+background-color: null;
+font-style: inherit;">def</span> hessian(<span class="va" style="color: #111111;
+background-color: null;
+font-style: inherit;">self</span>, y, pred): <span class="cf" style="color: #003B4F;
+background-color: null;
+font-style: inherit;">return</span> np.ones(<span class="bu" style="color: null;
+background-color: null;
+font-style: inherit;">len</span>(y))</span></code></pre></div>
+</div>
+<p>Here I use a more or less arbitrary set of hyperparameters for training. Feel free to play around with tuning and trying other parameter combinations yourself.</p>
+<div class="cell" data-execution_count="10">
+<div class="sourceCode cell-code" id="cb10" style="background: #f1f3f5;"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb10-1"><span class="im" style="color: #00769E;
+background-color: null;
+font-style: inherit;">import</span> xgboost <span class="im" style="color: #00769E;
+background-color: null;
+font-style: inherit;">as</span> xgb</span>
+<span id="cb10-2"></span>
+<span id="cb10-3">params <span class="op" style="color: #5E5E5E;
+background-color: null;
+font-style: inherit;">=</span> {</span>
+<span id="cb10-4">    <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'learning_rate'</span>: <span class="fl" style="color: #AD0000;
+background-color: null;
+font-style: inherit;">0.1</span>,</span>
+<span id="cb10-5">    <span class="st" style="color: #20794D;
+background-color: null;
+font-style: inherit;">'max_depth'</span>: <span class="dv" style="color: #AD0000;
+background-color: null;
+font-style: inherit;">5</span>,</span>
+<span id="cb10-6">    <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">True</span>)</span>
-<span id="cb21-5">X_train, X_test, y_train, y_test <span class="op" style="color: #5E5E5E;
+font-style: inherit;">'subsample'</span>: <span class="fl" style="color: #AD0000;
 background-color: null;
-font-style: inherit;">=</span> train_test_split(X, y, test_size<span class="op" style="color: #5E5E5E;
+font-style: inherit;">0.8</span>,</span>
+<span id="cb10-7">    <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">=</span><span class="fl" style="color: #AD0000;
+font-style: inherit;">'reg_lambda'</span>: <span class="fl" style="color: #AD0000;
 background-color: null;
-font-style: inherit;">0.3</span>, random_state<span class="op" style="color: #5E5E5E;
+font-style: inherit;">1.5</span>,</span>
+<span id="cb10-8">    <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">=</span><span class="dv" style="color: #AD0000;
+font-style: inherit;">'gamma'</span>: <span class="fl" style="color: #AD0000;
 background-color: null;
-font-style: inherit;">43</span>)</span></code></pre></div>
-</div>
-<div class="cell" data-execution_count="19">
-<div class="sourceCode cell-code" id="cb22" style="background: #f1f3f5;"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb22-1"><span class="im" style="color: #00769E;
+font-style: inherit;">0.0</span>,</span>
+<span id="cb10-9">    <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">from</span> sklearn.tree <span class="im" style="color: #00769E;
+font-style: inherit;">'min_child_weight'</span>: <span class="dv" style="color: #AD0000;
 background-color: null;
-font-style: inherit;">import</span> DecisionTreeRegressor</span>
-<span id="cb22-2"><span class="im" style="color: #00769E;
+font-style: inherit;">25</span>,</span>
+<span id="cb10-10">    <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">from</span> sklearn.metrics <span class="im" style="color: #00769E;
+font-style: inherit;">'base_score'</span>: <span class="fl" style="color: #AD0000;
 background-color: null;
-font-style: inherit;">import</span> mean_squared_error</span>
-<span id="cb22-3"></span>
-<span id="cb22-4">max_depth <span class="op" style="color: #5E5E5E;
+font-style: inherit;">0.0</span>,</span>
+<span id="cb10-11">    <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">=</span> <span class="dv" style="color: #AD0000;
+font-style: inherit;">'tree_method'</span>: <span class="st" style="color: #20794D;
 background-color: null;
-font-style: inherit;">8</span></span>
-<span id="cb22-5">min_samples_leaf <span class="op" style="color: #5E5E5E;
+font-style: inherit;">'exact'</span>,</span>
+<span id="cb10-12">}</span>
+<span id="cb10-13">num_boost_round <span class="op" style="color: #5E5E5E;
 background-color: null;
 font-style: inherit;">=</span> <span class="dv" style="color: #AD0000;
 background-color: null;
-font-style: inherit;">16</span></span>
-<span id="cb22-6"></span>
-<span id="cb22-7">tree <span class="op" style="color: #5E5E5E;
-background-color: null;
-font-style: inherit;">=</span> DecisionTree(X_train, y_train, max_depth<span class="op" style="color: #5E5E5E;
+font-style: inherit;">50</span></span>
+<span id="cb10-14"></span>
+<span id="cb10-15"><span class="co" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">=</span>max_depth, min_samples_leaf<span class="op" style="color: #5E5E5E;
+font-style: inherit;"># train the from-scratch XGBoost model</span></span>
+<span id="cb10-16">model_scratch <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">=</span>min_samples_leaf)</span>
-<span id="cb22-8">pred <span class="op" style="color: #5E5E5E;
+font-style: inherit;">=</span> XGBoostModel(params, random_seed<span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">=</span> tree.predict(X_test)</span>
-<span id="cb22-9"></span>
-<span id="cb22-10">sk_tree <span class="op" style="color: #5E5E5E;
+font-style: inherit;">=</span><span class="dv" style="color: #AD0000;
 background-color: null;
-font-style: inherit;">=</span> DecisionTreeRegressor(max_depth<span class="op" style="color: #5E5E5E;
+font-style: inherit;">42</span>)</span>
+<span id="cb10-17">model_scratch.fit(X_train, y_train, SquaredErrorObjective(), num_boost_round)</span>
+<span id="cb10-18"></span>
+<span id="cb10-19"><span class="co" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">=</span>max_depth, min_samples_leaf<span class="op" style="color: #5E5E5E;
+font-style: inherit;"># train the library XGBoost model</span></span>
+<span id="cb10-20">dtrain <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">=</span>min_samples_leaf)</span>
-<span id="cb22-11">sk_tree.fit(X_train, y_train)</span>
-<span id="cb22-12">sk_pred <span class="op" style="color: #5E5E5E;
+font-style: inherit;">=</span> xgb.DMatrix(X_train, label<span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">=</span> sk_tree.predict(X_test)</span>
-<span id="cb22-13"></span>
-<span id="cb22-14"><span class="bu" style="color: null;
+font-style: inherit;">=</span>y_train)</span>
+<span id="cb10-21">dtest <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">print</span>(<span class="ss" style="color: #20794D;
+font-style: inherit;">=</span> xgb.DMatrix(X_test, label<span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">f'from scratch MSE: </span><span class="sc" style="color: #5E5E5E;
+font-style: inherit;">=</span>y_test)</span>
+<span id="cb10-22">model_xgb <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">{</span>mean_squared_error(y_test, pred)<span class="sc" style="color: #5E5E5E;
+font-style: inherit;">=</span> xgb.train(params, dtrain, num_boost_round)</span></code></pre></div>
+</div>
+<p>Let’s check the models’ performance on the held out test data to benchmark our implementation.</p>
+<div class="cell" data-execution_count="11">
+<div class="sourceCode cell-code" id="cb11" style="background: #f1f3f5;"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb11-1">pred_scratch <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">:0.4f}</span><span class="ss" style="color: #20794D;
+font-style: inherit;">=</span> model_scratch.predict(X_test)</span>
+<span id="cb11-2">pred_xgb <span class="op" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">'</span>)</span>
-<span id="cb22-15"><span class="bu" style="color: null;
+font-style: inherit;">=</span> model_xgb.predict(dtest)</span>
+<span id="cb11-3"><span class="bu" style="color: null;
 background-color: null;
 font-style: inherit;">print</span>(<span class="ss" style="color: #20794D;
 background-color: null;
-font-style: inherit;">f'scikit-learn MSE: </span><span class="sc" style="color: #5E5E5E;
-background-color: null;
-font-style: inherit;">{</span>mean_squared_error(y_test, sk_pred)<span class="sc" style="color: #5E5E5E;
-background-color: null;
-font-style: inherit;">:0.4f}</span><span class="ss" style="color: #20794D;
+font-style: inherit;">f'scratch score: </span><span class="sc" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">'</span>)</span></code></pre></div>
-<div class="cell-output cell-output-stdout">
-<pre><code>from scratch MSE: 0.3988
-scikit-learn MSE: 0.3988</code></pre>
-</div>
-</div>
-<p>We get similar accuracy on a held-out test dataset.</p>
-<p>Let’s benchmark the two implementations on training time.</p>
-<div class="cell" data-execution_count="20">
-<div class="sourceCode cell-code" id="cb24" style="background: #f1f3f5;"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb24-1"><span class="op" style="color: #5E5E5E;
+font-style: inherit;">{</span>SquaredErrorObjective()<span class="sc" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">%%</span>time</span>
-<span id="cb24-2">sk_tree <span class="op" style="color: #5E5E5E;
+font-style: inherit;">.</span>loss(y_test, pred_scratch)<span class="sc" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">=</span> DecisionTreeRegressor(max_depth<span class="op" style="color: #5E5E5E;
+font-style: inherit;">}</span><span class="ss" style="color: #20794D;
 background-color: null;
-font-style: inherit;">=</span>max_depth, min_samples_leaf<span class="op" style="color: #5E5E5E;
+font-style: inherit;">'</span>)</span>
+<span id="cb11-4"><span class="bu" style="color: null;
 background-color: null;
-font-style: inherit;">=</span>min_samples_leaf)</span>
-<span id="cb24-3">sk_tree.fit(X_train, y_train)<span class="op" style="color: #5E5E5E;
+font-style: inherit;">print</span>(<span class="ss" style="color: #20794D;
 background-color: null;
-font-style: inherit;">;</span></span></code></pre></div>
-<div class="cell-output cell-output-stdout">
-<pre><code>CPU times: user 45.3 ms, sys: 555 µs, total: 45.8 ms
-Wall time: 45.3 ms</code></pre>
-</div>
-<div class="cell-output cell-output-display" data-execution_count="20">
-<style>#sk-container-id-1 {color: black;background-color: white;}#sk-container-id-1 pre{padding: 0;}#sk-container-id-1 div.sk-toggleable {background-color: white;}#sk-container-id-1 label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-container-id-1 label.sk-toggleable__label-arrow:before {content: "▸";float: left;margin-right: 0.25em;color: #696969;}#sk-container-id-1 label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-container-id-1 div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-container-id-1 div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-container-id-1 div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-container-id-1 input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-container-id-1 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: "▾";}#sk-container-id-1 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-container-id-1 div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-container-id-1 div.sk-estimator:hover {background-color: #d4ebff;}#sk-container-id-1 div.sk-parallel-item::after {content: "";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-container-id-1 div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 div.sk-serial::before {content: "";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: 0;}#sk-container-id-1 div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;position: relative;}#sk-container-id-1 div.sk-item {position: relative;z-index: 1;}#sk-container-id-1 div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;position: relative;}#sk-container-id-1 div.sk-item::before, #sk-container-id-1 div.sk-parallel-item::before {content: "";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: -1;}#sk-container-id-1 div.sk-parallel-item {display: flex;flex-direction: column;z-index: 1;position: relative;background-color: white;}#sk-container-id-1 div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-container-id-1 div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-container-id-1 div.sk-parallel-item:only-child::after {width: 0;}#sk-container-id-1 div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;}#sk-container-id-1 div.sk-label label {font-family: monospace;font-weight: bold;display: inline-block;line-height: 1.2em;}#sk-container-id-1 div.sk-label-container {text-align: center;}#sk-container-id-1 div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: https://github.com/scikit-learn/scikit-learn/issues/21755 */display: inline-block !important;position: relative;}#sk-container-id-1 div.sk-text-repr-fallback {display: none;}</style><div id="sk-container-id-1" class="sk-top-container"><div class="sk-text-repr-fallback"><pre>DecisionTreeRegressor(max_depth=8, min_samples_leaf=16)</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br>On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class="sk-container" hidden=""><div class="sk-item"><div class="sk-estimator sk-toggleable"><input class="sk-toggleable__control sk-hidden--visually" id="sk-estimator-id-1" type="checkbox" checked=""><label for="sk-estimator-id-1" class="sk-toggleable__label sk-toggleable__label-arrow">DecisionTreeRegressor</label><div class="sk-toggleable__content"><pre>DecisionTreeRegressor(max_depth=8, min_samples_leaf=16)</pre></div></div></div></div></div>
-</div>
-</div>
-<div class="cell" data-execution_count="21">
-<div class="sourceCode cell-code" id="cb26" style="background: #f1f3f5;"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb26-1"><span class="op" style="color: #5E5E5E;
+font-style: inherit;">f'xgboost score: </span><span class="sc" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">%%</span>time</span>
-<span id="cb26-2">tree <span class="op" style="color: #5E5E5E;
+font-style: inherit;">{</span>SquaredErrorObjective()<span class="sc" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">=</span> DecisionTree(X_train, y_train, max_depth<span class="op" style="color: #5E5E5E;
+font-style: inherit;">.</span>loss(y_test, pred_xgb)<span class="sc" style="color: #5E5E5E;
 background-color: null;
-font-style: inherit;">=</span>max_depth, min_samples_leaf<span class="op" style="color: #5E5E5E;
+font-style: inherit;">}</span><span class="ss" style="color: #20794D;
 background-color: null;
-font-style: inherit;">=</span>min_samples_leaf)</span></code></pre></div>
+font-style: inherit;">'</span>)</span></code></pre></div>
 <div class="cell-output cell-output-stdout">
-<pre><code>CPU times: user 624 ms, sys: 1.65 ms, total: 625 ms
-Wall time: 625 ms</code></pre>
+<pre><code>scratch score: 0.2434125759558149
+xgboost score: 0.24123239765807963</code></pre>
 </div>
 </div>
-<p>Wow, the scikit-learn implementation absolutely smoked us, training an order of magnitude faster. This is to be expected, since they implement split finding in cython, which generates compiled C code that can run much faster than our native python code. Maybe we can take a look at how to optimize python code with cython here on the blog one of these days.</p>
+<p>Well, look at that! Our scratch-built SGBoost is looking pretty consistent with the library. Go us!</p>
 </section>
 <section id="wrapping-up" class="level2">
 <h2 class="anchored" data-anchor-id="wrapping-up">Wrapping Up</h2>
-<p>Holy cow, we just implemented a decision tree using nothing but numpy. I hope you enjoyed the scratch build as much as I did, and I hope you got a little bit better at coding (I certainly did). That was actually way harder than I expected, but looking back at the finished product, it doesn’t seem so bad right? I almost thought we were going to get away with not implementing our own decision tree, but it turns out that this will be super helpful for us when it comes time to implement XGBoost from scratch.</p>
+<p>I’d say this is a pretty good milestone for us here at Random Realizations. We’ve been hammering away at the various concepts around gradient boosting, leaving a trail of equations and scratch-built algos in our wake. Today we put all of that together to create a legit scratch build of XGBoost, something that would have been out of reach for me before we embarked on this journey together over a year ago. To anyone with the patience to read through this stuff, cheers to you! I hope you’re learning and enjoying this as much as I am.</p>
 </section>
-<section id="references" class="level2">
-<h2 class="anchored" data-anchor-id="references">References</h2>
-<p>This implementation is inspired and partially adapted from Jeremy Howard’s live coding of a <a href="https://course18.fast.ai/lessonsml1/lesson7.html">Random Forest</a> as part of the fastai ML course.</p>
+<section id="reader-exercises" class="level2">
+<h2 class="anchored" data-anchor-id="reader-exercises">Reader Exercises</h2>
+<p>If you want to take this a step further and deepen your understanding and coding abilities, let me recommend some exercises for you.</p>
+<ol type="1">
+<li>Implement column subsampling. XGBoost itself provides column subsampling by tree, by level, and by node. Try implementing by tree first, then try adding by level or by node as well. These should be pretty straightforward to do.</li>
+<li>Implement sparsity aware split finding for missing feature values (Algorithm 2 in the <a href="https://arxiv.org/abs/1603.02754">XGBoost paper</a>). This will be a little more involved, since you’ll need to refactor and modify several parts of the tree booster class.</li>
+</ol>
 </section>
 
  ]]></description>
   <category>python</category>
   <category>gradient boosting</category>
   <category>from scratch</category>
-  <guid>https://randomrealizations.com/posts/decision-tree-from-scratch/index.html</guid>
-  <pubDate>Sun, 12 Dec 2021 21:00:00 GMT</pubDate>
-  <media:content url="https://randomrealizations.com/posts/decision-tree-from-scratch/thumbnail.png" medium="image" type="image/png" height="86" width="144"/>
+  <guid>https://randomrealizations.com/posts/xgboost-from-scratch/index.html</guid>
+  <pubDate>Fri, 06 May 2022 22:00:00 GMT</pubDate>
+  <media:content url="https://randomrealizations.com/posts/xgboost-from-scratch/thumbnail.jpg" medium="image" type="image/jpeg"/>
+</item>
+<item>
+  <title>XGBoost Explained</title>
+  <dc:creator>Matt Bowers</dc:creator>
+  <link>https://randomrealizations.com/posts/xgboost-explained/index.html</link>
+  <description><![CDATA[ 
+
+
+
+<div class="quarto-figure quarto-figure-center">
+<figure class="figure">
+<p><img src="https://randomrealizations.com/posts/xgboost-explained/main.jpg" class="img-fluid figure-img"></p>
+<figcaption class="figure-caption">Tree branches on a chilly day in Johnson City</figcaption>
+</figure>
+</div>
+<p>Ahh, XGBoost, what an absolutely stellar implementation of gradient boosting. Once Tianqi Chen and Carlos Guestrin of the University of Washington published the <a href="https://www.kdd.org/kdd2016/papers/files/rfp0697-chenAemb.pdf">XGBoost paper</a> and shared the <a href="https://github.com/dmlc/xgboost">open source code</a> in the mid 2010’s, the algorithm quickly gained adoption in the ML community, appearing in over half of winning Kagle submissions in 2015. Nowadays it’s certainly among the most popular gradient boosting libraries, along with LightGBM and CatBoost, although the highly scientific indicator of <a href="https://www.githubcompare.com/dmlc/xgboost+microsoft/lightgbm+catboost/catboost">GitHub stars per year</a> indicates that it is in fact the most beloved gradient boosting package of all. Since it was the first of the modern popular boosting frameworks, and since <a href="https://blog.dataiku.com/the-many-flavors-of-gradient-boosting-algorithms">benchmarking</a> indicates that no other boosting algorithm outperforms it, we can comfortably focus our attention on understanding XGBoost.</p>
+<p>The XGBoost authors identify two key aspects of a machine learning system: (1) a flexible statistical model and (2) a scalable learning system to fit that model using data. XGBoost improves on both of these aspects, providing a more flexible and feature-rich statistical model and building a truly scalable system to fit it. In this post we’re going to focus on the statistical modeling innovations, outlining the key differences from the classic gradient boosting machine and divinginto the mathematical derivation of the XGBoost learning algorithm. If you’re not already familiar with <a href="../../posts/gradient-boosting-machine-from-scratch/">gradient boosting</a>, go back and read the earlier posts in the series before jumping in here.</p>
+<p>Buckle up, dear reader. Today we understand how XGBoost works, no hand waving required.</p>
+<section id="xgboost-is-a-gradient-boosting-machine" class="level2">
+<h2 class="anchored" data-anchor-id="xgboost-is-a-gradient-boosting-machine">XGBoost is a Gradient Boosting Machine</h2>
+<p>At a high level, XGBoost is an iteratively constructed composite model, just like the classic gradient boosting machine we discussed back in the <a href="../../posts/gradient-boosting-machine-from-scratch/">GBM post</a> . The final model takes the form</p>
+<p><img src="https://latex.codecogs.com/png.latex?%5Chat%7By%7D_i%20=%20F(%5Cmathbf%7Bx%7D_i)%20=%20b%20+%20%5Ceta%20%5Csum_%7Bk=1%7D%5EK%20f_k(%5Cmathbf%7Bx%7D_i)%20"></p>
+<p>where <img src="https://latex.codecogs.com/png.latex?b"> is the base prediction, <img src="https://latex.codecogs.com/png.latex?%5Ceta"> is the learning rate hyperparameter that helps control overfitting by reducing the contributions of each booster, and each of the <img src="https://latex.codecogs.com/png.latex?K"> boosters <img src="https://latex.codecogs.com/png.latex?f_k"> is a decision tree. To help us connect the dots between theory and code, whenever we encounter new hyperparameters, I’ll point out their names from the <a href="https://xgboost.readthedocs.io/en/stable/parameter.html">XGBoost Parameter Documentation</a>. So, <img src="https://latex.codecogs.com/png.latex?b"> can be set by <code>base_score</code>, and <img src="https://latex.codecogs.com/png.latex?%5Ceta"> is set by either <code>eta</code> or <code>learning_rate</code>.</p>
+<p>XGBoost introduces two key statistical learning improvements over the classic gradient boosting model. First, it reimagines the gradient descent algorithm used for training, and second it uses a custom-built decision tree with extra functionality as its booster. We’ll dive into each of these key innovations in the following sections.</p>
+</section>
+<section id="descent-algorithm-innovations" class="level2">
+<h2 class="anchored" data-anchor-id="descent-algorithm-innovations">Descent Algorithm Innovations</h2>
+<section id="regularized-objective-function" class="level3">
+<h3 class="anchored" data-anchor-id="regularized-objective-function">Regularized Objective Function</h3>
+<p>In the post on <a href="../../posts/gradient-boosting-machine-with-any-loss-function/">GBM with any loss function</a>, we looked at loss functions of the form <img src="https://latex.codecogs.com/png.latex?%5Csum_i%20l(y_i,%5Chat%7By%7D_i)"> which compute some distance between targets <img src="https://latex.codecogs.com/png.latex?y_i"> and predictions <img src="https://latex.codecogs.com/png.latex?%5Chat%7By%7D_i"> and sum them up over the training dataset. XGBoost introduces regularization into the objective function so that the objective takes the form</p>
+<p><img src="https://latex.codecogs.com/png.latex?%20L%20=%20%5Csum_i%20l(y_i,%5Chat%7By%7D_i)%20+%20%5Csum_k%20%5COmega(f_k)%20"></p>
+<p>where <img src="https://latex.codecogs.com/png.latex?l"> is some twice-differentiable loss function. <img src="https://latex.codecogs.com/png.latex?%5COmega"> is a regularization that penalizes the complexity of each tree booster, taking the form</p>
+<p><img src="https://latex.codecogs.com/png.latex?%20%5COmega(f)%20=%20%5Cgamma%20T%20+%20%5Cfrac%7B1%7D%7B2%7D%20%5Clambda%20%7C%7Cw%7C%7C%5E2%20"></p>
+<p>where <img src="https://latex.codecogs.com/png.latex?T"> is the number of leaf nodes and <img src="https://latex.codecogs.com/png.latex?%7C%7Cw%7C%7C%5E2"> is the squared sum of the leaf prediction values. This introduces two new hyperparameters: <img src="https://latex.codecogs.com/png.latex?%5Cgamma"> which penalizes the number of leaf nodes and <img src="https://latex.codecogs.com/png.latex?%5Clambda"> which is the L-2 regularization parameter for leaf predicted values. These are set by <code>gamma</code> and <code>reg_lambbda</code> in the XGBoost parametrization. Together, these provide powerful new controls to reduce overfitting due to overly complex tree boosters. Note that <img src="https://latex.codecogs.com/png.latex?%5Cgamma=%5Clambda=0"> reduces the objective back to an unregularized loss function as used in the classic GBM.</p>
+</section>
+<section id="an-aside-on-newtons-method" class="level3">
+<h3 class="anchored" data-anchor-id="an-aside-on-newtons-method">An Aside on Newton’s Method</h3>
+<p>As we’ll see soon, XGBoost uses <a href="https://en.wikipedia.org/wiki/Newton%27s_method_in_optimization">Newton’s Method</a> to minimize its objective function, so let’s start with a quick refresher.</p>
+<p>Newton’s method is an iterative procedure for minimizing a function <img src="https://latex.codecogs.com/png.latex?s(x)">. At each step we have some input <img src="https://latex.codecogs.com/png.latex?x_t">, and our goal is to find a nudge value <img src="https://latex.codecogs.com/png.latex?u"> such that</p>
+<p><img src="https://latex.codecogs.com/png.latex?%20s(x_t%20+%20u)%20%5Cle%20s(x_t)"></p>
+<p>To find a good nudge value <img src="https://latex.codecogs.com/png.latex?u">, we generate a local quadratic approximation of the function in the neighborhood of the input <img src="https://latex.codecogs.com/png.latex?x_t">, and then we find the input value that would bring us to the minimum of the quadratic approximation.</p>
+<div class="quarto-figure quarto-figure-center">
+<figure class="figure">
+<p><img src="https://randomrealizations.com/posts/xgboost-explained/newton.png" class="img-fluid figure-img"></p>
+<figcaption class="figure-caption">Schematic of Newton’s method</figcaption>
+</figure>
+</div>
+<p>The figure shows a single Newton step where we start at <img src="https://latex.codecogs.com/png.latex?x_t">, find the local quadratic approximation, and then jump a distance <img src="https://latex.codecogs.com/png.latex?u"> along the <img src="https://latex.codecogs.com/png.latex?x">-axis to land at the minimum of the quadratic. If we iterate in this way, we are likely to land close to the minimum of <img src="https://latex.codecogs.com/png.latex?s(x)">.</p>
+<p>So how do we compute the quadratic approximation? We use the second order Taylor series expansion of <img src="https://latex.codecogs.com/png.latex?s(x)"> near the point <img src="https://latex.codecogs.com/png.latex?x_t">.</p>
+<p><img src="https://latex.codecogs.com/png.latex?%20s(x_t%20+%20u)%20%5Capprox%20%20s(x_t)%20+%20s'(x_t)u%20+%20%5Cfrac%7B1%7D%7B2%7D%20s''(x_t)%20u%5E2%20"></p>
+<p>To find the nudge value <img src="https://latex.codecogs.com/png.latex?u"> that minimizes the quadratic approximation, we can take the derivative with respect to <img src="https://latex.codecogs.com/png.latex?u">, set it to zero, and solve for <img src="https://latex.codecogs.com/png.latex?u">.</p>
+<p><img src="https://latex.codecogs.com/png.latex?%200%20=%20%5Cfrac%7Bd%7D%7Bdu%7D%20%20%5Cleft%20(%20s(x_t)%20+%20s'(x_t)u%20+%20%5Cfrac%7B1%7D%7B2%7D%20s''(x_t)%20u%5E2%20%5Cright%20)%20=%20s'(x_t)%20+%20s''(x_t)%20u%20"></p>
+<p><img src="https://latex.codecogs.com/png.latex?%5Crightarrow%20u%5E*%20=%20-%5Cfrac%7Bs'(x_t)%7D%7Bs''(x_t)%7D%20"></p>
+<p>And as long as <img src="https://latex.codecogs.com/png.latex?s''(x_t)%3E0"> (i.e., the parabola is pointing up), <img src="https://latex.codecogs.com/png.latex?s(x_t%20+%20u%5E*)%20%5Cle%20s(x_t)">.</p>
+</section>
+<section id="tree-boosting-with-newtons-method" class="level3">
+<h3 class="anchored" data-anchor-id="tree-boosting-with-newtons-method">Tree Boosting with Newton’s Method</h3>
+<p>This lands us at the heart of XGBoost, which uses Newton’s method, rather than gradient descent, to guide each round of boosting. This explanation will correspond very closely to section 2.2 of the XGBoost paper, but here I’ll explicitly spell out some of the intermediate steps which are omitted from their derivation, and you’ll get some additional commentary from me along the way.</p>
+<section id="newton-descent-in-tree-space" class="level4">
+<h4 class="anchored" data-anchor-id="newton-descent-in-tree-space">Newton Descent in Tree Space</h4>
+<p>Suppose we’ve done <img src="https://latex.codecogs.com/png.latex?t-1"> boosting rounds, and we want to add the <img src="https://latex.codecogs.com/png.latex?t">-th booster to our composite model. Our current model’s prediction for instance <img src="https://latex.codecogs.com/png.latex?i"> is <img src="https://latex.codecogs.com/png.latex?%5Chat%7By%7D_i%5E%7B(t-1)%7D">. If we add a new tree booster <img src="https://latex.codecogs.com/png.latex?f_t"> to our model, the objective function would give</p>
+<p><img src="https://latex.codecogs.com/png.latex?%20L%5E%7B(t)%7D%20=%20%5Csum_%7Bi=1%7D%5En%20l(y_i,%20%5Chat%7By%7D_i%5E%7B(t-1)%7D%20+%20f_t(%5Cmathbf%7Bx%7D_i))%20+%20%5COmega(f_t)%20"></p>
+<p>We need to choose <img src="https://latex.codecogs.com/png.latex?f_t"> so that it decreases the loss, i.e.&nbsp;we want</p>
+<p><img src="https://latex.codecogs.com/png.latex?%20l(y_i,%20%5Chat%7By%7D_i%5E%7B(t-1)%7D%20+%20f_t(%5Cmathbf%7Bx%7D_i))%20%5Cle%20l(y_i,%20%5Chat%7By%7D_i%5E%7B(t-1)%7D)"></p>
+<p>Does that sound familiar? In the previous section we used Newton’s method to find a value of <img src="https://latex.codecogs.com/png.latex?u"> that would make <img src="https://latex.codecogs.com/png.latex?s(x_t%20+%20u)%20%5Cle%20s(x_t)">. Let’s try the same thing with our loss function. To be explicit, the parallels are: <img src="https://latex.codecogs.com/png.latex?s(%5Ccdot)%20%5Crightarrow%20l(y_i,%20%5Ccdot)">, <img src="https://latex.codecogs.com/png.latex?x_t%20%5Crightarrow%20%5Chat%7By%7D_i%5E%7B(t-1)%7D">, and <img src="https://latex.codecogs.com/png.latex?u%20%5Crightarrow%20f_t(%5Cmathbf%7Bx%7D_i)">.</p>
+<p>Let’s start by finding the second order Taylor series approximation for the loss around the point <img src="https://latex.codecogs.com/png.latex?%5Chat%7By%7D_i%5E%7B(t-1)%7D">.</p>
+<p><img src="https://latex.codecogs.com/png.latex?%20l(y_i,%20%5Chat%7By%7D_i%5E%7B(t-1)%7D%20+%20f_t(%5Cmathbf%7Bx%7D_i))%20%5Capprox%20l(y_i,%20%5Chat%7By%7D_i%5E%7B(t-1)%7D)%20+%20g_i%20f_t(%5Cmathbf%7Bx%7D_i)%20+%20%5Cfrac%7B1%7D%7B2%7D%20h_i%20f_t(%5Cmathbf%7Bx%7D_i)%5E2%20"></p>
+<p>where</p>
+<p><img src="https://latex.codecogs.com/png.latex?%20g_i%20=%20%5Cfrac%7B%5Cpartial%7D%7B%5Cpartial%20%5Chat%7By%7D_i%5E%7B(t-1)%7D%7D%20l(y_i,%20%5Chat%7By%7D_i%5E%7B(t-1)%7D)"></p>
+<p>and</p>
+<p><img src="https://latex.codecogs.com/png.latex?%20h_i%20=%20%5Cfrac%7B%5Cpartial%7D%7B%5Cpartial%5E2%20%5Chat%7By%7D_i%5E%7B(t-1)%7D%7D%20l(y_i,%20%5Chat%7By%7D_i%5E%7B(t-1)%7D)"></p>
+<p>are the first and second order partial derivatives of the loss with respect to the current predictions. The XGBoost paper calls these the gradients and hessians, respectively. Remember that when we specify an actual loss function to use, we would also specify the functional form of the gradients and hessians, so that they are directly computable.</p>
+<p>Now we can go back and substitute our quadratic approximation in for the loss function to get an approximation of the objective function in the neighborhood of <img src="https://latex.codecogs.com/png.latex?%5Chat%7By%7D_i%5E%7B(t-1)%7D">..</p>
+<p><img src="https://latex.codecogs.com/png.latex?%20L%5E%7B(t)%7D%20%5Capprox%20%5Csum_%7Bi=1%7D%5En%20%5Bl(y_i,%20%5Chat%7By%7D_i%5E%7B(t-1)%7D)%20+%20g_i%20f_t(%5Cmathbf%7Bx%7D_i)%20+%20%5Cfrac%7B1%7D%7B2%7D%20h_i%20f_t(%5Cmathbf%7Bx%7D_i)%5E2%5D%20+%20%5COmega(f_t)%20"></p>
+<p>Since <img src="https://latex.codecogs.com/png.latex?l(y_i,%5Chat%7By%7D_i%5E%7B(t-1)%7D)"> is constant regardless of our choice of <img src="https://latex.codecogs.com/png.latex?f_t">, we can drop it and instead work with the modified objective, which gives us Equation (3) from the paper.</p>
+<p><img src="https://latex.codecogs.com/png.latex?%20%5Ctilde%7BL%7D%5E%7B(t)%7D%20=%20%5Csum_%7Bi=1%7D%5En%20%5B%20g_i%20f_t(%5Cmathbf%7Bx%7D_i)%20+%20%5Cfrac%7B1%7D%7B2%7D%20h_i%20f_t(%5Cmathbf%7Bx%7D_i)%5E2%5D%20+%20%5COmega(f_t)%20"></p>
+<p>Now the authors are about to do something great. They’re about to show how to directly compute the optimal prediction values for the leaf nodes of <img src="https://latex.codecogs.com/png.latex?f_t">. We’ll circle back in a moment about how we find a good structure for <img src="https://latex.codecogs.com/png.latex?f_t">, i.e.&nbsp;good node splits, but we’re going to find the optimal predicted values for any tree structure having <img src="https://latex.codecogs.com/png.latex?T"> terminal nodes. Let <img src="https://latex.codecogs.com/png.latex?I_j"> denote the set of instances <img src="https://latex.codecogs.com/png.latex?i"> that are in the <img src="https://latex.codecogs.com/png.latex?j">-th leaf node of <img src="https://latex.codecogs.com/png.latex?f_t">. Then we can rewrite the objective.</p>
+<p><img src="https://latex.codecogs.com/png.latex?%20%5Ctilde%7BL%7D%5E%7B(t)%7D%20=%20%5Csum_%7Bj=1%7D%5ET%20%5Cleft%20%5B%20%5Csum_%7Bi%20%5Cin%20I_j%7D%20g_i%20f_t(%5Cmathbf%7Bx%7D_i)%20+%20%5Cfrac%7B1%7D%7B2%7D%20%20%5Csum_%7Bi%20%5Cin%20I_j%7D%20h_i%20f_t(%5Cmathbf%7Bx%7D_i)%5E2%20%5Cright%20%5D%20+%20%5COmega(f_t)"></p>
+<p>We notice that for all instances in <img src="https://latex.codecogs.com/png.latex?I_j">, the tree yields the same predicted value <img src="https://latex.codecogs.com/png.latex?f_t(%5Cmathbf%7Bx%7D_i)=w_j">. Substituting in <img src="https://latex.codecogs.com/png.latex?w_j"> for the predicted values and expanding <img src="https://latex.codecogs.com/png.latex?%5COmega(f_t)"> we get</p>
+<p><img src="https://latex.codecogs.com/png.latex?%20%5Ctilde%7BL%7D%5E%7B(t)%7D%20=%20%5Csum_%7Bj=1%7D%5ET%20%5Cleft%20%5B%20%5Csum_%7Bi%20%5Cin%20I_j%7D%20g_i%20w_j%20+%20%5Cfrac%7B1%7D%7B2%7D%20%20%5Csum_%7Bi%20%5Cin%20I_j%7D%20h_i%20w_j%5E2%20%5Cright%20%5D%20+%20%5Cgamma%20T%20+%20%5Cfrac%7B1%7D%7B2%7D%20%5Clambda%20%5Csum_%7Bj=1%7D%5ET%20w_j%5E2"></p>
+<p>Rearranging terms we obtain Equation (4).</p>
+<p><img src="https://latex.codecogs.com/png.latex?%20%5Ctilde%7BL%7D%5E%7B(t)%7D%20=%20%5Csum_%7Bj=1%7D%5ET%20%5Cleft%20%5B%20w_j%20%5Csum_%7Bi%20%5Cin%20I_j%7D%20g_i%20+%20%5Cfrac%7B1%7D%7B2%7D%20%20w_j%5E2%20%5Cleft%20(%20%5Csum_%7Bi%20%5Cin%20I_j%7D%20h_i%20+%20%5Clambda%20%5Cright%20)%20%20%5Cright%20%5D%20+%20%5Cgamma%20T%20"></p>
+<p>For each leaf node <img src="https://latex.codecogs.com/png.latex?j">, our modified objective function is quadratic in <img src="https://latex.codecogs.com/png.latex?w_j">. To find the optimal predicted values we take the derivative, set to zero, and solve for <img src="https://latex.codecogs.com/png.latex?w_j">.</p>
+<p><img src="https://latex.codecogs.com/png.latex?%200%20=%20%5Cfrac%7Bd%7D%7Bdw_j%7D%20%5Cleft%20%5B%20w_j%20%5Csum_%7Bi%20%5Cin%20I_j%7D%20g_i%20+%20%5Cfrac%7B1%7D%7B2%7D%20%20w_j%5E2%20%5Cleft%20(%20%5Csum_%7Bi%20%5Cin%20I_j%7D%20h_i%20+%20%5Clambda%20%5Cright%20)%20%20%5Cright%20%5D%20=%20%5Cleft%20(%20%5Csum_%7Bi%20%5Cin%20I_j%7D%20h_i%20+%20%5Clambda%20%5Cright%20)%20w_j%20+%20%5Csum_%7Bi%20%5Cin%20I_j%7D%20g_i%20"></p>
+<p>This yields Equation (5).</p>
+<p><img src="https://latex.codecogs.com/png.latex?%20w_j%5E*%20=%20-%20%5Cfrac%7B%5Csum_%7Bi%20%5Cin%20I_j%7D%20g_i%20%7D%20%7B%5Csum_%7Bi%20%5Cin%20I_j%7D%20h_i%20+%20%5Clambda%20%7D%20"></p>
+</section>
+<section id="split-finding" class="level4">
+<h4 class="anchored" data-anchor-id="split-finding">Split Finding</h4>
+<p>Now that we know how to find the optimal predicted value for any leaf node, we need to identify a criterion for finding a good tree structure, which boils down to finding the best split for a given node. Back in the [decision tree from scratch](/decision-tree-from-scratch post, we derived a split evaluation metric based on the reduction in the objective function associated with a particular split.<br>
+To do that, first we need a way to compute the objective function given a particular tree structure. Substituting the optimal predicted values <img src="https://latex.codecogs.com/png.latex?w_j%5E*"> into the objective function, we get Equation (6).</p>
+<p><img src="https://latex.codecogs.com/png.latex?%20%5Ctilde%7BL%7D%5E%7B(t)%7D%20=%20-%5Cfrac%7B1%7D%7B2%7D%20%5Csum_%7Bj=1%7D%5ET%20%5Cfrac%7B%20(%5Csum_%7Bi%20%5Cin%20I_j%7D%20g_i%20)%5E2%20%7D%20%7B%5Csum_%7Bi%20%5Cin%20I_j%7D%20h_i%20+%20%5Clambda%7D%20+%20%5Cgamma%20T%20"></p>
+<p>We can then evaluate potential splits by comparing the objective before making a split to the objective after making a split, where the split with the maximum reduction in objective (a.k.a. gain) is best.</p>
+<p>More formally, let <img src="https://latex.codecogs.com/png.latex?I"> be the set of <img src="https://latex.codecogs.com/png.latex?n"> data instances in the current node, and let <img src="https://latex.codecogs.com/png.latex?I_L"> and <img src="https://latex.codecogs.com/png.latex?I_R"> be the instances that fall into the left and right child nodes of a proposed split. Let <img src="https://latex.codecogs.com/png.latex?L"> be the total loss for all instances in the node, while <img src="https://latex.codecogs.com/png.latex?L_L"> and <img src="https://latex.codecogs.com/png.latex?L_R"> are the losses for the left and right child nodes. The total loss contributed by instances in node <img src="https://latex.codecogs.com/png.latex?I"> prior to any split is</p>
+<p><img src="https://latex.codecogs.com/png.latex?L_%7B%5Ctext%7Bbefore%20split%7D%7D%20=%20-%5Cfrac%7B1%7D%7B2%7D%20%5Cfrac%7B%20(%5Csum_%7Bi%20%5Cin%20I%7D%20g_i%20)%5E2%20%7D%20%7B%5Csum_%7Bi%20%5Cin%20I%7D%20h_i%20+%20%5Clambda%7D%20+%20%5Cgamma%20"></p>
+<p>And the loss after splitting <img src="https://latex.codecogs.com/png.latex?I"> into <img src="https://latex.codecogs.com/png.latex?I_L"> and <img src="https://latex.codecogs.com/png.latex?I_R"> is</p>
+<p><img src="https://latex.codecogs.com/png.latex?L_%7B%5Ctext%7Bafter%20split%7D%7D%20=%20L_L%20+%20L_R%20=%20-%5Cfrac%7B1%7D%7B2%7D%20%20%5Cfrac%7B%20(%5Csum_%7Bi%20%5Cin%20I_L%7D%20g_i%20)%5E2%20%7D%20%7B%5Csum_%7Bi%20%5Cin%20I_L%7D%20h_i%20+%20%5Clambda%7D%20-%5Cfrac%7B1%7D%7B2%7D%20%20%5Cfrac%7B%20(%5Csum_%7Bi%20%5Cin%20I_R%7D%20g_i%20)%5E2%20%7D%20%7B%5Csum_%7Bi%20%5Cin%20I_R%7D%20h_i%20+%20%5Clambda%7D%20+%202%20%5Cgamma%20"></p>
+<p>The gain from this split is then</p>
+<p><img src="https://latex.codecogs.com/png.latex?%20%5CDelta%20L%20=%20L_%7B%5Ctext%7Bbefore%20split%7D%7D%20-%20%20L_%7B%5Ctext%7Bafter%20split%7D%7D%20=%20L%20-%20(L_L%20+%20L_R)"> <img src="https://latex.codecogs.com/png.latex?%5CDelta%20L%20=%20%5Cfrac%7B1%7D%7B2%7D%20%5Cleft%20%5B%20%5Cfrac%7B%20(%5Csum_%7Bi%20%5Cin%20I_L%7D%20g_i%20)%5E2%20%7D%20%7B%5Csum_%7Bi%20%5Cin%20I_L%7D%20h_i%20+%20%5Clambda%7D%20+%20%5Cfrac%7B%20(%5Csum_%7Bi%20%5Cin%20I_R%7D%20g_i%20)%5E2%20%7D%20%7B%5Csum_%7Bi%20%5Cin%20I_R%7D%20h_i%20+%20%5Clambda%7D%20-%20%5Cfrac%7B%20(%5Csum_%7Bi%20%5Cin%20I%7D%20g_i%20)%5E2%20%7D%20%7B%5Csum_%7Bi%20%5Cin%20I%7D%20h_i%20+%20%5Clambda%7D%20%5Cright%20%5D%20-%20%5Cgamma%20"></p>
+<p>which is Equation (7) from the paper. In practice it makes sense to accept a split only if the gain is positive, thus the <img src="https://latex.codecogs.com/png.latex?%5Cgamma"> parameter sets the minimum gain required to make a further split. This is why <img src="https://latex.codecogs.com/png.latex?%5Cgamma"> can be set with the parameter <code>gamma</code> or the more descriptive<code>min_loss_split</code>.</p>
+</section>
+</section>
+</section>
+<section id="tree-booster-innovations" class="level2">
+<h2 class="anchored" data-anchor-id="tree-booster-innovations">Tree Booster Innovations</h2>
+<section id="missing-values-and-sparsity-aware-split-finding" class="level3">
+<h3 class="anchored" data-anchor-id="missing-values-and-sparsity-aware-split-finding">Missing Values and Sparsity-Aware Split Finding</h3>
+<p>The XGBoost paper also introduces a modified algorithm for tree split finding which explicitly handles missing feature values. Recall that in order to find the best threshold value for a given feature, we can simply try all possible threshold values, recording the score for each. If some feature values are missing, the XGBoost split finding algorithm simply scores each threshold twice: once with missing value instances in the left node and once with them in the right node. The best split will then specify both the threshold value and to which node instances with missing values should be assigned. The paper calls this the sparsity aware split finding routine, which is defined as Algorithm 2.</p>
+</section>
+<section id="preventing-further-splitting" class="level3">
+<h3 class="anchored" data-anchor-id="preventing-further-splitting">Preventing Further Splitting</h3>
+<p>In addition to <code>min_loss_split</code> discussed above, XGBoost offers another parameter for limiting further tree splitting called <code>min_child_weight</code>. This name is a little confusing to me because the word “weight” has various meanings. In the context of this parameter, “weight” refers to the sum of the hessians <img src="https://latex.codecogs.com/png.latex?%5Csum%20h_i"> over instances in the node. For squared error loss <img src="https://latex.codecogs.com/png.latex?h_i=1">, so this is equivalent to the number of samples. Thus this parameter generalizes the notion of the minimum number of samples allowed in a terminal node.</p>
+</section>
+<section id="sampling" class="level3">
+<h3 class="anchored" data-anchor-id="sampling">Sampling</h3>
+<p>XGBoost takes a cue from Random Forest and introduces both column and row subsampling. These sampling methods can prevent overfitting and reduce training time by limiting the amount of data to be processed during boosting.</p>
+<p>Like random forest, XGBoost implements column subsampling, which limits tree split finding to randomly selected subsets of features. XGBoost provides column sampling for each tree, for each depth level within a tree, and for each split point within a tree, controlled by <code>colsample_bytree</code>, <code>colsample_bbylevel</code>, and <code>colsample_bbynode</code> respectively.</p>
+<p>One interesting distinction is that XGBoost implements row sampling without replacement using <code>subbsample</code>, whereas random forest uses bootstrapping. The choice to bootstrap rows in RF probably spurred from a desire to use as much data as possible while training on the smaller datasets of the 1990’s when RF was developed. With larger datasets and the ability to generate a large number of trees, XGBoost simply takes a subsample of rows for each tree.</p>
+</section>
+</section>
+<section id="scalability" class="level2">
+<h2 class="anchored" data-anchor-id="scalability">Scalability</h2>
+<p>Even though we’re focused on statistical learning, I figured I’d comment on why XGBoost is highly scalable. Basically it boils down to efficient, parallelizable, and distributable methods for growing trees. You’ll notice there is a <code>tree_method</code> parameter which allows you to choose between the greedy exact algorithm (like the one we discussed in the decision tree from scratch post) and the approximate algorithm, which offers various scalability-related functionality, notably including the ability to consider only a small number of candidate split points instead of trying all possible splits. The algorithm also uses clever tricks like pre-sorting data for split finding and caching frequently needed values.</p>
+<section id="why-xgboost-is-so-successful" class="level3">
+<h3 class="anchored" data-anchor-id="why-xgboost-is-so-successful">Why XGBoost is so Successful</h3>
+<p>As I mentioned in the intro, XGBoost is simply a very good implementation of the gradient boosting tree model. Therefore it inherits all the benefits of <a href="../../consider-the-decision-tree">decision trees and tree ensembles</a>, while making even further improvements over the classic gradient boosting machine. These improvements boil down to</p>
+<ol type="1">
+<li>more ways to control overfitting</li>
+<li>elegant handling of custom objectives</li>
+<li>scalability</li>
+</ol>
+<p>First, XGBoost introduces two new tree regularization hyperparameters <img src="https://latex.codecogs.com/png.latex?%5Cgamma"> and <img src="https://latex.codecogs.com/png.latex?%5Clambda"> which are baked directly into its objective function. Combining these with the additional column and row sampling functionality provides a variety of ways to reduce overfitting.</p>
+<p>Second, the XGBoost formulation provides a much more elegant way to train models on custom objective functions. Recall that for <a href="../../posts/gradient-boosting-machine-with-any-loss-function/">custom objectives</a>, the classic GBM finds tree structure by fitting a squared error decision tree to the gradients of the loss function and then sets each leaf’s predicted value by running a numerical optimization routine to find the optimal predicted value.</p>
+<p>The XGBoost formulation improves on this two-stage approach by unifying the generation of tree structure and predicted values. Both the split scoring metric and the predicted values are directly computable from the instance gradient and hessian values, which are connected directly back to the overall training objective. This also removes the need for additional numerical optimizations, which contributes to speed, stability, and scalability.</p>
+<p>Finally, speaking of scalability, XGBoost emerged at a time when industrial dataset size was exploding. Many use cases require scalable ML systems, and all use cases benefit from faster training and higher model development velocity.</p>
+</section>
+</section>
+<section id="wrapping-up" class="level2">
+<h2 class="anchored" data-anchor-id="wrapping-up">Wrapping Up</h2>
+<p>Well, there you go, those are the salient ideas behind XGBoost, the gold standard in gradient boosting model implementations. Hopefully now we all understand the mathematical basis for the algorithm and appreciate the key improvements it makes over the classic GBM. If you want to go even deeper, you can join us for the next post where we’ll roll up our sleeves and implement XGBoost entirely from scratch.</p>
+</section>
+<section id="references" class="level2">
+<h2 class="anchored" data-anchor-id="references">References</h2>
+<p><a href="https://www.kdd.org/kdd2016/papers/files/rfp0697-chenAemb.pdf">The XGBoost paper</a></p>
+</section>
+<section id="exercise" class="level2">
+<h2 class="anchored" data-anchor-id="exercise">Exercise</h2>
+<p>Proove that the XGBoost Newton Descent generalizes the classic GBM gradient descent. Hint: show that XGBoost with a squared error objective and no regularization reduces to the classic GBM.</p>
+</section>
+
+ ]]></description>
+  <category>gradient boosting</category>
+  <guid>https://randomrealizations.com/posts/xgboost-explained/index.html</guid>
+  <pubDate>Sat, 12 Mar 2022 22:00:00 GMT</pubDate>
+  <media:content url="https://randomrealizations.com/posts/xgboost-explained/thumbnail.jpg" medium="image" type="image/jpeg"/>
 </item>
 </channel>
 </rss>
diff --git a/gradient-boosting-series.html b/gradient-boosting-series.html
index 869b94d..09c5118 100644
--- a/gradient-boosting-series.html
+++ b/gradient-boosting-series.html
@@ -168,7 +168,7 @@ <h5 style="font-weight:600; font-size:1rem">Subscribe</h5>
     </div>
     
     <div style="margin-top: 10px;">
-    <button type="submit" class="btn btn-secondary btn-sm">Submit</button>
+    <button type="submit" class="btn btn-secondary btn-sm" data-umami-event="Subscribe">Submit</button>
     
     <!-- real people should not fill this in and expect good things - do not remove this or risk form bot signups-->
     <div style="position: absolute; left: -5000px;" aria-hidden="true"><input type="text" name="b_f718424fc5df77c22533bdaa6_a3c37fb57b" tabindex="-1" value=""></div>
@@ -204,7 +204,7 @@ <h1 class="title">Gradient Boosting</h1>
 <p>I recommend reading through the series in order, since concepts tend to build on earlier ideas.</p>
 <div class="quarto-listing quarto-listing-container-default" id="listing-listing">
 <div class="list quarto-listing-default">
-<div class="quarto-post image-right" data-index="0" data-categories="python,gradient boosting,from scratch" data-listing-date-sort="1607374800000" data-listing-file-modified-sort="1693833457291" data-listing-date-modified-sort="NaN" data-listing-reading-time-sort="6">
+<div class="quarto-post image-right" data-index="0" data-categories="python,gradient boosting,from scratch" data-listing-date-sort="1607378400000" data-listing-file-modified-sort="1693833457291" data-listing-date-modified-sort="NaN" data-listing-reading-time-sort="6">
 <div class="thumbnail">
 <p><a href="./posts/gradient-boosting-machine-from-scratch/index.html"> <img src="./posts/gradient-boosting-machine-from-scratch/thumbnail.jpg" class="thumbnail-image" alt="SF buzzes silently in the distance"> </a></p>
 </div>
@@ -242,7 +242,7 @@ <h3 class="no-anchor listing-title">
 </a>
 </div>
 </div>
-<div class="quarto-post image-right" data-index="1" data-categories="gradient boosting" data-listing-date-sort="1611262800000" data-listing-file-modified-sort="1693423021628" data-listing-date-modified-sort="NaN" data-listing-reading-time-sort="9">
+<div class="quarto-post image-right" data-index="1" data-categories="gradient boosting" data-listing-date-sort="1611266400000" data-listing-file-modified-sort="1693423021628" data-listing-date-modified-sort="NaN" data-listing-reading-time-sort="9">
 <div class="thumbnail">
 <p><a href="./posts/get-down-with-gradient-descent/index.html"> <img src="./posts/get-down-with-gradient-descent/thumbnail.png" class="thumbnail-image" alt="dancer getting down"> </a></p>
 </div>
@@ -274,7 +274,7 @@ <h3 class="no-anchor listing-title">
 </a>
 </div>
 </div>
-<div class="quarto-post image-right" data-index="2" data-categories="gradient boosting" data-listing-date-sort="1619470800000" data-listing-file-modified-sort="1691156276855" data-listing-date-modified-sort="NaN" data-listing-reading-time-sort="15">
+<div class="quarto-post image-right" data-index="2" data-categories="gradient boosting" data-listing-date-sort="1619474400000" data-listing-file-modified-sort="1691156276855" data-listing-date-modified-sort="NaN" data-listing-reading-time-sort="15">
 <div class="thumbnail">
 <p><a href="./posts/how-gradient-boosting-does-gradient-descent/index.html"> <img src="./posts/how-gradient-boosting-does-gradient-descent/thumbnail.jpg" class="thumbnail-image" alt="A whiteboard session at Playa Pelada"> </a></p>
 </div>
@@ -306,7 +306,7 @@ <h3 class="no-anchor listing-title">
 </a>
 </div>
 </div>
-<div class="quarto-post image-right" data-index="3" data-categories="python,gradient boosting,from scratch" data-listing-date-sort="1634936400000" data-listing-file-modified-sort="1693833457293" data-listing-date-modified-sort="NaN" data-listing-reading-time-sort="7">
+<div class="quarto-post image-right" data-index="3" data-categories="python,gradient boosting,from scratch" data-listing-date-sort="1634940000000" data-listing-file-modified-sort="1693833457293" data-listing-date-modified-sort="NaN" data-listing-reading-time-sort="7">
 <div class="thumbnail">
 <p><a href="./posts/gradient-boosting-machine-with-any-loss-function/index.html"> <img src="./posts/gradient-boosting-machine-with-any-loss-function/thumbnail.jpg" class="thumbnail-image" alt="Cold water cascades over the rocks in Erwin, Tennessee."> </a></p>
 </div>
@@ -344,7 +344,7 @@ <h3 class="no-anchor listing-title">
 </a>
 </div>
 </div>
-<div class="quarto-post image-right" data-index="4" data-categories="gradient boosting" data-listing-date-sort="1639256400000" data-listing-file-modified-sort="1691156276845" data-listing-date-modified-sort="NaN" data-listing-reading-time-sort="9">
+<div class="quarto-post image-right" data-index="4" data-categories="gradient boosting" data-listing-date-sort="1639260000000" data-listing-file-modified-sort="1691156276845" data-listing-date-modified-sort="NaN" data-listing-reading-time-sort="9">
 <div class="thumbnail">
 <p><a href="./posts/consider-the-decision-tree/index.html"> <img src="./posts/consider-the-decision-tree/thumbnail.jpg" class="thumbnail-image" alt="A California cypress tree abides in silence on Alameda Beach"> </a></p>
 </div>
@@ -376,7 +376,7 @@ <h3 class="no-anchor listing-title">
 </a>
 </div>
 </div>
-<div class="quarto-post image-right" data-index="5" data-categories="python,gradient boosting,from scratch" data-listing-date-sort="1639342800000" data-listing-file-modified-sort="1693833457288" data-listing-date-modified-sort="NaN" data-listing-reading-time-sort="11">
+<div class="quarto-post image-right" data-index="5" data-categories="python,gradient boosting,from scratch" data-listing-date-sort="1639346400000" data-listing-file-modified-sort="1693833457288" data-listing-date-modified-sort="NaN" data-listing-reading-time-sort="11">
 <div class="thumbnail">
 <p><a href="./posts/decision-tree-from-scratch/index.html"> <img src="./posts/decision-tree-from-scratch/thumbnail.png" class="thumbnail-image" alt="binary tree diagram"> </a></p>
 </div>
@@ -414,7 +414,7 @@ <h3 class="no-anchor listing-title">
 </a>
 </div>
 </div>
-<div class="quarto-post image-right" data-index="6" data-categories="python,gradient boosting,from scratch" data-listing-date-sort="1651870800000" data-listing-file-modified-sort="1693833457294" data-listing-date-modified-sort="NaN" data-listing-reading-time-sort="6">
+<div class="quarto-post image-right" data-index="6" data-categories="python,gradient boosting,from scratch" data-listing-date-sort="1651874400000" data-listing-file-modified-sort="1693833457294" data-listing-date-modified-sort="NaN" data-listing-reading-time-sort="6">
 <div class="thumbnail">
 <p><a href="./posts/xgboost-from-scratch/index.html"> <img src="./posts/xgboost-from-scratch/thumbnail.jpg" class="thumbnail-image" alt="A weathered tree reaches toward the sea at Playa Mal País"> </a></p>
 </div>
diff --git a/index.html b/index.html
index 8152445..48e4f4d 100644
--- a/index.html
+++ b/index.html
@@ -168,7 +168,7 @@ <h5 style="font-weight:600; font-size:1rem">Subscribe</h5>
     </div>
     
     <div style="margin-top: 10px;">
-    <button type="submit" class="btn btn-secondary btn-sm">Submit</button>
+    <button type="submit" class="btn btn-secondary btn-sm" data-umami-event="Subscribe">Submit</button>
     
     <!-- real people should not fill this in and expect good things - do not remove this or risk form bot signups-->
     <div style="position: absolute; left: -5000px;" aria-hidden="true"><input type="text" name="b_f718424fc5df77c22533bdaa6_a3c37fb57b" tabindex="-1" value=""></div>
@@ -179,7 +179,7 @@ <h5 style="font-weight:600; font-size:1rem">Subscribe</h5>
 </div></form>
 </div></div>
         
-    <h5 class="quarto-listing-category-title">Categories</h5><div class="quarto-listing-category category-default"><div class="category" data-category="">All <span class="quarto-category-count">(13)</span></div><div class="category" data-category="PySpark">PySpark <span class="quarto-category-count">(1)</span></div><div class="category" data-category="blogging">blogging <span class="quarto-category-count">(3)</span></div><div class="category" data-category="from scratch">from scratch <span class="quarto-category-count">(4)</span></div><div class="category" data-category="gradient boosting">gradient boosting <span class="quarto-category-count">(8)</span></div><div class="category" data-category="pandas">pandas <span class="quarto-category-count">(1)</span></div><div class="category" data-category="python">python <span class="quarto-category-count">(7)</span></div><div class="category" data-category="tutorial">tutorial <span class="quarto-category-count">(3)</span></div></div></div>
+    <h5 class="quarto-listing-category-title">Categories</h5><div class="quarto-listing-category category-default"><div class="category" data-category="">All <span class="quarto-category-count">(14)</span></div><div class="category" data-category="PySpark">PySpark <span class="quarto-category-count">(1)</span></div><div class="category" data-category="blogging">blogging <span class="quarto-category-count">(3)</span></div><div class="category" data-category="from scratch">from scratch <span class="quarto-category-count">(4)</span></div><div class="category" data-category="gradient boosting">gradient boosting <span class="quarto-category-count">(9)</span></div><div class="category" data-category="pandas">pandas <span class="quarto-category-count">(1)</span></div><div class="category" data-category="python">python <span class="quarto-category-count">(8)</span></div><div class="category" data-category="tutorial">tutorial <span class="quarto-category-count">(4)</span></div><div class="category" data-category="xgboost">xgboost <span class="quarto-category-count">(1)</span></div></div></div>
 <!-- main -->
 <main class="content column-page-left" id="quarto-document-content">
 
@@ -202,7 +202,48 @@ <h1 class="title">Home</h1>
 
 <div class="quarto-listing quarto-listing-container-default" id="listing-listing">
 <div class="list quarto-listing-default">
-<div class="quarto-post image-right" data-index="0" data-categories="python,tutorial,blogging" data-listing-date-sort="1693947600000" data-listing-file-modified-sort="1693989650207" data-listing-date-modified-sort="NaN" data-listing-reading-time-sort="22">
+<div class="quarto-post image-right" data-index="0" data-categories="python,tutorial,gradient boosting,xgboost" data-listing-date-sort="1698184800000" data-listing-file-modified-sort="1695040520046" data-listing-date-modified-sort="NaN" data-listing-reading-time-sort="12">
+<div class="thumbnail">
+<p><a href="./posts/xgboost-for-regression-in-python/index.html"> <img src="./posts/xgboost-for-regression-in-python/kigali-branches.jpg" class="thumbnail-image" alt="branches reach into the Kigali sky"> </a></p>
+</div>
+<div class="body">
+<a href="./posts/xgboost-for-regression-in-python/index.html">
+<h3 class="no-anchor listing-title">
+XGBoost for Regression in Python
+</h3>
+<div class="listing-subtitle">
+
+</div>
+</a>
+<div class="listing-categories">
+<div class="listing-category" onclick="window.quartoListingCategory('python'); return false;">
+python
+</div>
+<div class="listing-category" onclick="window.quartoListingCategory('tutorial'); return false;">
+tutorial
+</div>
+<div class="listing-category" onclick="window.quartoListingCategory('gradient boosting'); return false;">
+gradient boosting
+</div>
+<div class="listing-category" onclick="window.quartoListingCategory('xgboost'); return false;">
+xgboost
+</div>
+</div>
+<a href="./posts/xgboost-for-regression-in-python/index.html">
+<div class="listing-description">
+<p>A step-bystep tutorial on regression with XGBoost in python using sklearn and the xgboost library</p>
+</div>
+</a>
+</div>
+<div class="metadata">
+<a href="./posts/xgboost-for-regression-in-python/index.html">
+<div class="listing-date">
+Oct 25, 2023
+</div>
+</a>
+</div>
+</div>
+<div class="quarto-post image-right" data-index="1" data-categories="python,tutorial,blogging" data-listing-date-sort="1693951200000" data-listing-file-modified-sort="1693989650207" data-listing-date-modified-sort="NaN" data-listing-reading-time-sort="22">
 <div class="thumbnail">
 <p><a href="./posts/blogging-with-quarto-and-jupyter/index.html"> <img src="./posts/blogging-with-quarto-and-jupyter/thumbnail.png" class="thumbnail-image" alt="quarto, jupyter, and python logos together on a white background"> </a></p>
 </div>
@@ -240,7 +281,7 @@ <h3 class="no-anchor listing-title">
 </a>
 </div>
 </div>
-<div class="quarto-post image-right" data-index="1" data-categories="blogging" data-listing-date-sort="1690923600000" data-listing-file-modified-sort="1691189870034" data-listing-date-modified-sort="NaN" data-listing-reading-time-sort="3">
+<div class="quarto-post image-right" data-index="2" data-categories="blogging" data-listing-date-sort="1690927200000" data-listing-file-modified-sort="1691189870034" data-listing-date-modified-sort="NaN" data-listing-reading-time-sort="3">
 <div class="thumbnail">
 <p><a href="./posts/random-realizations-resurrected/index.html"> <img src="./posts/random-realizations-resurrected/thumbnail.jpg" class="thumbnail-image" alt="Christ the Redeemer towers into an empty blue sky"> </a></p>
 </div>
@@ -272,7 +313,7 @@ <h3 class="no-anchor listing-title">
 </a>
 </div>
 </div>
-<div class="quarto-post image-right" data-index="2" data-categories="python,gradient boosting,from scratch" data-listing-date-sort="1651870800000" data-listing-file-modified-sort="1693833457294" data-listing-date-modified-sort="NaN" data-listing-reading-time-sort="6">
+<div class="quarto-post image-right" data-index="3" data-categories="python,gradient boosting,from scratch" data-listing-date-sort="1651874400000" data-listing-file-modified-sort="1693833457294" data-listing-date-modified-sort="NaN" data-listing-reading-time-sort="6">
 <div class="thumbnail">
 <p><a href="./posts/xgboost-from-scratch/index.html"> <img src="./posts/xgboost-from-scratch/thumbnail.jpg" class="thumbnail-image" alt="A weathered tree reaches toward the sea at Playa Mal País"> </a></p>
 </div>
@@ -310,7 +351,7 @@ <h3 class="no-anchor listing-title">
 </a>
 </div>
 </div>
-<div class="quarto-post image-right" data-index="3" data-categories="gradient boosting" data-listing-date-sort="1647118800000" data-listing-file-modified-sort="1691156276855" data-listing-date-modified-sort="NaN" data-listing-reading-time-sort="14">
+<div class="quarto-post image-right" data-index="4" data-categories="gradient boosting" data-listing-date-sort="1647122400000" data-listing-file-modified-sort="1691156276855" data-listing-date-modified-sort="NaN" data-listing-reading-time-sort="14">
 <div class="thumbnail">
 <p><a href="./posts/xgboost-explained/index.html"> <img src="./posts/xgboost-explained/thumbnail.jpg" class="thumbnail-image" alt="Tree branches on a chilly day in Johnson City"> </a></p>
 </div>
@@ -342,7 +383,7 @@ <h3 class="no-anchor listing-title">
 </a>
 </div>
 </div>
-<div class="quarto-post image-right" data-index="4" data-categories="python,gradient boosting,from scratch" data-listing-date-sort="1639342800000" data-listing-file-modified-sort="1693833457288" data-listing-date-modified-sort="NaN" data-listing-reading-time-sort="11">
+<div class="quarto-post image-right" data-index="5" data-categories="python,gradient boosting,from scratch" data-listing-date-sort="1639346400000" data-listing-file-modified-sort="1693833457288" data-listing-date-modified-sort="NaN" data-listing-reading-time-sort="11">
 <div class="thumbnail">
 <p><a href="./posts/decision-tree-from-scratch/index.html"> <img src="./posts/decision-tree-from-scratch/thumbnail.png" class="thumbnail-image" alt="binary tree diagram"> </a></p>
 </div>
@@ -380,7 +421,7 @@ <h3 class="no-anchor listing-title">
 </a>
 </div>
 </div>
-<div class="quarto-post image-right" data-index="5" data-categories="gradient boosting" data-listing-date-sort="1639256400000" data-listing-file-modified-sort="1691156276845" data-listing-date-modified-sort="NaN" data-listing-reading-time-sort="9">
+<div class="quarto-post image-right" data-index="6" data-categories="gradient boosting" data-listing-date-sort="1639260000000" data-listing-file-modified-sort="1691156276845" data-listing-date-modified-sort="NaN" data-listing-reading-time-sort="9">
 <div class="thumbnail">
 <p><a href="./posts/consider-the-decision-tree/index.html"> <img src="./posts/consider-the-decision-tree/thumbnail.jpg" class="thumbnail-image" alt="A California cypress tree abides in silence on Alameda Beach"> </a></p>
 </div>
@@ -412,7 +453,7 @@ <h3 class="no-anchor listing-title">
 </a>
 </div>
 </div>
-<div class="quarto-post image-right" data-index="6" data-categories="python,gradient boosting,from scratch" data-listing-date-sort="1634936400000" data-listing-file-modified-sort="1693833457293" data-listing-date-modified-sort="NaN" data-listing-reading-time-sort="7">
+<div class="quarto-post image-right" data-index="7" data-categories="python,gradient boosting,from scratch" data-listing-date-sort="1634940000000" data-listing-file-modified-sort="1693833457293" data-listing-date-modified-sort="NaN" data-listing-reading-time-sort="7">
 <div class="thumbnail">
 <p><a href="./posts/gradient-boosting-machine-with-any-loss-function/index.html"> <img src="./posts/gradient-boosting-machine-with-any-loss-function/thumbnail.jpg" class="thumbnail-image" alt="Cold water cascades over the rocks in Erwin, Tennessee."> </a></p>
 </div>
@@ -450,7 +491,7 @@ <h3 class="no-anchor listing-title">
 </a>
 </div>
 </div>
-<div class="quarto-post image-right" data-index="7" data-categories="python,PySpark,tutorial" data-listing-date-sort="1624309200000" data-listing-file-modified-sort="1693833457294" data-listing-date-modified-sort="NaN" data-listing-reading-time-sort="13">
+<div class="quarto-post image-right" data-index="8" data-categories="python,PySpark,tutorial" data-listing-date-sort="1624312800000" data-listing-file-modified-sort="1693833457294" data-listing-date-modified-sort="NaN" data-listing-reading-time-sort="13">
 <div class="thumbnail">
 <p><a href="./posts/hello-pyspark/index.html"> <img src="./posts/hello-pyspark/guiones_wave.jpeg" class="thumbnail-image" alt="A big day at Playa Guiones"> </a></p>
 </div>
@@ -488,7 +529,7 @@ <h3 class="no-anchor listing-title">
 </a>
 </div>
 </div>
-<div class="quarto-post image-right" data-index="8" data-categories="gradient boosting" data-listing-date-sort="1619470800000" data-listing-file-modified-sort="1691156276855" data-listing-date-modified-sort="NaN" data-listing-reading-time-sort="15">
+<div class="quarto-post image-right" data-index="9" data-categories="gradient boosting" data-listing-date-sort="1619474400000" data-listing-file-modified-sort="1691156276855" data-listing-date-modified-sort="NaN" data-listing-reading-time-sort="15">
 <div class="thumbnail">
 <p><a href="./posts/how-gradient-boosting-does-gradient-descent/index.html"> <img src="./posts/how-gradient-boosting-does-gradient-descent/thumbnail.jpg" class="thumbnail-image" alt="A whiteboard session at Playa Pelada"> </a></p>
 </div>
@@ -520,7 +561,7 @@ <h3 class="no-anchor listing-title">
 </a>
 </div>
 </div>
-<div class="quarto-post image-right" data-index="9" data-categories="gradient boosting" data-listing-date-sort="1611262800000" data-listing-file-modified-sort="1693423021628" data-listing-date-modified-sort="NaN" data-listing-reading-time-sort="9">
+<div class="quarto-post image-right" data-index="10" data-categories="gradient boosting" data-listing-date-sort="1611266400000" data-listing-file-modified-sort="1693423021628" data-listing-date-modified-sort="NaN" data-listing-reading-time-sort="9">
 <div class="thumbnail">
 <p><a href="./posts/get-down-with-gradient-descent/index.html"> <img src="./posts/get-down-with-gradient-descent/thumbnail.png" class="thumbnail-image" alt="dancer getting down"> </a></p>
 </div>
@@ -552,7 +593,7 @@ <h3 class="no-anchor listing-title">
 </a>
 </div>
 </div>
-<div class="quarto-post image-right" data-index="10" data-categories="python,gradient boosting,from scratch" data-listing-date-sort="1607374800000" data-listing-file-modified-sort="1693833457291" data-listing-date-modified-sort="NaN" data-listing-reading-time-sort="6">
+<div class="quarto-post image-right" data-index="11" data-categories="python,gradient boosting,from scratch" data-listing-date-sort="1607378400000" data-listing-file-modified-sort="1693833457291" data-listing-date-modified-sort="NaN" data-listing-reading-time-sort="6">
 <div class="thumbnail">
 <p><a href="./posts/gradient-boosting-machine-from-scratch/index.html"> <img src="./posts/gradient-boosting-machine-from-scratch/thumbnail.jpg" class="thumbnail-image" alt="SF buzzes silently in the distance"> </a></p>
 </div>
@@ -590,7 +631,7 @@ <h3 class="no-anchor listing-title">
 </a>
 </div>
 </div>
-<div class="quarto-post image-right" data-index="11" data-categories="python,pandas,tutorial" data-listing-date-sort="1606251600000" data-listing-file-modified-sort="1693833457288" data-listing-date-modified-sort="NaN" data-listing-reading-time-sort="11">
+<div class="quarto-post image-right" data-index="12" data-categories="python,pandas,tutorial" data-listing-date-sort="1606255200000" data-listing-file-modified-sort="1693833457288" data-listing-date-modified-sort="NaN" data-listing-reading-time-sort="11">
 <div class="thumbnail">
 <p><a href="./posts/8020-pandas-tutorial/index.html"> <img src="./posts/8020-pandas-tutorial/80_20_pandas.png" class="thumbnail-image" alt="8020 pandas"> </a></p>
 </div>
@@ -628,7 +669,7 @@ <h3 class="no-anchor listing-title">
 </a>
 </div>
 </div>
-<div class="quarto-post image-right" data-index="12" data-categories="blogging" data-listing-date-sort="1605992400000" data-listing-file-modified-sort="1691156276854" data-listing-date-modified-sort="NaN" data-listing-reading-time-sort="4">
+<div class="quarto-post image-right" data-index="13" data-categories="blogging" data-listing-date-sort="1605996000000" data-listing-file-modified-sort="1691156276854" data-listing-date-modified-sort="NaN" data-listing-reading-time-sort="4">
 <div class="thumbnail">
 <p><a href="./posts/hello-world/index.html"> <img src="./posts/hello-world/thumbnail.jpg" class="thumbnail-image" alt="Matt raises his arms in joy at the world."> </a></p>
 </div>
diff --git a/listings.json b/listings.json
index 491f066..579de06 100644
--- a/listings.json
+++ b/listings.json
@@ -14,6 +14,7 @@
   {
     "listing": "/index.html",
     "items": [
+      "/posts/xgboost-for-regression-in-python/index.html",
       "/posts/blogging-with-quarto-and-jupyter/index.html",
       "/posts/random-realizations-resurrected/index.html",
       "/posts/xgboost-from-scratch/index.html",
@@ -32,6 +33,7 @@
   {
     "listing": "/archive.html",
     "items": [
+      "/posts/xgboost-for-regression-in-python/index.html",
       "/posts/blogging-with-quarto-and-jupyter/index.html",
       "/posts/random-realizations-resurrected/index.html",
       "/posts/xgboost-from-scratch/index.html",
diff --git a/posts/8020-pandas-tutorial/index.html b/posts/8020-pandas-tutorial/index.html
index 5acb914..345c330 100644
--- a/posts/8020-pandas-tutorial/index.html
+++ b/posts/8020-pandas-tutorial/index.html
@@ -175,7 +175,7 @@ <h5 style="font-weight:600; font-size:1rem">Subscribe</h5>
     </div>
     
     <div style="margin-top: 10px;">
-    <button type="submit" class="btn btn-secondary btn-sm">Submit</button>
+    <button type="submit" class="btn btn-secondary btn-sm" data-umami-event="Subscribe">Submit</button>
     
     <!-- real people should not fill this in and expect good things - do not remove this or risk form bot signups-->
     <div style="position: absolute; left: -5000px;" aria-hidden="true"><input type="text" name="b_f718424fc5df77c22533bdaa6_a3c37fb57b" tabindex="-1" value=""></div>
diff --git a/posts/blogging-with-quarto-and-jupyter/index.html b/posts/blogging-with-quarto-and-jupyter/index.html
index e906a58..6539bd4 100644
--- a/posts/blogging-with-quarto-and-jupyter/index.html
+++ b/posts/blogging-with-quarto-and-jupyter/index.html
@@ -172,7 +172,7 @@ <h5 style="font-weight:600; font-size:1rem">Subscribe</h5>
     </div>
     
     <div style="margin-top: 10px;">
-    <button type="submit" class="btn btn-secondary btn-sm">Submit</button>
+    <button type="submit" class="btn btn-secondary btn-sm" data-umami-event="Subscribe">Submit</button>
     
     <!-- real people should not fill this in and expect good things - do not remove this or risk form bot signups-->
     <div style="position: absolute; left: -5000px;" aria-hidden="true"><input type="text" name="b_f718424fc5df77c22533bdaa6_a3c37fb57b" tabindex="-1" value=""></div>
diff --git a/posts/consider-the-decision-tree/index.html b/posts/consider-the-decision-tree/index.html
index 37a6ba6..7e618d2 100644
--- a/posts/consider-the-decision-tree/index.html
+++ b/posts/consider-the-decision-tree/index.html
@@ -172,7 +172,7 @@ <h5 style="font-weight:600; font-size:1rem">Subscribe</h5>
     </div>
     
     <div style="margin-top: 10px;">
-    <button type="submit" class="btn btn-secondary btn-sm">Submit</button>
+    <button type="submit" class="btn btn-secondary btn-sm" data-umami-event="Subscribe">Submit</button>
     
     <!-- real people should not fill this in and expect good things - do not remove this or risk form bot signups-->
     <div style="position: absolute; left: -5000px;" aria-hidden="true"><input type="text" name="b_f718424fc5df77c22533bdaa6_a3c37fb57b" tabindex="-1" value=""></div>
diff --git a/posts/decision-tree-from-scratch/index.html b/posts/decision-tree-from-scratch/index.html
index 0caa852..33f54f6 100644
--- a/posts/decision-tree-from-scratch/index.html
+++ b/posts/decision-tree-from-scratch/index.html
@@ -177,7 +177,7 @@ <h5 style="font-weight:600; font-size:1rem">Subscribe</h5>
     </div>
     
     <div style="margin-top: 10px;">
-    <button type="submit" class="btn btn-secondary btn-sm">Submit</button>
+    <button type="submit" class="btn btn-secondary btn-sm" data-umami-event="Subscribe">Submit</button>
     
     <!-- real people should not fill this in and expect good things - do not remove this or risk form bot signups-->
     <div style="position: absolute; left: -5000px;" aria-hidden="true"><input type="text" name="b_f718424fc5df77c22533bdaa6_a3c37fb57b" tabindex="-1" value=""></div>
diff --git a/posts/drafts/conda-cheat-sheet/index.html b/posts/drafts/conda-cheat-sheet/index.html
index 06de5d4..0b8f493 100644
--- a/posts/drafts/conda-cheat-sheet/index.html
+++ b/posts/drafts/conda-cheat-sheet/index.html
@@ -170,7 +170,7 @@ <h5 style="font-weight:600; font-size:1rem">Subscribe</h5>
     </div>
     
     <div style="margin-top: 10px;">
-    <button type="submit" class="btn btn-secondary btn-sm">Submit</button>
+    <button type="submit" class="btn btn-secondary btn-sm" data-umami-event="Subscribe">Submit</button>
     
     <!-- real people should not fill this in and expect good things - do not remove this or risk form bot signups-->
     <div style="position: absolute; left: -5000px;" aria-hidden="true"><input type="text" name="b_f718424fc5df77c22533bdaa6_a3c37fb57b" tabindex="-1" value=""></div>
diff --git a/posts/get-down-with-gradient-descent/index.html b/posts/get-down-with-gradient-descent/index.html
index 7aee0d4..e358bc4 100644
--- a/posts/get-down-with-gradient-descent/index.html
+++ b/posts/get-down-with-gradient-descent/index.html
@@ -174,7 +174,7 @@ <h5 style="font-weight:600; font-size:1rem">Subscribe</h5>
     </div>
     
     <div style="margin-top: 10px;">
-    <button type="submit" class="btn btn-secondary btn-sm">Submit</button>
+    <button type="submit" class="btn btn-secondary btn-sm" data-umami-event="Subscribe">Submit</button>
     
     <!-- real people should not fill this in and expect good things - do not remove this or risk form bot signups-->
     <div style="position: absolute; left: -5000px;" aria-hidden="true"><input type="text" name="b_f718424fc5df77c22533bdaa6_a3c37fb57b" tabindex="-1" value=""></div>
diff --git a/posts/gradient-boosting-machine-from-scratch/index.html b/posts/gradient-boosting-machine-from-scratch/index.html
index 1d3d164..5cd7431 100644
--- a/posts/gradient-boosting-machine-from-scratch/index.html
+++ b/posts/gradient-boosting-machine-from-scratch/index.html
@@ -174,7 +174,7 @@ <h5 style="font-weight:600; font-size:1rem">Subscribe</h5>
     </div>
     
     <div style="margin-top: 10px;">
-    <button type="submit" class="btn btn-secondary btn-sm">Submit</button>
+    <button type="submit" class="btn btn-secondary btn-sm" data-umami-event="Subscribe">Submit</button>
     
     <!-- real people should not fill this in and expect good things - do not remove this or risk form bot signups-->
     <div style="position: absolute; left: -5000px;" aria-hidden="true"><input type="text" name="b_f718424fc5df77c22533bdaa6_a3c37fb57b" tabindex="-1" value=""></div>
diff --git a/posts/gradient-boosting-machine-with-any-loss-function/index.html b/posts/gradient-boosting-machine-with-any-loss-function/index.html
index 1cec980..b53bdd4 100644
--- a/posts/gradient-boosting-machine-with-any-loss-function/index.html
+++ b/posts/gradient-boosting-machine-with-any-loss-function/index.html
@@ -174,7 +174,7 @@ <h5 style="font-weight:600; font-size:1rem">Subscribe</h5>
     </div>
     
     <div style="margin-top: 10px;">
-    <button type="submit" class="btn btn-secondary btn-sm">Submit</button>
+    <button type="submit" class="btn btn-secondary btn-sm" data-umami-event="Subscribe">Submit</button>
     
     <!-- real people should not fill this in and expect good things - do not remove this or risk form bot signups-->
     <div style="position: absolute; left: -5000px;" aria-hidden="true"><input type="text" name="b_f718424fc5df77c22533bdaa6_a3c37fb57b" tabindex="-1" value=""></div>
diff --git a/posts/hello-pyspark/index.html b/posts/hello-pyspark/index.html
index d8a6ed5..e0f0b48 100644
--- a/posts/hello-pyspark/index.html
+++ b/posts/hello-pyspark/index.html
@@ -172,7 +172,7 @@ <h5 style="font-weight:600; font-size:1rem">Subscribe</h5>
     </div>
     
     <div style="margin-top: 10px;">
-    <button type="submit" class="btn btn-secondary btn-sm">Submit</button>
+    <button type="submit" class="btn btn-secondary btn-sm" data-umami-event="Subscribe">Submit</button>
     
     <!-- real people should not fill this in and expect good things - do not remove this or risk form bot signups-->
     <div style="position: absolute; left: -5000px;" aria-hidden="true"><input type="text" name="b_f718424fc5df77c22533bdaa6_a3c37fb57b" tabindex="-1" value=""></div>
diff --git a/posts/hello-world/index.html b/posts/hello-world/index.html
index adea3b8..a4decda 100644
--- a/posts/hello-world/index.html
+++ b/posts/hello-world/index.html
@@ -138,7 +138,7 @@ <h5 style="font-weight:600; font-size:1rem">Subscribe</h5>
     </div>
     
     <div style="margin-top: 10px;">
-    <button type="submit" class="btn btn-secondary btn-sm">Submit</button>
+    <button type="submit" class="btn btn-secondary btn-sm" data-umami-event="Subscribe">Submit</button>
     
     <!-- real people should not fill this in and expect good things - do not remove this or risk form bot signups-->
     <div style="position: absolute; left: -5000px;" aria-hidden="true"><input type="text" name="b_f718424fc5df77c22533bdaa6_a3c37fb57b" tabindex="-1" value=""></div>
diff --git a/posts/how-gradient-boosting-does-gradient-descent/index.html b/posts/how-gradient-boosting-does-gradient-descent/index.html
index 2b464d2..d73805a 100644
--- a/posts/how-gradient-boosting-does-gradient-descent/index.html
+++ b/posts/how-gradient-boosting-does-gradient-descent/index.html
@@ -140,7 +140,7 @@ <h5 style="font-weight:600; font-size:1rem">Subscribe</h5>
     </div>
     
     <div style="margin-top: 10px;">
-    <button type="submit" class="btn btn-secondary btn-sm">Submit</button>
+    <button type="submit" class="btn btn-secondary btn-sm" data-umami-event="Subscribe">Submit</button>
     
     <!-- real people should not fill this in and expect good things - do not remove this or risk form bot signups-->
     <div style="position: absolute; left: -5000px;" aria-hidden="true"><input type="text" name="b_f718424fc5df77c22533bdaa6_a3c37fb57b" tabindex="-1" value=""></div>
diff --git a/posts/random-realizations-resurrected/index.html b/posts/random-realizations-resurrected/index.html
index 522dbe6..95e22de 100644
--- a/posts/random-realizations-resurrected/index.html
+++ b/posts/random-realizations-resurrected/index.html
@@ -138,7 +138,7 @@ <h5 style="font-weight:600; font-size:1rem">Subscribe</h5>
     </div>
     
     <div style="margin-top: 10px;">
-    <button type="submit" class="btn btn-secondary btn-sm">Submit</button>
+    <button type="submit" class="btn btn-secondary btn-sm" data-umami-event="Subscribe">Submit</button>
     
     <!-- real people should not fill this in and expect good things - do not remove this or risk form bot signups-->
     <div style="position: absolute; left: -5000px;" aria-hidden="true"><input type="text" name="b_f718424fc5df77c22533bdaa6_a3c37fb57b" tabindex="-1" value=""></div>
diff --git a/posts/xgboost-explained/index.html b/posts/xgboost-explained/index.html
index c7160ae..3053236 100644
--- a/posts/xgboost-explained/index.html
+++ b/posts/xgboost-explained/index.html
@@ -140,7 +140,7 @@ <h5 style="font-weight:600; font-size:1rem">Subscribe</h5>
     </div>
     
     <div style="margin-top: 10px;">
-    <button type="submit" class="btn btn-secondary btn-sm">Submit</button>
+    <button type="submit" class="btn btn-secondary btn-sm" data-umami-event="Subscribe">Submit</button>
     
     <!-- real people should not fill this in and expect good things - do not remove this or risk form bot signups-->
     <div style="position: absolute; left: -5000px;" aria-hidden="true"><input type="text" name="b_f718424fc5df77c22533bdaa6_a3c37fb57b" tabindex="-1" value=""></div>
diff --git a/posts/xgboost-for-regression-in-python/index.html b/posts/xgboost-for-regression-in-python/index.html
new file mode 100644
index 0000000..32df79c
--- /dev/null
+++ b/posts/xgboost-for-regression-in-python/index.html
@@ -0,0 +1,1446 @@
+<!DOCTYPE html>
+<html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
+
+<meta charset="utf-8">
+<meta name="generator" content="quarto-1.3.433">
+
+<meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
+
+<meta name="author" content="Matt Bowers">
+<meta name="dcterms.date" content="2023-10-25">
+<meta name="description" content="A step-bystep tutorial on regression with XGBoost in python using sklearn and the xgboost library">
+
+<title>Random Realizations – XGBoost for Regression in Python</title>
+<style>
+code{white-space: pre-wrap;}
+span.smallcaps{font-variant: small-caps;}
+div.columns{display: flex; gap: min(4vw, 1.5em);}
+div.column{flex: auto; overflow-x: auto;}
+div.hanging-indent{margin-left: 1.5em; text-indent: -1.5em;}
+ul.task-list{list-style: none;}
+ul.task-list li input[type="checkbox"] {
+  width: 0.8em;
+  margin: 0 0.8em 0.2em -1em; /* quarto-specific, see https://github.com/quarto-dev/quarto-cli/issues/4556 */ 
+  vertical-align: middle;
+}
+/* CSS for syntax highlighting */
+pre > code.sourceCode { white-space: pre; position: relative; }
+pre > code.sourceCode > span { display: inline-block; line-height: 1.25; }
+pre > code.sourceCode > span:empty { height: 1.2em; }
+.sourceCode { overflow: visible; }
+code.sourceCode > span { color: inherit; text-decoration: inherit; }
+div.sourceCode { margin: 1em 0; }
+pre.sourceCode { margin: 0; }
+@media screen {
+div.sourceCode { overflow: auto; }
+}
+@media print {
+pre > code.sourceCode { white-space: pre-wrap; }
+pre > code.sourceCode > span { text-indent: -5em; padding-left: 5em; }
+}
+pre.numberSource code
+  { counter-reset: source-line 0; }
+pre.numberSource code > span
+  { position: relative; left: -4em; counter-increment: source-line; }
+pre.numberSource code > span > a:first-child::before
+  { content: counter(source-line);
+    position: relative; left: -1em; text-align: right; vertical-align: baseline;
+    border: none; display: inline-block;
+    -webkit-touch-callout: none; -webkit-user-select: none;
+    -khtml-user-select: none; -moz-user-select: none;
+    -ms-user-select: none; user-select: none;
+    padding: 0 4px; width: 4em;
+  }
+pre.numberSource { margin-left: 3em;  padding-left: 4px; }
+div.sourceCode
+  {   }
+@media screen {
+pre > code.sourceCode > span > a:first-child::before { text-decoration: underline; }
+}
+</style>
+
+
+<script src="../../site_libs/quarto-nav/quarto-nav.js"></script>
+<script src="../../site_libs/quarto-nav/headroom.min.js"></script>
+<script src="../../site_libs/clipboard/clipboard.min.js"></script>
+<script src="../../site_libs/quarto-search/autocomplete.umd.js"></script>
+<script src="../../site_libs/quarto-search/fuse.min.js"></script>
+<script src="../../site_libs/quarto-search/quarto-search.js"></script>
+<meta name="quarto:offset" content="../../">
+<link href="../../favicon.png" rel="icon" type="image/png">
+<script src="../../site_libs/quarto-html/quarto.js"></script>
+<script src="../../site_libs/quarto-html/popper.min.js"></script>
+<script src="../../site_libs/quarto-html/tippy.umd.min.js"></script>
+<script src="../../site_libs/quarto-html/anchor.min.js"></script>
+<link href="../../site_libs/quarto-html/tippy.css" rel="stylesheet">
+<link href="../../site_libs/quarto-html/quarto-syntax-highlighting.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<script src="../../site_libs/bootstrap/bootstrap.min.js"></script>
+<link href="../../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
+<link href="../../site_libs/bootstrap/bootstrap.min.css" rel="stylesheet" id="quarto-bootstrap" data-mode="light">
+<script id="quarto-search-options" type="application/json">{
+  "location": "navbar",
+  "copy-button": false,
+  "collapse-after": 3,
+  "panel-placement": "end",
+  "type": "overlay",
+  "limit": 20,
+  "language": {
+    "search-no-results-text": "No results",
+    "search-matching-documents-text": "matching documents",
+    "search-copy-link-title": "Copy link to search",
+    "search-hide-matches-text": "Hide additional matches",
+    "search-more-match-text": "more match in this document",
+    "search-more-matches-text": "more matches in this document",
+    "search-clear-button-title": "Clear",
+    "search-detached-cancel-button-title": "Cancel",
+    "search-submit-button-title": "Submit",
+    "search-label": "Search"
+  }
+}</script>
+<script async="" defer="" src="https://umami.randomrealizations.com/script.js" data-domains="randomrealizations.com" data-website-id="17844d61-f224-45c0-aa5f-2935c14dd5ac"></script>
+<script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.6/require.min.js" integrity="sha512-c3Nl8+7g4LMSTdrm621y7kf9v3SDPnhxLNhcjFJbKECVnmZHTdo+IRO05sNLTH/D3vA6u1X32ehoLC7WFVdheg==" crossorigin="anonymous"></script>
+<script src="https://cdnjs.cloudflare.com/ajax/libs/jquery/3.5.1/jquery.min.js" integrity="sha512-bLT0Qm9VnAYZDflyKcBaQ2gg0hSYNQrJ8RilYldYQ1FxQYoCLtUjuuRuZo+fjqhx/qtq/1itJ0C2ejDxltZVFg==" crossorigin="anonymous"></script>
+<script type="application/javascript">define('jquery', [],function() {return window.jQuery;})</script>
+
+
+<link rel="stylesheet" href="../../styles.css">
+<link rel="canonical" href="https://randomrealizations.com/posts/xgboost-for-regression-in-python/" />
+</head>
+
+<body class="nav-fixed">
+
+<div id="quarto-search-results"></div>
+  <header id="quarto-header" class="headroom fixed-top">
+    <nav class="navbar navbar-expand-lg navbar-dark ">
+      <div class="navbar-container container-fluid">
+      <div class="navbar-brand-container">
+    <a href="../../index.html" class="navbar-brand navbar-brand-logo">
+    <img src="../../logo.png" alt="" class="navbar-logo">
+    </a>
+    <a class="navbar-brand" href="../../index.html">
+    <span class="navbar-title">Random Realizations</span>
+    </a>
+  </div>
+            <div id="quarto-search" class="" title="Search"></div>
+          <button class="navbar-toggler" type="button" data-bs-toggle="collapse" data-bs-target="#navbarCollapse" aria-controls="navbarCollapse" aria-expanded="false" aria-label="Toggle navigation" onclick="if (window.quartoToggleHeadroom) { window.quartoToggleHeadroom(); }">
+  <span class="navbar-toggler-icon"></span>
+</button>
+          <div class="collapse navbar-collapse" id="navbarCollapse">
+            <ul class="navbar-nav navbar-nav-scroll ms-auto">
+  <li class="nav-item">
+    <a class="nav-link" href="../../about.html" rel="" target="">
+ <span class="menu-text">About</span></a>
+  </li>  
+  <li class="nav-item">
+    <a class="nav-link" href="../../archive.html" rel="" target="">
+ <span class="menu-text">Archive</span></a>
+  </li>  
+  <li class="nav-item dropdown ">
+    <a class="nav-link dropdown-toggle" href="#" id="nav-menu-resources" role="button" data-bs-toggle="dropdown" aria-expanded="false" rel="" target="">
+ <span class="menu-text">Resources</span>
+    </a>
+    <ul class="dropdown-menu dropdown-menu-end" aria-labelledby="nav-menu-resources">    
+        <li>
+    <a class="dropdown-item" href="../../gradient-boosting-series.html" rel="" target="">
+ <span class="dropdown-text">Gradient Boosting Series</span></a>
+  </li>  
+        <li>
+    <a class="dropdown-item" href="https://python-bloggers.com/" rel="" target="">
+ <span class="dropdown-text">PythonBloggers</span></a>
+  </li>  
+    </ul>
+  </li>
+</ul>
+            <div class="quarto-navbar-tools tools-wide">
+    <a href="https://github.com/mcb00" rel="" title="Matt on Github" class="quarto-navigation-tool px-1" aria-label="Matt on Github"><i class="bi bi-github"></i></a>
+    <a href="https://twitter.com/mcbwrs" rel="" title="Matt on Twitter" class="quarto-navigation-tool px-1" aria-label="Matt on Twitter"><i class="bi bi-twitter"></i></a>
+    <a href="https://www.linkedin.com/in/matt-bowers" rel="" title="Matt on Linkedin" class="quarto-navigation-tool px-1" aria-label="Matt on Linkedin"><i class="bi bi-linkedin"></i></a>
+</div>
+          </div> <!-- /navcollapse -->
+      </div> <!-- /container-fluid -->
+    </nav>
+</header>
+<!-- content -->
+<div id="quarto-content" class="quarto-container page-columns page-rows-contents page-layout-article page-navbar">
+<!-- sidebar -->
+<!-- margin-sidebar -->
+    <div id="quarto-margin-sidebar" class="sidebar margin-sidebar"><div class="quarto-margin-header"><div class="margin-header-item">
+<h5 style="font-weight:600; font-size:1rem">Subscribe</h5>
+
+<form action="https://dev.us20.list-manage.com/subscribe/post?u=5212e33f7cd396dd4a742431c&amp;id=0a2f69f3f3&amp;f_id=002e29e8f0" method="post" id="mc-embedded-subscribe-form" name="mc-embedded-subscribe-form" class="validate" target="_self">
+
+    <div class="form-group">
+      <!-- <label for="mce-EMAIL" class="form-label mt-4">Subscribe via email</label> -->
+      <input type="email" value="" name="EMAIL" class="form-control" id="mce-EMAIL" placeholder="enter your email" required="">
+    </div>
+    
+    <div style="margin-top: 10px;">
+    <button type="submit" class="btn btn-secondary btn-sm" data-umami-event="Subscribe">Submit</button>
+    
+    <!-- real people should not fill this in and expect good things - do not remove this or risk form bot signups-->
+    <div style="position: absolute; left: -5000px;" aria-hidden="true"><input type="text" name="b_f718424fc5df77c22533bdaa6_a3c37fb57b" tabindex="-1" value=""></div>
+    
+
+
+<hr>
+</div></form>
+</div></div>
+        <nav id="TOC" role="doc-toc" class="toc-active">
+    <h2 id="toc-title">On this page</h2>
+   
+  <ul>
+  <li><a href="#install-and-import-the-xgboost-library" id="toc-install-and-import-the-xgboost-library" class="nav-link active" data-scroll-target="#install-and-import-the-xgboost-library">Install and import the <code>xgboost</code> library</a></li>
+  <li><a href="#read-dataset-into-python" id="toc-read-dataset-into-python" class="nav-link" data-scroll-target="#read-dataset-into-python">Read dataset into python</a></li>
+  <li><a href="#prepare-raw-data-for-xgboost" id="toc-prepare-raw-data-for-xgboost" class="nav-link" data-scroll-target="#prepare-raw-data-for-xgboost">Prepare raw data for XGBoost</a>
+  <ul class="collapse">
+  <li><a href="#encode-string-features" id="toc-encode-string-features" class="nav-link" data-scroll-target="#encode-string-features">Encode string features</a></li>
+  <li><a href="#encode-date-and-timestamp-features" id="toc-encode-date-and-timestamp-features" class="nav-link" data-scroll-target="#encode-date-and-timestamp-features">Encode date and timestamp features</a></li>
+  <li><a href="#transform-the-target-if-necessary" id="toc-transform-the-target-if-necessary" class="nav-link" data-scroll-target="#transform-the-target-if-necessary">Transform the target if necessary</a></li>
+  </ul></li>
+  <li><a href="#train-and-evaluate-the-xgboost-regression-model" id="toc-train-and-evaluate-the-xgboost-regression-model" class="nav-link" data-scroll-target="#train-and-evaluate-the-xgboost-regression-model">Train and Evaluate the XGBoost regression model</a>
+  <ul class="collapse">
+  <li><a href="#specify-target-and-feature-columns" id="toc-specify-target-and-feature-columns" class="nav-link" data-scroll-target="#specify-target-and-feature-columns">Specify target and feature columns</a></li>
+  <li><a href="#split-the-data-into-training-and-validation-sets" id="toc-split-the-data-into-training-and-validation-sets" class="nav-link" data-scroll-target="#split-the-data-into-training-and-validation-sets">Split the data into training and validation sets</a></li>
+  <li><a href="#create-dmatrix-data-objects" id="toc-create-dmatrix-data-objects" class="nav-link" data-scroll-target="#create-dmatrix-data-objects">Create <code>DMatrix</code> data objects</a></li>
+  <li><a href="#set-the-xgboost-parameters" id="toc-set-the-xgboost-parameters" class="nav-link" data-scroll-target="#set-the-xgboost-parameters">Set the XGBoost parameters</a></li>
+  <li><a href="#train-the-xgboost-model" id="toc-train-the-xgboost-model" class="nav-link" data-scroll-target="#train-the-xgboost-model">Train the XGBoost model</a></li>
+  <li><a href="#train-the-xgboost-model-using-the-sklearn-interface" id="toc-train-the-xgboost-model-using-the-sklearn-interface" class="nav-link" data-scroll-target="#train-the-xgboost-model-using-the-sklearn-interface">Train the XGBoost model using the sklearn interface</a></li>
+  <li><a href="#evaluate-the-model-and-check-for-overfitting" id="toc-evaluate-the-model-and-check-for-overfitting" class="nav-link" data-scroll-target="#evaluate-the-model-and-check-for-overfitting">Evaluate the model and check for overfitting</a></li>
+  <li><a href="#check-feature-importance" id="toc-check-feature-importance" class="nav-link" data-scroll-target="#check-feature-importance">Check feature importance</a></li>
+  </ul></li>
+  <li><a href="#improve-performance-using-a-model-iteration-loop" id="toc-improve-performance-using-a-model-iteration-loop" class="nav-link" data-scroll-target="#improve-performance-using-a-model-iteration-loop">Improve performance using a model iteration loop</a>
+  <ul class="collapse">
+  <li><a href="#feature-selection" id="toc-feature-selection" class="nav-link" data-scroll-target="#feature-selection">Feature selection</a></li>
+  <li><a href="#tune-the-xgboost-hyperparameters" id="toc-tune-the-xgboost-hyperparameters" class="nav-link" data-scroll-target="#tune-the-xgboost-hyperparameters">Tune the XGBoost hyperparameters</a></li>
+  </ul></li>
+  <li><a href="#wrapping-up" id="toc-wrapping-up" class="nav-link" data-scroll-target="#wrapping-up">Wrapping Up</a></li>
+  </ul>
+</nav>
+    </div>
+<!-- main -->
+<main class="content" id="quarto-document-content">
+
+<header id="title-block-header" class="quarto-title-block default">
+<div class="quarto-title">
+<h1 class="title">XGBoost for Regression in Python</h1>
+  <div class="quarto-categories">
+    <div class="quarto-category">python</div>
+    <div class="quarto-category">tutorial</div>
+    <div class="quarto-category">gradient boosting</div>
+    <div class="quarto-category">xgboost</div>
+  </div>
+  </div>
+
+<div>
+  <div class="description">
+    A step-bystep tutorial on regression with XGBoost in python using sklearn and the xgboost library
+  </div>
+</div>
+
+
+<div class="quarto-title-meta">
+
+    <div>
+    <div class="quarto-title-meta-heading">Author</div>
+    <div class="quarto-title-meta-contents">
+             <p>Matt Bowers </p>
+          </div>
+  </div>
+    
+    <div>
+    <div class="quarto-title-meta-heading">Published</div>
+    <div class="quarto-title-meta-contents">
+      <p class="date">October 25, 2023</p>
+    </div>
+  </div>
+  
+    
+  </div>
+  
+
+</header>
+
+<p>In this post I’m going to show you my process for solving regression problems with XGBoost in python, using either the native <code>xgboost</code> API or the scikit-learn interface. This is a powerful methodology that can produce world class results in a short time with minimal thought or effort. While we’ll be working on an old Kagle competition for predicting the sale prices of bulldozers and other heavy machinery, you can use this flow to solve whatever tabular data regression problem you’re working on.</p>
+<p>This post serves as the explanation and documentation for the XGBoost regression jupyter notebook from my <a href="https://github.com/mcb00/ds-templates">ds-templates repo</a> on GitHub, so go ahead and download the notebook and follow along with your own data.</p>
+<p>If you’re not already comfortable with the ideas behind gradient boosting and XGBoost, you’ll find it helpful to read some of my previous posts to get up to speed. I’d start with this <a href="../../posts/gradient-boosting-machine-from-scratch/">introduction to gradient boosting</a>, and then read this <a href="../../posts/xgboost-explained/">explanation of how XGBoost works</a>.</p>
+<p>Let’s get into it! 🚀</p>
+<section id="install-and-import-the-xgboost-library" class="level2">
+<h2 class="anchored" data-anchor-id="install-and-import-the-xgboost-library">Install and import the <code>xgboost</code> library</h2>
+<p>If you don’t already have it, go ahead and <a href="https://anaconda.org/conda-forge/xgboost">use conda to install the xgboost library</a>, e.g.</p>
+<div class="sourceCode" id="cb1"><pre class="sourceCode .zsh code-with-copy"><code class="sourceCode zsh"><span id="cb1-1"><a href="#cb1-1" aria-hidden="true" tabindex="-1"></a><span class="ex">$</span> conda install <span class="at">-c</span> conda-forge xgboost</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<p>Then import it along with the usual suspects.</p>
+<div class="cell" data-execution_count="1">
+<div class="sourceCode cell-code" id="cb2"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb2-1"><a href="#cb2-1" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> numpy <span class="im">as</span> np</span>
+<span id="cb2-2"><a href="#cb2-2" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> pandas <span class="im">as</span> pd</span>
+<span id="cb2-3"><a href="#cb2-3" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> matplotlib.pyplot <span class="im">as</span> plt</span>
+<span id="cb2-4"><a href="#cb2-4" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> xgboost <span class="im">as</span> xgb</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+</div>
+</section>
+<section id="read-dataset-into-python" class="level2">
+<h2 class="anchored" data-anchor-id="read-dataset-into-python">Read dataset into python</h2>
+<p>In this example we’ll work on the <a href="https://www.kaggle.com/competitions/bluebook-for-bulldozers/overview">Kagle Bluebook for Bulldozers</a> competition, which asks us to build a regression model to predict the sale price of heavy equipment. Amazingly, you can solve your own regression problem by swapping this data out with your organization’s data before proceeding with the tutorial.</p>
+<p>Go ahead and download the <code>Train.zip</code> file from Kagle and extract it into <code>Train.csv</code>. Then read the data into a pandas dataframe.</p>
+<div class="cell" data-execution_count="2">
+<div class="sourceCode cell-code" id="cb3"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb3-1"><a href="#cb3-1" aria-hidden="true" tabindex="-1"></a>df <span class="op">=</span> pd.read_csv(<span class="st">'Train.csv'</span>, parse_dates<span class="op">=</span>[<span class="st">'saledate'</span>])<span class="op">;</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+</div>
+<p>Notice I cheated a little bit, checking the columns ahead of time and telling pandas to treat the <code>saledate</code> column as a date. In general it will make life easier to read in any date-like columns as dates.</p>
+<div class="cell" data-execution_count="3">
+<div class="sourceCode cell-code" id="cb4"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb4-1"><a href="#cb4-1" aria-hidden="true" tabindex="-1"></a>df.info()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="cell-output cell-output-stdout">
+<pre><code>&lt;class 'pandas.core.frame.DataFrame'&gt;
+RangeIndex: 401125 entries, 0 to 401124
+Data columns (total 53 columns):
+ #   Column                    Non-Null Count   Dtype         
+---  ------                    --------------   -----         
+ 0   SalesID                   401125 non-null  int64         
+ 1   SalePrice                 401125 non-null  int64         
+ 2   MachineID                 401125 non-null  int64         
+ 3   ModelID                   401125 non-null  int64         
+ 4   datasource                401125 non-null  int64         
+ 5   auctioneerID              380989 non-null  float64       
+ 6   YearMade                  401125 non-null  int64         
+ 7   MachineHoursCurrentMeter  142765 non-null  float64       
+ 8   UsageBand                 69639 non-null   object        
+ 9   saledate                  401125 non-null  datetime64[ns]
+ 10  fiModelDesc               401125 non-null  object        
+ 11  fiBaseModel               401125 non-null  object        
+ 12  fiSecondaryDesc           263934 non-null  object        
+ 13  fiModelSeries             56908 non-null   object        
+ 14  fiModelDescriptor         71919 non-null   object        
+ 15  ProductSize               190350 non-null  object        
+ 16  fiProductClassDesc        401125 non-null  object        
+ 17  state                     401125 non-null  object        
+ 18  ProductGroup              401125 non-null  object        
+ 19  ProductGroupDesc          401125 non-null  object        
+ 20  Drive_System              104361 non-null  object        
+ 21  Enclosure                 400800 non-null  object        
+ 22  Forks                     192077 non-null  object        
+ 23  Pad_Type                  79134 non-null   object        
+ 24  Ride_Control              148606 non-null  object        
+ 25  Stick                     79134 non-null   object        
+ 26  Transmission              183230 non-null  object        
+ 27  Turbocharged              79134 non-null   object        
+ 28  Blade_Extension           25219 non-null   object        
+ 29  Blade_Width               25219 non-null   object        
+ 30  Enclosure_Type            25219 non-null   object        
+ 31  Engine_Horsepower         25219 non-null   object        
+ 32  Hydraulics                320570 non-null  object        
+ 33  Pushblock                 25219 non-null   object        
+ 34  Ripper                    104137 non-null  object        
+ 35  Scarifier                 25230 non-null   object        
+ 36  Tip_Control               25219 non-null   object        
+ 37  Tire_Size                 94718 non-null   object        
+ 38  Coupler                   213952 non-null  object        
+ 39  Coupler_System            43458 non-null   object        
+ 40  Grouser_Tracks            43362 non-null   object        
+ 41  Hydraulics_Flow           43362 non-null   object        
+ 42  Track_Type                99153 non-null   object        
+ 43  Undercarriage_Pad_Width   99872 non-null   object        
+ 44  Stick_Length              99218 non-null   object        
+ 45  Thumb                     99288 non-null   object        
+ 46  Pattern_Changer           99218 non-null   object        
+ 47  Grouser_Type              99153 non-null   object        
+ 48  Backhoe_Mounting          78672 non-null   object        
+ 49  Blade_Type                79833 non-null   object        
+ 50  Travel_Controls           79834 non-null   object        
+ 51  Differential_Type         69411 non-null   object        
+ 52  Steering_Controls         69369 non-null   object        
+dtypes: datetime64[ns](1), float64(2), int64(6), object(44)
+memory usage: 162.2+ MB</code></pre>
+</div>
+</div>
+</section>
+<section id="prepare-raw-data-for-xgboost" class="level2">
+<h2 class="anchored" data-anchor-id="prepare-raw-data-for-xgboost">Prepare raw data for XGBoost</h2>
+<p>When faced with a new tabular dataset for modeling, we have two format considerations: data types and missingness. From the call to <code>df.info()</code> above, we can see we have both mixed types and missing values.</p>
+<p>When it comes to missing values, some models like the gradient booster or random forest in scikit-learn require purely non-missing inputs. One of the great strengths of XGBoost is that it relaxes this requirement, allowing us to pass in missing feature values, so we don’t have to worry about them.</p>
+<p>Regarding data types, all ML models for tabular data require inputs to be numeric, either integers or floats, so we’re going to have to deal with those <code>object</code> columns.</p>
+<section id="encode-string-features" class="level3">
+<h3 class="anchored" data-anchor-id="encode-string-features">Encode string features</h3>
+<p>The simplest way to encode string variables is to map each unique string value to an integer; this is called <em>integer encoding</em>.</p>
+<p>We have a couple of options for how to implement this transformation: pandas categoricals or the scikit-learn label encoder. We can use the categorical type in pandas to generate mappings from string values to integers for each string feature. The category type is a bit like the factor type in R. Pandas stores the underlying data as integers, and it also keeps a mapping from the integers to the string values. XGBoost will be able to access the integers for model fitting. This is nice because we can still access the actual categories which can be helpful when we start taking a closer look at the data. If you prefer, you can also use the scikit-learn label encoder to replace the string columns with their integer-mapped counterparts.</p>
+<div class="cell" data-execution_count="4">
+<div class="sourceCode cell-code" id="cb6"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb6-1"><a href="#cb6-1" aria-hidden="true" tabindex="-1"></a><span class="kw">def</span> encode_string_features(df, use_cats<span class="op">=</span><span class="va">True</span>):</span>
+<span id="cb6-2"><a href="#cb6-2" aria-hidden="true" tabindex="-1"></a>    out_df <span class="op">=</span> df.copy()</span>
+<span id="cb6-3"><a href="#cb6-3" aria-hidden="true" tabindex="-1"></a>    <span class="cf">for</span> feature, feature_type <span class="kw">in</span> df.dtypes.items():</span>
+<span id="cb6-4"><a href="#cb6-4" aria-hidden="true" tabindex="-1"></a>        <span class="cf">if</span> feature_type <span class="op">==</span> <span class="st">'object'</span>:</span>
+<span id="cb6-5"><a href="#cb6-5" aria-hidden="true" tabindex="-1"></a>            <span class="cf">if</span> use_cats:</span>
+<span id="cb6-6"><a href="#cb6-6" aria-hidden="true" tabindex="-1"></a>                out_df[feature] <span class="op">=</span> out_df[feature].astype(<span class="st">'category'</span>)</span>
+<span id="cb6-7"><a href="#cb6-7" aria-hidden="true" tabindex="-1"></a>            <span class="cf">else</span>:</span>
+<span id="cb6-8"><a href="#cb6-8" aria-hidden="true" tabindex="-1"></a>                <span class="im">from</span> sklearn.preprocessing <span class="im">import</span> LabelEncoder</span>
+<span id="cb6-9"><a href="#cb6-9" aria-hidden="true" tabindex="-1"></a>                out_df[feature] <span class="op">=</span> LabelEncoder() <span class="op">\</span></span>
+<span id="cb6-10"><a href="#cb6-10" aria-hidden="true" tabindex="-1"></a>                    .fit_transform(out_df[feature].astype(<span class="st">'str'</span>))</span>
+<span id="cb6-11"><a href="#cb6-11" aria-hidden="true" tabindex="-1"></a>    <span class="cf">return</span> out_df</span>
+<span id="cb6-12"><a href="#cb6-12" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb6-13"><a href="#cb6-13" aria-hidden="true" tabindex="-1"></a>df <span class="op">=</span> encode_string_features(df, use_cats<span class="op">=</span><span class="va">False</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+</div>
+</section>
+<section id="encode-date-and-timestamp-features" class="level3">
+<h3 class="anchored" data-anchor-id="encode-date-and-timestamp-features">Encode date and timestamp features</h3>
+<p>While dates feel sort of numeric, they are not numbers, so we need to transform them into numeric columns. Unfortunately, encoding timestamps isn’t as straightforward as encoding strings, so we actually might need to engage in a little bit of feature engineering. A single date has many different attributes, e.g.&nbsp;days since epoch, year, quarter, month, day, day of year, day of week, is holiday, etc. As a starting point, we can just add a few of these attributes as features. Once a feature is represented as a date or timestamp data type, you can access various attributes via the <code>dt</code> attribute.</p>
+<div class="cell" data-execution_count="5">
+<div class="sourceCode cell-code" id="cb7"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb7-1"><a href="#cb7-1" aria-hidden="true" tabindex="-1"></a><span class="kw">def</span> encode_datetime_features(df, datetime_features, datetime_attributes):</span>
+<span id="cb7-2"><a href="#cb7-2" aria-hidden="true" tabindex="-1"></a>    out_df <span class="op">=</span> df.copy()</span>
+<span id="cb7-3"><a href="#cb7-3" aria-hidden="true" tabindex="-1"></a>    <span class="cf">for</span> datetime_feature <span class="kw">in</span> datetime_features:</span>
+<span id="cb7-4"><a href="#cb7-4" aria-hidden="true" tabindex="-1"></a>        <span class="cf">for</span> datetime_attribute <span class="kw">in</span> datetime_attributes:</span>
+<span id="cb7-5"><a href="#cb7-5" aria-hidden="true" tabindex="-1"></a>            <span class="cf">if</span> datetime_attribute <span class="op">==</span> <span class="st">'days_since_epoch'</span>:</span>
+<span id="cb7-6"><a href="#cb7-6" aria-hidden="true" tabindex="-1"></a>                out_df[<span class="ss">f'</span><span class="sc">{</span>datetime_feature<span class="sc">}</span><span class="ss">_</span><span class="sc">{</span>datetime_attribute<span class="sc">}</span><span class="ss">'</span>] <span class="op">=</span> <span class="op">\</span></span>
+<span id="cb7-7"><a href="#cb7-7" aria-hidden="true" tabindex="-1"></a>                    (out_df[datetime_feature] </span>
+<span id="cb7-8"><a href="#cb7-8" aria-hidden="true" tabindex="-1"></a>                     <span class="op">-</span> pd.Timestamp(year<span class="op">=</span><span class="dv">1970</span>, month<span class="op">=</span><span class="dv">1</span>, day<span class="op">=</span><span class="dv">1</span>)).dt.days</span>
+<span id="cb7-9"><a href="#cb7-9" aria-hidden="true" tabindex="-1"></a>            <span class="cf">else</span>:</span>
+<span id="cb7-10"><a href="#cb7-10" aria-hidden="true" tabindex="-1"></a>                out_df[<span class="ss">f'</span><span class="sc">{</span>datetime_feature<span class="sc">}</span><span class="ss">_</span><span class="sc">{</span>datetime_attribute<span class="sc">}</span><span class="ss">'</span>] <span class="op">=</span> <span class="op">\</span></span>
+<span id="cb7-11"><a href="#cb7-11" aria-hidden="true" tabindex="-1"></a>                    <span class="bu">getattr</span>(out_df[datetime_feature].dt, datetime_attribute)</span>
+<span id="cb7-12"><a href="#cb7-12" aria-hidden="true" tabindex="-1"></a>    <span class="cf">return</span> out_df</span>
+<span id="cb7-13"><a href="#cb7-13" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb7-14"><a href="#cb7-14" aria-hidden="true" tabindex="-1"></a>datetime_features <span class="op">=</span> [</span>
+<span id="cb7-15"><a href="#cb7-15" aria-hidden="true" tabindex="-1"></a>    <span class="st">'saledate'</span>,</span>
+<span id="cb7-16"><a href="#cb7-16" aria-hidden="true" tabindex="-1"></a>]</span>
+<span id="cb7-17"><a href="#cb7-17" aria-hidden="true" tabindex="-1"></a>datetime_attributes <span class="op">=</span> [</span>
+<span id="cb7-18"><a href="#cb7-18" aria-hidden="true" tabindex="-1"></a>    <span class="st">'year'</span>,</span>
+<span id="cb7-19"><a href="#cb7-19" aria-hidden="true" tabindex="-1"></a>    <span class="st">'month'</span>,</span>
+<span id="cb7-20"><a href="#cb7-20" aria-hidden="true" tabindex="-1"></a>    <span class="st">'day'</span>,</span>
+<span id="cb7-21"><a href="#cb7-21" aria-hidden="true" tabindex="-1"></a>    <span class="st">'quarter'</span>,</span>
+<span id="cb7-22"><a href="#cb7-22" aria-hidden="true" tabindex="-1"></a>    <span class="st">'day_of_year'</span>,</span>
+<span id="cb7-23"><a href="#cb7-23" aria-hidden="true" tabindex="-1"></a>    <span class="st">'day_of_week'</span>,</span>
+<span id="cb7-24"><a href="#cb7-24" aria-hidden="true" tabindex="-1"></a>    <span class="st">'days_since_epoch'</span>,</span>
+<span id="cb7-25"><a href="#cb7-25" aria-hidden="true" tabindex="-1"></a>]</span>
+<span id="cb7-26"><a href="#cb7-26" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb7-27"><a href="#cb7-27" aria-hidden="true" tabindex="-1"></a>df <span class="op">=</span> encode_datetime_features(df, datetime_features, datetime_attributes)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+</div>
+</section>
+<section id="transform-the-target-if-necessary" class="level3">
+<h3 class="anchored" data-anchor-id="transform-the-target-if-necessary">Transform the target if necessary</h3>
+<p>In the interest of speed and efficiency, we didn’t bother doing any EDA with the feature data. Part of my justification for this is that trees are incredibly robust to outliers, colinearity, missingness, and other assorted nonsense in the feature data. However, they are not necessarily robust to nonsense in the target variable, so it’s worth having a look at it before proceeding any further.</p>
+<div class="cell" data-execution_count="6">
+<div class="sourceCode cell-code" id="cb8"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb8-1"><a href="#cb8-1" aria-hidden="true" tabindex="-1"></a>df.SalePrice.hist()<span class="op">;</span> plt.xlabel(<span class="st">'SalePrice'</span>)<span class="op">;</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="cell-output cell-output-display">
+<p><img src="index_files/figure-html/cell-7-output-1.png" class="img-fluid" alt="histogram of sale price showing right-skewed data"></p>
+</div>
+</div>
+<p>Often when predicting prices it makes sense to use log price, especially when they span multiple orders of magnitude or have a strong right skew. These data look pretty friendly, lacking outliers and exhibiting only a mild positive skew; we could probably get away without doing any transformation. But checking the evaluation metric used to score the Kagle competition, we see they’re using root mean squared log error. That’s equivalent to using RMSE on log-transformed target data, so let’s go ahead and work with log prices.</p>
+<div class="cell" data-execution_count="7">
+<div class="sourceCode cell-code" id="cb9"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb9-1"><a href="#cb9-1" aria-hidden="true" tabindex="-1"></a>df[<span class="st">'logSalePrice'</span>] <span class="op">=</span> np.log1p(df[<span class="st">'SalePrice'</span>])</span>
+<span id="cb9-2"><a href="#cb9-2" aria-hidden="true" tabindex="-1"></a>df.logSalePrice.hist()<span class="op">;</span> plt.xlabel(<span class="st">'logSalePrice'</span>)<span class="op">;</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="cell-output cell-output-display">
+<p><img src="index_files/figure-html/cell-8-output-1.png" class="img-fluid" alt="histogram of log sale price showing a more symetric distribution"></p>
+</div>
+</div>
+</section>
+</section>
+<section id="train-and-evaluate-the-xgboost-regression-model" class="level2">
+<h2 class="anchored" data-anchor-id="train-and-evaluate-the-xgboost-regression-model">Train and Evaluate the XGBoost regression model</h2>
+<p>Having prepared our dataset, we are now ready to train an XGBoost model. We’ll walk through the flow step-by-step first, then later we’ll collect the code in a single cell, so it’s easier to quickly iterate through variations of the model.</p>
+<section id="specify-target-and-feature-columns" class="level3">
+<h3 class="anchored" data-anchor-id="specify-target-and-feature-columns">Specify target and feature columns</h3>
+<p>First we’ll put together a list of our features and define the target column. I like to have an actual list defined in the code so it’s easier to see everything we’re puting into the model and easier to add or remove features as we iterate. Just run something like <code>list(df.columns)</code> in a cel to get a copy-pasteable list of columns, then edit it down to the full list of features, i.e.&nbsp;remove the target, date columns, and other non-feature columns..</p>
+<div class="cell" data-execution_count="8">
+<div class="sourceCode cell-code" id="cb10"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb10-1"><a href="#cb10-1" aria-hidden="true" tabindex="-1"></a><span class="co"># list(df.columns)</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+</div>
+<div class="cell" data-execution_count="9">
+<div class="sourceCode cell-code" id="cb11"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb11-1"><a href="#cb11-1" aria-hidden="true" tabindex="-1"></a>features <span class="op">=</span> [</span>
+<span id="cb11-2"><a href="#cb11-2" aria-hidden="true" tabindex="-1"></a>    <span class="st">'SalesID'</span>,</span>
+<span id="cb11-3"><a href="#cb11-3" aria-hidden="true" tabindex="-1"></a>    <span class="st">'MachineID'</span>,</span>
+<span id="cb11-4"><a href="#cb11-4" aria-hidden="true" tabindex="-1"></a>    <span class="st">'ModelID'</span>,</span>
+<span id="cb11-5"><a href="#cb11-5" aria-hidden="true" tabindex="-1"></a>    <span class="st">'datasource'</span>,</span>
+<span id="cb11-6"><a href="#cb11-6" aria-hidden="true" tabindex="-1"></a>    <span class="st">'auctioneerID'</span>,</span>
+<span id="cb11-7"><a href="#cb11-7" aria-hidden="true" tabindex="-1"></a>    <span class="st">'YearMade'</span>,</span>
+<span id="cb11-8"><a href="#cb11-8" aria-hidden="true" tabindex="-1"></a>    <span class="st">'MachineHoursCurrentMeter'</span>,</span>
+<span id="cb11-9"><a href="#cb11-9" aria-hidden="true" tabindex="-1"></a>    <span class="st">'UsageBand'</span>,</span>
+<span id="cb11-10"><a href="#cb11-10" aria-hidden="true" tabindex="-1"></a>    <span class="st">'fiModelDesc'</span>,</span>
+<span id="cb11-11"><a href="#cb11-11" aria-hidden="true" tabindex="-1"></a>    <span class="st">'fiBaseModel'</span>,</span>
+<span id="cb11-12"><a href="#cb11-12" aria-hidden="true" tabindex="-1"></a>    <span class="st">'fiSecondaryDesc'</span>,</span>
+<span id="cb11-13"><a href="#cb11-13" aria-hidden="true" tabindex="-1"></a>    <span class="st">'fiModelSeries'</span>,</span>
+<span id="cb11-14"><a href="#cb11-14" aria-hidden="true" tabindex="-1"></a>    <span class="st">'fiModelDescriptor'</span>,</span>
+<span id="cb11-15"><a href="#cb11-15" aria-hidden="true" tabindex="-1"></a>    <span class="st">'ProductSize'</span>,</span>
+<span id="cb11-16"><a href="#cb11-16" aria-hidden="true" tabindex="-1"></a>    <span class="st">'fiProductClassDesc'</span>,</span>
+<span id="cb11-17"><a href="#cb11-17" aria-hidden="true" tabindex="-1"></a>    <span class="st">'state'</span>,</span>
+<span id="cb11-18"><a href="#cb11-18" aria-hidden="true" tabindex="-1"></a>    <span class="st">'ProductGroup'</span>,</span>
+<span id="cb11-19"><a href="#cb11-19" aria-hidden="true" tabindex="-1"></a>    <span class="st">'ProductGroupDesc'</span>,</span>
+<span id="cb11-20"><a href="#cb11-20" aria-hidden="true" tabindex="-1"></a>    <span class="st">'Drive_System'</span>,</span>
+<span id="cb11-21"><a href="#cb11-21" aria-hidden="true" tabindex="-1"></a>    <span class="st">'Enclosure'</span>,</span>
+<span id="cb11-22"><a href="#cb11-22" aria-hidden="true" tabindex="-1"></a>    <span class="st">'Forks'</span>,</span>
+<span id="cb11-23"><a href="#cb11-23" aria-hidden="true" tabindex="-1"></a>    <span class="st">'Pad_Type'</span>,</span>
+<span id="cb11-24"><a href="#cb11-24" aria-hidden="true" tabindex="-1"></a>    <span class="st">'Ride_Control'</span>,</span>
+<span id="cb11-25"><a href="#cb11-25" aria-hidden="true" tabindex="-1"></a>    <span class="st">'Stick'</span>,</span>
+<span id="cb11-26"><a href="#cb11-26" aria-hidden="true" tabindex="-1"></a>    <span class="st">'Transmission'</span>,</span>
+<span id="cb11-27"><a href="#cb11-27" aria-hidden="true" tabindex="-1"></a>    <span class="st">'Turbocharged'</span>,</span>
+<span id="cb11-28"><a href="#cb11-28" aria-hidden="true" tabindex="-1"></a>    <span class="st">'Blade_Extension'</span>,</span>
+<span id="cb11-29"><a href="#cb11-29" aria-hidden="true" tabindex="-1"></a>    <span class="st">'Blade_Width'</span>,</span>
+<span id="cb11-30"><a href="#cb11-30" aria-hidden="true" tabindex="-1"></a>    <span class="st">'Enclosure_Type'</span>,</span>
+<span id="cb11-31"><a href="#cb11-31" aria-hidden="true" tabindex="-1"></a>    <span class="st">'Engine_Horsepower'</span>,</span>
+<span id="cb11-32"><a href="#cb11-32" aria-hidden="true" tabindex="-1"></a>    <span class="st">'Hydraulics'</span>,</span>
+<span id="cb11-33"><a href="#cb11-33" aria-hidden="true" tabindex="-1"></a>    <span class="st">'Pushblock'</span>,</span>
+<span id="cb11-34"><a href="#cb11-34" aria-hidden="true" tabindex="-1"></a>    <span class="st">'Ripper'</span>,</span>
+<span id="cb11-35"><a href="#cb11-35" aria-hidden="true" tabindex="-1"></a>    <span class="st">'Scarifier'</span>,</span>
+<span id="cb11-36"><a href="#cb11-36" aria-hidden="true" tabindex="-1"></a>    <span class="st">'Tip_Control'</span>,</span>
+<span id="cb11-37"><a href="#cb11-37" aria-hidden="true" tabindex="-1"></a>    <span class="st">'Tire_Size'</span>,</span>
+<span id="cb11-38"><a href="#cb11-38" aria-hidden="true" tabindex="-1"></a>    <span class="st">'Coupler'</span>,</span>
+<span id="cb11-39"><a href="#cb11-39" aria-hidden="true" tabindex="-1"></a>    <span class="st">'Coupler_System'</span>,</span>
+<span id="cb11-40"><a href="#cb11-40" aria-hidden="true" tabindex="-1"></a>    <span class="st">'Grouser_Tracks'</span>,</span>
+<span id="cb11-41"><a href="#cb11-41" aria-hidden="true" tabindex="-1"></a>    <span class="st">'Hydraulics_Flow'</span>,</span>
+<span id="cb11-42"><a href="#cb11-42" aria-hidden="true" tabindex="-1"></a>    <span class="st">'Track_Type'</span>,</span>
+<span id="cb11-43"><a href="#cb11-43" aria-hidden="true" tabindex="-1"></a>    <span class="st">'Undercarriage_Pad_Width'</span>,</span>
+<span id="cb11-44"><a href="#cb11-44" aria-hidden="true" tabindex="-1"></a>    <span class="st">'Stick_Length'</span>,</span>
+<span id="cb11-45"><a href="#cb11-45" aria-hidden="true" tabindex="-1"></a>    <span class="st">'Thumb'</span>,</span>
+<span id="cb11-46"><a href="#cb11-46" aria-hidden="true" tabindex="-1"></a>    <span class="st">'Pattern_Changer'</span>,</span>
+<span id="cb11-47"><a href="#cb11-47" aria-hidden="true" tabindex="-1"></a>    <span class="st">'Grouser_Type'</span>,</span>
+<span id="cb11-48"><a href="#cb11-48" aria-hidden="true" tabindex="-1"></a>    <span class="st">'Backhoe_Mounting'</span>,</span>
+<span id="cb11-49"><a href="#cb11-49" aria-hidden="true" tabindex="-1"></a>    <span class="st">'Blade_Type'</span>,</span>
+<span id="cb11-50"><a href="#cb11-50" aria-hidden="true" tabindex="-1"></a>    <span class="st">'Travel_Controls'</span>,</span>
+<span id="cb11-51"><a href="#cb11-51" aria-hidden="true" tabindex="-1"></a>    <span class="st">'Differential_Type'</span>,</span>
+<span id="cb11-52"><a href="#cb11-52" aria-hidden="true" tabindex="-1"></a>    <span class="st">'Steering_Controls'</span>,</span>
+<span id="cb11-53"><a href="#cb11-53" aria-hidden="true" tabindex="-1"></a>    <span class="st">'saledate_year'</span>,</span>
+<span id="cb11-54"><a href="#cb11-54" aria-hidden="true" tabindex="-1"></a>    <span class="st">'saledate_month'</span>,</span>
+<span id="cb11-55"><a href="#cb11-55" aria-hidden="true" tabindex="-1"></a>    <span class="st">'saledate_day'</span>,</span>
+<span id="cb11-56"><a href="#cb11-56" aria-hidden="true" tabindex="-1"></a>    <span class="st">'saledate_quarter'</span>,</span>
+<span id="cb11-57"><a href="#cb11-57" aria-hidden="true" tabindex="-1"></a>    <span class="st">'saledate_day_of_year'</span>,</span>
+<span id="cb11-58"><a href="#cb11-58" aria-hidden="true" tabindex="-1"></a>    <span class="st">'saledate_day_of_week'</span>,</span>
+<span id="cb11-59"><a href="#cb11-59" aria-hidden="true" tabindex="-1"></a>    <span class="st">'saledate_days_since_epoch'</span></span>
+<span id="cb11-60"><a href="#cb11-60" aria-hidden="true" tabindex="-1"></a>]</span>
+<span id="cb11-61"><a href="#cb11-61" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb11-62"><a href="#cb11-62" aria-hidden="true" tabindex="-1"></a>target <span class="op">=</span> <span class="st">'logSalePrice'</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+</div>
+</section>
+<section id="split-the-data-into-training-and-validation-sets" class="level3">
+<h3 class="anchored" data-anchor-id="split-the-data-into-training-and-validation-sets">Split the data into training and validation sets</h3>
+<p>Next we split the dataset into a training set and a validation set. Of course since we’re going to evaluate against the validation set a number of times as we iterate, it’s best practice to keep a separate test set reserved to check our final model to ensure it generalizes well. Assuming that final test set is hidden away, we can use the rest of the data for training and validation.</p>
+<p>There are two main ways we might want to select the validation set. If there isn’t a temporal ordering of the observations, we might be able to randomly sample. In practice, it’s much more common that observations have a temporal ordering, and that models are trained on observations up to a certain time and used to predict on observations occuring after that time. Since this data is temporal, we don’t want to split randomly; instead we’ll split on observation date, reserving the latest observations for the validation set.</p>
+<div class="cell" data-execution_count="10">
+<div class="sourceCode cell-code" id="cb12"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb12-1"><a href="#cb12-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Temporal Validation Set</span></span>
+<span id="cb12-2"><a href="#cb12-2" aria-hidden="true" tabindex="-1"></a><span class="kw">def</span> train_test_split_temporal(df, datetime_column, n_test):</span>
+<span id="cb12-3"><a href="#cb12-3" aria-hidden="true" tabindex="-1"></a>    idx_sort <span class="op">=</span> np.argsort(df[datetime_column])</span>
+<span id="cb12-4"><a href="#cb12-4" aria-hidden="true" tabindex="-1"></a>    idx_train, idx_test <span class="op">=</span> idx_sort[:<span class="op">-</span>n_valid], idx_sort[<span class="op">-</span>n_valid:]</span>
+<span id="cb12-5"><a href="#cb12-5" aria-hidden="true" tabindex="-1"></a>    <span class="cf">return</span> df.iloc[idx_train, :], df.iloc[idx_test, :]</span>
+<span id="cb12-6"><a href="#cb12-6" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb12-7"><a href="#cb12-7" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb12-8"><a href="#cb12-8" aria-hidden="true" tabindex="-1"></a><span class="co"># Random Validation Set</span></span>
+<span id="cb12-9"><a href="#cb12-9" aria-hidden="true" tabindex="-1"></a><span class="kw">def</span> train_test_split_random(df, n_test):</span>
+<span id="cb12-10"><a href="#cb12-10" aria-hidden="true" tabindex="-1"></a>    np.random.seed(<span class="dv">42</span>)</span>
+<span id="cb12-11"><a href="#cb12-11" aria-hidden="true" tabindex="-1"></a>    idx_sort <span class="op">=</span> np.random.permutation(<span class="bu">len</span>(df))</span>
+<span id="cb12-12"><a href="#cb12-12" aria-hidden="true" tabindex="-1"></a>    idx_train, idx_test <span class="op">=</span> idx_sort[:<span class="op">-</span>n_valid], idx_sort[<span class="op">-</span>n_valid:]</span>
+<span id="cb12-13"><a href="#cb12-13" aria-hidden="true" tabindex="-1"></a>    <span class="cf">return</span> df.iloc[idx_train, :], df.iloc[idx_test, :]</span>
+<span id="cb12-14"><a href="#cb12-14" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb12-15"><a href="#cb12-15" aria-hidden="true" tabindex="-1"></a>my_train_test_split <span class="op">=</span> <span class="kw">lambda</span> d, n_valid: train_test_split_temporal(d, <span class="st">'saledate'</span>, n_valid)</span>
+<span id="cb12-16"><a href="#cb12-16" aria-hidden="true" tabindex="-1"></a><span class="co"># my_train_test_split = lambda d, n_valid: train_test_split_random(d, n_valid)</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+</div>
+<div class="cell" data-execution_count="11">
+<div class="sourceCode cell-code" id="cb13"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb13-1"><a href="#cb13-1" aria-hidden="true" tabindex="-1"></a>n_valid <span class="op">=</span> <span class="dv">12000</span></span>
+<span id="cb13-2"><a href="#cb13-2" aria-hidden="true" tabindex="-1"></a>train_df, valid_df <span class="op">=</span> my_train_test_split(df, n_valid)</span>
+<span id="cb13-3"><a href="#cb13-3" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb13-4"><a href="#cb13-4" aria-hidden="true" tabindex="-1"></a>train_df.shape, valid_df.shape</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="cell-output cell-output-display" data-execution_count="11">
+<pre><code>((389125, 61), (12000, 61))</code></pre>
+</div>
+</div>
+</section>
+<section id="create-dmatrix-data-objects" class="level3">
+<h3 class="anchored" data-anchor-id="create-dmatrix-data-objects">Create <code>DMatrix</code> data objects</h3>
+<p>XGBoost uses a data type called dense matrix for efficient training and prediction, so next we need to create <code>DMatrix</code> objects for our training and validation datasets.</p>
+<blockquote class="blockquote">
+<p>If you prefer to use the scikit-learn interface to XGBoost, you don’t need to create these dense matrix objects. More on that below.</p>
+</blockquote>
+<div class="cell" data-execution_count="12">
+<div class="sourceCode cell-code" id="cb15"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb15-1"><a href="#cb15-1" aria-hidden="true" tabindex="-1"></a>dtrain <span class="op">=</span> xgb.DMatrix(data<span class="op">=</span>train_df[features], label<span class="op">=</span>train_df[target], enable_categorical<span class="op">=</span><span class="va">True</span>)</span>
+<span id="cb15-2"><a href="#cb15-2" aria-hidden="true" tabindex="-1"></a>dvalid <span class="op">=</span> xgb.DMatrix(data<span class="op">=</span>valid_df[features], label<span class="op">=</span>valid_df[target], enable_categorical<span class="op">=</span><span class="va">True</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+</div>
+</section>
+<section id="set-the-xgboost-parameters" class="level3">
+<h3 class="anchored" data-anchor-id="set-the-xgboost-parameters">Set the XGBoost parameters</h3>
+<p>XGBoost has <a href="https://xgboost.readthedocs.io/en/latest/parameter.html">numerous hyperparameters</a>. Fortunately, just a handful of them tend to be the most influential; furthermore, the default values are not bad in most situations. I like to start out with a dictionary containing the default parameter values for just the ones I think are most important. For training there is one required boosting parameter called <code>num_boost_round</code> which I set to 50 as a starting point; you can make this smaller initially if training takes too long.</p>
+<div class="cell" data-execution_count="13">
+<div class="sourceCode cell-code" id="cb16"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb16-1"><a href="#cb16-1" aria-hidden="true" tabindex="-1"></a><span class="co"># default values for important parameters</span></span>
+<span id="cb16-2"><a href="#cb16-2" aria-hidden="true" tabindex="-1"></a>params <span class="op">=</span> {</span>
+<span id="cb16-3"><a href="#cb16-3" aria-hidden="true" tabindex="-1"></a>    <span class="st">'learning_rate'</span>: <span class="fl">0.3</span>,</span>
+<span id="cb16-4"><a href="#cb16-4" aria-hidden="true" tabindex="-1"></a>    <span class="st">'max_depth'</span>: <span class="dv">6</span>,</span>
+<span id="cb16-5"><a href="#cb16-5" aria-hidden="true" tabindex="-1"></a>    <span class="st">'min_child_weight'</span>: <span class="dv">1</span>,</span>
+<span id="cb16-6"><a href="#cb16-6" aria-hidden="true" tabindex="-1"></a>    <span class="st">'subsample'</span>: <span class="dv">1</span>,</span>
+<span id="cb16-7"><a href="#cb16-7" aria-hidden="true" tabindex="-1"></a>    <span class="st">'colsample_bynode'</span>: <span class="dv">1</span>,</span>
+<span id="cb16-8"><a href="#cb16-8" aria-hidden="true" tabindex="-1"></a>    <span class="st">'objective'</span>: <span class="st">'reg:squarederror'</span>,</span>
+<span id="cb16-9"><a href="#cb16-9" aria-hidden="true" tabindex="-1"></a>}</span>
+<span id="cb16-10"><a href="#cb16-10" aria-hidden="true" tabindex="-1"></a>num_boost_round <span class="op">=</span> <span class="dv">50</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+</div>
+</section>
+<section id="train-the-xgboost-model" class="level3">
+<h3 class="anchored" data-anchor-id="train-the-xgboost-model">Train the XGBoost model</h3>
+<p>Check out the <a href="https://xgboost.readthedocs.io/en/latest/python/python_api.html#module-xgboost.training">documentation on the learning API</a> to see all the training options. During training, I like to have XGBoost print out the evaluation metric on the train and validation set after every few boosting rounds and again at the end of training; that can be done by setting <code>evals</code> and <code>verbose_eval</code>. You can also save the evaluation results in a dictionary passed into <code>evals_result</code> to inspect and plot the objective curve over the training iterations.</p>
+<div class="cell" data-execution_count="14">
+<div class="sourceCode cell-code" id="cb17"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb17-1"><a href="#cb17-1" aria-hidden="true" tabindex="-1"></a>evals_result <span class="op">=</span> {}</span>
+<span id="cb17-2"><a href="#cb17-2" aria-hidden="true" tabindex="-1"></a>m <span class="op">=</span> xgb.train(params<span class="op">=</span>params, dtrain<span class="op">=</span>dtrain, num_boost_round<span class="op">=</span>num_boost_round,</span>
+<span id="cb17-3"><a href="#cb17-3" aria-hidden="true" tabindex="-1"></a>              evals<span class="op">=</span>[(dtrain, <span class="st">'train'</span>), (dvalid, <span class="st">'valid'</span>)],</span>
+<span id="cb17-4"><a href="#cb17-4" aria-hidden="true" tabindex="-1"></a>              verbose_eval<span class="op">=</span><span class="dv">10</span>,</span>
+<span id="cb17-5"><a href="#cb17-5" aria-hidden="true" tabindex="-1"></a>              evals_result<span class="op">=</span>evals_result)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="cell-output cell-output-stdout">
+<pre><code>[0] train-rmse:6.74422  valid-rmse:6.79733
+[10]    train-rmse:0.34798  valid-rmse:0.37158
+[20]    train-rmse:0.26289  valid-rmse:0.28239
+[30]    train-rmse:0.25148  valid-rmse:0.27028
+[40]    train-rmse:0.24375  valid-rmse:0.26420
+[49]    train-rmse:0.23738  valid-rmse:0.25855</code></pre>
+</div>
+</div>
+</section>
+<section id="train-the-xgboost-model-using-the-sklearn-interface" class="level3">
+<h3 class="anchored" data-anchor-id="train-the-xgboost-model-using-the-sklearn-interface">Train the XGBoost model using the sklearn interface</h3>
+<p>You can optionally use the <a href="https://xgboost.readthedocs.io/en/latest/python/sklearn_estimator.html">sklearn estimator interface</a> to XGBoost. This will bypass the need to use the <code>DMatrix</code> data objects for training and prediction, and it will allow you to leverage many of the other scikit-learn ecosystem tools like pipelines, parameter search, partial dependence plots, etc. The <code>XGBRegressor</code> is available in the <code>xgboost</code> library that we’ve already imported.</p>
+<div class="cell" data-execution_count="15">
+<div class="sourceCode cell-code" id="cb19"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb19-1"><a href="#cb19-1" aria-hidden="true" tabindex="-1"></a><span class="co"># scikit-learn interface</span></span>
+<span id="cb19-2"><a href="#cb19-2" aria-hidden="true" tabindex="-1"></a>reg <span class="op">=</span> xgb.XGBRegressor(n_estimators<span class="op">=</span>num_boost_round, <span class="op">**</span>params)</span>
+<span id="cb19-3"><a href="#cb19-3" aria-hidden="true" tabindex="-1"></a>reg.fit(train_df[features], train_df[target], </span>
+<span id="cb19-4"><a href="#cb19-4" aria-hidden="true" tabindex="-1"></a>        eval_set<span class="op">=</span>[(train_df[features], train_df[target]), (valid_df[features], valid_df[target])], </span>
+<span id="cb19-5"><a href="#cb19-5" aria-hidden="true" tabindex="-1"></a>        verbose<span class="op">=</span><span class="dv">10</span>)<span class="op">;</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="cell-output cell-output-stdout">
+<pre><code>[0] validation_0-rmse:6.74422   validation_1-rmse:6.79733
+[10]    validation_0-rmse:0.34798   validation_1-rmse:0.37158
+[20]    validation_0-rmse:0.26289   validation_1-rmse:0.28239
+[30]    validation_0-rmse:0.25148   validation_1-rmse:0.27028
+[40]    validation_0-rmse:0.24375   validation_1-rmse:0.26420
+[49]    validation_0-rmse:0.23738   validation_1-rmse:0.25855</code></pre>
+</div>
+</div>
+<p>Since not all features of XGBoost are available through the scikit-learn estimator interface, you might want to get the native booster object back out of the sklearn wrapper.</p>
+<div class="cell" data-execution_count="16">
+<div class="sourceCode cell-code" id="cb21"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb21-1"><a href="#cb21-1" aria-hidden="true" tabindex="-1"></a>m <span class="op">=</span> reg.get_booster()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+</div>
+</section>
+<section id="evaluate-the-model-and-check-for-overfitting" class="level3">
+<h3 class="anchored" data-anchor-id="evaluate-the-model-and-check-for-overfitting">Evaluate the model and check for overfitting</h3>
+<p>We get the model evaluation metrics on the training and validation sets printed to stdout when we use the <code>evals</code> argument to the training API. Typically I just look at those printed metrics, but let’s double check by hand.</p>
+<div class="cell" data-execution_count="17">
+<div class="sourceCode cell-code" id="cb22"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb22-1"><a href="#cb22-1" aria-hidden="true" tabindex="-1"></a><span class="kw">def</span> root_mean_squared_error(y_true, y_pred):</span>
+<span id="cb22-2"><a href="#cb22-2" aria-hidden="true" tabindex="-1"></a>    <span class="cf">return</span> np.sqrt(np.mean((y_true <span class="op">-</span> y_pred)<span class="op">**</span><span class="dv">2</span>))</span>
+<span id="cb22-3"><a href="#cb22-3" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb22-4"><a href="#cb22-4" aria-hidden="true" tabindex="-1"></a>root_mean_squared_error(dvalid.get_label(), m.predict(dvalid))</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="cell-output cell-output-display" data-execution_count="17">
+<pre><code>0.25855368</code></pre>
+</div>
+</div>
+<p>So, how good is that RMSLE of 0.259? Well, checking the <a href="https://www.kaggle.com/competitions/bluebook-for-bulldozers/leaderboard">Kagle leaderboard</a> for this competition, we would have come in 53rd out of 474, which is in the top 12% of submissions. That’s not bad for 10 minutes of work doing the bare minimum necessary to transform the raw data into a format consumable by XGBoost and then training a model using default hyperparameter values.</p>
+<blockquote class="blockquote">
+<p>Note that we’re using a different validation set from that used for the final leaderboard (which is long closed), but our score is likely still a decent approximation for how we would have done in the competition.</p>
+</blockquote>
+<p>It can be helpful to take a look at objective curves for training and validation data to get a sense for the extent of overfitting. A huge difference between training and validation performance indicates overfitting. In the below curve, there is very little overfitting, indicating we can be aggressive with hyperparameters that increase model flexibility. More on that soon.</p>
+<div class="cell" data-execution_count="18">
+<div class="sourceCode cell-code" id="cb24"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb24-1"><a href="#cb24-1" aria-hidden="true" tabindex="-1"></a>pd.DataFrame({</span>
+<span id="cb24-2"><a href="#cb24-2" aria-hidden="true" tabindex="-1"></a>    <span class="st">'train'</span>: evals_result[<span class="st">'train'</span>][<span class="st">'rmse'</span>],</span>
+<span id="cb24-3"><a href="#cb24-3" aria-hidden="true" tabindex="-1"></a>    <span class="st">'valid'</span>: evals_result[<span class="st">'valid'</span>][<span class="st">'rmse'</span>]</span>
+<span id="cb24-4"><a href="#cb24-4" aria-hidden="true" tabindex="-1"></a>}).plot()<span class="op">;</span> plt.xlabel(<span class="st">'boosting round'</span>)<span class="op">;</span> plt.ylabel(<span class="st">'objective'</span>)<span class="op">;</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="cell-output cell-output-display">
+<p><img src="index_files/figure-html/cell-19-output-1.png" class="img-fluid" alt="line plot showing objective function versus training iteration for training and validation sets"></p>
+</div>
+</div>
+</section>
+<section id="check-feature-importance" class="level3">
+<h3 class="anchored" data-anchor-id="check-feature-importance">Check feature importance</h3>
+<p>It’s helpful to get an idea of how much the model is using each feature. In following iterations we might want to try dropping low-signal features or examining the important ones more closely for feature engineering ideas. The gigantic caveat to keep in mind here is that there are different measures of feature importance, and each one will give different importances. XGBoost provides three importance measures; I tend to prefer looking at the weight measure because its rankings usually seem most intuitive.</p>
+<div class="cell" data-execution_count="19">
+<div class="sourceCode cell-code" id="cb25"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb25-1"><a href="#cb25-1" aria-hidden="true" tabindex="-1"></a>fig, ax <span class="op">=</span> plt.subplots(figsize<span class="op">=</span>(<span class="dv">5</span>,<span class="dv">10</span>))</span>
+<span id="cb25-2"><a href="#cb25-2" aria-hidden="true" tabindex="-1"></a>feature_importances <span class="op">=</span> pd.Series(m.get_score(importance_type<span class="op">=</span><span class="st">'weight'</span>)).sort_values(ascending<span class="op">=</span><span class="va">False</span>)</span>
+<span id="cb25-3"><a href="#cb25-3" aria-hidden="true" tabindex="-1"></a>feature_importances.plot.barh(ax<span class="op">=</span>ax)</span>
+<span id="cb25-4"><a href="#cb25-4" aria-hidden="true" tabindex="-1"></a>plt.title(<span class="st">'Feature Importance'</span>)<span class="op">;</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="cell-output cell-output-display">
+<p><img src="index_files/figure-html/cell-20-output-1.png" class="img-fluid" alt="feature importance plot showing a few high importance features and many low importance ones"></p>
+</div>
+</div>
+</section>
+</section>
+<section id="improve-performance-using-a-model-iteration-loop" class="level2">
+<h2 class="anchored" data-anchor-id="improve-performance-using-a-model-iteration-loop">Improve performance using a model iteration loop</h2>
+<p>At this point we have a half-decent prototype model. Now we enter the model iteration loop in which we adjust features and model parameters to find configurations that have better and better performance.</p>
+<p>Let’s start by putting the feature and target specification, the training/validation split, the model training, and the evaluation all together in one code block that we can copy paste for easy model iteration.</p>
+<blockquote class="blockquote">
+<p>Note that for this process to be effective, model training needs to take less than 10 seconds. Otherwise you’ll be sitting around waiting way too long. If training takes too long, try training on a sample of the training data, or try reducing the number of boosting rounds.</p>
+</blockquote>
+<div class="cell" data-execution_count="20">
+<div class="sourceCode cell-code" id="cb26"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb26-1"><a href="#cb26-1" aria-hidden="true" tabindex="-1"></a>features <span class="op">=</span> [</span>
+<span id="cb26-2"><a href="#cb26-2" aria-hidden="true" tabindex="-1"></a>    <span class="st">'SalesID'</span>,</span>
+<span id="cb26-3"><a href="#cb26-3" aria-hidden="true" tabindex="-1"></a>    <span class="st">'MachineID'</span>,</span>
+<span id="cb26-4"><a href="#cb26-4" aria-hidden="true" tabindex="-1"></a>    <span class="st">'ModelID'</span>,</span>
+<span id="cb26-5"><a href="#cb26-5" aria-hidden="true" tabindex="-1"></a>    <span class="st">'datasource'</span>,</span>
+<span id="cb26-6"><a href="#cb26-6" aria-hidden="true" tabindex="-1"></a>    <span class="st">'auctioneerID'</span>,</span>
+<span id="cb26-7"><a href="#cb26-7" aria-hidden="true" tabindex="-1"></a>    <span class="st">'YearMade'</span>,</span>
+<span id="cb26-8"><a href="#cb26-8" aria-hidden="true" tabindex="-1"></a>    <span class="st">'MachineHoursCurrentMeter'</span>,</span>
+<span id="cb26-9"><a href="#cb26-9" aria-hidden="true" tabindex="-1"></a>    <span class="st">'UsageBand'</span>,</span>
+<span id="cb26-10"><a href="#cb26-10" aria-hidden="true" tabindex="-1"></a>    <span class="st">'fiModelDesc'</span>,</span>
+<span id="cb26-11"><a href="#cb26-11" aria-hidden="true" tabindex="-1"></a>    <span class="st">'fiBaseModel'</span>,</span>
+<span id="cb26-12"><a href="#cb26-12" aria-hidden="true" tabindex="-1"></a>    <span class="st">'fiSecondaryDesc'</span>,</span>
+<span id="cb26-13"><a href="#cb26-13" aria-hidden="true" tabindex="-1"></a>    <span class="st">'fiModelSeries'</span>,</span>
+<span id="cb26-14"><a href="#cb26-14" aria-hidden="true" tabindex="-1"></a>    <span class="st">'fiModelDescriptor'</span>,</span>
+<span id="cb26-15"><a href="#cb26-15" aria-hidden="true" tabindex="-1"></a>    <span class="st">'ProductSize'</span>,</span>
+<span id="cb26-16"><a href="#cb26-16" aria-hidden="true" tabindex="-1"></a>    <span class="st">'fiProductClassDesc'</span>,</span>
+<span id="cb26-17"><a href="#cb26-17" aria-hidden="true" tabindex="-1"></a>    <span class="st">'state'</span>,</span>
+<span id="cb26-18"><a href="#cb26-18" aria-hidden="true" tabindex="-1"></a>    <span class="st">'ProductGroup'</span>,</span>
+<span id="cb26-19"><a href="#cb26-19" aria-hidden="true" tabindex="-1"></a>    <span class="st">'ProductGroupDesc'</span>,</span>
+<span id="cb26-20"><a href="#cb26-20" aria-hidden="true" tabindex="-1"></a>    <span class="st">'Drive_System'</span>,</span>
+<span id="cb26-21"><a href="#cb26-21" aria-hidden="true" tabindex="-1"></a>    <span class="st">'Enclosure'</span>,</span>
+<span id="cb26-22"><a href="#cb26-22" aria-hidden="true" tabindex="-1"></a>    <span class="st">'Forks'</span>,</span>
+<span id="cb26-23"><a href="#cb26-23" aria-hidden="true" tabindex="-1"></a>    <span class="st">'Pad_Type'</span>,</span>
+<span id="cb26-24"><a href="#cb26-24" aria-hidden="true" tabindex="-1"></a>    <span class="st">'Ride_Control'</span>,</span>
+<span id="cb26-25"><a href="#cb26-25" aria-hidden="true" tabindex="-1"></a>    <span class="st">'Stick'</span>,</span>
+<span id="cb26-26"><a href="#cb26-26" aria-hidden="true" tabindex="-1"></a>    <span class="st">'Transmission'</span>,</span>
+<span id="cb26-27"><a href="#cb26-27" aria-hidden="true" tabindex="-1"></a>    <span class="st">'Turbocharged'</span>,</span>
+<span id="cb26-28"><a href="#cb26-28" aria-hidden="true" tabindex="-1"></a>    <span class="st">'Blade_Extension'</span>,</span>
+<span id="cb26-29"><a href="#cb26-29" aria-hidden="true" tabindex="-1"></a>    <span class="st">'Blade_Width'</span>,</span>
+<span id="cb26-30"><a href="#cb26-30" aria-hidden="true" tabindex="-1"></a>    <span class="st">'Enclosure_Type'</span>,</span>
+<span id="cb26-31"><a href="#cb26-31" aria-hidden="true" tabindex="-1"></a>    <span class="st">'Engine_Horsepower'</span>,</span>
+<span id="cb26-32"><a href="#cb26-32" aria-hidden="true" tabindex="-1"></a>    <span class="st">'Hydraulics'</span>,</span>
+<span id="cb26-33"><a href="#cb26-33" aria-hidden="true" tabindex="-1"></a>    <span class="st">'Pushblock'</span>,</span>
+<span id="cb26-34"><a href="#cb26-34" aria-hidden="true" tabindex="-1"></a>    <span class="st">'Ripper'</span>,</span>
+<span id="cb26-35"><a href="#cb26-35" aria-hidden="true" tabindex="-1"></a>    <span class="st">'Scarifier'</span>,</span>
+<span id="cb26-36"><a href="#cb26-36" aria-hidden="true" tabindex="-1"></a>    <span class="st">'Tip_Control'</span>,</span>
+<span id="cb26-37"><a href="#cb26-37" aria-hidden="true" tabindex="-1"></a>    <span class="st">'Tire_Size'</span>,</span>
+<span id="cb26-38"><a href="#cb26-38" aria-hidden="true" tabindex="-1"></a>    <span class="st">'Coupler'</span>,</span>
+<span id="cb26-39"><a href="#cb26-39" aria-hidden="true" tabindex="-1"></a>    <span class="st">'Coupler_System'</span>,</span>
+<span id="cb26-40"><a href="#cb26-40" aria-hidden="true" tabindex="-1"></a>    <span class="st">'Grouser_Tracks'</span>,</span>
+<span id="cb26-41"><a href="#cb26-41" aria-hidden="true" tabindex="-1"></a>    <span class="st">'Hydraulics_Flow'</span>,</span>
+<span id="cb26-42"><a href="#cb26-42" aria-hidden="true" tabindex="-1"></a>    <span class="st">'Track_Type'</span>,</span>
+<span id="cb26-43"><a href="#cb26-43" aria-hidden="true" tabindex="-1"></a>    <span class="st">'Undercarriage_Pad_Width'</span>,</span>
+<span id="cb26-44"><a href="#cb26-44" aria-hidden="true" tabindex="-1"></a>    <span class="st">'Stick_Length'</span>,</span>
+<span id="cb26-45"><a href="#cb26-45" aria-hidden="true" tabindex="-1"></a>    <span class="st">'Thumb'</span>,</span>
+<span id="cb26-46"><a href="#cb26-46" aria-hidden="true" tabindex="-1"></a>    <span class="st">'Pattern_Changer'</span>,</span>
+<span id="cb26-47"><a href="#cb26-47" aria-hidden="true" tabindex="-1"></a>    <span class="st">'Grouser_Type'</span>,</span>
+<span id="cb26-48"><a href="#cb26-48" aria-hidden="true" tabindex="-1"></a>    <span class="st">'Backhoe_Mounting'</span>,</span>
+<span id="cb26-49"><a href="#cb26-49" aria-hidden="true" tabindex="-1"></a>    <span class="st">'Blade_Type'</span>,</span>
+<span id="cb26-50"><a href="#cb26-50" aria-hidden="true" tabindex="-1"></a>    <span class="st">'Travel_Controls'</span>,</span>
+<span id="cb26-51"><a href="#cb26-51" aria-hidden="true" tabindex="-1"></a>    <span class="st">'Differential_Type'</span>,</span>
+<span id="cb26-52"><a href="#cb26-52" aria-hidden="true" tabindex="-1"></a>    <span class="st">'Steering_Controls'</span>,</span>
+<span id="cb26-53"><a href="#cb26-53" aria-hidden="true" tabindex="-1"></a>    <span class="st">'saledate_year'</span>,</span>
+<span id="cb26-54"><a href="#cb26-54" aria-hidden="true" tabindex="-1"></a>    <span class="st">'saledate_month'</span>,</span>
+<span id="cb26-55"><a href="#cb26-55" aria-hidden="true" tabindex="-1"></a>    <span class="st">'saledate_day'</span>,</span>
+<span id="cb26-56"><a href="#cb26-56" aria-hidden="true" tabindex="-1"></a>    <span class="st">'saledate_quarter'</span>,</span>
+<span id="cb26-57"><a href="#cb26-57" aria-hidden="true" tabindex="-1"></a>    <span class="st">'saledate_day_of_year'</span>,</span>
+<span id="cb26-58"><a href="#cb26-58" aria-hidden="true" tabindex="-1"></a>    <span class="st">'saledate_day_of_week'</span>,</span>
+<span id="cb26-59"><a href="#cb26-59" aria-hidden="true" tabindex="-1"></a>    <span class="st">'saledate_days_since_epoch'</span>,</span>
+<span id="cb26-60"><a href="#cb26-60" aria-hidden="true" tabindex="-1"></a>]</span>
+<span id="cb26-61"><a href="#cb26-61" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb26-62"><a href="#cb26-62" aria-hidden="true" tabindex="-1"></a>target <span class="op">=</span> <span class="st">'logSalePrice'</span></span>
+<span id="cb26-63"><a href="#cb26-63" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb26-64"><a href="#cb26-64" aria-hidden="true" tabindex="-1"></a>train_df, valid_df <span class="op">=</span> train_test_split_temporal(df, <span class="st">'saledate'</span>, <span class="dv">12000</span>)</span>
+<span id="cb26-65"><a href="#cb26-65" aria-hidden="true" tabindex="-1"></a>dtrain <span class="op">=</span> xgb.DMatrix(data<span class="op">=</span>train_df[features], label<span class="op">=</span>train_df[target], enable_categorical<span class="op">=</span><span class="va">True</span>)</span>
+<span id="cb26-66"><a href="#cb26-66" aria-hidden="true" tabindex="-1"></a>dvalid <span class="op">=</span> xgb.DMatrix(data<span class="op">=</span>valid_df[features], label<span class="op">=</span>valid_df[target], enable_categorical<span class="op">=</span><span class="va">True</span>)</span>
+<span id="cb26-67"><a href="#cb26-67" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb26-68"><a href="#cb26-68" aria-hidden="true" tabindex="-1"></a>params <span class="op">=</span> {</span>
+<span id="cb26-69"><a href="#cb26-69" aria-hidden="true" tabindex="-1"></a>    <span class="st">'learning_rate'</span>: <span class="fl">0.3</span>,</span>
+<span id="cb26-70"><a href="#cb26-70" aria-hidden="true" tabindex="-1"></a>    <span class="st">'max_depth'</span>: <span class="dv">6</span>,</span>
+<span id="cb26-71"><a href="#cb26-71" aria-hidden="true" tabindex="-1"></a>    <span class="st">'min_child_weight'</span>: <span class="dv">1</span>,</span>
+<span id="cb26-72"><a href="#cb26-72" aria-hidden="true" tabindex="-1"></a>    <span class="st">'subsample'</span>: <span class="dv">1</span>,</span>
+<span id="cb26-73"><a href="#cb26-73" aria-hidden="true" tabindex="-1"></a>    <span class="st">'colsample_bynode'</span>: <span class="dv">1</span>,</span>
+<span id="cb26-74"><a href="#cb26-74" aria-hidden="true" tabindex="-1"></a>    <span class="st">'objective'</span>: <span class="st">'reg:squarederror'</span>,</span>
+<span id="cb26-75"><a href="#cb26-75" aria-hidden="true" tabindex="-1"></a>}</span>
+<span id="cb26-76"><a href="#cb26-76" aria-hidden="true" tabindex="-1"></a>num_boost_round <span class="op">=</span> <span class="dv">50</span></span>
+<span id="cb26-77"><a href="#cb26-77" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb26-78"><a href="#cb26-78" aria-hidden="true" tabindex="-1"></a>m <span class="op">=</span> xgb.train(params<span class="op">=</span>params, dtrain<span class="op">=</span>dtrain, num_boost_round<span class="op">=</span>num_boost_round,</span>
+<span id="cb26-79"><a href="#cb26-79" aria-hidden="true" tabindex="-1"></a>              evals<span class="op">=</span>[(dtrain, <span class="st">'train'</span>), (dvalid, <span class="st">'valid'</span>)],verbose_eval<span class="op">=</span><span class="dv">10</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="cell-output cell-output-stdout">
+<pre><code>[0] train-rmse:6.74422  valid-rmse:6.79733
+[10]    train-rmse:0.34798  valid-rmse:0.37158
+[20]    train-rmse:0.26289  valid-rmse:0.28239
+[30]    train-rmse:0.25148  valid-rmse:0.27028
+[40]    train-rmse:0.24375  valid-rmse:0.26420
+[49]    train-rmse:0.23738  valid-rmse:0.25855</code></pre>
+</div>
+</div>
+<section id="feature-selection" class="level3">
+<h3 class="anchored" data-anchor-id="feature-selection">Feature selection</h3>
+<section id="drop-low-importance-features" class="level4">
+<h4 class="anchored" data-anchor-id="drop-low-importance-features">Drop low-importance features</h4>
+<p>Let’s try training a model on only the top k most important features. You can try different values of k for the rankings created from each of the three importance measures. You can play with how many to keep, looking for the optimal number manually.</p>
+<div class="cell" data-execution_count="21">
+<div class="sourceCode cell-code" id="cb28"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb28-1"><a href="#cb28-1" aria-hidden="true" tabindex="-1"></a>feature_importances_weight <span class="op">=</span> pd.Series(m.get_score(importance_type<span class="op">=</span><span class="st">'weight'</span>)).sort_values(ascending<span class="op">=</span><span class="va">False</span>)</span>
+<span id="cb28-2"><a href="#cb28-2" aria-hidden="true" tabindex="-1"></a>feature_importances_cover <span class="op">=</span> pd.Series(m.get_score(importance_type<span class="op">=</span><span class="st">'cover'</span>)).sort_values(ascending<span class="op">=</span><span class="va">False</span>)</span>
+<span id="cb28-3"><a href="#cb28-3" aria-hidden="true" tabindex="-1"></a>feature_importances_gain <span class="op">=</span> pd.Series(m.get_score(importance_type<span class="op">=</span><span class="st">'gain'</span>)).sort_values(ascending<span class="op">=</span><span class="va">False</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+</div>
+<div class="cell" data-execution_count="22">
+<div class="sourceCode cell-code" id="cb29"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb29-1"><a href="#cb29-1" aria-hidden="true" tabindex="-1"></a><span class="co"># features = list(feature_importances_weight[:30].index)</span></span>
+<span id="cb29-2"><a href="#cb29-2" aria-hidden="true" tabindex="-1"></a><span class="co"># features = list(feature_importances_cover[:35].index)</span></span>
+<span id="cb29-3"><a href="#cb29-3" aria-hidden="true" tabindex="-1"></a>features <span class="op">=</span> <span class="bu">list</span>(feature_importances_gain[:<span class="dv">30</span>].index)</span>
+<span id="cb29-4"><a href="#cb29-4" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb29-5"><a href="#cb29-5" aria-hidden="true" tabindex="-1"></a>dtrain <span class="op">=</span> xgb.DMatrix(data<span class="op">=</span>train_df[features], label<span class="op">=</span>train_df[target], enable_categorical<span class="op">=</span><span class="va">True</span>)</span>
+<span id="cb29-6"><a href="#cb29-6" aria-hidden="true" tabindex="-1"></a>dvalid <span class="op">=</span> xgb.DMatrix(data<span class="op">=</span>valid_df[features], label<span class="op">=</span>valid_df[target], enable_categorical<span class="op">=</span><span class="va">True</span>)</span>
+<span id="cb29-7"><a href="#cb29-7" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb29-8"><a href="#cb29-8" aria-hidden="true" tabindex="-1"></a>params <span class="op">=</span> {</span>
+<span id="cb29-9"><a href="#cb29-9" aria-hidden="true" tabindex="-1"></a>    <span class="st">'learning_rate'</span>: <span class="fl">0.3</span>,</span>
+<span id="cb29-10"><a href="#cb29-10" aria-hidden="true" tabindex="-1"></a>    <span class="st">'max_depth'</span>: <span class="dv">6</span>,</span>
+<span id="cb29-11"><a href="#cb29-11" aria-hidden="true" tabindex="-1"></a>    <span class="st">'min_child_weight'</span>: <span class="dv">1</span>,</span>
+<span id="cb29-12"><a href="#cb29-12" aria-hidden="true" tabindex="-1"></a>    <span class="st">'subsample'</span>: <span class="dv">1</span>,</span>
+<span id="cb29-13"><a href="#cb29-13" aria-hidden="true" tabindex="-1"></a>    <span class="st">'colsample_bynode'</span>: <span class="dv">1</span>,</span>
+<span id="cb29-14"><a href="#cb29-14" aria-hidden="true" tabindex="-1"></a>    <span class="st">'objective'</span>: <span class="st">'reg:squarederror'</span>,</span>
+<span id="cb29-15"><a href="#cb29-15" aria-hidden="true" tabindex="-1"></a>}</span>
+<span id="cb29-16"><a href="#cb29-16" aria-hidden="true" tabindex="-1"></a>num_boost_round <span class="op">=</span> <span class="dv">50</span></span>
+<span id="cb29-17"><a href="#cb29-17" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb29-18"><a href="#cb29-18" aria-hidden="true" tabindex="-1"></a>m <span class="op">=</span> xgb.train(params<span class="op">=</span>params, dtrain<span class="op">=</span>dtrain, num_boost_round<span class="op">=</span>num_boost_round,</span>
+<span id="cb29-19"><a href="#cb29-19" aria-hidden="true" tabindex="-1"></a>              evals<span class="op">=</span>[(dtrain, <span class="st">'train'</span>), (dvalid, <span class="st">'valid'</span>)], verbose_eval<span class="op">=</span><span class="dv">10</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="cell-output cell-output-stdout">
+<pre><code>[0] train-rmse:6.74422  valid-rmse:6.79733
+[10]    train-rmse:0.34798  valid-rmse:0.37150
+[20]    train-rmse:0.26182  valid-rmse:0.27986
+[30]    train-rmse:0.24974  valid-rmse:0.26896
+[40]    train-rmse:0.24282  valid-rmse:0.26043
+[49]    train-rmse:0.23768  valid-rmse:0.25664</code></pre>
+</div>
+</div>
+<p>Looks like keeping the top 30 from the gain importance type gives a slight performance improvement.</p>
+</section>
+<section id="drop-one-feature-at-a-time" class="level4">
+<h4 class="anchored" data-anchor-id="drop-one-feature-at-a-time">Drop one feature at a time</h4>
+<p>Next try dropping each feature out of the model one-at-a-time to see if there are any more features that you can drop. For each feature, drop it from the feature set, then train a new model, then record the evaluation score. At the end, sort the scores to see which features are the best candidates for removal.</p>
+<div class="cell" data-execution_count="23">
+<div class="sourceCode cell-code" id="cb31"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb31-1"><a href="#cb31-1" aria-hidden="true" tabindex="-1"></a>features <span class="op">=</span> [</span>
+<span id="cb31-2"><a href="#cb31-2" aria-hidden="true" tabindex="-1"></a>    <span class="st">'Coupler_System'</span>,</span>
+<span id="cb31-3"><a href="#cb31-3" aria-hidden="true" tabindex="-1"></a>     <span class="st">'Tire_Size'</span>,</span>
+<span id="cb31-4"><a href="#cb31-4" aria-hidden="true" tabindex="-1"></a>     <span class="st">'Scarifier'</span>,</span>
+<span id="cb31-5"><a href="#cb31-5" aria-hidden="true" tabindex="-1"></a>     <span class="st">'ProductSize'</span>,</span>
+<span id="cb31-6"><a href="#cb31-6" aria-hidden="true" tabindex="-1"></a>     <span class="st">'Ride_Control'</span>,</span>
+<span id="cb31-7"><a href="#cb31-7" aria-hidden="true" tabindex="-1"></a>     <span class="st">'fiBaseModel'</span>,</span>
+<span id="cb31-8"><a href="#cb31-8" aria-hidden="true" tabindex="-1"></a>     <span class="st">'Enclosure'</span>,</span>
+<span id="cb31-9"><a href="#cb31-9" aria-hidden="true" tabindex="-1"></a>     <span class="st">'Pad_Type'</span>,</span>
+<span id="cb31-10"><a href="#cb31-10" aria-hidden="true" tabindex="-1"></a>     <span class="st">'YearMade'</span>,</span>
+<span id="cb31-11"><a href="#cb31-11" aria-hidden="true" tabindex="-1"></a>     <span class="st">'fiSecondaryDesc'</span>,</span>
+<span id="cb31-12"><a href="#cb31-12" aria-hidden="true" tabindex="-1"></a>     <span class="st">'ProductGroup'</span>,</span>
+<span id="cb31-13"><a href="#cb31-13" aria-hidden="true" tabindex="-1"></a>     <span class="st">'Drive_System'</span>,</span>
+<span id="cb31-14"><a href="#cb31-14" aria-hidden="true" tabindex="-1"></a>     <span class="st">'Ripper'</span>,</span>
+<span id="cb31-15"><a href="#cb31-15" aria-hidden="true" tabindex="-1"></a>     <span class="st">'saledate_days_since_epoch'</span>,</span>
+<span id="cb31-16"><a href="#cb31-16" aria-hidden="true" tabindex="-1"></a>     <span class="st">'fiModelDescriptor'</span>,</span>
+<span id="cb31-17"><a href="#cb31-17" aria-hidden="true" tabindex="-1"></a>     <span class="st">'fiProductClassDesc'</span>,</span>
+<span id="cb31-18"><a href="#cb31-18" aria-hidden="true" tabindex="-1"></a>     <span class="st">'MachineID'</span>,</span>
+<span id="cb31-19"><a href="#cb31-19" aria-hidden="true" tabindex="-1"></a>     <span class="st">'Hydraulics'</span>,</span>
+<span id="cb31-20"><a href="#cb31-20" aria-hidden="true" tabindex="-1"></a>     <span class="st">'SalesID'</span>,</span>
+<span id="cb31-21"><a href="#cb31-21" aria-hidden="true" tabindex="-1"></a>     <span class="st">'Track_Type'</span>,</span>
+<span id="cb31-22"><a href="#cb31-22" aria-hidden="true" tabindex="-1"></a>     <span class="st">'ModelID'</span>,</span>
+<span id="cb31-23"><a href="#cb31-23" aria-hidden="true" tabindex="-1"></a>     <span class="st">'fiModelDesc'</span>,</span>
+<span id="cb31-24"><a href="#cb31-24" aria-hidden="true" tabindex="-1"></a>     <span class="st">'Travel_Controls'</span>,</span>
+<span id="cb31-25"><a href="#cb31-25" aria-hidden="true" tabindex="-1"></a>     <span class="st">'Transmission'</span>,</span>
+<span id="cb31-26"><a href="#cb31-26" aria-hidden="true" tabindex="-1"></a>     <span class="st">'Blade_Extension'</span>,</span>
+<span id="cb31-27"><a href="#cb31-27" aria-hidden="true" tabindex="-1"></a>     <span class="st">'fiModelSeries'</span>,</span>
+<span id="cb31-28"><a href="#cb31-28" aria-hidden="true" tabindex="-1"></a>     <span class="st">'Grouser_Tracks'</span>,</span>
+<span id="cb31-29"><a href="#cb31-29" aria-hidden="true" tabindex="-1"></a>     <span class="st">'Undercarriage_Pad_Width'</span>,</span>
+<span id="cb31-30"><a href="#cb31-30" aria-hidden="true" tabindex="-1"></a>     <span class="st">'Stick'</span>,</span>
+<span id="cb31-31"><a href="#cb31-31" aria-hidden="true" tabindex="-1"></a>     <span class="st">'Thumb'</span></span>
+<span id="cb31-32"><a href="#cb31-32" aria-hidden="true" tabindex="-1"></a>]</span>
+<span id="cb31-33"><a href="#cb31-33" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb31-34"><a href="#cb31-34" aria-hidden="true" tabindex="-1"></a><span class="co"># drop each feature one-at-a-time</span></span>
+<span id="cb31-35"><a href="#cb31-35" aria-hidden="true" tabindex="-1"></a>scores <span class="op">=</span> []</span>
+<span id="cb31-36"><a href="#cb31-36" aria-hidden="true" tabindex="-1"></a><span class="cf">for</span> i, feature <span class="kw">in</span> <span class="bu">enumerate</span>(features):</span>
+<span id="cb31-37"><a href="#cb31-37" aria-hidden="true" tabindex="-1"></a>    drop_one_features <span class="op">=</span> features[:i] <span class="op">+</span> features[i<span class="op">+</span><span class="dv">1</span>:]</span>
+<span id="cb31-38"><a href="#cb31-38" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb31-39"><a href="#cb31-39" aria-hidden="true" tabindex="-1"></a>    dtrain <span class="op">=</span> xgb.DMatrix(data<span class="op">=</span>train_df[drop_one_features], label<span class="op">=</span>train_df[target], enable_categorical<span class="op">=</span><span class="va">True</span>)</span>
+<span id="cb31-40"><a href="#cb31-40" aria-hidden="true" tabindex="-1"></a>    dvalid <span class="op">=</span> xgb.DMatrix(data<span class="op">=</span>valid_df[drop_one_features], label<span class="op">=</span>valid_df[target], enable_categorical<span class="op">=</span><span class="va">True</span>)</span>
+<span id="cb31-41"><a href="#cb31-41" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb31-42"><a href="#cb31-42" aria-hidden="true" tabindex="-1"></a>    params <span class="op">=</span> {</span>
+<span id="cb31-43"><a href="#cb31-43" aria-hidden="true" tabindex="-1"></a>        <span class="st">'learning_rate'</span>: <span class="fl">0.3</span>,</span>
+<span id="cb31-44"><a href="#cb31-44" aria-hidden="true" tabindex="-1"></a>        <span class="st">'max_depth'</span>: <span class="dv">6</span>,</span>
+<span id="cb31-45"><a href="#cb31-45" aria-hidden="true" tabindex="-1"></a>        <span class="st">'min_child_weight'</span>: <span class="dv">1</span>,</span>
+<span id="cb31-46"><a href="#cb31-46" aria-hidden="true" tabindex="-1"></a>        <span class="st">'subsample'</span>: <span class="dv">1</span>,</span>
+<span id="cb31-47"><a href="#cb31-47" aria-hidden="true" tabindex="-1"></a>        <span class="st">'colsample_bynode'</span>: <span class="dv">1</span>,</span>
+<span id="cb31-48"><a href="#cb31-48" aria-hidden="true" tabindex="-1"></a>        <span class="st">'objective'</span>: <span class="st">'reg:squarederror'</span>,</span>
+<span id="cb31-49"><a href="#cb31-49" aria-hidden="true" tabindex="-1"></a>    }</span>
+<span id="cb31-50"><a href="#cb31-50" aria-hidden="true" tabindex="-1"></a>    num_boost_round <span class="op">=</span> <span class="dv">50</span></span>
+<span id="cb31-51"><a href="#cb31-51" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb31-52"><a href="#cb31-52" aria-hidden="true" tabindex="-1"></a>    m <span class="op">=</span> xgb.train(params<span class="op">=</span>params, dtrain<span class="op">=</span>dtrain, num_boost_round<span class="op">=</span>num_boost_round,</span>
+<span id="cb31-53"><a href="#cb31-53" aria-hidden="true" tabindex="-1"></a>                evals<span class="op">=</span>[(dtrain, <span class="st">'train'</span>), (dvalid, <span class="st">'valid'</span>)],</span>
+<span id="cb31-54"><a href="#cb31-54" aria-hidden="true" tabindex="-1"></a>                verbose_eval<span class="op">=</span><span class="va">False</span>)</span>
+<span id="cb31-55"><a href="#cb31-55" aria-hidden="true" tabindex="-1"></a>    score <span class="op">=</span> root_mean_squared_error(dvalid.get_label(), m.predict(dvalid))</span>
+<span id="cb31-56"><a href="#cb31-56" aria-hidden="true" tabindex="-1"></a>    scores.append(score)</span>
+<span id="cb31-57"><a href="#cb31-57" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb31-58"><a href="#cb31-58" aria-hidden="true" tabindex="-1"></a>results_df <span class="op">=</span> pd.DataFrame({</span>
+<span id="cb31-59"><a href="#cb31-59" aria-hidden="true" tabindex="-1"></a>    <span class="st">'feature'</span>: features,</span>
+<span id="cb31-60"><a href="#cb31-60" aria-hidden="true" tabindex="-1"></a>    <span class="st">'score'</span>: scores</span>
+<span id="cb31-61"><a href="#cb31-61" aria-hidden="true" tabindex="-1"></a>})</span>
+<span id="cb31-62"><a href="#cb31-62" aria-hidden="true" tabindex="-1"></a>results_df.sort_values(by<span class="op">=</span><span class="st">'score'</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="cell-output cell-output-display" data-execution_count="23">
+<div>
+
+
+<table class="dataframe table table-sm table-striped small" data-quarto-postprocess="true" data-border="1">
+<thead>
+<tr class="header">
+<th data-quarto-table-cell-role="th"></th>
+<th data-quarto-table-cell-role="th">feature</th>
+<th data-quarto-table-cell-role="th">score</th>
+</tr>
+</thead>
+<tbody>
+<tr class="odd">
+<td data-quarto-table-cell-role="th">18</td>
+<td>SalesID</td>
+<td>0.252617</td>
+</tr>
+<tr class="even">
+<td data-quarto-table-cell-role="th">5</td>
+<td>fiBaseModel</td>
+<td>0.253710</td>
+</tr>
+<tr class="odd">
+<td data-quarto-table-cell-role="th">27</td>
+<td>Undercarriage_Pad_Width</td>
+<td>0.254032</td>
+</tr>
+<tr class="even">
+<td data-quarto-table-cell-role="th">17</td>
+<td>Hydraulics</td>
+<td>0.254114</td>
+</tr>
+<tr class="odd">
+<td data-quarto-table-cell-role="th">20</td>
+<td>ModelID</td>
+<td>0.254169</td>
+</tr>
+<tr class="even">
+<td data-quarto-table-cell-role="th">4</td>
+<td>Ride_Control</td>
+<td>0.254278</td>
+</tr>
+<tr class="odd">
+<td data-quarto-table-cell-role="th">16</td>
+<td>MachineID</td>
+<td>0.254413</td>
+</tr>
+<tr class="even">
+<td data-quarto-table-cell-role="th">19</td>
+<td>Track_Type</td>
+<td>0.254825</td>
+</tr>
+<tr class="odd">
+<td data-quarto-table-cell-role="th">6</td>
+<td>Enclosure</td>
+<td>0.254958</td>
+</tr>
+<tr class="even">
+<td data-quarto-table-cell-role="th">28</td>
+<td>Stick</td>
+<td>0.255164</td>
+</tr>
+<tr class="odd">
+<td data-quarto-table-cell-role="th">1</td>
+<td>Tire_Size</td>
+<td>0.255365</td>
+</tr>
+<tr class="even">
+<td data-quarto-table-cell-role="th">10</td>
+<td>ProductGroup</td>
+<td>0.255404</td>
+</tr>
+<tr class="odd">
+<td data-quarto-table-cell-role="th">22</td>
+<td>Travel_Controls</td>
+<td>0.255895</td>
+</tr>
+<tr class="even">
+<td data-quarto-table-cell-role="th">29</td>
+<td>Thumb</td>
+<td>0.256300</td>
+</tr>
+<tr class="odd">
+<td data-quarto-table-cell-role="th">23</td>
+<td>Transmission</td>
+<td>0.256380</td>
+</tr>
+<tr class="even">
+<td data-quarto-table-cell-role="th">26</td>
+<td>Grouser_Tracks</td>
+<td>0.256395</td>
+</tr>
+<tr class="odd">
+<td data-quarto-table-cell-role="th">11</td>
+<td>Drive_System</td>
+<td>0.256652</td>
+</tr>
+<tr class="even">
+<td data-quarto-table-cell-role="th">24</td>
+<td>Blade_Extension</td>
+<td>0.256698</td>
+</tr>
+<tr class="odd">
+<td data-quarto-table-cell-role="th">7</td>
+<td>Pad_Type</td>
+<td>0.256952</td>
+</tr>
+<tr class="even">
+<td data-quarto-table-cell-role="th">25</td>
+<td>fiModelSeries</td>
+<td>0.257073</td>
+</tr>
+<tr class="odd">
+<td data-quarto-table-cell-role="th">2</td>
+<td>Scarifier</td>
+<td>0.257590</td>
+</tr>
+<tr class="even">
+<td data-quarto-table-cell-role="th">12</td>
+<td>Ripper</td>
+<td>0.257848</td>
+</tr>
+<tr class="odd">
+<td data-quarto-table-cell-role="th">0</td>
+<td>Coupler_System</td>
+<td>0.258074</td>
+</tr>
+<tr class="even">
+<td data-quarto-table-cell-role="th">21</td>
+<td>fiModelDesc</td>
+<td>0.258712</td>
+</tr>
+<tr class="odd">
+<td data-quarto-table-cell-role="th">13</td>
+<td>saledate_days_since_epoch</td>
+<td>0.259856</td>
+</tr>
+<tr class="even">
+<td data-quarto-table-cell-role="th">14</td>
+<td>fiModelDescriptor</td>
+<td>0.260439</td>
+</tr>
+<tr class="odd">
+<td data-quarto-table-cell-role="th">9</td>
+<td>fiSecondaryDesc</td>
+<td>0.260782</td>
+</tr>
+<tr class="even">
+<td data-quarto-table-cell-role="th">15</td>
+<td>fiProductClassDesc</td>
+<td>0.263790</td>
+</tr>
+<tr class="odd">
+<td data-quarto-table-cell-role="th">3</td>
+<td>ProductSize</td>
+<td>0.268068</td>
+</tr>
+<tr class="even">
+<td data-quarto-table-cell-role="th">8</td>
+<td>YearMade</td>
+<td>0.313105</td>
+</tr>
+</tbody>
+</table>
+
+</div>
+</div>
+</div>
+<p>Next try removing the feature with the best removal score. Then with that feature still removed, also try removing the feature with the next best removal score and so on. Repeat this process until the model evaluation metric is no longer improving. I think this could be considered a faster version of backward stepwise feature selection.</p>
+<div class="cell" data-execution_count="24">
+<div class="sourceCode cell-code" id="cb32"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb32-1"><a href="#cb32-1" aria-hidden="true" tabindex="-1"></a>features <span class="op">=</span> [</span>
+<span id="cb32-2"><a href="#cb32-2" aria-hidden="true" tabindex="-1"></a>    <span class="st">'Coupler_System'</span>,</span>
+<span id="cb32-3"><a href="#cb32-3" aria-hidden="true" tabindex="-1"></a>     <span class="st">'Tire_Size'</span>,</span>
+<span id="cb32-4"><a href="#cb32-4" aria-hidden="true" tabindex="-1"></a>     <span class="st">'Scarifier'</span>,</span>
+<span id="cb32-5"><a href="#cb32-5" aria-hidden="true" tabindex="-1"></a>     <span class="st">'ProductSize'</span>,</span>
+<span id="cb32-6"><a href="#cb32-6" aria-hidden="true" tabindex="-1"></a>     <span class="st">'Ride_Control'</span>,</span>
+<span id="cb32-7"><a href="#cb32-7" aria-hidden="true" tabindex="-1"></a><span class="co">#      'fiBaseModel',</span></span>
+<span id="cb32-8"><a href="#cb32-8" aria-hidden="true" tabindex="-1"></a>     <span class="st">'Enclosure'</span>,</span>
+<span id="cb32-9"><a href="#cb32-9" aria-hidden="true" tabindex="-1"></a>     <span class="st">'Pad_Type'</span>,</span>
+<span id="cb32-10"><a href="#cb32-10" aria-hidden="true" tabindex="-1"></a>     <span class="st">'YearMade'</span>,</span>
+<span id="cb32-11"><a href="#cb32-11" aria-hidden="true" tabindex="-1"></a>     <span class="st">'fiSecondaryDesc'</span>,</span>
+<span id="cb32-12"><a href="#cb32-12" aria-hidden="true" tabindex="-1"></a>     <span class="st">'ProductGroup'</span>,</span>
+<span id="cb32-13"><a href="#cb32-13" aria-hidden="true" tabindex="-1"></a>     <span class="st">'Drive_System'</span>,</span>
+<span id="cb32-14"><a href="#cb32-14" aria-hidden="true" tabindex="-1"></a>     <span class="st">'Ripper'</span>,</span>
+<span id="cb32-15"><a href="#cb32-15" aria-hidden="true" tabindex="-1"></a>     <span class="st">'saledate_days_since_epoch'</span>,</span>
+<span id="cb32-16"><a href="#cb32-16" aria-hidden="true" tabindex="-1"></a>     <span class="st">'fiModelDescriptor'</span>,</span>
+<span id="cb32-17"><a href="#cb32-17" aria-hidden="true" tabindex="-1"></a>     <span class="st">'fiProductClassDesc'</span>,</span>
+<span id="cb32-18"><a href="#cb32-18" aria-hidden="true" tabindex="-1"></a>     <span class="st">'MachineID'</span>,</span>
+<span id="cb32-19"><a href="#cb32-19" aria-hidden="true" tabindex="-1"></a><span class="co">#      'Hydraulics',</span></span>
+<span id="cb32-20"><a href="#cb32-20" aria-hidden="true" tabindex="-1"></a><span class="co">#      'SalesID',</span></span>
+<span id="cb32-21"><a href="#cb32-21" aria-hidden="true" tabindex="-1"></a>     <span class="st">'Track_Type'</span>,</span>
+<span id="cb32-22"><a href="#cb32-22" aria-hidden="true" tabindex="-1"></a>     <span class="st">'ModelID'</span>,</span>
+<span id="cb32-23"><a href="#cb32-23" aria-hidden="true" tabindex="-1"></a>     <span class="st">'fiModelDesc'</span>,</span>
+<span id="cb32-24"><a href="#cb32-24" aria-hidden="true" tabindex="-1"></a>     <span class="st">'Travel_Controls'</span>,</span>
+<span id="cb32-25"><a href="#cb32-25" aria-hidden="true" tabindex="-1"></a>     <span class="st">'Transmission'</span>,</span>
+<span id="cb32-26"><a href="#cb32-26" aria-hidden="true" tabindex="-1"></a>     <span class="st">'Blade_Extension'</span>,</span>
+<span id="cb32-27"><a href="#cb32-27" aria-hidden="true" tabindex="-1"></a>     <span class="st">'fiModelSeries'</span>,</span>
+<span id="cb32-28"><a href="#cb32-28" aria-hidden="true" tabindex="-1"></a>     <span class="st">'Grouser_Tracks'</span>,</span>
+<span id="cb32-29"><a href="#cb32-29" aria-hidden="true" tabindex="-1"></a><span class="co">#      'Undercarriage_Pad_Width',</span></span>
+<span id="cb32-30"><a href="#cb32-30" aria-hidden="true" tabindex="-1"></a>     <span class="st">'Stick'</span>,</span>
+<span id="cb32-31"><a href="#cb32-31" aria-hidden="true" tabindex="-1"></a>     <span class="st">'Thumb'</span></span>
+<span id="cb32-32"><a href="#cb32-32" aria-hidden="true" tabindex="-1"></a>]</span>
+<span id="cb32-33"><a href="#cb32-33" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb32-34"><a href="#cb32-34" aria-hidden="true" tabindex="-1"></a>dtrain <span class="op">=</span> xgb.DMatrix(data<span class="op">=</span>train_df[features], label<span class="op">=</span>train_df[target], enable_categorical<span class="op">=</span><span class="va">True</span>)</span>
+<span id="cb32-35"><a href="#cb32-35" aria-hidden="true" tabindex="-1"></a>dvalid <span class="op">=</span> xgb.DMatrix(data<span class="op">=</span>valid_df[features], label<span class="op">=</span>valid_df[target], enable_categorical<span class="op">=</span><span class="va">True</span>)</span>
+<span id="cb32-36"><a href="#cb32-36" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb32-37"><a href="#cb32-37" aria-hidden="true" tabindex="-1"></a>params <span class="op">=</span> {</span>
+<span id="cb32-38"><a href="#cb32-38" aria-hidden="true" tabindex="-1"></a>    <span class="st">'learning_rate'</span>: <span class="fl">0.3</span>,</span>
+<span id="cb32-39"><a href="#cb32-39" aria-hidden="true" tabindex="-1"></a>    <span class="st">'max_depth'</span>: <span class="dv">6</span>,</span>
+<span id="cb32-40"><a href="#cb32-40" aria-hidden="true" tabindex="-1"></a>    <span class="st">'min_child_weight'</span>: <span class="dv">1</span>,</span>
+<span id="cb32-41"><a href="#cb32-41" aria-hidden="true" tabindex="-1"></a>    <span class="st">'subsample'</span>: <span class="dv">1</span>,</span>
+<span id="cb32-42"><a href="#cb32-42" aria-hidden="true" tabindex="-1"></a>    <span class="st">'colsample_bynode'</span>: <span class="dv">1</span>,</span>
+<span id="cb32-43"><a href="#cb32-43" aria-hidden="true" tabindex="-1"></a>    <span class="st">'objective'</span>: <span class="st">'reg:squarederror'</span>,</span>
+<span id="cb32-44"><a href="#cb32-44" aria-hidden="true" tabindex="-1"></a>}</span>
+<span id="cb32-45"><a href="#cb32-45" aria-hidden="true" tabindex="-1"></a>num_boost_round <span class="op">=</span> <span class="dv">50</span></span>
+<span id="cb32-46"><a href="#cb32-46" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb32-47"><a href="#cb32-47" aria-hidden="true" tabindex="-1"></a>m <span class="op">=</span> xgb.train(params<span class="op">=</span>params, dtrain<span class="op">=</span>dtrain, num_boost_round<span class="op">=</span>num_boost_round,</span>
+<span id="cb32-48"><a href="#cb32-48" aria-hidden="true" tabindex="-1"></a>              evals<span class="op">=</span>[(dtrain, <span class="st">'train'</span>), (dvalid, <span class="st">'valid'</span>)], verbose_eval<span class="op">=</span><span class="dv">10</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="cell-output cell-output-stdout">
+<pre><code>[0] train-rmse:6.74422  valid-rmse:6.79145
+[10]    train-rmse:0.34882  valid-rmse:0.37201
+[20]    train-rmse:0.26050  valid-rmse:0.27386
+[30]    train-rmse:0.24844  valid-rmse:0.26205
+[40]    train-rmse:0.24042  valid-rmse:0.25426
+[49]    train-rmse:0.23549  valid-rmse:0.25004</code></pre>
+</div>
+</div>
+<p>So here I was able to remove four more features before the score started getting worse. With our reduced feature set, we’re now ranking 39th on that Kagle leaderboard. Let’s see how far we can get with some hyperparameter tuning.</p>
+</section>
+</section>
+<section id="tune-the-xgboost-hyperparameters" class="level3">
+<h3 class="anchored" data-anchor-id="tune-the-xgboost-hyperparameters">Tune the XGBoost hyperparameters</h3>
+<p>This is a topic which deserves its own full-length post, but just for fun, here I’ll do a quick and dirty hand tuning without a ton of explanation.</p>
+<p>Broadly speaking, my process is to increase model expressiveness by increasing the maximum tree depth untill it looks like I’m overfitting. At that point, I start pushing tree pruning parameters like min child weight and regularization parameters like lambda to counteract the overfitting. That process lead me to the following parameters.</p>
+<div class="cell" data-execution_count="25">
+<div class="sourceCode cell-code" id="cb34"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb34-1"><a href="#cb34-1" aria-hidden="true" tabindex="-1"></a>params <span class="op">=</span> {</span>
+<span id="cb34-2"><a href="#cb34-2" aria-hidden="true" tabindex="-1"></a>    <span class="st">'learning_rate'</span>: <span class="fl">0.3</span>,</span>
+<span id="cb34-3"><a href="#cb34-3" aria-hidden="true" tabindex="-1"></a>    <span class="st">'max_depth'</span>: <span class="dv">10</span>,</span>
+<span id="cb34-4"><a href="#cb34-4" aria-hidden="true" tabindex="-1"></a>    <span class="st">'min_child_weight'</span>: <span class="dv">14</span>,</span>
+<span id="cb34-5"><a href="#cb34-5" aria-hidden="true" tabindex="-1"></a>    <span class="st">'lambda'</span>: <span class="dv">5</span>,</span>
+<span id="cb34-6"><a href="#cb34-6" aria-hidden="true" tabindex="-1"></a>    <span class="st">'subsample'</span>: <span class="dv">1</span>,</span>
+<span id="cb34-7"><a href="#cb34-7" aria-hidden="true" tabindex="-1"></a>    <span class="st">'colsample_bynode'</span>: <span class="dv">1</span>,</span>
+<span id="cb34-8"><a href="#cb34-8" aria-hidden="true" tabindex="-1"></a>    <span class="st">'objective'</span>: <span class="st">'reg:squarederror'</span>,}</span>
+<span id="cb34-9"><a href="#cb34-9" aria-hidden="true" tabindex="-1"></a>num_boost_round <span class="op">=</span> <span class="dv">50</span></span>
+<span id="cb34-10"><a href="#cb34-10" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb34-11"><a href="#cb34-11" aria-hidden="true" tabindex="-1"></a>m <span class="op">=</span> xgb.train(params<span class="op">=</span>params, dtrain<span class="op">=</span>dtrain, num_boost_round<span class="op">=</span>num_boost_round,</span>
+<span id="cb34-12"><a href="#cb34-12" aria-hidden="true" tabindex="-1"></a>              evals<span class="op">=</span>[(dtrain, <span class="st">'train'</span>), (dvalid, <span class="st">'valid'</span>)], verbose_eval<span class="op">=</span><span class="dv">10</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="cell-output cell-output-stdout">
+<pre><code>[0] train-rmse:6.74473  valid-rmse:6.80196
+[10]    train-rmse:0.31833  valid-rmse:0.34151
+[20]    train-rmse:0.22651  valid-rmse:0.24885
+[30]    train-rmse:0.21501  valid-rmse:0.23904
+[40]    train-rmse:0.20897  valid-rmse:0.23645
+[49]    train-rmse:0.20418  valid-rmse:0.23412</code></pre>
+</div>
+</div>
+<p>That gets us up to 12th place. Next I start reducing the learning rate and increasing the boosting rounds in proportion to one another.</p>
+<div class="cell" data-execution_count="26">
+<div class="sourceCode cell-code" id="cb36"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb36-1"><a href="#cb36-1" aria-hidden="true" tabindex="-1"></a>params <span class="op">=</span> {</span>
+<span id="cb36-2"><a href="#cb36-2" aria-hidden="true" tabindex="-1"></a>    <span class="st">'learning_rate'</span>: <span class="fl">0.3</span><span class="op">/</span><span class="dv">5</span>,</span>
+<span id="cb36-3"><a href="#cb36-3" aria-hidden="true" tabindex="-1"></a>    <span class="st">'max_depth'</span>: <span class="dv">10</span>,</span>
+<span id="cb36-4"><a href="#cb36-4" aria-hidden="true" tabindex="-1"></a>    <span class="st">'min_child_weight'</span>: <span class="dv">14</span>,</span>
+<span id="cb36-5"><a href="#cb36-5" aria-hidden="true" tabindex="-1"></a>    <span class="st">'lambda'</span>: <span class="dv">5</span>,</span>
+<span id="cb36-6"><a href="#cb36-6" aria-hidden="true" tabindex="-1"></a>    <span class="st">'subsample'</span>: <span class="dv">1</span>,</span>
+<span id="cb36-7"><a href="#cb36-7" aria-hidden="true" tabindex="-1"></a>    <span class="st">'colsample_bynode'</span>: <span class="dv">1</span>,</span>
+<span id="cb36-8"><a href="#cb36-8" aria-hidden="true" tabindex="-1"></a>    <span class="st">'objective'</span>: <span class="st">'reg:squarederror'</span>,}</span>
+<span id="cb36-9"><a href="#cb36-9" aria-hidden="true" tabindex="-1"></a>num_boost_round <span class="op">=</span> <span class="dv">50</span><span class="op">*</span><span class="dv">5</span></span>
+<span id="cb36-10"><a href="#cb36-10" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb36-11"><a href="#cb36-11" aria-hidden="true" tabindex="-1"></a>m <span class="op">=</span> xgb.train(params<span class="op">=</span>params, dtrain<span class="op">=</span>dtrain, num_boost_round<span class="op">=</span>num_boost_round,</span>
+<span id="cb36-12"><a href="#cb36-12" aria-hidden="true" tabindex="-1"></a>              evals<span class="op">=</span>[(dtrain, <span class="st">'train'</span>), (dvalid, <span class="st">'valid'</span>)], verbose_eval<span class="op">=</span><span class="dv">10</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="cell-output cell-output-stdout">
+<pre><code>[0] train-rmse:9.04930  valid-rmse:9.12743
+[10]    train-rmse:4.88505  valid-rmse:4.93769
+[20]    train-rmse:2.64630  valid-rmse:2.68501
+[30]    train-rmse:1.44703  valid-rmse:1.47923
+[40]    train-rmse:0.81123  valid-rmse:0.84079
+[50]    train-rmse:0.48441  valid-rmse:0.51272
+[60]    train-rmse:0.32887  valid-rmse:0.35434
+[70]    train-rmse:0.26276  valid-rmse:0.28630
+[80]    train-rmse:0.23720  valid-rmse:0.26026
+[90]    train-rmse:0.22658  valid-rmse:0.24932
+[100]   train-rmse:0.22119  valid-rmse:0.24441
+[110]   train-rmse:0.21747  valid-rmse:0.24114
+[120]   train-rmse:0.21479  valid-rmse:0.23923
+[130]   train-rmse:0.21250  valid-rmse:0.23768
+[140]   train-rmse:0.21099  valid-rmse:0.23618
+[150]   train-rmse:0.20928  valid-rmse:0.23524
+[160]   train-rmse:0.20767  valid-rmse:0.23445
+[170]   train-rmse:0.20658  valid-rmse:0.23375
+[180]   train-rmse:0.20558  valid-rmse:0.23307
+[190]   train-rmse:0.20431  valid-rmse:0.23252
+[200]   train-rmse:0.20316  valid-rmse:0.23181
+[210]   train-rmse:0.20226  valid-rmse:0.23145
+[220]   train-rmse:0.20133  valid-rmse:0.23087
+[230]   train-rmse:0.20045  valid-rmse:0.23048
+[240]   train-rmse:0.19976  valid-rmse:0.23023
+[249]   train-rmse:0.19902  valid-rmse:0.23009</code></pre>
+</div>
+</div>
+<p>Decreasing the learning rate and increasing the boosting rounds got us up to a 2nd place score. Notice that the score is still decreasing on the validation set. We can actually continue boosting on this model by passing it to the <code>xgb_model</code> argument in the <code>train</code> function. We want to go very very slowly here to avoid overshooting the minimum of the objective function. To do that I ramp up the lambda regularization parameter and boost a few more rounds from where we left off.</p>
+<div class="cell" data-execution_count="27">
+<div class="sourceCode cell-code" id="cb38"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb38-1"><a href="#cb38-1" aria-hidden="true" tabindex="-1"></a><span class="co"># second stage</span></span>
+<span id="cb38-2"><a href="#cb38-2" aria-hidden="true" tabindex="-1"></a>params <span class="op">=</span> {</span>
+<span id="cb38-3"><a href="#cb38-3" aria-hidden="true" tabindex="-1"></a>    <span class="st">'learning_rate'</span>: <span class="fl">0.3</span><span class="op">/</span><span class="dv">10</span>,</span>
+<span id="cb38-4"><a href="#cb38-4" aria-hidden="true" tabindex="-1"></a>    <span class="st">'max_depth'</span>: <span class="dv">10</span>,</span>
+<span id="cb38-5"><a href="#cb38-5" aria-hidden="true" tabindex="-1"></a>    <span class="st">'min_child_weight'</span>: <span class="dv">14</span>,</span>
+<span id="cb38-6"><a href="#cb38-6" aria-hidden="true" tabindex="-1"></a>    <span class="st">'lambda'</span>: <span class="dv">60</span>,</span>
+<span id="cb38-7"><a href="#cb38-7" aria-hidden="true" tabindex="-1"></a>    <span class="st">'subsample'</span>: <span class="dv">1</span>,</span>
+<span id="cb38-8"><a href="#cb38-8" aria-hidden="true" tabindex="-1"></a>    <span class="st">'colsample_bynode'</span>: <span class="dv">1</span>,</span>
+<span id="cb38-9"><a href="#cb38-9" aria-hidden="true" tabindex="-1"></a>    <span class="st">'objective'</span>: <span class="st">'reg:squarederror'</span>,}</span>
+<span id="cb38-10"><a href="#cb38-10" aria-hidden="true" tabindex="-1"></a>num_boost_round <span class="op">=</span> <span class="dv">50</span><span class="op">*</span><span class="dv">3</span></span>
+<span id="cb38-11"><a href="#cb38-11" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb38-12"><a href="#cb38-12" aria-hidden="true" tabindex="-1"></a>m1 <span class="op">=</span> xgb.train(params<span class="op">=</span>params, dtrain<span class="op">=</span>dtrain, num_boost_round<span class="op">=</span>num_boost_round,</span>
+<span id="cb38-13"><a href="#cb38-13" aria-hidden="true" tabindex="-1"></a>              evals<span class="op">=</span>[(dtrain, <span class="st">'train'</span>), (dvalid, <span class="st">'valid'</span>)], verbose_eval<span class="op">=</span><span class="dv">10</span>,</span>
+<span id="cb38-14"><a href="#cb38-14" aria-hidden="true" tabindex="-1"></a>              xgb_model<span class="op">=</span>m)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="cell-output cell-output-stdout">
+<pre><code>[0] train-rmse:0.19900  valid-rmse:0.23007
+[10]    train-rmse:0.19862  valid-rmse:0.22990
+[20]    train-rmse:0.19831  valid-rmse:0.22975
+[30]    train-rmse:0.19796  valid-rmse:0.22964
+[40]    train-rmse:0.19768  valid-rmse:0.22955
+[50]    train-rmse:0.19739  valid-rmse:0.22940
+[60]    train-rmse:0.19714  valid-rmse:0.22935
+[70]    train-rmse:0.19689  valid-rmse:0.22927
+[80]    train-rmse:0.19664  valid-rmse:0.22915
+[90]    train-rmse:0.19646  valid-rmse:0.22915
+[100]   train-rmse:0.19620  valid-rmse:0.22910
+[110]   train-rmse:0.19604  valid-rmse:0.22907
+[120]   train-rmse:0.19583  valid-rmse:0.22901
+[130]   train-rmse:0.19562  valid-rmse:0.22899
+[140]   train-rmse:0.19546  valid-rmse:0.22898
+[149]   train-rmse:0.19520  valid-rmse:0.22886</code></pre>
+</div>
+</div>
+<div class="cell" data-execution_count="28">
+<div class="sourceCode cell-code" id="cb40"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb40-1"><a href="#cb40-1" aria-hidden="true" tabindex="-1"></a>root_mean_squared_error(dvalid.get_label(), m1.predict(dvalid))</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="cell-output cell-output-display" data-execution_count="28">
+<pre><code>0.22885828</code></pre>
+</div>
+</div>
+<p>And that gets us to 1st place on the leaderboard.</p>
+</section>
+</section>
+<section id="wrapping-up" class="level2">
+<h2 class="anchored" data-anchor-id="wrapping-up">Wrapping Up</h2>
+<p>There you have it, how to use XGBoost to solve a regression problem in python with world class performance. Remember you can use the XGBoost regression notebook from my <a href="https://github.com/mcb00/ds-templates">ds-templates repo</a> to make it easy to follow this flow on your own problems. If you found this helpful, or if you have additional ideas about solving regression problems with XGBoost, let me know down in the comments.</p>
+</section>
+
+</main> <!-- /main -->
+<hr>
+
+<!-- <div style="max-width: 80%;"> -->
+<h3>Comments</h3>
+<script data-isso="https://isso.randomrealizations.com/" src="https://isso.randomrealizations.com/js/embed.min.js">
+</script>
+<section id="isso-thread"></section>
+<!-- </div> -->
+
+<hr>
+<script id="quarto-html-after-body" type="application/javascript">
+window.document.addEventListener("DOMContentLoaded", function (event) {
+  const toggleBodyColorMode = (bsSheetEl) => {
+    const mode = bsSheetEl.getAttribute("data-mode");
+    const bodyEl = window.document.querySelector("body");
+    if (mode === "dark") {
+      bodyEl.classList.add("quarto-dark");
+      bodyEl.classList.remove("quarto-light");
+    } else {
+      bodyEl.classList.add("quarto-light");
+      bodyEl.classList.remove("quarto-dark");
+    }
+  }
+  const toggleBodyColorPrimary = () => {
+    const bsSheetEl = window.document.querySelector("link#quarto-bootstrap");
+    if (bsSheetEl) {
+      toggleBodyColorMode(bsSheetEl);
+    }
+  }
+  toggleBodyColorPrimary();  
+  const icon = "";
+  const anchorJS = new window.AnchorJS();
+  anchorJS.options = {
+    placement: 'right',
+    icon: icon
+  };
+  anchorJS.add('.anchored');
+  const isCodeAnnotation = (el) => {
+    for (const clz of el.classList) {
+      if (clz.startsWith('code-annotation-')) {                     
+        return true;
+      }
+    }
+    return false;
+  }
+  const clipboard = new window.ClipboardJS('.code-copy-button', {
+    text: function(trigger) {
+      const codeEl = trigger.previousElementSibling.cloneNode(true);
+      for (const childEl of codeEl.children) {
+        if (isCodeAnnotation(childEl)) {
+          childEl.remove();
+        }
+      }
+      return codeEl.innerText;
+    }
+  });
+  clipboard.on('success', function(e) {
+    // button target
+    const button = e.trigger;
+    // don't keep focus
+    button.blur();
+    // flash "checked"
+    button.classList.add('code-copy-button-checked');
+    var currentTitle = button.getAttribute("title");
+    button.setAttribute("title", "Copied!");
+    let tooltip;
+    if (window.bootstrap) {
+      button.setAttribute("data-bs-toggle", "tooltip");
+      button.setAttribute("data-bs-placement", "left");
+      button.setAttribute("data-bs-title", "Copied!");
+      tooltip = new bootstrap.Tooltip(button, 
+        { trigger: "manual", 
+          customClass: "code-copy-button-tooltip",
+          offset: [0, -8]});
+      tooltip.show();    
+    }
+    setTimeout(function() {
+      if (tooltip) {
+        tooltip.hide();
+        button.removeAttribute("data-bs-title");
+        button.removeAttribute("data-bs-toggle");
+        button.removeAttribute("data-bs-placement");
+      }
+      button.setAttribute("title", currentTitle);
+      button.classList.remove('code-copy-button-checked');
+    }, 1000);
+    // clear code selection
+    e.clearSelection();
+  });
+  function tippyHover(el, contentFn) {
+    const config = {
+      allowHTML: true,
+      content: contentFn,
+      maxWidth: 500,
+      delay: 100,
+      arrow: false,
+      appendTo: function(el) {
+          return el.parentElement;
+      },
+      interactive: true,
+      interactiveBorder: 10,
+      theme: 'quarto',
+      placement: 'bottom-start'
+    };
+    window.tippy(el, config); 
+  }
+  const noterefs = window.document.querySelectorAll('a[role="doc-noteref"]');
+  for (var i=0; i<noterefs.length; i++) {
+    const ref = noterefs[i];
+    tippyHover(ref, function() {
+      // use id or data attribute instead here
+      let href = ref.getAttribute('data-footnote-href') || ref.getAttribute('href');
+      try { href = new URL(href).hash; } catch {}
+      const id = href.replace(/^#\/?/, "");
+      const note = window.document.getElementById(id);
+      return note.innerHTML;
+    });
+  }
+      let selectedAnnoteEl;
+      const selectorForAnnotation = ( cell, annotation) => {
+        let cellAttr = 'data-code-cell="' + cell + '"';
+        let lineAttr = 'data-code-annotation="' +  annotation + '"';
+        const selector = 'span[' + cellAttr + '][' + lineAttr + ']';
+        return selector;
+      }
+      const selectCodeLines = (annoteEl) => {
+        const doc = window.document;
+        const targetCell = annoteEl.getAttribute("data-target-cell");
+        const targetAnnotation = annoteEl.getAttribute("data-target-annotation");
+        const annoteSpan = window.document.querySelector(selectorForAnnotation(targetCell, targetAnnotation));
+        const lines = annoteSpan.getAttribute("data-code-lines").split(",");
+        const lineIds = lines.map((line) => {
+          return targetCell + "-" + line;
+        })
+        let top = null;
+        let height = null;
+        let parent = null;
+        if (lineIds.length > 0) {
+            //compute the position of the single el (top and bottom and make a div)
+            const el = window.document.getElementById(lineIds[0]);
+            top = el.offsetTop;
+            height = el.offsetHeight;
+            parent = el.parentElement.parentElement;
+          if (lineIds.length > 1) {
+            const lastEl = window.document.getElementById(lineIds[lineIds.length - 1]);
+            const bottom = lastEl.offsetTop + lastEl.offsetHeight;
+            height = bottom - top;
+          }
+          if (top !== null && height !== null && parent !== null) {
+            // cook up a div (if necessary) and position it 
+            let div = window.document.getElementById("code-annotation-line-highlight");
+            if (div === null) {
+              div = window.document.createElement("div");
+              div.setAttribute("id", "code-annotation-line-highlight");
+              div.style.position = 'absolute';
+              parent.appendChild(div);
+            }
+            div.style.top = top - 2 + "px";
+            div.style.height = height + 4 + "px";
+            let gutterDiv = window.document.getElementById("code-annotation-line-highlight-gutter");
+            if (gutterDiv === null) {
+              gutterDiv = window.document.createElement("div");
+              gutterDiv.setAttribute("id", "code-annotation-line-highlight-gutter");
+              gutterDiv.style.position = 'absolute';
+              const codeCell = window.document.getElementById(targetCell);
+              const gutter = codeCell.querySelector('.code-annotation-gutter');
+              gutter.appendChild(gutterDiv);
+            }
+            gutterDiv.style.top = top - 2 + "px";
+            gutterDiv.style.height = height + 4 + "px";
+          }
+          selectedAnnoteEl = annoteEl;
+        }
+      };
+      const unselectCodeLines = () => {
+        const elementsIds = ["code-annotation-line-highlight", "code-annotation-line-highlight-gutter"];
+        elementsIds.forEach((elId) => {
+          const div = window.document.getElementById(elId);
+          if (div) {
+            div.remove();
+          }
+        });
+        selectedAnnoteEl = undefined;
+      };
+      // Attach click handler to the DT
+      const annoteDls = window.document.querySelectorAll('dt[data-target-cell]');
+      for (const annoteDlNode of annoteDls) {
+        annoteDlNode.addEventListener('click', (event) => {
+          const clickedEl = event.target;
+          if (clickedEl !== selectedAnnoteEl) {
+            unselectCodeLines();
+            const activeEl = window.document.querySelector('dt[data-target-cell].code-annotation-active');
+            if (activeEl) {
+              activeEl.classList.remove('code-annotation-active');
+            }
+            selectCodeLines(clickedEl);
+            clickedEl.classList.add('code-annotation-active');
+          } else {
+            // Unselect the line
+            unselectCodeLines();
+            clickedEl.classList.remove('code-annotation-active');
+          }
+        });
+      }
+  const findCites = (el) => {
+    const parentEl = el.parentElement;
+    if (parentEl) {
+      const cites = parentEl.dataset.cites;
+      if (cites) {
+        return {
+          el,
+          cites: cites.split(' ')
+        };
+      } else {
+        return findCites(el.parentElement)
+      }
+    } else {
+      return undefined;
+    }
+  };
+  var bibliorefs = window.document.querySelectorAll('a[role="doc-biblioref"]');
+  for (var i=0; i<bibliorefs.length; i++) {
+    const ref = bibliorefs[i];
+    const citeInfo = findCites(ref);
+    if (citeInfo) {
+      tippyHover(citeInfo.el, function() {
+        var popup = window.document.createElement('div');
+        citeInfo.cites.forEach(function(cite) {
+          var citeDiv = window.document.createElement('div');
+          citeDiv.classList.add('hanging-indent');
+          citeDiv.classList.add('csl-entry');
+          var biblioDiv = window.document.getElementById('ref-' + cite);
+          if (biblioDiv) {
+            citeDiv.innerHTML = biblioDiv.innerHTML;
+          }
+          popup.appendChild(citeDiv);
+        });
+        return popup.innerHTML;
+      });
+    }
+  }
+});
+</script>
+</div> <!-- /content -->
+
+
+
+</body></html>
\ No newline at end of file
diff --git a/posts/xgboost-for-regression-in-python/index_files/figure-html/cell-19-output-1.png b/posts/xgboost-for-regression-in-python/index_files/figure-html/cell-19-output-1.png
new file mode 100644
index 0000000000000000000000000000000000000000..b67113d8a9f88a11b2ed36817c2a30783d3a3482
GIT binary patch
literal 17808
zcmajH1yoh*_cppnkw#KN1VM01n6&f;MFch?A{~m-(nu>QDN-t3g3>KWBZ2`)C?F{z
zQUcQb&9(XcV|@3Hd&fBE=vn*S?|NhAGoQJiYN#nwk+YH`2tsx3s-h->5MmI7pookV
zo&@o#QNSPKPFNkMn|9_-t|kumkQ*jW53TK-tSwAWy4-Vcw6L?iD0Erqg22fKPEHRU
zC4_}-{(FFsox^=$j0h(dCOP`>>K#V}p)o=KCCHJ<wm=ZYy=#hcT5gF8!;hS9-W!)*
znl`to<{f6{FN?{)7kxu8Cx^`TTuv)VBb`#~!VygKJIaX-$pVEgdtxP}k2bWeB+7RJ
zadf|~%jrMMCn3JO_@pd`>8I0;gvq_VD3MD}X*YUmu5EjERAqm<;jkDTe`3eYaghy6
z#wDw$s5t4TRYF8dM@Ls4hz$r14)$aUC5Qjp5@2HF=H~u)M*xEkD%H_~&)V)6=fKdY
z&h9qqZ^vEUb7ZR}$?NE_FkquNg5dL?a%7J~LdbbepO(+E+<EcTh&lFVwi?%&GuK=w
zVKN5=IXMFN_4$F{j=R-4{s9354{N%XmL7hxfbZs9`D%Xg`Sa(AKCOe}eb}t!iDdW)
zzX+M0<Q|dCWYvVkL`p2~m0+oP`}5<W=vw`IPhlio=6&1E9Y+pkvQy5PG!YNkw{jf2
z30sINe5R{qYHF(gS6HVAdGqECi>Pg4ChSIW^1A15_Soub3D2d*BQGRfZF5uZ+!0{X
z5n!73Djk^5cT-gC4ewuY>HYrwQ^c>zGF`!5%W~z4sf*Zq?XNELpFP|B_2or9FEw-X
z)5$*jj}`;Cxzla43ks<D)DnG%ZR0sw3SX+8{?OX$>YjWN&RHtv&CP=PefK|361yAq
z2R3Wsf*iPODk}1i9*Gxjb8)Au(aWEdk>OlhTU&S)7WRr>KDD}<wWX!y+)LG~SFb);
zXD=O~%;@Sbs2`QKSL78IrePDc{qXiy?oZET!@?lL!VOIfH}|Y<p7z<dxw%m<UIdSg
z84uX+#x)uv2m>?oDG`y1o18>jinsJDtdE<Tnc1(ji++iaS5zF<nTXAiNK$h+cF?D@
zIy*NfS<W$oZLzI18z{N=;=CE@CDs(N1_c<%UN2LyB6T^iJM{C%aJHzFl*paG@6}xx
zP$4<rO=F^(C`KpkAT5l_4$F)(UR)}9`1s7a(y=9G$kO9e80k;iG+InVO{6c}KdHf-
zPPp4fUYCWcCXYojgmFl^oaEujTuGDiRo2oinKm;vkyvV3yvN4{FCgP0&eOJfpUg&2
zKXEbserZk_56_R~(h?n8YHsf5DrApmC30i3PEFf&B#P7PRaiHUw}htglQUo&Jr-+^
zU~xs>d+uSZm&x60Pv}+JwiKJSp1b#%;X_Lc*QHAg8Oo6#VM_)TuD_IJLej^QTt^!7
zD@Q`$3|Hs+Xsr4Q)_TuXKIPmi8;iNj@D#E9tk!%|l9G<@+RdAG3fn0^t`BTHW-d@J
zD)91o%`9vf)*M7*wlvo0_A7uHL3TD4k04TOy*J&~zTfW3R_A=7dOA_+(J|3sCwfEo
z8D3%%5|gceQ!k_*U7+TW@Hed0l^OoEHJ#FWwy@?&{qB^o+umB=d9Q7!$*C!>3l~aO
z>?)|F(z~Brxz9nr_rOvGF^mF<mhO0PNj|%WOH7Pqd3iZFGSan!bf<|z*tp66=+zrH
zuv%ISfmCdPK_}eF6ap#b>b>`tx~~hTsi#OYUy;Ay_*-?zYh(D&V5RWy&XmZvZ!y-R
zb+unc*&=hwPVz|5A*t9a8hX?$st!b;Rt%*1tVayr8UC!hV$z#~6WYjq`*vw}rE_AX
zQ>OLJ&1~axUEIx^e!IK7B9#}N{%A}uJbOk9+Mf3A-JE)qG0s9R<~*gjiR?Q~BCRA7
z92fWOYX+>vMyqh$Y*Vglvgf~Rc}Ddo3hTBp+v^JyevQ6^cJ&<2&dxTr%DLaGhi)^p
zoE<&4Fq$!nHI-lEXZ%HW+tN6fKTx7NW&SE0g-!R2Q^z94^2?8Wz4PeK+~@KU-_M^_
zCV&5Sv6t9bW{+i34E&=(cnV4VXPq{A3BJP|pO_fN>9b2$RaM1*=8VPK1Le^=uVTyZ
zn90dW<IdzOFGOr!-Hr%*7mCdyVJ#;&zeS9s{+>UBw_HMhpYkR9>gFrCyGww%(Y-Q#
z_g52YN*h3d&OQ2fSKnuEjpe+le|%2`r_8nmHdf?V&XL*A>Ua%w!tL#Bw9?4zj+NN{
z{`#^#8C}Ya1d*z|gv(ZOU-l?u$Sp0@;!8>{R64jYz=fo%agrcK_T8eOc@{cax+WA6
z1PK4F`#h+o%+|pZ`dNO=t5?T9$-x?`g_&ZtA2dxMh+LfJr)&T9ghh23@ui&;|DPws
z%BK;8GMzLhH!d*{%&~_l&I0Ry=2&^UN%a`Ua0~R6hbUC#Y<ja_VrdV#VY(_jAVD9<
zJMq)~LVVwrVxRYbcW}<1oV+WbTh{i-2*;3jr}Xvr>FM9U<#7yflui~x9jb#NC-YhF
zVd#<6?V&UNpTlA1$y)&d=!re!@WW;a-g<SlfxI_);?g$Q;aV`GUm_4+P`^*zV&UE-
zsC(Kf5FPcTzv907U|q1Se*1gjIkS(H?2;~&H(#sD>Fcx0D=3_Y53PsZ6}Q_MZgzF9
zFASdd+<b^C8;ERMN5|ig%5KU~6}G&<2dz|U8S09?SB?_&-uiS$cayu}2kJvW4&=18
z;~a;p9H&wHp_(Gi5gQx(Zsv7Dk^O|y-TU_^w&x04CqG5&?FCLz+`4tkq&xE(%Q<7j
zW#MPQbg~Dvy412_P*4xY?w@Qyu437v*<F0xvlX+skGGyWtnann^cBi9P!A6a`#>c+
z<QplyO*YXQc?9v?p2-e|hnve2Y1+Bxt^3bSuNZ-$$TO^E1zXfIKTs-My$1qAE0REU
zusbb7$Ic!m=STVrR^m1pr99+e$I);KW1|ry{<^Erk<|<Ar+rVhdh6%dvnItlh}h?r
z-ZwX4YTT_`0olp!<TEof;AhCFs9HK@4u<}o!W^MF@uA41nU<Y>UOzr6s*87VLLN*G
zY3&&%gjU~2+u;}(U3s>7u@Ww;Q4UnX!B=`Nw=lC@alceyI~>wy=s|@nO-8Gm?rpE(
z?Ovsw5ep^hkoV3)PHwJMUj_Dvbl|^BOtB&?-0VGnf_|Pw^sc1-P*DS2G|b{(*x$s8
zpo(w%o{H0ZlPluHm5}|t9ka={D8x5kuhJA0<=xs3Jpu$Sq^-T(W}~8~r)Rm)XOH6Y
z<;&+-C=p+q!HP$_SIYcBY#JFDDRLjMa3^bA2SaJQy*jfw9xA-l>9bG2G2|HO==J|l
zb=}gl{y#6+{JfN-g&FcW*b{p=hScr;yI!+Bn`^Ml^vVQhq2Tm_AnV~*wX>K<PcMK#
zR#+9*GDSs2i8=kDeSIV0w|@P@U*KOAjlaG)kJ@(B{fDufPahtnf`j%s*fC-kdx(Ii
zXua4e<ri<{U3GRu{vt>URact(<k3^(F&BE{7@JSde6WMyVqGKmyO{IaE%D2rK7Hyh
zvxxpueW%pCYN&oGBbB1_de!c~F9|o|7sDNIyD4T-ak*1<U?PN1=Ly6*U0|f6!-;-O
zcc6=Xbrtc=%F{q^Fa2*2c8c55IU-%tPAD$XAM}041XmTjeX{k^_LJLtpvI3lLO)}J
zzp)^`)vq8h%z6Jzyzx%zQZGwMj=n#p_q=3&LEPwuf+EA8Udzi!na4+MO9N-;%ZP05
z!qD9}HBwvLt4ggKdk;bW(i^h79)L83UA*5RoBLfpYy6<oHHMb(`}gnsLPCL`?mEQD
zxE8~-Agp@g#+5|7S-jljIw9#Nxwr@*q>?o>Or96>ZohkhOIo_FXE4f8%X@2zS!!>!
zN2k)3!_w07TfL7@4a6fIi+s-n1wNl|v+RHTs#NxB*(6*+Z>*C9$M)J>Yow2aT<&+5
z?W-!M-e9MOsvKD)T?7iMezWH3RZ<rh7mK;fv#6eq{;DIC-lSl!?a!_Gh8Poi2SS-@
zf>@P+)YOv$=gFRje*Pj{AC{TPBcNCDwMs}_{ElHN(+B@qoIqHx76y0Qkk~-K8t;jQ
z1AFO(eiRsl;xd$eMM(aYwYC-rq~VOH9QA(xeOga|=|#JFd{%A%+a>-0GuFM3(!``V
z2(lA0nPR~bn9X!&{pl;@{84BW8N}($5_x5vWdwwgDJA9Q%cB`xfAk?*u9nJ3=g;}3
zh#;1ojLuv8U_<{%y}<`?AA;##$oNR3FGSrwjnonnCuHz*d}X>lA?V*fhiOd>jj`Co
z%hmjdtbsUcRii|&31W^OJ$hB$^~&@qK|!jmtt~-mqr$#ZQc^?rmIFaLHui#5yhmyN
zGuEs%q9sisNdE_iv?oig$3|RC&b<ob*7>rxmK|!KY?C)MLq9KiZ%A4XR#2@>ceZwA
zC@*&@bN+(xstH`f-H(>9?ODA9QNzgn5Az0lZ}m@?PObZ8>HSSJ$`ev7!gZV6OoF=p
zGd<Z~!oG)z4B@mWofDN(@Tb(NDrwQpqx$O7HJ31v%W4^%=2wo85%7ngcdj^0RR8WZ
zM^pMS0z|i$D6SyoJ?dZBb+mqRH8Dhxn>-wPBXvp?f;gOLQ+hWwC|wMBYu2|P<Q-X{
zwv&Pam<3w8JcVn`m$qZG4&SH;UzW?)G}e}U-BtJ-bxw+g&FS50-*sWfheJGI$Q!lA
z%{opGlEY0gF~z!+kSG%&F&M)&)YG9eXI@gxDE#$5GrU?7Lu8)?^!%X*y`u<KktBIJ
zPJpCo<Fg+-ur}GxuENo?C{k(M78Iv~MQ=2~i{Z~Aviu2Z^&DO}K5}?d2sMTLQ81`0
z$L>LNun4hMc8&&ziX!adzWOyZ=I*^xlv5V1oiAVYnb`NgUwB08vTBCi88ZmRxc`wd
z#Znjj;lQ?N+v^@0i^te3qP#$sR;t4-FbZHe=*Iamv#hA&RGg%O|4*g1N>r-X`k(Pn
zR3p-lX?9wyJ`E18K__@}3hp9poxDL?7{7%vGu#Yf=ER<tZqoMFEg9qwcN-r0=6pMu
zX!AnJNb4WEiS&Yx>D>h9P2hTzalhuv;Lr^s(b>u@6K|>_z8JhT;^N_(sVX-hDqCll
zfsBfK))E$_l^aQj=!l0)!`%20+b5NAbg?bZqrt$DNW(slaTOzo^JQW6)BpKOCb=U6
zBM1KOW*uOPOi_u~T{MTcZ4iOY@_TZM9zjehE;QK2vq7B1-%y<6q~R8mk&|O`#L`U*
zq%7ww`sl73f5%ge)}*%=9h%X%iNV`KsK1M6cNGerjX^^!{vvc-j-j7s3mK9la1TBF
zSrzo1{N$ML;OSVs&vT(GH^td2+<+@wGsmBIZ!8!HG`<K)e<uLZ{)JQJ*K0@Y6<a#+
zhg}_`<l+nQfXT$i@QxkJa$VC`>%~V}k$Ygs@s{10cLs~i=-O<-#k`IQv{)@K#|v-H
z*=sV&Q*BYFuBxgks;H2^R!a&2GgP^)AKF3)S#e2QDupXZNe*60W>rA7@5LJgZjAbH
z&h-_xLXaMwp3WV1{32;qmLRk5*RQH9;Y!obV`3(!K2XciQT6=M>m-oB`UyaXUJicw
z%?_iVkZ5GSem(vsJGlw0al#ebVjbgRoy3iHJQAQ`Xcz$yK;hKx^0#aH>&quwlVm1|
z<dg~*5<ve)U$&dGgnx9|+l%v#P`Sy)#l>>rE?ES-c=z}Oq<QYOBVXRU3Jp1zu1U<L
z^@ilF3Le3z4qkQCdR3w$Am+Ao^4PIsca<$}Y6XvohkF}{^Ld?u3&6m>=+&hj3+h#j
zeRxX0x!lTWzq3%;Y%OB;G3@vSGlQ6}RO}hn>@FGJLot57-PQRin$Lb?*fk?Fllc7k
z^Y6eN7uyVqto|L=GtUdve8ceKJDo3L*|EIijRxQ$z3C;UE#xoXFUD9J@E8-!em)e@
z+skhpwRW!=G;i$nW5n=Md)owOAALoP6iG!LDR;V1bNYCJ1~}>J|HSu%%%LF(WpKE2
za~&#bV7eII>1YixF#Ou&Zh2^R=1`Ei1u(Yc5ia`j96XkfUpeB#bdu9djH1|AVf;*|
zTaz5CE@YrW3ZRcDFN@zpO+_L^fVb*Ua{1xmaVlMgGZFNb?bueHY>-oORhUwff#i3p
zBH}ys^(a0OBBT#h;p+`pa~k6R63+%f1-~pv_!A8GaU``+>WnBE2(zy#ZpJ_L#a%@9
zUDh+lH}Fzt0;X8CbG(^NLJWVff*LJvP>UfIj?JQX|Hxv`N*5t1r4CxvHld>>k#=9P
z$WbVE_F|3VOL)`+kGQl78!ckZgknRQCJr}t0_J;rVHH7iRK(&@x0Vg7(DZ87$Gmk&
z&OoiZg#acgGengXA+{GnQLad^9y|)xoQSNO0o}AFY6Q>0Bq3L?2HxQR_EaMEU)j7E
zSRR4^Y*w_p(s5icKjJGE&xaQPV-ws=W-=Y=Zuhx(m0RZvchRr&z}LyU-`Xp2&>%Vd
z5}5xQx$KKk;6_sGcy8O$!gV5WodiFYco1Jr!;|7i;e24WWyJZe9pal7f<K-|$3jDn
z5VvL#BC5_<-(jIcp)<xU`{wxH)d?VH)p`7xd%zy#ijWjWu7;5}pXm~dS1A}ehpyNk
z+d_2g25F)qCDNeprtiQCZ*6FVx9XOY-!G4-(!!{hqL|(z^3aeVt>f+pl9zQDhTPQA
zlKQ|jLV=`SaKxXIIxLO$MhqPF*sKvg+azZfz??V+`wl7od&#&G?_d~M;Cnn0b<DH=
ztgQXW%9+Ep5yJF5*q~oA!K6rk$M4Tn;ENbeqC<tG$m%b3WNA979xkpSeVofO_K9OP
zF_L&4z4)6$LYRZ?zWQ@6^Ev=6as&K@6g}wThScZ4-pb?U%a<AqB#yto+KWDS`rZgz
zvd*e<@I%F!tTN<tch>duu7CB1oO*5maOc<l``zc3yBACVmEsr~iQtf2`k`lM^5<LH
z-w(^jMKa8U7%v|R0Q%jz!g@4VW9yV!p#%ibaW4m#QDiq-M{Sc^QgRH<sZgs2M=z3M
z1*S_R!GOsqJyeAh@$X%F%L#&|{xCNuQOuqY;(WUb!@|Cdw{PDn48FQ-#bjHzWv=f!
z?9A+C3n*fkQidbofwSN0s&%i{z#?vXoj1;nN4-SbLDelSO-;>b(a}BDd)3Z!&ytfz
z{x0O{6cfI8T~snRH-DcjV>cod6CW=b|AcUKJDB&h^iig?0CJaxug5Jxmn5!FbcCX?
z%GXzyDay;sY2}S~))zK6CnBR>zI^IB@+jM2ke{EwVIoo{qOg9SRZdQB!95(0E4uQP
z+~&asjAaHZz10lf?v{FR>iIoo*w{ZSTgKSY-QS``y7_LiZ?ZKE48kpXUZ*KXp3tdr
zI}g6&XMxT9&vFU?F<HbNG98oJ&6^=-_v5`D*I-)wF7aTT2GAQZf1S*9+Nl6#+F{p3
ze9Y|Ko}W&Lpuy@1w=sg4n3x9JN)*0SDCCs!_AI5h^I1t4H6Ckt>;`G_UO^_}o6gVQ
z(@M1Fq2aadkJnP~&iwR$C?1vU@mJ2$5|f^hG5pgWU}{pD6Z0iPJUqlW9IpMxPV~=b
z$+E|dCFA)dQ7bX(ICuAF?AhDtJ_q}F3x4<0u|#plk3ChpakVBwjQQe5XmrDUFs+Rn
zc<(B_{&zSOP5KIJ*IC_H|2$5xtv<@>wMqvF>@aodqZRYY(_t&Tha&7alq~P_EmaN;
z$RjvqBJ%PCGBY!c0j&h6ZsV?WJ$>BRi|Z{91PSt-@kVS*G}7~o$<?c5l%zzEz0A(N
z-%z}7zkDr+P9o=|ufyo8X;d>t-x+ABC5k<kIoRsFs;=IOil@hhtrVoS{20tXn78NR
z1r|OmC6XFpqz2$v?-^Tzwdkm*#|wj%W^K>7HrwsIClZwdkefwJJ25(|&BFYW&J4wS
zfVhD1n5-PaVEXhOsYV`6b5OEgYU_Eln+(U@$A>r%<Ou!NM@s`qzqD~LWqj(g9f@Op
zeoJn+Bl{4w7U^mhuDF+yu0n>M%UFMZf8)PHRW{Na<$Z>8PUn11tJeCK`o4a`*K-UU
zfYSR=6&II^m5}85`FSA<2C~(zGY$49!w%hp3Vf=qtsPqVM%)Z+_-VZ}8b%Zk1~(SI
zp-cRn|0R0y{i3v%mewUbBS17D3oPtO+cnVF=f+?a#Lvi_L8JUAPRHCF0i!y0zz5oY
z^!4_R?QM)MI`R9&Gcb-PUeBuDe1Qi7_j46)??GBd#&yzP7K<f$@}$Ej)1-7@X)4}`
zTSVl?1lQgTh}e4G>QE{Ye5Q!x5;JfCnczO?O-nbu@G=#}oaGMo_Y$rY@ROspfyjWB
z``{!~U6?I9@CU=~>YS5v_uf4ym7vmd_I82d-Mg79=abZeMru7yoj>0N?}*|p<I|w<
z#UDUbz5@2%&^vpCeRQQK_qH&9NH=d9;bYTTk{F~~C0P~CvxF+uUJsX9&`a;kmzvMX
z?5*B}z1H-=FzM4I2(9*)Din_&n$pJIDKaJig-FPc3=f}L-KGkA>ct^rGwoY#iFe_<
z(Rx_4zHsKunf4zWe}~y11x%u5Bs*q0VZua;`0`J@0K;0<oS%M-jV%;XJ`uh8`<*8g
zu~=sbt&@L_9}kOevVY_9-QqBfq6Q-hVAFEHStQrlvlIYW!9NKiHchCl0jStrBEtyW
zBreZ|A@<*Y{P$KiwZU4?pR9O8c|*e!A0s(w!ao)teLoxlP=5S$&LtdOFHz3G<DbX=
z`xPN-+0A?@o==#KJn1~v=N~s3H$3A~`1an_PJMdC%Gu<~V^-N;cwx1*&&|1{qNWx|
zO9=I#j#Foyk}>Jb8eji&JU_%SfJwYN*x&gNrY2sv`e?qmwaB=UFp_ljzT9~)1o;&w
zk4H_>qq;Xz^Qgb{K8=Xo$o#(+|2_j(4(Y8)dbo7g*Tk**m|yD-4Vq_AmmPHguZEPD
z^m?2?BjauTUl+o?0WXf=n8<n;er&=F;08iOaRzsI3z2%*DJU@ohlkr#)V+NhGJ4RX
z_@v8MA)OgX<@Sj@6xG)J^a}uTqQG0nPWaU1LglEEl2YLP=OjpyQh<?D#EKSX1t0ZF
ze9ph_eBt6n`(`RpFr7qTv{5U)zBn2Q@EQRWOqQLL>a{t}BVsG!ie1tJIFOdz2t8#j
z2zmV$>vpK&SfGs1(r4ro{_dUF?ou-iYSptZZePAJu7UV=d6Ln}qwbsgV0?X&4@wM;
z|E9iNdC7aSW%W3zUc?g(J#mdYl>RNB@ODM?r-2q!`<j}YlPU*~v9o_YI>t(=8lq4T
zto&A!%<7lngQnAM{~5X01t}(RNN6Ylayl9!B)WVda8OsTUsnL55Kpqfd_|R%9rv67
zxgUA|3mR2S-v8;<Z`Skr27su<zM)LCp)F4?Tm2Y|Qa;g$0-RO;-fCHz4@Js2HP(m(
z_%=RApkKNTOM)U$-}u50eTPrH{dHdNJ>C$83A&tnNcb)O#4`@m-WI;|;(iK_Z1#Ju
z{nD5mf5?-vvP-HjFSI<eRRL_r<?ql;aq7N6-Ue^O(UT>onTvi9uqGY4wMOtsJ?b(h
zpJUE|X>Rf*AcC?o4X2W-DkX{#aVZ44UCOMVOX{YXy3=?L^Ii56*U97wG+{qs+n(`n
zaCD!^kCdtj5{+W8@be?JY$PNkED}!qCp?$r)bFo)5FKk;cAU`Kek6{M+PCOH^^&i6
zJOdXW985M{4gcBQ>hw`kQ;WzhAqg{1BCY>(lK`2j`d{N=RSy9kHJ=L&4ehG58&wM;
zSG-IY5!t4(8a;Fp%&m9kFt){7L^6JBWqP8x^+fai6tBQ&KJ{_Ho;T-9JI~$kV6TDf
zXJKRujk}=u;~x|R=n?zz<HrH2OVYFZ&}YH+x|>Fn6OTH!`IPP4)s6l%xf^#Ep_L!&
zkX&>spr;x&yNH6y))g)`rT06Nb&8B1uke2bi8>EeI{@Czz?Cj|)9;^r+sRvW|14Q$
zlCx?SC+k^B@Fr26R<5|2kUQpl>dEsC2Y~Wk1lTJC@ZfSCEdeG90jAKE{g&7NHh77i
zk3Ty*{!RIe{N5X{Z|j`GhhKNJY7#}z8t?nXy15IHpTSYk0NSQao6sJpF0;n~K!00X
z+k<6s2t3{Px2A`G)>(rLT5IrT(1d8Xdy0;uh+(kM=lp03*S|wg>%4YMp&lgWu`ZyZ
zqEZB~Fyy?ZU|TtU{(#SDSy{<z4e<zH?(IUTA-RWkCW+de`26Kd(senx$CArg5Z+G9
z?+_r_-2jtW@>QPp+!<c<4gy)20G)T~P_!!7T}CAYg5X97vU>j7P_djt#KpzYZ2;_l
z9;zeO10`84CFy;0X?=6UPv`09i~Sy}oXlBGo##g{R*o+Gexr+Tqp_@uiwG$-`=641
zFcd78EgA6fjMK1lAJ^r}jCOTfzp*J+A_*p!SaFB>dF958;KA@N(P_In0__K>Tcg~v
z`LP@zbZv7I|278mBfUY-p<X;{?FHrkrlzK$fy`*yrYZjc+3ws<*%tu>tU6pB?9WjK
zLEAJ0yML?agizFd6AB0#8XCSJU#2Y~jGGH~KA~_Y?}rDsCO8j*p$@<;D#{F@Y%>a>
zw<n02f$aYM@b3|*f_;t&#_J;fz^eHxJ8+Mn_zDH#zqK2NI{Dg&uXb+BsYt0OBos_#
z9+KJWDIcMBV8y`1)b!wzDI^1%v$=&Rm%!C@s|Jcy*{fc@+Y0<LQB)K6HVzF_4}?Y%
zNz|dt!VrR=hCG6lix|q4p<`oX)Hc(@uA|QPH@x#DOiyamQ1g!n<0H&Yh(RuqQ&WEg
z?<5tQrT1-}g_MCvNi7R3oD<GADMS8m=*&e*#JDVu@D}f394NxXC_FcMmY`}KAi`nc
z#7#&yasWd#Wa2#GpYay*=$Yb(_HhUTdWVa0++L>hi!uK=7b_i!CUxacu~<UAyorRg
zwL}#FO>=GtV2=5(L1sm3;7C~X{m@=E>uB-(NqQBiLPsn(%cT<0hBt`7_~7}&=IV{v
z`!~d1WdaiI$r2hu-0^0WAZFV9I*KJuzK1byo?dkv5^Kk2?YhnB+)rP(f<q|YeKyGh
za#a_K#Xf(b)vC39TJNaFEqopvWoL?u%JeBb(N@`tAhT)<(fB+^v^eMLF}ByOxLf2%
z>SVtV-V=J#hlcQkq^IVYVD^4SA8vvdHZfp&ga~;e7K<W#p|A?_xdpK_k?P-K#-%Fw
zr2!JlqoQ&{Irf`Ekb7`<M#%)6#}+#i<J0Yr0TU0KwYI0#9tP?{ph^Mn&Fss;vGl^R
zp)fe^q5$UkEm761!o>Y+_>g}T_VrT4o(yTwvDf+UCwH7?$q-rpHA)RHZM^RrZA$09
zb1-rR51&~2{Q6)ACvXF%XQF@IYH|yMjLmnB9xe~CWX}fS*SH8azKS*~6q7K6C%&&2
zFC)G$90z2*!tp-7&JWvigpyT)JUig1E{;ZTL<FBrdqSp9!_wRAzYsEUVd|nsJ-%ey
z=311qiIl6zS$v)PFVN&Ewnf=L%istSoG(V!8}kY8AWy-oKJY4#PQ1Hv0@IIkwa&%o
zP128H@aj`)xd?L>B=s(5=S@H|2w`ksI{!I7C^A1-d87z#kPN_wGhKd#eg^!(Z^Q|-
z7(Sr_!@~h<I-WGbd_>U>kzGp@z~A4b8E%=&{+C>TVLdVOa6Oyq&^aMYjGSgO&b-LU
z$@oQt3lAwh55=~$@=(_@Im?F!Z*z|g9mVf8^SQ1Tgf3_~7dQl5A|gzcLk<)`mEgJf
z+$m#+k4t$K!1cvZeteXr59nQ-px!yF9|CC2flb8P_q$~a;CN8%<h>ZDrR2w?te{MY
zd_8iYj=!_2OBY=Czuz{h8ZtVh#uQui3F$k3zx7=66W{YfVrtLe;2s&oMKX1q&k2<5
z%2uyY15{5C*}He|hDQFUh{H>pMadRqnWmhR&py7Yju*CT6Bn^qvd2(89uGNN&@k5M
zr{{?>hW?Z5|8r1TWa;9gmTZv)9hNqH4fXWl_)dn{fztbCizBsYey-=$QS)d8aL~W{
z<mBWvFTQyI?9K2(i3qWzaU2H%z-*modXCigpA4P%E`lW`CEvhZYNurP&C#7W@wbE+
zb7RyBLAtUe3G{05^5k$;5F=0aHTeo#u6BMC)G41mf6jd|LjCx|Pb`UyX=KMP7ctJq
zpqBV5IaFhQ<lWgE^P^dUk`0%D03{ZSh1f;@#*Of=I?^UrJIUs?*r#V17WZ`VM=1`l
zbk_Cw^qLxWl-C33YT_3N%egPcscYmiFen>QBSoBp-1vK9*PO=D?FuD`9N#t~iV={v
z-ZD%rVEsmNn0MZiw!a`)Zrxu*w~1Ah3w7^J+EjC4hv>2p#L47Mu}ObEF!jVrgQfvM
z#BBhG)(mOaqb4X;N-VPB1+OL&r0F;S1BhuZHNp4$yeMk<PYC2r)Zw8-M8AhD$>X=p
zHQTV&YG!yE|KP$H-ySdG+wFyi8_|a{8pCD+$jQygN_@fK5kRe+!5j`8*79;?yqxD!
z4qcX&j@E9&pE8DB9`;c`E$^`dyfpti0pB+!|M5$s4eD#RIu6sTnjmb8y4LD@Muhfu
z+3}pi3RF})=<U;DvjW+OD~y)?8nr8@@RDEi1O`)v(X_gnh$4u9WE|11JYX-gr0~M4
zllN}o&Xxt|NRTG}6qKX}s|5v)<x`>&tigpXS+EVe_``b78jKD;qZ2e_7!hshM^i{J
zYc)v<M>%UMfblkOV~n=fRBSfRmEGGvz(>NpS@4CgxArEMVz*fl*@2DSO*kV$w$P9i
zTrPu7kQx!9$asLnFkV#y>(=WGqrZJ9jvx2XlhC5g8QK@duf3WQ&T5vPJ|u}wlmMB3
zFN4gxe1e!DxEf4Xib~r%dd4Q-rJqJ8|JB8!;+AtA&fpDO;6|fF6agZ(!=e6`7l6tS
z>Eqc=Ps;e`9p6u^7YqC9mQKP;6_Z=j$6J+*&eJI`JpFc*Vs^a%FS95_yxTwJmA+j5
zR|1i3p;^S+yIvA_{6Zy5_TsoL0H(NU2j7c9hbR_6+cK^bQ}M>7UMxsMjZGC^OOjjC
z$ETlj`xOhDA46noH_RH$V-sQRZH_uxgtDLe2Q@HFw3<fbJ5k|yO@9PL66BrtRXfke
zX9<w$x`XV)X`0iIA(RrAJJ#P5A;w=1WK!R1Il#$tcN*i4(%v?Fbb0;f&D6(?ar2Ca
z(-4QM__B1JCA<@O6!G0&+^5zNd=3>sI_DGc0R(}8*B4tXOynQ53l|-1zQ*SjNgvaX
z$=cR?B5}qTq~X;XdFL&V^ZHhJ%B|q^=TZBfnDHW2QaRy|Jd%=A=v?G5*UR%Fj0fLF
zk02T@wG=Vzhnw>fz!03in)`7xVX@(0AW%Wjx+u>>$%O|NTv?n$occ+DzWBnd9Gu{`
znGr$N=x2O_#sro01iQLMNC1F$NCs3p^LEyk_Jn~dSz{wc29c3AeT0Z*{U(ud<#jbI
z2#mdEsLHKdj3^rou)wx3x@SU*^a1m|K{zf-GKo@}{tdf<C^&2hRZCpe@<+C1;Hmwp
z7_^N(e#*H0Rm|h8iZclEW&8ePKrB#!lK5EIfh*P#IG4~aL{Tnu`7}fMDb5eq=%~D$
z>E=gQ+JOJC#axfUK(v02HN1ltC8P4Bh*~un?^5+3$a;Pu0b@2V1lHV^GB9lAce}!)
zM^%nz`1zl4e454v%tkq^QBL;XOe_B0u<av^Phfj(y>Dr)x?h37)_|${-Kc)l6CdJX
z8bo8t6G@alDGuk1+u6K@<Jus$`2OzKN4=%21<f2|<>6`>{HgT{!6Dqc$LrwW0I)6*
zB4$5MjQ%|(Mfjq&?DqE}#k>f2T4#AG16!ttcQf!e&CFY#Ge1;Ck5WdKel-&yj?*12
z@R#LmnZ0Y~dA^yNXWrTUE7!4oe*)3iXu0W|rRe~q(DVwZ>7(Y@pSC@2W5h!phQ>y*
z26%a$P%J(tciV;1bWI_#7<dPstev<S^Aib|SWENj{jd*GcX!2?+Rx&praT0jl`zA`
z7NY0ve0IOpJ;ki$x9V7fZdDrGfNo=YH`R^$JkiyR#TJ=-uvPQ9CkWLR;}b(bvBjBU
zZD$8Q`4yd?!FgJ-5yzIZRz$ugyp>gA2FFxojGK8~X2}>g<ap+HkW_$_hd}StR*{bc
z5k#jEN)Q))Nbe5E2L4(4`PFKfKUqGD><Py8Y*BJ_Bcysz;GX$S`Y~wYU|9TkeeQuz
z1moZ+p^IQV8~SE~Q1V}ajms<-yjk~keI${F{g%bYyEe~JZg6pqhM#ibLg<9W0gX|=
zP{3zHn!%76*{y2>#qiApC~=~|^5&PM<|W(SXt4*2B+kx9=Q)R|w34GZAOx>I&&23J
z{=;Ia^7!qsYwet15mF^;naA_(8-(BARXzoL;1Fzp{%RQ6$I^|F;F%xy?Y|}?ExQyS
zWbBAbWS-&Y;9_F5G<R~~@fNkY@b{yqby2vlX(@8>lEhOQpo32l$`BEdg_4*5*_K=0
zH2y(+H}F8%Ft?t=B@K}rC5clf#Dv^!MPKy<8=(7LMu;-^zR}Z^?JHqBb1A-eh>1vK
z<ZHyK+YWFpQipCM=LW<-i$jv+3I$d4^>01sg66NJX)(#wxLz!p;E*L7pnDq3?^~Sn
z4f{rm{I}YlNSTp~M>qIamID?v`PbdYLPv=XUQ+cYMst9M5;DNj20y^k+<v(2e&1BS
z`(u<ULBltZ<|GppNy&xBuP)%{YbeUm2wDvX(v!K-;|U_~P><Pz;FN`H8Up6VUkHP-
z3cl?WzCRrX&cnN9p)S05jnusKxKak))YKFi4NW{pwqEfd$w<E^L!q5S*z&rKn|>)d
zkygZ+4eT>Im5<hAB|*L?GbS@ZR;(c5Ouz<6OqEqJT^HqSZXI;APx4|r(U+&_^IbUN
zh@Yx&Y0=Us_!}UCw9SQlwkmd7!FiTkp`rh<flljnE1R8$^rf#{B_R<X!I|5HthK#)
z5Zf|`l3%;2W9XNa5cz9#aOE7%GqUya*{HV|sRLpImT#^#8D*;p2#acW$uYVpD2nk6
zzbRp@Dc<6j9*zq7(qgEpvGBopb~yO<EB()T-AHY#gvytO1jJ1J(c`DVZ4g1U-QU&J
z_KWoX+We8IMa;9BT_JYjMe0bv(YFR2(RN7ym%rnZ-8DXGTQre<=D5c4_$BUi*`{=P
z=`)38<ZLudX3dYe=i9CXC8yKUYrQ`wS4)wh6_gfOyA|q`COee6(!%?elQ#K}P8sQ9
z{ZSBxOH7Ra9^L!y+$NzFMs-BZ>`U53jlS4Z2C<QscxfzDq`S2q{VCiftkoRdR-&Yn
z)lslGs*W^yASOp;b)<}gouq7oAL?`G|8QGu2V0i_8z|Jt6Pf>dC1@}E^RlA}2{qHL
zu?O}f{`n`)sL*xCNZo%r^CnU5i33w(NS2vo7aIrPUy(RwzoXv*#Ueicc|u@QG*Pks
zWRP`l3p>1d^T>lINO(uFT0W7Mr7W)6)D6UqI+T2?);)oPPvz~yqHkd-p_nDd=1y)$
zL4R5@<;&wSj#o8QIArXZmQ~J-$2tb%J{v1Cs;V$H#ab5LW5Q6Sc1x&)>QdyCX=t&w
zcCnWim~@Hk_KH>wFWt<t;FnZcNqMlDmczQ9X3-z?dTQ{2<|}=2t)4YZ>y}JNb=reB
zY!zv?Zh61>7+Te_77q0mO1I8d#!?uL-et=dQaM{#KjzZ2(cSGeUGb`}GB8D@$A&hF
zl4EP9ed5<-G#I$;)_^o0eU{gk?iksXW@L6hF)by@ZZGFAC`x_zYg(N#!ufvgti~NH
z?7bvLjERDqg2nl?W2du@oz~*kf1`9xfJ{^1Tq<>APJrASKLV!K?uU9o?tgGJNu~jH
z%ObOMy-muRA;aYT`6FyR4k_Il-*hq8-%|WF(&^P#86atyZ%XOt%Ls6`VJxQjVtjTg
zzr~?{^)l5CiLOxI0>+wy-`OViYCPd1lgBm|0>9kf4^%Uac!ahb&)=1NtLtqi7;&wK
zGC#~9;Xujep7oHS-eP^MPajiX`LoKFgJp|}jUQ8d?3`K3uglH}jWnjqdt-C8xf&nH
zK3N{;>;Abwc*|K^t5+gU<@%Mc<=V<mi*;~%c783E%FeY^_vaR<Hf)M|2XR*AX%!<8
z?s^m48gN1#KOc@-1{HmmE7mnG)g#d3FfNs+6M0}9!0PhA#E(_v!7rWcTh`w)PUz|+
zg{l?H_dV33N|>mBLrX=W?@dUEA$!2UcBx2?$Rpb84#ylrYqe5=&ny2gHY9|W{b_+5
zZv#_0u=aU3FH5OUeVueaXj&`S>MXq3_}5L9uEQ&7G;pF~MO@H!W7p+Xop9{<Zvqn&
zR{;QN;>re7>#bgv?Um+eGyt)~pTvTWDRb?ow9R^;e2<3tY~qwsgJos2C4Ek*xVNs2
zOYb0?w@3`vd)lI(#MUkZxdsj9m7=@W)oQ6DLdJB{eS`Ca6{j~y5H8|*mj)r?`6hd#
z2K)MDaoUKT^kqJzYia0X-#~(O<%~`1K5NC?zwMPlm*2F$zNcuuy_oSV__)`<FkLyN
z-#5A5B9kL+WGN(IjG5kBxp(ccuL_+;;=MP;=frJArCV{S^BpC<Wmn(4n!kzjFQu*6
zbc=2p$oo*9u_RuZT~<=}sk?1WL|1?Gr$ozgXTQ+&K}y{+W7ECb&*I+9VU-TvHEb!4
z-n$(K2_m++S?39LIql+2hnMVzH+Q<~Hm0h5-pzMjw(7Bdz@@BrP(Y5vnz|0$zg}4T
zP+$0K->%Dbv+w`<FAo|uACR%1CH`1$RR)OwdDM|Nx5#@jmm#Yiz!7Siov{?q56}9z
z+`nboqx0^Y&NCr?%ZMkx-M-!_VcoB*FdXWL&;Qt0uJuh(;kwn=flc>TU9pW*tO@$j
zYgCMdpR5w%>Q;8FEqn?Jfz*3r^mfm(LiBEU0Id*j@wIh|kD2yE3*lLnpS;;N9#bTb
zu2z(Pvg*8VRhR#9uwbHZwe}L{97St(R<X4}J9Ax~{owRQpHpIIq2z(&BX2grh{k{e
zZ^4?;m3qhga94^#n+v4AbBlih7N?s=&%}>bG3~cDO`RuDRZ+bQOfE|rEH06=HP;}5
zh8CAP9+LuT%m3X)Wr{MsxKmov0sm2y4h5DiL}78O>Hj{m*`4I&C4o8zR9Sd{QH?>N
zX}T>k2)3iur5m9vz4Y~B=SfLPHb^6QZEKd5Ca0&rgh70*C@nu*$!Fv;a1S~ThynSY
z)sK&h3)Hi%B7q2)+3QOE6_h!1;54~dS#}BoZ593HR?Mf>691HVkAHZ&xxcfh=NaB&
z`gp+F<qoWe|KYW>7+zl9q5<pR&``%aoXpHY>FMcAM$!L6Lam{Tg^@f_-WT%I-+3Da
z(WVbnEB&uPdPzwU_W4}#XZ$fGWo3nYZBa>gO?~~LdH(CCY7J>>4Hv_q<4SRoD3b%%
zHRFWtS;V@3ti-!Hkc#D^d03_v`ssD}l)x8-Q~K)nFDr$!284xC)&L8<t+TVWnMP))
zpik7W=tDG0dDvB+7+qM`cM~Yj8FKD3;1?8Zp%AX|ZEBK7S*1qaTfFW7z!&u0*3v={
zG}Yw(r@+X_2nm9XwDPPJCh1<~p#5y`89<!M-3bfcAGW3e7~LSQTjM*iV8Cv-IPT`b
z+&cJv=(rshF$RNq2H8*S+l5k%%oDg70iX**@x^6E8d8L<(7)LK880?6^1W-hu0JY9
zsT)r_<hG<jQAf1nE>`=`n!6O#a1$$AuEW5V<#nNmN>}@u+T7gM!9w-du+a2|kEj0M
zTJru|n7W^cj<^v)0hf%Nyb0)h#m;lu15t!cDEk6R22f$93rUAm>atKI1=rG$V<#6p
zA@w6blLV*;3iDYbZC}5B_|ug^$HWxa->);|@I?@4?a#hAiv3P5b(+zn<dEnUpSh)^
zL<%i7)1HZD!{va`0lW^h{0aTm^Bal(i=Eif`raY$j+W5UC%zPv@Zk3Q<^5+#N#W4m
zbPN`H>fg%BJ>?Y-sH(7tShK_l+yVYxVa-2sLyv{a0(VM?p}kMm(^KlS>Wi^mHNM#w
zz{@iRT&lmq2Ee_3B=>vBQ+mGiw{Jghu}4%*ug(w9Q!oiMbR<a@J5H<Vdn}Z{xNt9I
zMdk<lTtTT>D>W)GP&Ui?;vP|TpNjPo88n`B+=C8)j@D|YJd&z+tKHXIK$_-DY@t#*
zPUExbm#O&ps4uXR%U9}#ZJB{^Y1;UNq{A7QG*B%F@!6f=Ja&vUd3@ZIKo*KDShB|R
z8^GHkTZUrR!@TzzSZn~$xKsLzO#ILWM7Q1L)&Sbwz4bDvooQ3DiR87kR=ejtQp|b^
z4T&gahMrH&>5h#3xT5F5-o|rHXBw0OkHbZM>7QG34`UOJFk9q;*ckC`YrBfDuN2%Z
z9Vh}iq1LTiT&GU?nGFY+x4#O2ocuVHqm$5mPIHJm5I=qT^y;;1jZlrZ-`%uF3}k+I
z?`8QXYinu}LSYMvy76oF_V$49J&$?o@qm3a?xVD@u)0#FFSmaKkTdOD7;hUxQyU`o
z+rR{l3rb+JHa3FLXjAOIW|^6tEqP$pUc8>icvjmORI9((Ou4jmb7$dByn)McpsjnY
zE{<}dg_i5Wwf<0#G6s@_f%FdnjGzWX%Tyhh42fA*_ML+B9_!Zg!|$0#y?1O%TS4G1
z8aE)&Ih2~0M+sbs55O-iy7z@?dV<}m6LR$bO<KyAjx_sw9AqkEWgRLk&w%FA+pC3=
zKA^oq+_!+u%6qj((aGsjucdQo1|Kk!+Fx-$(w-QYVE5?sSnX0Sc3IE`W=0y696Q$7
z9XoqwoY0my)YJf*6|RaB%00hzTpt^}cY$U$mQ%_R{z@5-l`<Lua$c&e-=ks;|0rGn
zS3-B}*b^w}j)D3a?C-Rvcl!fH0o6s5X43dI5L~o1j3;aM1Q5GuIXM?EtfPjw<Q_%q
z$c_ISpB?`v)><=nlna=#Vm}OwcQ=>&EA2P|E=c?We4le@XCNF8bn>yU2;D8#c?gAR
z9v&X2y+v2xr%|0dtDQ%yo3D*_M~p*7&lFUUn}<gcn&Cc2{N4b=r3lm;XozA$J^&x(
zbV+F`7TPUn<%6L%MwZdVubr>U3*A$Ef9rN^qn<rG`PIC1a&mtLs_Aq}893{anmbT7
zlZc<4wVX!n!$qK?Ew_t~27?p_13x50W`8+SK1=f{Fw1nw8jIHhU*FzCRU~utV1L6D
zyu>&(I++eu*qB216t{rD*R9Ipv#qb7&kmYoMzMf|2dHJ!pHd2YXxMQrDLOWm7rCTo
z{`Ccy|3m%pb*MRqY;L+iO|#|JZ3Fy5Mawv?Eb<6~?|q3APiY2Y1Uv8C8V!H(g5|vT
zuG{@<bWh9{r~uvt#Xy|{Hy4-OgK}L!ulQ}>FNhTT9C%Mp7>NG}p_O;qe+S$lx~S=8
z_WzG&=>N97|J#lKPZxDM;t0h@GdTAWvr8y13}^$;%{PFnf!>khp-h6$IsDp$p|Luu
zwOlI>C0u};0_wDQTJYiH+t4-)O>hc0hQhyn=B16cl^)J1ij(5dnN84US#Amq)zH5c
z91_xWu-kc%I@9N#B<V^GSdTDxU9|0n&8~({IfC7O;3HZKe(>!icp2!-oq(EW?w1L>
znq^YxjtPSH7HEk5wLWNf1o`@Zwb9Di+S-;zdXIQ4UV-*iv^~?p!UF9WLp#@Cx$zq)
z#YcR!6(|nj$1j?X#p&5vtksIzZa`0{GVojgEc^HGpU`%4ipO7Zlwct=s9W}c?s;@{
z(#HLc#HVO{f_8jD(SVkbF#wvI((39)wi3arswPVv2loUHZ4+oGgS(Z0X~Aicq)vT_
zSIFoZYvuI$FuK1kDxm+4cClt9VrBW|#rvky9Z6_S#C!AC5i+iaJvT(f561q2aRs^~
zAIfa^+FAcqHJ#jiu)laPUb`_ugrMLW+GAtr_KSpF!imhtYmKp>dKO?+XeNfvuE$4C
zxFO&z_yq(ML2X0Db8~Z-p-wz*bg<0~WpOqL!6u*+^r7_rzc0cQ;6px?-21WwrQk;3
zj)T8t_BsF>4eDDZ7i;{W>l1j5>*<SCwNSWuINyIBOb^-#?6z3DkvP#URsp@J(CUS9
zx-QlL5DKkQf4@lw=NygnJaUn4E6jf4<5QrYm{blX%_VzpTSUdho%W|42S||6ck}Q2
z#l*<?m>4p@#*_Pdi#`-w{%8y9i*qJVzPO|S7c?q4S;(K(bn0^~x0ICri}v5z7p3+$
zCxqr;F~o@d-yv}>f7&l`g7nA-Xxd5Y+}asE5E>sJPu!RoAOB#q|BrdvEB8CbeQlQa
zwd%|6*&VwfTcDOW8hDJXly*v=1$_5+NqztYa|p27Bo{khoL4~U@KAi8u9p1!P65?C
zc``1vj)w#TIYXRN#%(DTImslX-*0KoZOH)zY?4&yv>KYye+GdbJGe05jzMc8nA}Ea
zJoxP?g~5;$ky17^`W+!a(4IwbItCTuz!IJofpEbWUYr4DadWNECmd~;gv00PI1c&%
zUSWFA-=BE83+;nSl=fncH*|lqVqdNs9(79D(CIxC_7#Gtok#3CJ!}i1l|n9GyYfYq
zy-bEuxP!Fx!!g+xs;{XR{DM0GC!94`jCQ;wq(7=-`)b5?#@WQgq`%ff6zyOFGd!m5
zvrP|Y@#EL^$a3BMFJAEb1yfdMm8~dA{l3ioTuHu#xm7w{zFMqt{Uw|F4;#<gN_Cdg
z<abfsegH<X{qF{}pj(!+LX#%2w$N^bq(}(p(Q5_{6cGZRcLGSr&I2Ez5hD_sBqbwH
zKe)t-Qj6WDVs96@E*efhI_^6W9N2C?*XschE`;&TV9=B1tVP(s+Dq2tuh&e2ab#d*
ztcr<EOiZL+;Dg>T?OaAl*S}5$l0f3AOq2|4yLkzICn9Q^k%0l_a=QIV3qrZ3HT&Ch
z9l0*30XcK?^%y8g1B4z=i;B+e{-W`bjXZG$?X*W47JUvp!cJUaMLW7sf(J?=(<!-o
zbi{3f3Tc>5@ri(5lZm&v1z<#p(OyevDWwDwi?7*}(*HDOgT285RZ?$p0_u@isHCEN
zi^oJq#@G6cS|Klx@cfW|WpN8s@*`M)rO#&z$H17iLR=f*Jokf3N~&gf=Qy<I+P?cm
zDz(%=<TjUIncDRL<t(vK22xM)rmwgu2Jj=pKs3yG^QI9Ph#_Eze!<&X{Ai^AczVKb
zBB061=^L;7K_tnzP*$jf%nvlvLY2%6SSl=1Zs+z_vt^+7->?BFe0bIxT5JX8YbdBI
zM2iOvV6xDTT>>Q8ZT#qTR|1?q7?1aB(x@Jzo%rv-dZQ-i+cyogc^o47*`*4h_2i!J
z?j=YL$KW(2(FzLCc;D+k?v2{x;pH^}j_A9EioqqQ*aB6xqIr;u`v|sdn#hQoo71>*
z5bQ%B7#@1Kfpn<%Pm=8Z2@3Yq299Oj{<amwmDEh3Bq%!(;^}kH`#R7S+;)Aliv|)C
z$Yx+cH0Tc{fQ}X0jY?Y&z3X~JC;&}V-m6gjhwSz67a#uX2SWaTd?Gle|Ndo>eVLp7
X{f;5iAD_XmqafF?YKjH&#!vna>WX=_

literal 0
HcmV?d00001

diff --git a/posts/xgboost-for-regression-in-python/index_files/figure-html/cell-20-output-1.png b/posts/xgboost-for-regression-in-python/index_files/figure-html/cell-20-output-1.png
new file mode 100644
index 0000000000000000000000000000000000000000..2805460e9cbfe6524a4e5dd6759a06fe7a9cd760
GIT binary patch
literal 94075
zcma&O30RJ6+b@2nc~A-|MVd%OlM<Rsr4SX0Xi`E{rZlH{CYlYE6p<#9A<ZLEh@vzo
zh0;8a`**S4_j|wn?Y;m1bsXzhi=Lk6e(w9a&hs~&SMZ+QI*VCWvQQMYc!#c*Aw?~+
zpeR}mCI<XX$wfy~{AbI#?I!1p9Bj`yTb{9@c3GZtw0AgXf81)#1)DQxk2{=}+bF+L
zR&tHqxpR(Zw@OK!`u7VqI-EHsC9WW_9dE+ysB3zbqBtzczclw$?;fXUC~Ak6y79#u
zeP5kTmRG%C92|Jh&d9#YSZ(_fPP){fB%#RLVj(g-td$IY_i`q*CA}BBg<I{QlMJ>J
z4%sWoxjpBS6)h)cx!C??8IR>AIHYFIey0M1EL>FU9@U*ccI;TI^5Bbt_|I}>GL<|`
zf__Uhbo2J!`$_%z-$I2bZ%z&Tud$q7@t?oB&ns$y*Dwiw4C9g}Ke=*3o0;d&ugRy-
zg^{1!N!Ytco4nZYjTntEd2zZed7J75Rt^rvDks^?!AsY9MyXu8c8zn{GQZl|+RN|f
z1FLIm>8hMe3NAYB3LH+XsHzec6Jx5orTpOM9wyE`_sIoy87<}J4oKT|o#oZ9j}IpH
zPV4R7t?lS2ZFcyurM>;^-s#B%@_UMkJf5DO^`Aav{&ZyO_;W1-$6E`w^!@&Qa%&ie
zp@|7oL}${S(ev{~Q;k{Xn)~+UrIoF@vvU*u4kp3q#~w2m6t`~GIDB{&S1{e`)vKRg
z9J-yfTSH@!=VS*X4<DbVBzIX;6OYHtc*XDL{Hd)S9UVWuzSv9^zj+g$*w2QqNa0mw
zb#+<B3z?ajj_1#Rud}wcE^*>laA*x<Ub;5Pb8dP`u+~$Tk+oENb)@i*`efQo^76@P
zX$<Sut>agA9aU9!tJ(Bhbz#=G#FtU?z=0JB2?>KkLnW_crJNS#JubSrX&ySn$H2f4
zr|2Za!pb@|qn{?fVxw8mv$H+7Ez1J-?Az!6^5x6gxZSsug4tJXRnyd@J#pfMUGtL-
z;{$C{a%q}@>?Lh&0_T4<TuMk-xoFWM>ErK3D76&r@X5N(3JToC&571`ci*(Iu%L2G
z*jQOvci!B(T*|y~+2etp9>!oTpX-|)N<Vy9iaQh#5D+JAxoCJ~g#P@<$cW6zPm(4k
zCTjckEr0&}x#QWhn(ppO=7BN+@+UtPfBM92mBX`j>((D1AFMe2_4!z$=i06jEwX+J
z?mm6Wvwiz^pOo?fmy!B>2Xm8y2Zdy1x$sGqBRzHRG7oPPS+{OvOjb_L^7QG5@2_Kp
z*REw?<(5>ta6y5epMP+2$eL1X%sedP(5gfgV}UqLj~>6F;<57ZiwkoGPK<PPYa}FC
zPc~*&6nf0;$v=JA{&Oz<&8_Es($dxn2?;IX<qbZVccP@MtW$NJB_(ZFzsBzPam(f>
zr{YvRw$4v>a<Hvd;=z0Qhlek|(BHyrZf-7VnoIX!|3mTf3t2_;A-;?pR#U^@n{sVf
zmn~aXpQ^tCpPrC)c6QR~u|wPHXV0EZ&P}up^(1;};CXy~e!gt{ce8%p&w&9B8yg$@
z)&kyb&i&=tmSxf>8pNpL{;w~l>Kfm^Wv8yh#4s2b7|hPk3@Bb4TvHjbDzv&<_gwF%
zhsMqR{{EBwF9zz9H5q5e8qEhMCaQ96UT0gBEGpc+cQ5Djb3c7PKRVXg>`+v3OWAG7
z>eUfXPBjZjNQ7ohjMS-G_ci4>Iy>ta8!!8mVd{&UlYFZA$;H9;60iCAD|Kwxy`?<X
zl9G~PG*?`fadYQpWJspnxl^tkzEbq*z|=_3PIL1_7dN-q&lm7vZbRSbjSd_*Cb2)G
z{k3ynQwf!AT@?}iysEl7Xli;oM%>xS$#Ubyjk8lZ)>S-IcVD06y@}!P`uiX3-|*?-
zq0Q<pPEIn9a&jym9?TQ(`CuU_EbL2T5z4*c+iN|QlnGT|+!8s;{^$EtpEHLJ9kM-k
z?7;muu4BJ)u!~2&ySTVy?0<N$_UqT1QYSy8Eq-t)f1}&{>=yI=#>V9dn;qV+++<gL
z$1r`(S^<GX!@6v{cR~!_Hb;*}&14v+M=i&f>#VvgNA<l=)t{Be!)5T^uC2||KQuae
zw7a`|b_#WehkAnDB|EdlV|o*9ag3y?kXX^#9`>c7752kjHQy_D6!LE(rElQ#<Ks);
zCaOv)Mny#}S-RBs_HCib$w?;dD&u|oI=+1?H8@369J)eE+UC`=xP%0a9Xpuy^z^Ri
zid>u;7Bw_9RMBfub{k)IqYBk*-8#m?!a|>msfO#fI`@ZRE5s(8zSfxE<|(0=pPfy4
z8>TpOb94I~Z{$MdSW=L(Pd4}bw{qrXD^_%3lPJ24OX3#M2Wyo!8sSD1U!olvC|VGb
zm(OeI$*O*MD8D|-T=hq9uO=Sgt5>faot$XA4ON9BI`zWCOb;Dmz4aTvLJCSzkypHj
zwvLW(ry?7VwA!HDyT-=A@bK_<=OFjbg>Uoam<8|f<4R=BOQqshpK)}&;*#i7x-ftJ
ziFuKyqU)%bZEYM4BO{~Ti4#7ZiWgm7zm4@a3W%PjrKOD@8pnREbvtwBOj2g%k)MSp
zKHe8qJg|4~`bV>Gu5V85Z)|LgHQm2wkM|0xBkfpr7Gcq@YumlrbUlW<f|Jblf6Xnp
zIJD^ZmptG5_r<$w<7wsO<nE=Xcf7s1?Z>+mo%F=WgoK0XvD4dyBHKT&R(AEnEn)WL
zXq{*;p_Q&ox_fus$n)Uf;FdZy6$jF)f^M8>%v?-0Jv+NDEG+C*M(p+Lp+A1u(J?Ss
z{c1>eck%R8!L@ev8=!YI=(kd&_^8A<JMpiRYu)!?fAX&D3bHd|5}cDw{cS~3k9Ka~
zPLEQFXS`?69@MJ~>8=y~UunG!6H8|5%x`(kPch=pn)ddRs=w&leLt~2FuKc*+RdD9
ze9)F_|BDN!9QgyEK2;3Tx^38K6CEkFlr&+XbsQWVV(ZrnDl5Ni`MEKovzSL>tZEc1
zL(|DACn@7jM8;kjbRM%pchfx=Ix837NlSaRv$(aVtBdZ`sZ*$?)?L++%Rgo9e&IY&
zd3$^4<lfYtx<u=?LJzyf%vH;lFHg?QjKIdU#>JsA%amKmP-^1QR<C0=*wrV|pmB@$
z-OZ@W7cq5PIQAIzd|`Iy;pgW{620bw2L}gb?+O;pjUTE_R26U@X!Xy}pYZArb)Wx(
z3AXU^@mZtf#3`IwiC^@`gVNN|VdPk?RE!>mMG(>6Zh@6j`uQ_23V;S4En0jSc4+5B
zo0n7`kKIW38yc#&F)J`2fC0Pe-MziSKYsnPYA?RTBW*zia0o=8-hD%bHF@eNh=(S8
zpa72m)!HX3O9~IK{=osMhKGk%TwbkQnyMc=_~V_hUqHbAu_}=jXhY}vnmF-cfyUw$
zKJ=_tVq*hwJ%zh<b(ywa=wsTvdGpR2ik#x&;wF!57iS)M8G>GIl*uV2At6B(hb)(9
z@9kwpO^>^NJq7i&{@uG06lhK!9wt)gzdm0cyYcXJU0nmZoS(nHB>;f#u3i4uuk+n8
zT>ti#GWsp+>|hy(<C!z1R|GW&J1?sW2@2A@efxILfde{dcVubcquBTq`c$jbRD|Vc
zMJWxYsiRTu*ZXIuM!tUv4GFP2efsoHBcp4<T0*u*kCtP>S48k{DQ%p>{WArwijRn3
z|4?6VadVrq?SZT#(Nk|KQ7#^#vdiH6GkBx1$W*p^$~knD`q!41`=#H%FYD&!HeRQ?
z5H+=-deycI)nOBpL!B9D8l>*-G&PO?m}Q=L9*?SaTJ+Tnch^kfUJHvgw8hwPB4QI`
zV>Z8j{Yw2}mTf`9%Fb@Pc;zMw+`)|Pq1=*d*QOb>IXO9H09EZgeE4Q?h1I9DU3^a*
z+EmnUhIpQ)p`pn*^3rvuy1M#c_=m~Ki`_qdR5vs<n4o_*C8_&1`LnK|Q>8Vn8=RLb
z>P20#M-j-_acwPjp8wBZzjCn-9^u#2UxxE-&c%`*sr&fU$-29<vy6+2>!=Ft)Uaci
z%zn*h0sypGJ8f;(u9QFFhb!}VJSWdQ#a}PG?0*NnMpyp%8}0u)FlNo+GNi{dMc-mv
zyf|F%R3PAS*XvTAGL&(NmK9Ne(D#d9x=mQv*_nK>1X7;TYI$~+5w}KcWGnW#G0oz|
ziz$Z%#=Gh1re*I4lwOZMC%tJ?*TTjqfAZ^UHIGw(VE&+TaragJM@qBS0+OaGbco^t
zs~dy4q$$ARcE!wwci39uy(tEXDo>p{wWIS6MAp{sL}j^i=Z;|`K(x$-zNQnQ-%q|*
zl1@w0T;sQ7m^W#C^+Mybv&XPZnO|uM?TyZp;tmz$4t<dEGQrr)j4h(`)!c=u*xN}-
z6rQF0DhAv?soL0u6~?=TBAKSAr+vt_E^c;dY4e;}+~&2QjHgzAXUEdMuP+WbF0{UM
zdxt*+RaiZJ_DoPnXc2!^2jB{&XQ0kNPhSGO2>2I}lOyx^@ndWVRZ{!iS5@+GZ!}|C
zx|IB0Q&W>g&)b{(=KJuq{4ZbTICt(`Qd(NT#>3CoY}gQ9bcD<8+__DX8#V}X1v}1+
zpDM4cyp0NtjTHFgiNcK=H^^Vk0}+lV+q{k;eJ(hd*~iy+FM6eUk0N_=a`KP&sjMh{
zWJO@rGg-Yj^PS~Oz5@r4@!`XV*?48?q>dD{XRQ|#Yai;YnmsEfCYFSPH#;XNDCl_Z
zTvr{4PRGxm%)wfVl$Dj)_!X}d30&N^Rq;%hZkT}T>b#SU+Eo#&9{ey`iw8Cz$R)jn
zo{_PvxtT9k$~*wAX?kMd@{Jq(l*8Q_>|%c`CgJt#nNe}%07>>+UjO;?XHZCp2EOL)
zloThRO;p^(A{CG6b9ickn>H;2C@#Yjm$oRT_*Fd%^b+6H)p0O0GdI0(mfD@Dvbd5*
zNm+TXiAi;%%h<2w8X6jdfQubp@=o5{_kjDk^8ovb6)Vt0c<&k{R-ztF#<E*mS&?@O
zTdlmZrl!Uu|MaTTvN9RxelztpIonz_lozL;??nL<DBRfXdU`Bv$MBGnQ&N1f@PP?e
z;mb2=NANRKb|7Zw&Yz#E>nsmmTL1B51-RklIJy>K(juz*?c0FRP*(d>ry{Rh@dm*D
z20}DAKF(dfjFXc_R8;iF*7GdD!PlnZzxVVAOGz0x5(aw4+7EbB7IiEwGjpS&?VE%O
z9&0x0I)J?SLPh9`uujDb?Zq^eJk~~OJ1v1QRy^Kv>dPhuZ)K0^D49y|7oHFA-+#+u
zWMGh$mzR%;jjf8^m9Qx|@ATJ%0r16W&`j1Zb3gAUnxI)qxTl4Mu`zfDS9~+9OHE2D
zt_<TX<FW3m4CiB`ww?dA%xeIxB;G{QHZn5Oy^k_6G~_bxskyd}jS3A9kN)iS;==DH
zkRCc4EOuM4y4fw1!nq%N)Ni6tJp$kaF+Wt~sr=#HJIg;SZJ@0v5vwhGW@13DlBf4w
z3M1c^Ga}YuIe^u<Aeh;BbdOM$b4p7!)<)`1CVmLxl`{dKqnrEs>}(*7MS6OAm`tUa
znOWS!ho@WwHdy!luH#TkFLSg%>OaASDs<+1cegAhxp{Mbi|Z(R)52NsI6J$0Nu<!v
z7Cn6UaLVh!g9n<RF&qLabQFL^X-SE?qXQvBeIJ^dOtN<eUAjcmW_#w0Y-@pw(Fv9e
zREP}+A72?s_XcY&!<Fs%ROIC~-?o1J`aOn*8hD6Pve-^&nIpn`sU(7!*#HCSRaI3l
zUc8uuMFXM|n4iCyjg5_>0M-<}<_j#!AAmMGv=)>+IFN;H&p`S7EV(*zRf-)ot+=w1
z0Uc-dJXp1Esv#~?!@z(8l-vY2lWIrric@w|kd>8n#GdKu?k0E?E%%!>T0D;k*N*hM
z^~%btY;A2R%FS&iL}fo69i7GO<j|~3&2uSi)gUD?ni(&Ag0#H+hLPvFxw-8(Iu-Fr
zltbG`SFe{Gl_vfkF^#r03tx-8RDU&QN!>dTFU82Sa%GHbrgIV7p8YolRIiyxZX64<
zw6x?=cHP9jS}6#3$9j77r=t6$^q0rSY155EfILQlVvU>c-@pI8((L`^jJmP8t@s{1
za<*)zTML5m-EV`K>KYhSp#Tl_eU^J{|M{xH8Bm7j=X(R_S-GQ5WQjP@t<jx)CBzkc
ze^h@ZF28VNl=r?%C?Vg#2e_j|MMT{9To;vkam>bMXh7P7EhZ*LOkBJa<@nu`Q-`SH
zYwHXgJ1fHtg^%SvwtKe-4^vG|jpARmYN$%UD+!1<`_Us!s0`0`Y@lvzIm@7;qT(}h
zl~YAuUtdd0s~zCN-SFJ61}t2$!IXWX1DD>!%P}e_DAYGJ_+gKevNQVAs051Cu|RyA
z5@;-XdZP~vASiX$>rXC7sj3Q0479D_AMo6ksYJ?Org>4sdZV;<Y_y=TFs+Lh6}>|a
zlx_$GB3!%sMvzlyC21)rkC1SL_GcXl4+;ueXh@U9R=yM)yMlpZbr`<Xuv8iNqrj0P
zM{HVSBO@<)ds9R;$aiR~Ox6sf+tFMbFPC%t!&*9edQua0ckJlUscFr#u41=udCIx@
zbc-554)AML(a|=n=zI6?mo_%80HmtGb|h3D6@Q7Vt1H#&SqKP6<N@?rnu41fH;RcB
zgPlytQamzNbOe(E5Z>74%$|SxYbidvePH17w{PFXr<Af7+>Cc@BM;3!%6qN|g|M`^
zSk2LW<jv+YUz-7a`&ynY1&R;I%aez=Fy)}`S-t6F-{(g%XS;X$`S}%>mM(&n)s*ib
zfHpRo*c>G)Dap3s(35ho1y&s$9qGzJ;KJAhA&*!HLgdB*J(t$jvS|jel|Z8!f>=OH
zDJdx}S+y#>ERdtDq=e?wm&YX_%a%uvirB4#=mx387oQI;CHs8ue#ke&9_8ibYd~Px
z+1ZKWv-R8$Z*#A?ef0r=x*?&VQ%6HP6(65$WT)s<%h1|78q$r#Hf{_E3}ggfkC3sh
zRMXe*ePHqdvSRgJU+dw53@!0f{VmT-UR>CK{}_EpcF$FeD8r@J14JdTfA{_V{kyup
zJ}b1A$16kx{oE_M=G@ezN{u6l{Df<UCZ&jq%)Q}Yxk2h5WwRi(`|y3;g9ld@6ckt-
zKOT(rZe8lf%*4VnZ8x`g)z)QP!CKui=YG^*{v<_YP8I%L>Yiz<RoqRwfAvC_yLTx6
zw7kyf`IPm#=sXmDIP~k&L;W4xc=2%j?e*1l4RbRSGPbomgy};T!%~>+zV48H;rGD;
z*RlI)We;TI<KuzPS~{(FkD>Ys3ebjodjx<M(W7$Nojhrc5}KWxs{w|7y5;F5K-k*=
z!$kJc`+5M)s{cKgJo*knV6s`fD_06_+jd>m9E6S36Jec*L7Uts-1pbNp|!q}m*KoC
zP#qw2sgAHkVsb}WpeDoz{DY<%;O|eLk&#jV=$JuYTaf?+D%!yd@qxf6rLSMJqRz$1
z9b>1~08rox0zyJqYTvxcaT&2K{C$=4?$tRSns(f~c18A_oE*v<SmCzU!u&lDSzxDx
zO~-w*v!x5?zrJvOch^AR$B!RskTtRAAp3S@%}h-tyG`^nF*B>bbRC=A_uB^W%dfCd
z#Y<O&3k+A?++1Mg%9YY`a%JfM^b>u}`5d^Oi%@qKl(6hH@o)0|`ypubLv@BiTek4t
zx^)Y3O8tiqWl%4^9|?*j&{<NiSVhj@mQo4!mb6uQknDt{yYtu_cG5)`myTD_5=1@2
zVv@$g+8HNJL-sJ}PMCn#EGIC_wQT{sa<*&m?u_jGY5+CZX?wxVsbUo1_U>*5=%P#4
z?QZX?xrWdF(eg}=g@t8OA!4<1NL17^eie6~ckkXU{CouLS;e6`yGD23JD>G6ln?#v
zcT;zET$8per86tI$O*<+;oRmCxX$1fMddzvM34~VE)fxt_VyBAh;uXrFJCSgnPD;N
z`twX>px6e`SYY>aNA>>tC42t-d7z%RYYlGI641l9?LtP<5!|59_+@$<>=Rqu9JGHl
zJ|61A(6{aL(;w~{@KON(02<bK?AEIDlPR^6o(pr%wx>_O1%WY-NldK6a|{g*whWW$
z?(Ka&Iy!0sT9KNlDlL8Gb5qk`&#}5&HX$J)eZZ%9cW}P)sOV@@d;5%bm+rd6x;7&K
zOd`Q?OPUn(Sla_6Wf<Mv?c<4!Tt;O#G#JM_0R#mAc?|39@F>fPjP_iM&0rqU(qhKo
zjlYgLbXmJt#b{ta#-+byi-@Rbvgsq+>bkm~*S~-NrXPcD*51`6B6jx784W$XkQNUP
zJ_R;XlO8_YfSVBp`Zs^j%1Q_wlNqea^KdTY-VT62^v#}LQ&ZFEsn!EFk57DLd6OVt
zhQ3Hqz+>8F0gF3`P}u{<xo`-ZWqf>mR&L$ewcna@tbOtt5)@9cP(E>O%elA~ZP>73
z9^7oU{TSK&V@a`ts&Z**Y1k9!Fe{?2$Hh@pxm8Y**{4n4XKMJY!=xlWFO7WpNXg97
zGC9c;ssLdlWb^4n@T&*1ufi&2k~~ttW0<~-6l>r?{joy;a%VcHvltey^g}gWj+?k0
zfYoz$NavY?x>UMwwB9?2sgfqyNye*)xHiG?FWhqE$ZAnbESoNEY<^y#Eh2Li%}+}l
zfkgCkW`m@p<8apwbyG<8(0r*`sU5Bp*8>L%ubAK7eRJ#J$VgethC|uzQ)e>Fi&j$>
zXw;7%Zz3WJzNC+)zW#&vL$9ohy=kgny(-0aa{B#6eiFLx{j=S*yz^~<<GMR{cIiCp
z>+5sGkKmHThCIvA+AG4PZe(Qi2e}X5rt6b?Z2M*<)Tffx*454V4m#LrhDJs?Hm|wO
ziab|giP^MngI(bl7-$WxL04bD3|%P4ZQ?Z1WQv<PA1j>tVlh8ErMr7~CD?>Ch>$v;
z+_AUixLf{sq~FlRgCZijmp+}Jn@PFw`^(_)u!fl#|363&Sean~QoGKS61Zf2iVnxl
zgiXv48Z>+v7VCX9ed#Jc-uKxT(vx+05a-3|(e<-)b2OB&c9`1k-R#rjzeCUVyzRiF
zQatx#34XNK%xreA6}a8-`4oJ)2IvlCm-qMgGchrF!%ToLm2={wIJx>q$LcnYv|<NY
zSA;A#d2Gj-Ab&F8eD5bgbZY=U_nV~lXJ;?L8YJMa$bCEW^5x5cON+mJSxNLEuvX~%
z6a^fn==6<lrGkABb|Pv2^Yil;=cdPWR*6o7w?Kz1Ubk=Hx~i87_9Ml|$5&oa(E%6Z
zThTMqtRu>VS5;J0K*dvo&_=o!%#b0Y+~Mad`-V#DurIq_>$s_3l`6#-B|mO?;cPH?
z#_J#D1lo4#r%&T4V+L;O=x8){Qo6&@7eOv@kC$+9`9L9jrdMbRzs=9x9=_WNaFH7p
z%s~^Ehb7IPE_w3g2}-vn@TL6ro#Zt<2|gn7{lMo>@tETqckSBsKqg@!AZ-n^pdW?b
zR}r?#nS7+1OZ>?D8b(*{9E`RvKfD?=QqsJzpk*t6m7uuz{a4@a{~9MR8Q!x<TW@G&
zgtp2FdlRa+w2Vv%_~*ih(kyHbruE@zYl3$|8Pmqbg1^SOVnv|3FGB|~EYsq}4}aFH
z97i)vT4Si$J)0@NiUn_Jz!UDx8LuV8IXX6GX=fJ%k1rYh55)ZH7JtPHeI_LunXq^K
zqNA6ij)L)a^!Fb*F={{btwLH_+8Z=gJB(-ViJ}@PuqhDC(LwGV$XX4)weNJnflM>?
znepFKDuHagK9Infei7)2GUcl);sc%qCT(i#_Q>{)4@!IOsW|c-Ikq$NM23Q0M2pm1
zy?HYn5=SX4KhUk9@bK+WJ?XBy32eK-26&9i;~@?h%%Q<SNA+zGTzvrKHkv*41{e<G
z+sq!wArSMq2W_q%S`eCdJ5)xLKSn5uLb}=LgEA;oL<zx_mO#|8DV(h1DT|Y_9-J8{
z>O?~nI6t(X6muy&$mQ@c_=%wgP?cQz{PX9};29dYB8#I(=_mjlt_C#95-dg*l#ETM
zzAOVBr~puwxj1N17;){|(zM+-Stx=A(v9wB+kcit8Ot$F1Q4sht|VC9bEJ=yM&hwS
zzJ<OYxaDlO1z;W7HC~CnzP?E}|9UcOAgrRGUVV1=Xz=W;dsk1-NDmaY<(>+py)v()
z5tiMqqoXMlNgU0HhzQ3E7cw1JE}`u0?TP1RVrrTUt*0s5lJ=I0hxok@8K!b20UaRm
z7bgdklOj@5tkXx({o_4gn}Lu97!-QvGv6>4-+tnRI6!0&V9&ldwFul}ASP{WgJ(4>
zC?Iwq-%wSy#fx7*1Of6Z2<4I%x(3vI_3G6t2?-&a9a=3Q$^(0Dgw;4W|K;Vt?p=U?
z-v&F%6dl_s=v}0wfJ_U+L(58mZTB>haMSlJ*54F&+itPWs6EMFr7p>Rph<xuT>xKV
z`_7#^{m;`bT6E?5_16N<{VkCXIPlEqlIX&s)M1CC$V<bJ71;^fSpC9<CrSM?QuojD
zGXkX2F*0_3s|ek*cW(*sllaGlc)4SGH#eX9Vr+XD{13gF`N*g$fH(fJu(f4C|3&rh
z1grf%KK0ePt>d!ld@*#~Y+#==y&+mckp5ZN@!?uWqP!#Zj<*#mL#NHglf9FgDkv?z
z1kmhzR-&>SH>#5+ym^rCY?KBi*HI=|P|?47KqZLqPw?``2M37c<TH`q`tkos(7cn9
z)(}fq;y|Vr2m#jrnzMRiSix1@Z>|qPg|~u;6&=D$Wiq(ca}h;YTuZ9sn%>ahU@^Le
zw9D{OAQ%QzT4L5mOB|pm5Cl?(W2DR@3JR2XczG>ru8KlX)K8s`hilHYbSWo4KOXa9
z^0{DWvc31y(*<E{TUUl%^7W-9YANuz<l*O0S)IMTq2$4T&b1-FKOCxX(105IEZmGM
zdZ|2$j%$eToT77CuVObAjj^9X=h{0g^|mdzQfg{pb^S6>BCP8FkD$h_2M;9RF(3LP
zNDG=P=@nOdaq;o7MsyB>QG#V%gFwtsX*AJzjt6h2;)F*ebp+D<u0S<>EYqGv<I%1#
zGTq(X*F@?fL*Ol{AH$UC0J|N1tG>S88_0fothdS`?q+;EGt3mq8)k%Jgl}S^07Wn^
z;YUdiA1($$ai95}PXJo;29RYCQA<P;_U+q;)gvq-;#YJlI+|WeN(wZAm_^V`;UKmD
zi%<BN;IXqFIB;OT<>SZYAVTHPd<bGE3!Wkn<lOHs%khYCO==L~=yG6sl%Q8aXtDe9
zcs=~7kv3HeEW<neQQjV@Yj-91VI^5Y-Xy)<%Gx?IF0R^K3Ri|3LljS~?b`*h;S6%|
zNOMj$Zlu((%>m(atSVS2hovtd&vc@65;v6WTf_lIuAj;>d&Y$_upK=5ADn}4M|<%_
z`UPz9$ZOYpiR*=pjoYA;h$1uR&dJFM&8(DEztdm0w!Cy(L3;nbeHBnee9OzVCTC_+
z?%iXekR@Q)d0k;K00|Lcv5Er;gAo$aum$Css($@?DL<ZIPz%r0FD0+}8|nA%ksUZ>
z1CnHEl@k}vE2rJ9j74uvH9@%LwGM*-9!6sUseMC!7Mg-A)d!phxDVT?0-AW;9mC`4
z&jDPm0MjIqGLXV;T}fL^<Pje0TDZ8Vt<3#=sKRHRoI2_b|B)acChdljbh6<dr-Yc;
z72dna$$~T3ZFDw(L$;{7vb{uzrBc~J3To~29`|whLJ(LXwRv+ajRmOw_fILD*;#vB
zUAKa8wF9Tee!Rbr^^a)}vA5&<cY0JoJ3G5UBX_|3<xjrC@s5j&Q!_GJ8opAVtEH!>
z2OfwBs--oAHu7k_9B{kv4AIG}V@(ep)Ii(@oio~18x~lJ3j|zXNJbl*?}q0E;3lK7
zx8F_l^wARPxQs@_lqS};e901T5SRI<XM1+1enXA|=<m(bB8?26->T+??!my*$w2Nz
z4T3r8ivkUm(|A(D4cZUfprFNk3Q5@A*w;iM_e#8Za}~j-P_6m5oS|!NZG{?Ih1*)f
z0}vzIxncc*J$uBKdqGG{cWe0gk-o<&=V@)6ewxl^`zCK-G}g@Z;^MCgbbz%*xS~I=
zMIkU;y^>~zB_67TsVVQVw>LUoqlJa=Z9Yv=L;$L~EMN^3O{`#WoL5EO<{GfEsXb<9
zwXw=B!*sYGBpCxyhk{?J<T3r&cmaFq$A`Nd3iwQ-W5Rf-uBo|{nktI=Oqa3)UP1Mn
zH`rwKR1$6g)P3&9u~AXR9dh8e!W%d4w#`jLuSRX?#3e&kUCu8me36u_eYf$1uV@LC
z7^u$q!PirVdH^AX*vHG3wRU&ncKJb(09^71x&MYV0kqvGNsIB|VQ|5^N8Ug~lAD-O
zkbsB)3>o=KbhJPA5KI4|C-%Jfv&G4itn~Es028!ut`Hy#c=2Le#>0p2&^3S~`GH1w
zRNNKOq$k%`z)`e*cYAvik{-}N|FDynuFIi{icQ3Wy#>Q$>If9@BpNBqD@QOOLM^c<
znOIm#(fE?n(}Rgg1yvLB3!xE}mj$FIj=9M<(c(6qX<SFv4Efp(u;$zQ`oabZXV}3S
zNO7#swb7>>bmHN<KxAZx)@FrA1&=<beI9p@MBuQv{BPY_O-vSiC;R3ni=mm5wFI>^
z5Y8rZ|216^6L_(pA06%O-mn5^QI9|js46GGRv(n)8#iwPrn19nvu`WBS=3fmMhknV
z7<i8S29Ro5Rn^_no{z9?NaX=CEv~4bCrmY9G2f(fDWRyu_k$k^Smy(k7Z-e-qy(#V
zN^(MuFbVk?i%Cgw96Wds>mrp?8rDNd&CG=N|HD)LkFcTpIU<5@yI}uE((-ecg&rR?
zJ3CwK#E)i@n=1=IBklA}tH6CKx76)#1`qILt=K}t)p#GDMTBZVfZk(dqzNL~(beS#
zjVeC$mgmm|#>(Q0zvdIzNugO6!EH5H4bWP<P8c3F&nCMi;LE{i8Y{yeBqe#9znRzk
z0;W`q7=g6gxV_?*E#R|Dv7<_WiLgdko4$P6zpGf@PbT2oH!HFA?-q<ndtQa@SN(rt
z45eV%77h*^fCUKV5H>J-K@}Py(sZ;G(6@J|;^fp+2~0ff(JI6ZCL{Ck_$Y_`KBSfW
zw1h6<!85S)2jRyfPEVgdw?fW?N7Yk_K!%7_+lEu@buO<)Lhz3n3vr(WL&ofpF02Ay
zb{ze&H;hMS5l9$lk$REmoa1<(>89iF{jwyY^<)5Y8c<!aL9mdy9_O9=X$TNZ@F9|b
zWP2PbxL9J2WD4<Dt*oppEG=mujG<JqJ`n{({j>J}_V%6evh>I{{b+r;jU)u&;g^<|
z(;*><heDz^UN5`kuxKygLUEg*gm+CV;l?d-*}G`}8?Y!av%Sa!L3Qi|q9Puswl)Kv
zKSkkEn(|Jrpw^&-fM1y8Skdp#Frk6aO+tM58)7V?&HUfgHF^&nEiE;OJ)j1q@K7d)
zkAFxb&`noY*Zb;KF3|e#t}zHtm45wtxLZLD6w+sLj<AprNv@!4Ss^qLCx4QgT|lMQ
z9F@74s3Bv&_7Nil+_wZfi2hWe>lh1&awXcb69QXDZEP&Rl?Ts`!LPA^9md4UO85=Q
zp4_`9rp^bEP6u=pWQ*u3gq6`UuoVMQ(2o&$>&lfS#Pj;{<$1%%fA>PWQihPdfYv%Y
zg<BO5syQ+~zH#3Kk_K!PNyx!D1gw#v`hI@6>*I=mo_%ecj8870LN=;2^Q(U=bn_~x
z)9N>`s*of}>~GkRG!|LrMVW(Bcp7}luIK#6P#l{!x48?8opp4yfV;YW<QB4uX%8N}
zP7Fk%gzCe$b^na|9FH~Xm3jAW7181oS1ezigPJj{N{Way03Nr1fYgs(G*v4&$Tnef
z19HJ3A>tG9i)d+iy$u69nsjY9i9T;}g8<<E?9AD-yj0Bf>xWJ&yXJfs4vmNi&=OLI
zT#glj_U}(>F}5NWtOi;)jFvUgdNdTUK=$Lu8O8#{O#H|4I|(`rVo6JRYW7aN+*WE0
zxJiM^2|`IRIb9Sc<H*16K-j=#AzmJyL8DB{5u$1B_01flv%P*zGIY@DePP@W3m+$m
z+m@#}wF<brAq34(|3iv0@7)W+_8v|=_17Bp#eQV5BBq#p$bnDC_6e<7qn5qH8A=`C
zhr}#DS?}IQN*8{#w6rW)xsn9|qJ{?tII!SIR&eY2Uw(K*mPlj~rJY!i9UYf^e0=mK
z1piA0Y{<Ln$Kq|FH+Wc-&yco#>HVvBQkCFq!pe%2sC1qY`+4`yWhiZBNc18ZTmdk<
z7m>^xN-kWgbEDVVuSey6u^2dIGwrD{**|u59vR}x=rXLokS!{4F-`pVVyvww5V}d^
z3Q-Gz_~*~J?%cJD-Ugay6`-q(*Sx!DgNUG@`kuR8d?8SkQeU-6J$m%0Ec5US3wwLB
zhi#ug+XXzQzOR5Vmeaswh}(!fvBb`!-uZP$-~@m_sHu1SJ{b%HJvwAN3?B4;MW_e_
zg|+`a=1f%V>v3=_k*kLIh)$!Q`UMSC<386*KXY0<JSOO583*%Nh_wcRLI!nOebv9w
z0eEB3f_4&uCWv2!qLwOo&Jxk}|0?R~i=NzfoP=i~HUsWvyN;bOdwz}?@2&wDe=z@a
zIF!+6Uyiptl>ri-tg{ABg==*xwUbx=#4_S_Hkx}0qB?=+m0)L2?Ug~bClVkmR!uD}
zy7wPGIN@6%^|TUbKf2!_=0VBo>Kko&dS8S948Gr{%U}S<WkhQwLJSB$xzEswP4?V{
z_Qb@_UWp$k15HH=>nx(+hcW@a5L3=5cI?+jQdxlHkXGl|<j~4b*k()&x9aUtnIJ78
zVpYHxtwiCgM+!?(QPJZ2t7u|S!<O54U>iDv$@6pTAsl*RH(Qfh#mt<3P6^vx9eIe3
z&Q4lXT5vKzTF55RS#r4BwKujbMIa_SJKG!n9?(-DIK#f@s$f@LMc<lcq~0wV_6OW0
zFUZe#PUoz=kK6?#Bz0Kx8b(GVDdXGm(hs5oZM3e8VeT@g+X}-;jva^tTxJsCXRMA(
z`0gLi*%49$EAFZ+Y(F+);N?I>O<>Pj1}FsGn7Eg_ckh0RAP*|pB39PW1om*OU(TfU
zx`t`k39-+TpP)W%+O+BZmfHp^cKsDGS%x2U|N6y(8VEEv>-$AUR`z~^w~FPyF;ikC
zMynfL+1CzBZE~t7@m;p1rsv2h++SiQ7dKa^x3ygxwEhDBMaS!>SqyQwK}vyqp!8)w
zc~Y5S`sm8FYeqI6QT8!&*n#3Fui=W)PZN_KpyeCsa6v)E5P-9t2acer??3)c@bcB4
zrEoF!&5X7`KK`K|&Ke#TB9OFCUB|?cPxjG|-DruW_i-hw_rl{8wS9K0d1yP+XDpS@
zdxeieS?ZD-%GsPc`<?_}Xe{8|$!78K@-i!&L@wf6*6*(`V$m#RB6Z7Q8<(T5m{?j0
zdd;DY*drsJfzq>6Q_~j|EC>8)-W5hTiEkGz%x@!Xu2V5$N9ub+z6*HE9AwXOVWdUw
z5aF@~s{+1X7c_z;S_@GOb>tHe?UQ45s~NlxqXv}Lwc{4BQHKiMUE&k>2IDbG1BIq#
zWFT}}+XQol?mSI4Kz}y0a=9FnS`?hmQ2Y?{L1;Xsp}P^dA}lAL&+{^mj1TTv1VVta
zNEX20!k4TqDY*o$EA8Rop@0Z>$+6k_V|m!ZwkJ*?<i<&o<GuKf{&jUt3kv2NK8Iab
z=vs?l;I)G5P9K<>7{#adTOcxz8l`_&)oWgfS_6F^H)_^p=T0^THXd(?mBR1|u`@K0
zO6Y!jlePW#>Hs2R!V4aP(=IF{!-*U)1W;d#qrb6>5MU@rJ-h-oWP3wDMv{P@jYkTC
zu$-)+E<W{~+e(r2+mBBV`C!sIc9hcB-cshnmW!N`YkGcOLWGMMq1TNg&jGtA3rG}1
z>Rh`s&Ks)jH_RMVLShR-n`i!o4EFy_mm!s61-eFg!-zY2>=^MMW<CC=&H#Pc(b170
zT{NQ*<oQ;?+lk~o1_@64^6GFHa-^ss$Logz-;zj+?j@A}tA?@29vt~7CO>wf9^{>B
zj-jWc%f{jyYNW|V)SapSIA|uknOjNrSgy6j#ojWNO}LBkh&H;7bcZMHMqQkG_W)ik
zSp-l{#qJ58q_c<FH2ch0+y>j*22njI5x_?T;<(LC%bOpAd3WZr_xYdi{Sjq9V3GxK
zgDhO1T(kf=j8x3?{|SY8&3~(4t#snw2I=%)V8~~0-qmAI^758P%?-bns%>exu_M*6
z&>ppOr%+@zaQuAX)j9y`!z5&j*ulO@N#u|$u@`2Ydfo!tlgTGxi;wB>MV~qktRC<G
zit#57<PS?R(}A*W(|Ro~E(mvw`O~MBN*ASpMTdTUln{Zl2-N~nxaAEs)zxZHHQFK~
zBRQcG7+HpW`t)hGXiJ1I6g1oeruN@<SM&1nEMDIN^R1bi4U_RKsu(6Xu%>3mo^CK2
z3?Rr_??c)Vp%{UaE+BjQW3V#Cg5K2BFe6P?jLmxJ*E~>2X>~Qz+O=zmjQHgA*AR$`
zHGL8yB6KKGvwL&Xw~|N6aWLR^2of&0v5<Z=<p|rpy%7v!gebBwgJpa5Qw<>nLxEf^
z+k90o70!klg!1%WQ6tylX1eXG2~Iv6(MjM}_3k7S(U?F4^~(^2L;BF<e&R&Uub#Qw
zVXF;wGl|<>>O>_h8|v$e!3&n~@r9z+TO!p<=ri^Kec~4C-+8S6^fd*7Avb`ZF}I)6
zfb}N%^fYWrENY}LVzv_b8Gl4@4zY*yhB4rW1IIIvU8;b!LqCv>5LC5?7bwpl76dA?
zcK6s-QDJzY#1}2e!wiSsL*uO5w--^VDh8%#x}1Qix@$W-JF)mK0TaD0&;e@zODE#<
zM)SfjLaULTNhMh6KSgbiB-r~&z_vsIA}>UQkO&W~D>nKen?w~O{)F53r(PfyMHT;#
z6jBL#s0_SK${Q*LB#3t}-JD6@Mow<&(9jU18Q0XS!Y-q|jT`_ni1DcF>9K&1WnUb!
zqH=XYS{t9AKe_Xk67SNrI|-vF(^4&k9(?%F?E|eZvGr-FxJ}2oh{S-X3Uqf%4Bj-o
zblVIpA?BQ+t*s633`Whx(I1@Pe0y!bqE>Z+=kWmH5CI9sGV7&Fmk9A(woGr~7chY}
zaD=X&UMa*OV$&ci2hr{IDx|KL2*qBWll>Cg4@2G_YrXdE+lb@GkINu_Niqm&28nMW
zhgI;ny=d`EPhW2Y951QqG}x~G3H)^}cPJ?oCqLd_PJ)tN%2HC7q3ne~`M<ngLP8Ef
zMG`iQiH@$|nQ4<<HgIi>YaU}#bmy@;P0+QxJP}N@J$34hv9YlU!1QNyDoif{mcE`d
zCECZ9ZQJAk&`7WXg3xCy4e^(36*J~0_3z(XL3Ep)!e7N<S9U7y&StB4QD=5r_Vpfk
zsxrvVrlq8mfgnA4^2C;4i{8)QJ$_z=un@bewTKZprO4xG6qAV2MP9$onqy-2p~HO`
zjwv7o^_zZ&IH>0?!o=a;5{;R#X6S!W?gVoP(7dG8rYPj%s=R4v!L=_T^+p`2KN&|0
z^cSC0=~m?c_EiVVBA4P95(S6@=L$wMJUo2#$cKgoEx2S4fO0tgg>tSMCj<EzUIpCp
zGZw_l&BKxCbmXtf(kEk`4a*Rt5hR}of6oFw4cOX`pFcaUI~0~+KfP|@>Ny()T_4eK
zWgvzVTWc5YxLB}--R%z?YMA?%TZ){N(Eq=L)4IBGRrzG1v*gWtfY^h&ch`X>hLJch
z907N@t{n?+crf%L6BYHNF`C@jOGt`vBBvGzvW|b+fB#+#k&?J|D925RW8xb!wEuoJ
zikPfF_8TNqnPYGHDK$Ggv6Y)ovw<xTIT*djy;)Rv_l_Mq+yMycpez+Nb^)&wNsz#A
zh!sc^s)LuCF93=V@skA5A3R`%D{<7;cDQkFb{63@XIOef&jq)X7%|1WCue2V3H+T2
znw^5NP3lKn98Xhov!)1_;Kq&YAfm`6z|dWd&W`y;x*dEJ+(~N;njvE1rz@gyZS5{%
zuf7iJ#P9$3k|%+@s4s%zq{u<=A+|E6p~Aw$Ik~um8tX0-ILWrsaFR|5>Mcl1Da@)@
zEoL)cH(Ua%#*2KAfG4ckZM?4p&z1x|p1yb?ymjkJ1cHQ--J#&sJhQ@&7_MHs<^(Oe
zA@eXFEU9vA6dEe)$V(qslvo3Sh(Qtc1lT|w?t#<Pu#M-PUWlH^dX}R~NaV#kn-b9i
zjjjxm&zCP>CIN~Nk`IP_PMlVhAA)q?hY%o5c_Y}5q<T0!wNj*dd3kv+BD0(R1i9p)
z-}!AMR!0;qFi&nQ3}9?BISP1K4iDcE-8eo6k_hYgTgl!-w+T0pgq^d91c53|(v=yp
z2Ey?zC<+$*C8#gd#TU*gMMbLS&^A0t>ILQt4FxJ&1zVHU&u2@sA=r}?EDSBGwcp&-
z3PB^1sc<Not6b-Uf&lec8}nQs=yVhbeBlTCyBn-+VA`|7MCqzSOv}>Rx-78{DOhsD
z2`R-yK+97nTJRV}#D819ex;$%kyF2TAoGPxurH+BP6X1rI^Vo{_2==w+1v&(5Vm48
z?;*@4VQ%3;*A^m)c-XNFFCCt*Y{c!?M6E=QWG&CB^*gTW{daJ%?Agp}1$!Q3Mi@YA
zNIO99n9YNS)KcKG6q&o(MHzV~K9&NSUAcAZZR4Zd-21s*tYULFFb!OewqbevI5URH
zK=w#D2a*cq4Jtr_!{GTX7ruV|YH4NV0|`jI00;rK-{;x0Ed(l}okxGxhB^*)WGW>i
zQWnKYiyM+C9Xd4z1o3u<_Z*RR9htwNmYcS|&7hUX+MTGj=KGO=*|Vnz=<%t?i~<oO
zVC^O0{YZ)y&z}vMX-F(qD5|N`521rUbMgq9XughS_D}yKNlZ!Z6hIv4F4Ugsv*P*y
za>TI1P+ZxoS0PgQ$TCArWan4(#+w(zSVT{UCV4Df0R%xy=pCcdDugVuc#+gxgfz#y
z9BZA~>E0}eGB&9Ln9D2%sG+yP%><vN28%>~esx$~IHTbij1^G~<rbDh$?*5rQxAiX
zhQfev018qmV2UGhHv6`=E{LN9;D${ih-7JFLl34>vVsP{AJ7RS9z$oE;}P9)_waa!
z5CSfmH4}2vZTNB|iGZ=G2yhL-b?bVpmDtH;BAntO1Ur)!UOh?-8LbDBBnuW$%4u%;
zJObmnwQlQnCD1uII5d6!3=@T&q=hdGw7&7u+=H71kQ9fHL^&D4u;b1|mBaOwz$EX$
z+dQAkZQYt+&`t(b;4G9NEs87(iAxcO1(8qd_jh(+Vz8uY#-i2o#EB66RSMD?zz?t!
zyp6@bXCZ)8hSW5y^I82#61_(4?)d$CgWX94WRMBgM*M{nAP~&Y3jQ~#ZHV=u(qY1w
zolLJ{_4e~1CSAW@3maTq3jwzvvR#0s$$&G~8CpF=^~=x#iOdRn=tBGizTPTKD;L9I
zsYh}n<4`_3_AVLO?<ENvI91vC`J~THewWa@ckkYsHESr`ID|j~5Fa7ChJt%g2GzVC
zWeVmYEz$=hMT&y>K>o7{d?k`014O~J_G0+^$jC2kK>Tn!ilwysq$5eUppR`lYwU!t
zg%B?JqG;iZv7wOQqflSKx6}};nxCI>@MQUuV-<hQyIF9bH{5-{C5x&TCcNiA+(L2x
zd+>rv2nIzxYgkMf0jh@OQ(^~ebSxblq(-jC;Knc|(}lT<plcz_@n>3V$?%TpjJs>#
zw3F$t1ojj|06vMljFKJ<$-vI`cs#DJKuKLzDiSvTP=|M@zT1(q&cbGnCJcIPp6g6w
z(m(TLy8;D~AYzZ`oGdFYrhrhYtE=zD%r~PcVTOhI6^I$mKp>$~^(o@w;qEP>pvqr@
zb)q+BgXvCSa4+4b-%tEoqgc@CB?e5-z@ybPGkc&M`uHn7(5v~RI3OV@>1QITO`uCb
z9Y-p>w7x!jWY9RgW``INk{Lv(B>Cyn$&`uT-Q59%_P&B$4?9=vQNdAPC#Xs*pAj9+
zbXtTf8?g^KG*)Q#(h3Rz(80(_55y188o_irK);$$WHG)IX!-h&yVcccNb3UF*M#W-
z4}jEpua`&fuScW&D?yUF1aW|PCE(Q>B3#L^EN83rflxfBf3DkfqJasI0A3)B3IC+F
ztLfQL?}@AnQTFdS7?1<}5W~P=E1bm`!w-l5cQ`P)yiXrIJq5jMNoa)~)D|SFE*lDO
z0RiQaJ<5a(IxAvw<lq6sO0Uiw19TzLH0arA;uvHm9w|A30%Piz!TxR|2MH`oJUWtl
zd8)c$!{Wc80)Ox+_!yGTZI~#<oa%D>=yPbkAYV+Lo=6Fm;1*2M_06rX4)N5%WP=kT
ztZ*kH1b(|6fE)tygcJzLjj1bP`RMM$9hYuDCB(ISxj%eaEghXsGKquI;^l!dL4<e`
zuSES3#6c90=6w`Mw3P%B(PPW7M&7+}b~xAH!V7xfeiJoZ5F{0Y#(a=Z{#4PbDn{~3
zST2!QulgXqKCCYarm@Mco<e6OsZi9dq1RGHYh+~&>nETxkk_H$0O=%1CCb{ckp6_w
z7Uat)*M8z^LKugqTSC$V(AR-m1(9vY0`2ofaD!YYVh*KP5krF=n)6$&+5WrI6WJE?
zqp087Bsb?g<g;6<UhFttfaEQ49d_^UFk|{|iq2F#`Zh2ONpKRB0m=oSzrDZk#*G_E
zP(0Qhg2jLUr#!5uNX(-x%;KRj07!bW40|k}i|A{8$%nDgVj?5nytzR)8(`BP*GnKQ
zNg%-*fE8LUYyGyaPUpyxTkRO{)aa^Ikp(|dz+}GRcYqK}(Xan8@@Afhj(PIyb9qQ2
z@OtI&Vc4?@^V86V<4h#Gn+vggpz(cu>2{#r0D#g2RTi&>iOS%O*}S1fq<WDnpB%J&
zmUw(}up<N5d-fT|uRnbFU=7XI-4rbr=O{>-Z+yBLXH4LMMmHB?xgE+ou^exNN5J5H
zQ`PHM(XE)6N-7&W0SWbO2+x4F9EIA#`S=6Uv`{Oavn2M+#Sxzh7`O~bb<M>zF6iAv
zodnmsjF~<xMVx<d6brVamtWD*o5liPI6d2|lIJ9jMd8Di!6$++5?Kf;AsLHC5kD3t
zGma=qE_TPz1hUMsfc2Ge?nT03GGwaS>-2z%Ds^F`d)HrO$iN`0_98OZ|7B4n@a_tZ
z>3v2pndyJoR4)x|>1x673BY=W0H^A!YqxGi0Nmi@nc+t8Sb8Aa`Pou`R*SF_NC?^O
zJuvzr2?8_PyY~{pnO?V|qUi7qpFVvW#oG+bjT(O$*aD@>0;-n}r#MMc`h<k&rMn>>
z|JtJ;rj!-P11v8;NQ592;OKAj6B80x@T^L11)?KhP`#rLIxBiED~9P=ZYc{3YYz5(
zuR&-7t}@^?OetcyqJe8741(dma-<+Fk?2E31#;=YigOP`GQ_SaJK4gh-`Q*bFkI_S
zonlnqDGy2Hedxa9fyBBLp`m==3+J8adLzzW3rCrmnAA?6-slBeQYX`s_+CjXKhY3^
zin^0NjJ3l&u4}k|Ket(rVuY9at(u&}sm9{|=)*|SCBq_pI^O@P*DA+ku-~A@_>ifP
zu&~VO7DW=QLxu=$J?Zkq%2rnf_Ku&3-FL(a1t%ytnAr4yB`i56Vc0c8Qp2%heFHJj
zvUo7D-x@0}IH3OvPwJ^-$08sOa#}=CGawu}IRFq2^G|e<x^y}^RcEX-`Zk+=ZE2Cl
zBrHi~!GG_$y8ufx8(~cNyCfUtwb1J^JR1?E^J&1`<rJchIMjjT7qtHB#$Gg5qD5Vz
zWz>sKRP{R2|Be}3NK8ValT?bPY(c6W7A;138C1`FM6Q6G@c}x8h|bNHAz*KH@#t?T
zrT{!r=am5<$s`H5%ap8(PIMPS%#0YObYGa8pjr!;#>B_hm^&cl27k&CP7@Fy7v+eR
ztmHNxmSz2Km5jNvvT|WPJ3YO>?26stp`m4<6785~ZvXvy@Unnvch(`C(<33lN)E8V
z@D4>G$C_!pfRFaq6)DAw2dBqQ&YNNlLfr_3POA>GhS9OeKT7~{{rX>%w(Ib;4R2jm
zq2B^bbm#6}lB|Dv?kCqj@lA5(Llq`ux=tX7OOjTl_yfr)m?3fll)Pupr1>Oz=a04`
zRiq9{67=ZNqa>tE0xm#K1G3G?6a=93pnP(o^FVEb5<4vQ`FcVEIa@(SMuyy06v9gg
zx_LDP4zuIVlcEahPp$;TeVxh56lgIdc!eowN6es*96yvb&akd$FJ6?rd&h}=p@Tw$
zGm0P#UdFQ$78Fd@vjUfr!*~i=e4_(x=F(?=AXS4M4rtL%d>!0X*f-)I0|9BkT1a#k
zA(Ek#IOwIAATPha@4=+VE^Zf39q)YA0WxC*t-(hm_rBTUaWzEHi7)u7nT!~GpZvib
z2OAS%3GlTD7&ImDy3n+kI5>15>wQ0c41pBj*GfznF*-XtlM%vMu^quE+PE4Lk(S=P
zxr&S*LGCE6ug`9I0`=@24$EK@z~~!RC-Rv?fJIa>c0PJ1$LP-wjDP@@=n8ZxmI5b$
zV^K%c(Spes3VJA0>%<~&{H8jlZviO?r6Eaf1o8;eFK+&s3k{kNKLKzKjF0Cf=nK*b
zhII#gt-*(dNh}R10Sj`rhqCKqiw$xlrS<s{BYKD(G(QqEg1xl=xd5OI!EIg(56DO<
zI_;6}QEO)0H%x~4qmFJm-NJ+Pa{9TjKTxE-DX>sU^Zr+SJ}bT@N!G2}dhQ;M4f94?
z7xheV^X3DCr-ccH3N7&Bu#@YrEB;r~>D4P1SUzz|F0$Z67!fl2kW3W0F4f7SG0AIe
zfKbb&ie`QA$sJH>bTB{y75mVy>uxs3KV`&QxP&-g#%=TL`f<r+q#IM<fN3hO3x5zc
z1{;bNfdy$}EU5M*yl$VdMwI{{0oemxD?%&onHSCL{4U!1H_z*xwx(tq<l_^M7hnY@
z!w8UHNqm>#^~vk2v&?F=e+++9WZ%1YufT>|3EzJG3PIT8)1TySAs1v2b&^6yCWV(o
z=rB$I4HILYliT;&h#Fv}mE#dV5G^zitO*Ver>}DI@wb0qI8w~RWAL?302do1xru&?
z{^#dM>yWh|qA}Ws(#1gvP-er(oCG!ddJwN-N>|ymXx#tecpoo0k_;LPPD@)137I@T
z96weM9o{qaHU^Pb^H&Y^<fg5`Tx}3;9m0wwK;Gcsl8@Tpm5?=r9Vsx&25L=cn299G
z@c^*BkqTlhINacj`~_J@usg``3zV_g6J<C@0+C)&IdagIVgwOHutldT%+8-X2V9KE
z(4-rNr3tLZEhN*1NHYt}?)>xpaE}D_v~zFYzI`Pz(fpTd&G-&*M5vG(&u&Ytuirzm
zQe;XN+7xkS<K47mE6D`mEY`Uq7(i;P@HNQZ5@ePJEyEEC0Tks`kpCs4XE&fbf#Z<@
zWpZQ<h*AeC4MnnbgdwAa0|TresWPBJ|DwkF`Xzvxi^#-1rabw`Nn2SYsstv223dg@
z%isVu82Bc(1^=1wEkHC*gPawlq{NS>M6!*TqxciKz`+jG<SFOJd3n$z&ASzppxTxA
zF)t<GAuc{1^&`2#?GT^_3AIBPaD-HjG+38!r#S|caJ7M0P`5#F$nkRfGtGjr)yY_$
zyE2dzDH;G4aRAjMd<GqvDu(q-4#Tr=qA(h8?&tgM*=_gG#TQQ1LIcsj%_ol2`I9G4
zf~Ao6#pl8e^@ty+^8aTpLi|gA2o{wK;wI)-h&c}Nj2vcxg^NALx^3GwnEbELF96d!
zq3E&XZ-%DJPXt78WAK0^79mCwy<fe0qsg6*NHxMCfu`uLj1Qe}!(0R|f*foxc)|Gf
zmoEuz!b>Tv3sGmbfhUV8%yzgp53v3RTIv6TR`iMsrLoHXWs70l5FtG{V|y~)Dy(wM
z#&D`Y5Qa2`)~@Z=)L4$B1BnSlMn#niVCQRK-T>EF+vw&s=JnSgE4a!1U2(VLOW9tw
z*v9c0$WAyUke;x5At`*8BdF$VXdgNR<dKb!GO^%Mha}8mzP%VIEQ5b>3C0{bzHT4a
zU4(~0ju?!LJ{Sg~-J6{vvIFoScL8nel`CuR8NJ`{^2e(k&Uq)h%shx#Q}7VTJ_6K4
z3gQY*-yrckC_B_-*DF|uINb$qZt53i*l}nCdvTr~9wmKUqG}bQC1mVf{UzDX5DqJm
zM@YVN#}C-5W_b@8Pw=od#2qGak?RVl$c!UN<zb)~fus`P8Pflred$Lg2y#z=pk{yt
zN;a7(1Wu#z7T}eWm0g64DpFp6V&zs&(HqO0<ePl8%ZOnSq+MLN1j0ILNw9cGaF&Er
zFmB2Nlht!>>;UCZq>f0JCD1nzA-8NZ;&BxKayb|~+=RRc5~~~Dzkh%9({IeIV=hYC
zv0V*F;83RG0*02L`f5049DczK9Z1o0RuQkP$JvBL3?^b046l~NOE9BI!Uakw5f;En
z;<lV!OAQYA<Afvu?5Mb{=fyEW`|7Y2^bMvd6Fa%n!`qwT;uETiyvwV(h-NSK>rQZN
zXvoi^@fJ&wX5CZd=|OTvxOJCso|ew8LVV>buWr+Y83$u&;|ZkJ_QKV@7o&)S$1a0N
ze#49-v0<?da6U`RiJ#Z;fsshGTooSzFat`>ukFp<|6$?k-)^EdU&J`dHGd%BkNOks
zqAVwOt!-P5EXG!AMMd-?>UKfxz|1|+!HKQQ(C^IGU@@)%2!oo3odeaM=<b+2Ag4JL
z?ncPrnUTLh8JU5{q9Cz*lysCaBHcK?%z1U@{P_)er?~z!tlQ4mL;sB3VVz#Naf8jv
z%L{;%&1-&Yop=7p)BEnCi<5vY0{O%$#quP52S@C23TqGY9Y3(5k_RUXB;Ofz@*KO)
zSgEFdr!w)_+kxr5)8rFeFeo4u_P5J{C3*uNB}2aY6ZxNmf`LJaWGo4C4A9^Fg~|Ol
zU0p{>cpMX8=smQI?By^5zK0wGpT##O@;anE%rpg{TUkPxRXp3xW{!ivSP{4+Bj`BY
z0*98YR=QYh&cVXs1Jx48fU+TsOz08<;>Ez5GzF{XwqY$n4J*042tan&C>V$o4J!~n
z4@OgonB+QYM@DT?Vn_nq5qa-do@BD8+Dn7EX+<BZDh`LVNLOapN8Xu>EMm;(V?ZSc
z@)m++d-tqE!$7IX&db}5xCS{J067gv+b>GzF*EH5FO{@1Y{S${)Ir*d+r&S@xa#Wa
zB1feVLjsvJGQ6)oH#&NbOu9htg8xf40S3G|D3Y>;4^6Nglqq3+8r-_sm%xSE@FswN
zEl!;Zhb&IU0!Wkz1%=p$2u0vCj3>>=cL~IS2~s<8lo9wTp5svW8$PNS5ZUGv){6xG
zCI=^1;bcED`b{h&8T%&b!rhSdprT9T;Lk#P@JaY%<;eWtq3pMngt0?s&4!+|VRJWN
zL_0P*gvs`6X=qU?co!TaqXc`6D)^ec4b2fekMafq`v$ivZkzK)WcP6v%_7*)@gFjX
zs*RB%nu1+QfQx7!Z;LqcF=ImlbHI?>Fd$@nx_|;skZL*$sUWM=jqFb@f_<EDgbr{A
z$rtWT(Z(^ODGe$S<giBzYinBhyoHyKx)95_M=~LhE7gS}2X%I6MBln~<g}!y#Eq!F
zGLsnSA*fj_+8GEQBtLveB&tHF2yq656p<&<u_W_<a{*4w7C~V_q=E@@e<2QL<HdoR
zT%!kWL1o0M8>)JdL#*KC$#i|JDVdps>6jckM7()S)9W}oX4gI+mZVwtH$X~e_aYEx
zRNS^L*3AK)W3iK&|Bl7hJ8iFydrr+5rDM~JNAI6xqv(v1Y(xWWDg^caadk)-;LKMY
z+yV+9Y6vD{4$L#Cv{?=EX#J#+!C~uu-RdE;n);8S@Pp0zpWF@)fc0>rqmyhUN$@v}
zU6p@=Zyp&PEvjn;1CqGq_~%*TK1dM|hnIhnlb83$6bK+@O)L)d1hG@^pZ_0?6GH=b
zK;Z4GGPz)>`$BagqAX05PBg4a^cec{#0Z3wI{LeL1$KBDdWqwYdO?g;y^ijK!p4A4
zBghr1kUwq>EHx64oIQ8<zve~Eo;uRtSAAhzg8$PQ1umCSP^g5592p-U3|%00%T|O5
z2;j3Mswa+h#Wx~b2WNt?kkoC<T+JDbg2a!!82;1qS%GyfK?Jgfx|5n#gZ)7|42l!!
zJV*$TL07u-5Oo9LctQH+f!pwB+#`&a7UGPUnDK1y2b>f`E)0SSIkT4>vH}&6EOm@E
z(+?J<%D}Ba$7BOBM5VTX7ZL6H7VZW)dIWgmpN#RVjBUu8j`n?CigM@&a6t|sf_aV6
z1mpV#gaZ{Ny<EebzUa>xEK9t{eMATcm^zBHnCBnCIV~ApHX!hQ7Y#Y=#9Z!RQ1tAv
zv0optJq({ULMdBfxKh-jOBERppY-(O{|t9ruw%LEE(>#FYEDp9tA?RPv^R|Kt;Chn
z++D{qZ$~%7Bg2+FqFmyhgy?Z6oGr}9_A}-gL9oKZL`I?JW$s*{_|&IR#K*;g=JWHG
zKBPya!F+{F5&aIMD~ULnz<$0B8I3CFEjTjtRV)r7RG%v?Ej2te?Jf?ek$!Q36*_1<
z-<dyQ3YG+)+1j7Iy(58e4HP3f>)HxjXkmwHX=|4vz=c{(;yt&NTzoKoeH(xbZjmoW
zz~^U24Aftc=@o=(|2ysQ*$VDKv>vQm@-)G}MjC<aNIc;mD*{0+ueq@m80@F}JNt-S
z1ERH;ih9t(V2hT3TcM7j^=Ki>GBr(@B$CNVDJis2z6iweTA15ReKxuRE}?;<wuUPh
zLF_Oxrg^IZvLrNPU-;hue#F+oVGr|~o}2Ipb>l%JSV#Z`C{7!BB5cn=lxU(Ip%akM
zX#ghFN%;gl_n85B`kw<4Y=4ebg+cr0U<23;pr&N#yQK(|+-U72sZY`jFkhJj1O&(-
zayZ)B8=)UaJFi-n6?iSoljBE;aRHwiCsRiYyyS!RizZ3tD9oOn0l$>=))I>Ryc_3z
z;INb-a&#al7|D)dP*-o)n~ncR-X+^y``^^@>J`X-5m|bu2lU`?9MyjUe4L!JHrRt^
z+K1E&XM@w<SqB`A^5kd@7y5T41`&@wIR=FoY0%<C1W$2pF(9b>XJ|qgRe(Y-=ZQQK
zHyFqx4;<ymy=<At$QKYP9%>x82?8`qlQn_@%NpXL!asWaSW@mty74R6I<H}**^J{b
z(P}DcD_fPeY|8p~v*bvh-mAz`QP~(2IMe%%sp&xb+~S9!p`qkhK9qNIj5uI1WP0&D
z$NLBkwzvH48FbWW|5rW~;sBg{NY2DU9y1AEXYI_`=qRNot+(HB|NHJ4Os8GO0xtRb
zbrlec4q}O3r(k%JGl3vXEaBk^!u(L|)GWkbgyOlPwrtrVxM9Pt?BvN8LoqwQ$1Fee
zudXoXq42**d-Hg%_x0WX(>$kDG)t2bmC`(*Ndsjb8Y$8|mj;AJg%XiwWk^y<^CTKH
zXi%a_Nu<Im5ut(eyw%!!?eksxcg}hIzJHvv_pbPS-tX7z9<KYkuA56PCJbLyphh&?
ztIf;cpk9YgPyoH5E$)T{FGll?Qd}w=SO02<MAO6cDS<nI|71^aLNZb)On`Sk)nAVc
zu#(RR-RLh>Z^e%<N+gRID8&j<mM_YC(oKxGFengeEhvflDOK1X3y15pc)UMbH~)V$
zSi{hI!N9pDyed*OxCR_JtBb0j#G=F#kvOHWG?n~E%h^YH?ak>jv0Goy&4dx`69>_G
z6CJZX*r>!?THdy&8j$%=R+yWja8ZK&2vx#*_GK>DroppXctvmHoJ(4veI%StrJz6W
zUZKYj?d^gNlqZsSn8+XtKY{ZS{+Dn+tj758_HF`+il?m%7qKtVmg_-lTvZ`<;Rp@m
zG4rC-6``KRj31ExJdl^qNZP*zqsR;}JR#820qxe@Ts*~Qo$jpOsPsl}ko$grXU}RJ
zfA9+ryusvxgFbcu8T-^1&_HAdVq?iC6SRUj2m5J~dQWj9px6`8fC0BD+6Nhr)B6BE
z-|9=x_XAW+LKImthMT`@K4@AyXglK6LiAu#=|Oph$}QT^UaU+ty{)fx4y$|nQejU{
zkM<t$xDTp5$N>Mx6$HV3wq`X_QAr4f&Z7$Syarm7SV~rs)P!;E&{Hx|eyF3y=s*$s
z2n<YW1L>><l(`0v7EV^ihYzKXzTHPShBDH<4Wj{qshA4V8PJXJ=?_o2^mUZ?n<>A%
zL7D_OKFFWgZ8eFa%oW3tBO@SXQ<PEq)+V{i4;LLcY9u{FcK}Vy6V||5Kfp9lmHDAc
z*V^CR_Eo%SCi&1xlHUmso;TbP!L+&LxMi&87`KCxz+cbK6Z;K96TOQ8@%s-|)dq@{
zg>UPKAq@cOka6Q0SX*0{!bPpI`hQ9St2Qzox}>!y@$?=9CG1i(G)BKuo{mO|kJHX?
zv{-0JSf>C{5go>QOBHMZY9S$|#}VZn4Pay*Fbe+9LtR}5pPl!7Ewza_k-qjUpwyoA
z{G__rlOlqX4=Q~W#ZZlpDB}L6vokE-j#Y_tQUK9FY&S7waeTr>TtnXkk58HuK^N20
z(*tw0kj^3}Un?ziWk;)o$HsabOax63ULTbSwU_wDiam}Da6yT;`TJMp+9^DUcJA&(
z*&26}51WO`PZs_yrQW-{49;U<*z06H#Nr$VA;p&d$I?%?n=00C+$i*I9&py(z6)N~
zrVRk0UgsMfas^RO>3lyqj)@_E^e|U<{`%);ro+?dyaoCrR#4)K#|o$b7?g*}f^?h7
zlYRI|*Z6d(l6jKSh>BFY7@oF%{|CH|)(Gy^&hVVYkig?yzVDqho4*1n<+WPtR{X-~
zT3bB)G(&MflSw}9gTM`fibI#*Ww9oI;>qcCB;hhVhuu_{77x{IS8`PKVu+9W{YS!>
zVu(8J>e?FN$5FQ3eicNIJi6nzjFo&J*L_%at8HN}3kE7Z9MywOkA#xrAPyL(O@o84
zNbfX;Gzr(~aN$nA<_V}&m>3Knv)Sz@u|B2#w|XP=p3w<DWOh#(eVuR2|DmRg2eK4}
zK4WN!=Nxg779r;lRua-}zT@?j6lEz?I98OWClH*zjB~FD?Kl;6Ry?9XaZMD9CMNwi
zIcYJ>sOLR9v8KX4-reb!nA%FYn}iG%s>*ffPp~R)Nxm5_h5_~JiCYhMF)PT<t%~B$
z7ia2$dXGTHBts5}Vj`?oc!+I6hYr<9OTzbI{3D!BfEw#nN2y#zO(kCb3_C>64X!Qj
zZ_3Ia0)@nzx{k^6?uKi(B~5{-PD6>mictp)+zVB<m*iLZ*;$_}{@+|V?0Ww;F=9mt
zf2+E@3L%C64>09Ah2q?Sz4?7%Hv#d&5*=sExCT%Mw^F$1u$CqEP^C+gPkkJn(<s}T
z77;<=g}i=BK5N#F9%s5^K-^p_PtjTR8r5NHSYiX<TP&Fw$%=m?(i>q(Nwi1YTt&e|
zQ@CDK6$sYhF5M*1m-YosmxnX-66bIO92WOa%p4G*i3nM6XT%w>xwjTtTQgQp;8Dsh
z4yf@Vkl|(0=B^7n4T;$^CO`VlqqNR&y(8z~F`^lW7Y4rq=e|F5N-R>IJgMnWUnXwY
zZf#Z5-zp3aV(02_MUl(g0lCE@gBTODB-^m-q_g#*i0rv{(Wj?)eh7OR@yZr1*l)5M
zJ_T>`Ucq^UA|ldZI7FJNO(gdI0R!497YuQ#&q2n2j!y>QUHqG|Pdi>!A!}U-S#KCO
z!qwV#NuQt~zp(YKt1n-+7Dps-$R7CinLGKa4%$eyAR@6JBVQo?9mzm~Wg?paITZ>n
zq-1q?#S!=1*rno!>0a>r?Cd+Lz2t=OLJR}PK3&|}X2!l>P(b0GW}la5Eui6`1+_d9
zhw)kMZ%dXf6Zaah5~I{{W5?d$w+J7Z1rTav@k+Bh{{O%|u2qHr%Wxb^vtG@VpRZZ@
z8~V&nu1@?{R}W7aG+Dj5gHDsvEAFa5;E#2&8b7{tXbdtK=HhMp_C1@@hu>WO`s5+w
z*UhlIAU9NcCk3phFY{}q&c_uW4(`l&JkuD_Owru6iV4uK9)1IYpwDBB$S}|%7^qZH
zNOy^szUKLUUAjc3^lNNno9kY3nE?Qtn64^&h~{9LN{|(ZEbb4NGCFr_S*elhDsN~3
z%vvD|VnHm?M|^_R)obvu*N297fJ!@!zy%hXMZ6K^D7{1mzKYm+D2CAHioG-#5fy6#
z5Zv&_Itq+C8cNNZl4AJ-png1*6bkqby}HvI!_o*Lv_l;j3NtS=D@$}3_%phu2t$s!
zgoNX#zng6^#RSGFuyVG*&o}^yMVjqOkA-U_9%;A9{z7Qtqi3d(EB@I`sQT(M#>EV@
z1bI+aQIQBx?)a@_acj>w=BGda(z8iZNFz)v9S#}AaR1LaCu%V_hzjdQWU%YL^-=p#
zZ+m4|su<Hu?4u6;Yt9+em|E(W`$DCy8rs=^uX;+a(jmCpgu7$-a_xev{n9p7ZguiA
z8O9UnzctD)w%Od13*~oo=544=S(VnfiU;#{3a2BTclsVh0iZl$&f1gr%BIZKXemEN
zlLBH?(7t{5D_nR1lCfZWm+4Sm!ytGvLNuJCw;;$YeDv=ITg%Ei6QC1H$vR3=7@dny
zt&~qRHEl#aS7Sb$<jE(F0l%r?TE3OBs*DQ>fT&?df?$Y$kBmrZV+bw|k^L!~WgU*t
z$2mQrK|h41)0<FZlE^$>tRq0b;NR327jxRw@E4^h=@#Xu>+bEpxGJ5t0l)bQJ1!{I
zMX=a>;4%%Q7G50f+Mk4PEc!d{UkGdnv1|SO_D+*st6}l~%c&f{LmyXjaVus(BYwLS
z$if~3QJE0g7ys)GLU|{Qz>Jl_JYv4jFxL)nE5toez^;7*)z2>^wtip+OP?Sjaqu3Q
zKI(i&mn*tn_kekt;s|wnUE>~wV`9#peOECbBA^J{r-i=&3~QmRTwW4&EFz*7y=eJl
zo)z^-_zT9PZ!3<4g;nR85Mgfr*@0qH6mA9Ovsy}QY~x%f*wq}L&ocKQR~K{5z^V!u
zGM~$zghQRpK5Le|_2^0e<##Qf^y_aRv}v;qC4!rrd|11A**wuth@o@GX;v4OKV?=?
z>fTI6r5(yhZsM_%U!_R|IvuIk3ZO((M!5cOVu<vH(ixMpAr~&)x4B6>C>sQjkBKkV
zZ#PD*)SIbOeKoz&q~8wm9vOdoCWhlx=RP@lmA?+_ejRsGNWfN4NbX-^SeC>ZfwmB4
z1XdAEpW{dQnCkt%mK*h1zs>OO-Mi~}j0CDm^O7s}h2^>j#I1x+`2W5BENWV%x9_KQ
zPfuw+U;k}xGZ?b-=YOm;7OCB(QZCPF1vyd@H%?*zQ@P_g4-+l@l@kZ1PdR#eFmhGq
z473j4&tJX5&Cr9VGV;vVvg7a8^z5<!Vq6NXzxczQm?Ip^-@v<wA*fq9nrXWmMh&C%
zeUsIFz{IojCOn=NXat*^YVQ~Q*Sx!7{E;=kCL+QQojccCEVU`Sc;aS}7isHC@r!&&
zFr(WPO$^G=@LZ_@xL9J?Pe)?((eHNi>qJ(#{#99v0Gk^_>Qm;6@uMwV9Mqa2B|J8e
z17<t#zON<+$ZVcu+77WAV5CB!s;auEwa}9;WNNk;AQV3;U^b#{<a)BJC@^{{$|rV2
z48?MuVnezK+5yGw{cB;Q8G!j81f+*i!xpUxi}q#p|M4|#p%6{vM{w<@QiKP7J1Xxh
z@j&&XyJOiF#CUxJ%{JtHqbWhyECDG&8S~q_*GI-?RD(lN^4rbC^g}8U;Vp|H0<gxj
zQ!{-J>=SYis$X<F=drjFQ=KZ+YRaSpZUXre<8eWb0iCKS;IfJp51~+;ooLpoC<t|a
zguc#xg*W@Kc|b^NxByYHG@e%{1#OX#Rp9`P0>8rSaN?X(9~=Kx<y5)^Lwll+?<Lbh
z8icW?+8F-O6P<T0lYULaNsLqi%3AzA)GTp$8(N4F2!l0Qu@V^-)k`5lTlG$zbZ$0h
zC^A*?4Eo;Pc@F=gYQjtuhfsIh-`2fYWH)#4;o+@i8_Y={ilKvms+;72m~QA9bfaTa
z@3QkoTbbowvE|t{**v0a+qM9AkufCE@_(aSSH|u?Z}@2tWt98sit?YBkicto@DT?D
z0mOQJg^YA4JUk)z975p+Z3bg$z%=^a<Hy_eYsFJLcHgwUa)HAd9sN)`oSFwxruFaZ
z57Dx_6A7wpAa(g)%L`7oSkXt8&@mxWh!Z(6V5r#aLf+a{%tiNRPS!y525>zgHY$)2
zD1?P5#xNL(J+%RY1}@T9-O0INlq7oy)-JDCx9&secNenKudE(K%&jgD^+y~f!4|-?
zw(yZT79#aJ7n^v)5m{kH8d#g+Gwk5r^PV*lC6SmNQtP3w3gx?1`h@5Kc=wooyV1~H
z_y9!CezoE|%hZI(0z+r~yHkHxyK?C79{p_$d14M2tITl9FH>8<;l4&sB<t&jzCv~*
zJ~1LErw9VhJ;Bnmy}B;epUN?HR!rsL!zH?sv3qn!|4XWrtwUetyWrCLi%_|V+E8;#
z-m5MKTeGT{)T&YAAoz)B=R_65a~Dr3is;yTZbqN9uQu1R`dY5s>DFRb_mfZ>4OffY
z;PxerDp6IK5J8$J*>-p8_BU4INydofuOWLdpN{9wNhPEJ4jaayr-xK1{_f|2+w4|A
z*CG5M+_ya#{9j!D(TfpCH3gbY#pR#ZhHO6KbC(SI8;29zgLAa2&Q9#cWML9m@|CW7
zdRJ0zw~^FEQyWziij2T(m9$C$z=Zf(xmRxr&vV|3({-4_3b$TLd{EC&CU>s;@TC`f
zVPS6b?xi2S>O6^Vb|<Ec#DQ8g(+jVZ)dZT@<m>ARZ#FtQdh%lZRfX$)FA4QlNd4{=
zZXz%to^PbC6+crb;gg}_K&rrG^FQegH;S3mONR@;au_=rKQpJwByo}PBkp};B)66c
zGfzwmNyQo+mdQ;TuD9RnYv%nb|MU&hcpF#RrWTHTVS^2y&1e6!94ztZ5naAdpj({^
zT4&I2U)uPdl=ZaGJ^DK3TFyYDc5VXqpW^pAh2$>2Kr0lzUkVD|*j<Aj4Ja*RYi7;6
zaNLr`WnVsjp2~zPy2m^Jozqoia_<7BCY*0zlO{KCkJ5Vg@#Fm<N-l3oyIxzfc2+*2
zJI<>r!2atiBo^^z{$x5n71URDIEj*XOX~st?n5hn27PYfFpqyMTd73d%m1B4OnlWz
zF^@F4R)^{b6^`|rnO4KngStye_na*N@jbt$S2h1{$=vDROPuTu0@(uU-5xBe*~f2W
z$YeK|K~G&>*U}UB0@IiU&(=<;fOp5wRuop~9$?+VhJTs<o38_`3)o!ApGaNSTrKcd
zIcocBzf+=VRqN~hw`!H3D~zHA7uR*67l4D~7X&%`2ED=}0AtO(4kSl13C&RAi=&2s
zic@jzh!nI-h);dGipxLT`G%lXL!W<l;X5~dVj7lBUQ?ytfz0Jgs%bZ1Knt)i17aF}
zAm$$nsLw22f;kPF;Dkj@!^i8#!SL`J$acOE-vr5|14aS00gzNsJmCT@e0@dIBdGIK
zN5>6hp6v^lGLI*AxJAa+kNf^<(A7C#DB_2Ms^G(47pz=t02m@9C&^*82l|ysmmp@?
zaniqCXRn)RHZTyl^Yj({Hbmi?u&5mzvdr)7g1}0@T?_69B+&!RNJY(z#AcKSG*ASO
z;FncPUt8MKzf#cMT3FvLJ5fcXE~siKTt)vPiu8A<s6v{6db+NrX_FapaxkxqvK0k$
zH8?a^nS7%Cs0sx^j9W3T8v*q|_PGh^2)gNyjffjcZW6E;^PaA3;l8%VoE7|~mIAH)
zRG=^UUW{&IFya(Pax@5mTNgqSV0<C6dPm!*;1r6EojQp-ss4}8)s9cTuog-N7F*Te
z<*xnq-^@M$T@)fdYzvWs8~rX{sv<_DizwPfHN%w<lnF=-F32juE(PI45bbz#*FX6^
zx6Sju_IFC)OTF@tA0uq@7lvA#RDTF9LhJHVQtY^$J=W+~(5&pFn{>az*E)n$nfylP
z2VpZl5!lGVXMXzSUE+ZCep`%zZ&xPL4isf7l{QX!&tgIk%wwi~FFDy?zn_kdJvL<-
zNzg0BR;6Bp297>~=@%o{|4jz+91@|dqm%uic4rBVhghOSub=eV=HvzovO%?Y=$cxt
zI|R<0)ThPPEz?I@gczwxNNZTQz(B-)RVs}+5m<@Y3iK7^3*6xhLL$Vm%sgn<YJwrM
zxKSbIYNb$b7;=e$MhXM{>fR^d!!!rO$E4gUJk!BvP1gCkdEtLE(qPUH-TsRsw67m=
zMu(=Wk(8%>=Wv1B|4}rP2@&Ifq3%H76g=Ud&c(!tfxp}r$cZwvr#FebcLlQRU-rt8
zJ@g1IVb#^A2CrGsgpZ>9N`N9Lqv9ALQdv(=&y0jG)ao!S>`B&wNx<xu)m!Bk@sMWs
zF*sFe7F8xT`0T!%6P`&l9R90-xqGa$mYIXjp%VJD8t+@*3Um%0x1&TqqOkGs3E8|*
zq4bc-g@q~3Sjf>J#%NgC{O@-BeC=AynPm_QiC&xtCQjUaU@Kg8=&Z8pMeq@-9l8Bw
zc@Vy2nIVJw$l*f|E{SVO(n5w(m*!35%Qt9am3bXvY*@xb5M40EQojpc0X~aj2iaRU
z<+BLk#5Ux&S#)Vo(d%1_TkmDU*t)e3t+aTMQVIKM_2t=(usz+ld?t$k|0h1IMl@XO
z%tal=<XeV<S3fjUhzFGrqbQ$5M8*{rI*`~vVJu=1xsv}~Mo~Vk2Q(&o3-6RjRQ|KX
zPD%k_POhpjV>2+n1Xv6e<`|n`g;U5G8u{spi0;5Iz$hzc{jkU=VeVa&Pl3q<YF*iP
zO>Z_W<*=FftUv)YOx0&YtLz97E*h^|7PbQ7SmrM6mXtbGsbvFvWX#*4i>@zoIG!#5
zkq59U&6?x!cNCy5V#mmt`|vb6IAgb`XDu1C5F>DclC4Yvr^5~}w-^gPdW+OXa+x9`
z5E&>iBdAXL5Z|%*G_!Nmi&E542?zlKCh<|jZhfN|ssKXROTfpGo*p3aXcTP~P%=%1
zEY*>+gtPuRFFG#qXf#?(5Ok5uQHkxuAy#~-7(iO)o)QFGa~QNn^BS-kd4V23Al3hE
zYq%z6WP`6RQOR%9TUU5PmTe#^*3EwVmfC&PyFNTyLWRxJ5y5Q$nSP63uV9g3iCrC!
zA<8ffri3igL^6=L&}sxf!nGUe_;vz%oz}P3Wc5c(BaQ=n!XF=6S;eI-{H2q<aPW^F
z?a$HmN>2xNA(9MA^v}!y82PxJ*0VvV3Zvx9d+5J7GY+S@)1Mfu7V-<%(utN?-+fuo
zSZ4XCc5}ZyzdC9;XqXL4^Z`B+E{L?X2M<;OqQjXbIwf6(DgC$vZJT<(L8%J6@xr$h
zbePca-4z}bTfFBOx1Y{fR>!31inS_l2EO3XJX|x;`wQ_F@LU|8n$AnR@jtupFZynV
zJoeEEOb#koLWicH9YC{C%7uW%$KuvE>U!K6n*^EA;_xI6EqwVNPPr*cpI);x0O_I7
z<m{HiT}Sg`_OOCsgNP>ShTbr-zrmoH`+3g}d+LF(WyBkbqGKAPF<Hh6IA4CXa>RNP
zl4+R<kJ%GPiE3yqJV-Kj3vdYFgvnmUm81ZsZBQJ<Hh_MHR|e=H5stCv)mjM;4-Bm)
zV-{W$Tcs2pl=w*C;wc5ABSJ#N>T4L2PO8J}TuIq#E-qOz9*0CI<bwD|kNn&>GKrG&
z%(7#@=PSqwP)EwXaI%%0n@Y9U41{yZFBeIf6+>!7-{JGyMN(Wj@MEHz$^b@HkY1qU
zhlR;qTifiNk^erWKG3)%Ca(PQt^ot47((RGVYMZAnp%8+?J@y5ntV@6g>}qGrScG>
z$I*pS#Ug$RdcN>CFn=BWjp2Slv7W9O8NZ34@Bgg_<kh&D#XW;|lj&N)BSKJH&2RL2
z_8UOjvN-O7ft00Y4F!v4;!>PvwD&uF&js869&SHYmJh}{$6t`GC85=QZjVD(0DXV$
zNzQ5(_`b+=gFQgHYv;;(c1=w~)8&Lt&CO^s>M*?KX0Vdtnrt^}{;{eD2<aODYXr)!
zuL+;%Q^n|=55$Z!aqofv1FWd`9FPJ#TXgB$Xj~<}gM$mFTxP|*y-B7sv9TG?2vgvA
zQK}#cM$Qy{vblsW7=Vg+CMJec#D`sk<sdUzG={b7*S|qEgma7nU_nA0FcIH70ejMQ
zz}`1*-9k+k|MclO^B8ng2LeA+8(MAx>Q)OQ7V{gZ;s(h|Fz&qro#EPn!fXboJ;7R9
z**l79@Xme<Z37czq3P-QX-!F?urr(@^ElZ<=*y&|#ZN)B|8&4Vib)y{JT5_HX-b9L
z$WMZ>aX8Ul7;@<bY*a~VQ^{79#Zz5ZYS&wNsUdh%lHs^D)_q>o!!@Y(5dKp14T8Ds
z-V&7|SIJ?V=Wt@(?G1<8j@goz9PZ67l@-2Thcuz^^njot`+DdsQ<b4kG%B1%L`*qP
zO(`1>b)scRgU82_cbhbQ|F%&jKRQkZ{jNE}J?PVa>-P`ZS%Qidf_xT^;l^N7IU=$U
zkigvgs+%E}$&jr(eVK?JEwyz6kTw&Uw{A}bm8{$zD+Xpc%LzU3Y0a&s$y-<RIn=ho
zH`c{Bub`Y5v9fdQpv!|cD;ub*cl7LQ)Uc0d>!9mBUd$SBZ&!BjrCtLb_xRo;M90Bl
z<NO)Z&TiW{!eLIsM{}m%J6PLnN}T?Wi&xU4ZqFE?n}06N>C%Pcc}}smNqH|8oZEly
zX<o5g><tT}s8suzGb2_6XxVIKZ1@FRx6sr%G3U;`k1a_TM1D2eXkWUGKU%gl1m&E~
ztBffo$?qfjY)5V@MC8Z`r#hjCv^+j}7Y?4WU2QFDx4Thqv~&E8UfERaG$H%h*Qoci
z$inUW$A01k2!NZO`xW4R2F`P|=h3%kWO#*$LW1^6$laJ5zU8<xmtD($l(R&OASR_o
zKCo<paFH(_JZRAOn7AmKw)a6Dr)EZ;A*3uDxX(Eq<pDuKw#=eZ9vS+`ltc*gCkY&K
z7n+-zHA@CMfh_OhnEDO;QdlSkMWkwb=G)W<3egCnxFh`Bz~GKv>M?P@vBMtrVlq#1
zxwo+E`bL=s?S^m5jLgo@H!(N&cXlm$G>UMSf%bi5sM%fitS-T>Le{wy@5Z+?#_7uU
zPnUjB!ODi;aaR<0p^jrj4u|8-v2*G6+!@r6r*1rBG&026dWSPY&A}V{5q1kgwpAZH
z7$fU@be~pA)bq*)kBqJ;oHiAd=TS-|Jb8{|8M)&Ym%I4*o;`b(QswAX{8&m(l`jk7
ze)6T|$DTQ#o+to-vpCY4P4xHJJkyZhy4lzF>7J)5$TBZnSQXiH+3JsHp334{inI6L
z+E*T4nmKFMXd(|cx(y|oI?IC*(G<kQNyFW4Deu?u_O^CiC!Q8d#$KQ!#(Vo;<w#@Z
zVfEf!cPyx)ECXK{Tig`P;&S=7JQjTiLS}k+dDT+!3_Cl&6>~lSKYer8IOBTT(tB_h
zmDKC(TzSay*u+3aL^yDaCkY)kJy=!PfAmnkmPr&<VmTo{m(l|{$w{&VIaa1ba&l-D
zFsU9`@n{d1b*0~(883$ou6eS>IX<<<&)3y<RYMlabGX^Did^3H8V>#*(gP0*=s6}{
z{-j;v&W=-)8f_UVyS>V-R2e`qhbW7=@n!JhB}?p4i<%6q&p*(vJz?Xp!KO@>QEH-;
zTu&vYpR=g^3?>fZ<eFKxq{8}DWLnRMyLYVApKt705jKT?mee%o-@iC8hWwm~%M;ui
zvY{9vL(B!p7fzdsmh}x<Mml^t+&8tJIDyd$hX#oUe-|y7|BfAp(vy;sI<tY0F%{E4
z;oG^}eeKq%(?*m+M;wt@jh-{-i1TaE1rL(Lfk(4|SuFAquhD8tF;ASchf^2+ttL3v
ztlo?vo(+KjOH=bEJjgbahMnYwTi6!k2-)0Dg*0aMalFSQr%?4<M}=cBOXsf~xq=G<
zlf?l$8~norjaP8fFEeq07IE;=tcSCD6ZRs75xgDVJ@{Cp^|6)+_B2wz^Bx-c8a6(m
zgHPHR<Rc^mhSeKT#|fwzqJj6(K8SQDkrQHXWI{C~8*m_PXrDcfpemESpvDmnfL)kK
zM2@-3R7F9$>a3&VPmO}k`xe#IGj5r!F5?R7SO0C>;>la!tOrQRAV3PCJ1InHOJn^U
zZIJ@L1S!%F7?zIm8nu%MTM1g-i%UeTL^6}zLhv1u;TVE)-U6w6j^vKULwqW_#lh@{
zoIL?<n%o|6|EIPtTcD!nqz+<^3VJB&2WCA*xvAb{Up7KsLwyBf2*bkdl)e0oP(8+D
zTfs5`GNy6a;BoDwT|4&d8Izqm4<0;`4XB&UEk^7bGj^<{f-Cy*pd&#|nLo~EWwljw
zl|*nO6$7&Xo<kQLn+%5zUE{0|RS_K53|pk*8eSpzA88?RG|PXDZw1U<Q*;SN^Gf(W
zbq~le#Z1{?fXmmTRW<k&kF@a0Jd7=nasG>1YOV($U=J>ORW}cgak%qGuAYs;ktss%
z88W*Tzb|)Hpr%y-4>32jIN3<M3%bxQG3mfZTDZphhw$6c&2*tDG_XHn8+)-3A<vQb
zL<KKaDxbuH$^P{;(qvuJw~Jbz85*hQHg_{YY}5Nm1j?!!2>g}0cGjKhh|{<?mz9e&
z2B2{BrcD_{ZUu$8ACqnABErMf`5i(`qr<e%Gi2)?3{UFl(8oCsbCy{iHeJKWX8Z2l
zhS(`EpyX>EPU78We*AdM#$?VMO#!{|Ri?Ibx*Zd?ybA3bl=hlLC5yBtIm^JVhA%8F
zoW`$>C}$mdq5=prcgiJHtcw=T^#mzrS5cIWCm0WOVD%?+S!QtHiN%YRy<0Y*gH_k-
z$Er(ez!VI)vkn&5<2r13YqQPqFwYgpZab4DaM7#&>PCX8gZGP(A$M^z+AOnJXD0$Y
z+l6x%Wxj@(BBRMC$EQqsYa4DDXsF7O+ebd^*uMQ*O{*hVTn!<O8_hd+V*dGl^`A35
zZJ$4WB1AZwieryvdBs{kd-D3Oin((5?K7FVKV5W{yd%QC)|-81;^~PWhv$jF&8tJ5
z@hu+D>W%wHCW|ivx>y{xjUC7}oWW4BD~p+A2(iu2?<)SoR`$Boh|_sa1nM2<pGPn|
z!fZ4GDuUu|b~b9P(I7!pWKPF}!9{v6#GxvUnl+2C@ua6%v~ZaFL5d9vES(?h9WdO~
z^r8Nb#=bYNU6b!3f9;CW5I_@j<x3*-u%AwL5qfCmm~GXM)>p;U1i<$Zj@^(U24H1N
zO5#dZ198RrYaRCre2Wb}O`9~?-Z+m!OsH+d)gy8B$C{YTB%aA1)~Z#DogsHvDteew
z2l9?9SebL4ol3d$5{e>j4^|})T=HWho^F-Rnw>fk4h0b*x^n1<d9n+R!H6u5fh*8@
z;Id7`k;W}sehYSugW;~jRDqIL>@2bLZx6iv%r@R1;=kBl3H6z9Dw5&dJ1#GH8v4y?
z;*iaCVkCu)e&-R`jw4AoGaRrPL6xLPK-b4qL5yxMAU~kR$dEDvikQ5C@j*;@fr}18
z`w-q2Krb}S^G<m?cFXem-Ef};S3;yi=`K>_V|Z72XkRH^3o}f(_GoGEGHEG((X)5&
zx1qjz_GN|HmXYQgKq3{`oh9+IHgDZ}6Wo-%#ct$d588d#mP(EWxfhB(rm-YD@y0yJ
zn+`u73UQ-;%if!4n9U>2)rnRm!3kQDjz@%QxKC<sSi9i1?29^mx}%nskuw=OvxfDH
zxLV17OXI$;2+u`3BF<Ovh+l1QD09G-nbT{V4_*%0O!n9&r1$FC^B^U?W9vumCL7ys
zXf$j{PDPvB(QOkPYz_{qp=o*ebJ%sEr~@bn0s2T`%Pkv$eICO`t?1pIGc{~u+k{>s
z#kl<C!64w32FI+unN(G&QYCj}8h@F8_(2!JYjfUMarF=tE=rrRQUxDm{y<j}1U#LU
znR$(V?GCBOuyt<rlvOhwBR{2lY(C4#)}|{Jf{qX3n;VY}`DYgSSlEbNDq;mJIwHAY
z9C96K>SFB2;i&R3+^AXamRp?LG;KPPqf?yg<>MoL+N?7ZssCEAo?-Jx$oNvL2RYZK
zDqpYBZ45s+`b6is0F%swn_dXswoNev#vLIqx9l}v!3R8yrAk9+^lnO97!@_wZwbXY
zMvPFa4PX!Zbrj-E={b44#lShd?HQo{u7!6xGs@k@TmO8s)k0HNW;%UYOjW`d&hY3=
zl9?PYhHyuo+B)Z`$LfG55`sPXGBNag2oAlDX_1$-TVQls-Y;51UUUcDshPVTVwxcp
zJRfH`L|<wO`8^Pf%yr$dY2lAd_&RFiKjCne4eBFNNWGJtf+q21a<aKDV1-(?6EE;`
z1|?sTC`~EB?-RC|MzW@RC%y8H+~>9C&vT1|O0|?Ki^NzSzl3MqmNTcznkhr8I4)cj
zDV@htIs^YyIpA7W2%8{i{1eDXojXT(b~h?`F*jPq9&!(w*}0n858#Y|ou$k13D|M3
zN)wY<G$D^6gL3HvJLHl{I3OA#U#WedlMRpo?~m%(JEcj}rZ;6Me&$R#QA{czNLM2B
zh+P7wd>S>-<>DukNK%E^EC$_iVd>_|#iOfs+V4DY-&P8eQ;XgVgH>_CrKCIf3ShMd
z@=!Go=-_}!<j4bD2S|Y`kr7O@US3(f2l=TE=U0>IrTvm(ekeMz_(=<6H^6$}6LhmW
z`nM|GWoX@k=C^IPam)`CxhqX?xKm2!5iPb?m;I!TLVU(l{f9c+%!lF_)5iVFn)A2!
zTb!(q%{V<KrSmB!@v(WhKpvzb8{AFbYs;1`39nz<>Gne=n2@l6qa)kCIyZm5E_L5U
z)oRCbs@)B{pJO`mk=*u){JQ;fIvek0v+G(eoS2~MfI1OZ3m)pKa^E*a?5aQ1tGzfp
zaG!cY9pi6Or!Pjwn2~;@2+Z~x&@E$bLR1;;TJe5tuSI)?=L23+Nw9g?km>+ZJ!cm)
z;vNfL_CI2M%!_W38om?k&(U3lr2G6i(Ad3Ae{?|fBNbxqPjF%Og?XWJqihGZ^n?6S
z>_*=})Bq!UL-9&bmUT4zR_Bk5vMjJHf$>dUUJv?{TYWooxxM>4P7M+5GIs@@c-x*m
z{YuhZe8KCh$&zH*N5JtdV`$$3BwgAg>IBnZmL@sU+={J@R1y?2x$5Ik{QvAr%E+S8
z%V6fWEI)l8#G)nCTJ{d+${YB|oO&T)IO}%q?1`xdJ&CY@>A1)W8<2>`zUOJ*#tBlu
z(fjvh^CC8EwXim*MfEP#Dsv0ITN3)RHl<sjiv6o#P4;g$+H3@5bI{Uo!is{4lSQlz
zIbLes{m6Fr@&T^=C^QlW2q2jW&}BjHEIjLkhJ+k85BK^umYR%6I)H}M(|I-J7dclH
zEhK`Jli;B7vFjs#bt#YOHjCYuc@){(L0RP$A#Ak(Brg8OhyU>KP5R3yh*xG2#;Qci
zaR`Ii8rGArewnAMfMJDkkqJ%%n_tf&&$2x5SnuG<U^>h>e#9R@*I?Pfd<l^w#<q(E
zQe=cXhSPL{fmg(1@_R#M;#EWi53eOVJIKyDm~3`~JxRLy44k||*p$A}IKsjg2qy1L
z4Y5q2H0G(=BcVln;tl8<S;D!@d3C8D3Oks60;3(cDP@;)ZX8BZRm3huK><bG`XR95
zd)aFlTB0j02FhMJb(NGvZpqcYuEmOukQQ~j7%)20#hTb8V;=yDL02luU0_`#61Rj8
zME5lF&9x15_q8!C0fG<@bL<kTn7SK8KAKgO8&~l(kUCwUBPs)OOoWYb#N(WOSdB#8
z9SRKKiK3;)(syGK(A6LEPbFyT%DO5I52mY4deb9Yzbre#I?5m_Rj+U<fuZ{7s$Z`~
zqIQN&v)V5YBA9cmC9M)~qkuuIbX+Je*cw_7hLJk^*2xV`&IjEPPedvjg;Z12B?Ax<
z6Z+y~w~R5_H;_`fn?k1c%zULap#0D^|G_hs)q3%<YSjU8#NqkOe(>{AKiZ2zi)J%=
z#mD^je5M;&9qT-+H-mHS`lH}hebs>hoOJASuK~bJ;9xNuk{#Dnj61-=R8cqz1jMyT
zxnK=l)3rF&27uK8?h`DI+r-+G>as(Du2u(>g91eSTdCC(sH8rw;=15TyGhEiHf@YC
zjx7cD5Jz<OnT3%D=3K^w@hycmX?zK-EA{Mk{zgNOU*N#e@iD^W=HR8Sn(QmZpR6k*
z%{&5pq?0I!FSAoB%sZ_()7eZyi(h-&{-9vy3}cmHd?j4FHmu<5S8KtN49t|($;+#<
zw)9iRus(s2SR^-42f!zr?@M!@Nb?ZLr#6Uq+=c6vi0&?c53T`ke9liQY(o;V3g<WB
z!2{zm#7BWp2`8+|k1a{j;Y67o8oMimdZvrCAQTG$v{o8)%vQ<<W+FqcVX6cVE8+R`
z2}n+S1X??@pZ;ExLfp$b%1SyejLS4VP6Gb&S+ijs)3y#+GGg^R{;ckSA4QK%a?eQn
zOIYxt_Dqhn=<JBF$16PCF*0uUU!7#Qo77%kB2K$2_4X<Z=(@W5{z-rZf<%!rNl+zW
z*UL;~6Ugk@WB5l!phh?99p$fA7Mj8+J@?uzi~HRPD@@>#YDhp!*LM(*X@5g1hl(2r
z(#3faH4G5<CL*ypYfw9@?jUm(SPIs+oNAr8UNFW}Du%&lrnj?}|FB7?Q=OfqCMpXy
zB6F~OH$UO#&5%d4U|GI!Pd_XFSc8wCka-h)1ZHa9>BS%?WH}muBNyBMJSC<<7>L==
zo;_j4{3rjEJ(YH@3{f^{r>&i@ADM=ndeERt?bF%|E@rZuv}1`V4`gC00|V!$&{QT7
z5h^#ZV3-j`j}3t(3kDN8x+t+(!+LBYQU&IxE0y~RTb0wHX>;7iI3i0xB`&~27wgbR
z$~gzI&u%|(;KgS<y<4q)iM=xGBIB7}!&l%aihNqZ`pp}RPWeCZEn)ieQYtd$%12Y;
zv1q6;-%nqe)qsFdbbOS7K>>w}!-55DGioR(?yFb(h)=+SY=0RWQ%tJf3N*3Z-+ve(
zn&!7A=8Nz7Kp9L%AQV32aByqX(gu;e#LAhBc%8|^SYT-k%i8Pf2LsJG7v~x<ks{!7
z^pbh#jr-+3xPM=L(4a~AQU)0sz4NG(%I$cRv&{J^lYvxsc-_J9ZiSh0#4_?4t#JZ>
z{+y+L3SzN3KbMO|&Zih{D3-m9%)2K)p+W|9KjXXrigr*?kh7k{&H7O9?h)^<<0#YS
z?ss|=B_8#T?o*D~3fD}*6539TtY|UJ0R|KlGe$?}{>oe*V{V`u%}k)I9`+l(P8Tsy
zaXAcnWaCby+?6T<+A;wNl@?uaZPvv_=2VOuR#H&V<!m>8SL_!sN(#(TRKhVQPY#QC
zTmaVe6Cfs`ps+B6bOrd4Op%M)J>}q?%&~vVMe`O*nOaV@W(k4~Z7ElktC_0x7>2G>
za~(}hFD}ufeQ(P((!6%>+Lf~pfzXcVE8o0;kOciBGkz{AdP~fm(_+g=i?A&WZFXE7
zCk=c)7~fVRLm^8$Er{JRWygL30bRGVm#w2Dru}|7je8V~PIvt>ju&0_A=Fp=S_r9D
z`KiaJ?6Gqz+d%yo`IOLacf=Vu$p}&j1@~1BA;}GQqoPF1BbJ*v2E{+fa{}I{#5{$L
zM~xh~(k}E-2A~LPiT==ex#|$GFQil=I^pVo<{c0l9O8{jo<N-hPH-SSkHpUWJq-33
zq{ht<YuF@adc>JIg+P(Km|>xhga!jdQIvT3TW*x3pv#4cS|FD)q)enr%zf4wZ3Q$?
zAhu}?h0#G5JT}pnB`9<O&#B!I`d)qVWCwY95!`WxZH80y<D(x}4eLkAeq;C;Vt@cO
zVp76#h7VJo<}8zC)09gLL8>9ixXnDr$m%_XuzU$(%Cg*)%^n^KK;y$Uoe*P_AF$9c
zCf-Y|PIo=!DVPLP!yk;6F7y9TpM0i3b%6EFyiafl9z0pSCv$0#8Fn5`iOg$Od4oa+
zC*DwPcfMrqEuzX2t__@x{mc5BY`~HN!PK3^ME^Dn3|6iQ{F=Xp%HI4E=DWOXN6xym
z=>Dz}A}Y4YAAwD^p^S_GnBy?M8RH3sWZ2+SM9odNhWrZ+7_Id09wuNHZvk+QF|Nr#
zWqwfRX8$pz+J~>D<{2d;uY^)^x6@9~O_e=L#TrMV51N|pdpz14sBGxfuJj)HVsAso
zMr={r6cN~Qs`laUHUYd~U08N+5b@@oiCdVql;bsOVo}lJImuz0Z7;)Z>qrQB)woW4
za&qCA_0mB7=%7=*Y%K_gg%VUC>G$0z_GzKS*B*^Ak@w5<nZdGAf~d3s2|G{`_pgv~
zdtT}!Iyhe1VkakRD|5OEYSRhxL|!H2CCnjV@s~7q=mT)({S+3+G3%(C6T=NvASfI!
zxd+@R$eve-Y#SOG`bK=zkT=pmOI=Q7TeP1cl3;lv--aDd*Mn@Z%+qeX6cxKj93y~w
zXHtF;NE$V26i=|74SSdimQ9i52%%7VSlow)te%0u`QE-kK}h7c@7*iTcLIL;UCe)Z
zVK}rZ5dt$Zvy8U=sh05J?b{Jdriz@eXH;zLn6Ea6NZ%EUi?$e-Qoh)_eC*@YE1!e+
zN25kVI8y^43No<o+`YT_%c!JLN_M9(`8^QRsouqqgsj1+;mC^l^b)yo1W{vbO9=+*
zWQ=!Ye-;VuWMSC&S7DcE8nKJBjNVFW5{5AxC4+hM{)}s*2Y*1HS}hZS^@ylc&U_qL
z(hVjtMTz5Z<sjf!6Z0M#IEE3ube)TYHjUZD9%$56Oi#@ir}ynchl!_WRq7-A_I(od
zSKziw=q7<%U6&_{tiky`H7wk%?YweP?=bt|lEIXyhM0R8Uk)V4b!sdbE|WM7Q6?+q
z-m6@=kqqL9M=d<aHIxU8vy!#mXJv(!UmCWdE`vDIV$~tg3$ZTwuxwy4f~o^FQ2;4T
zXD{XTFvp1;HFD(5I5#D3gm%L*@goR>_lh1|XU>77mg*qumYRFVE?p!lz~-o?yzji7
zKUL~dq%#E6it(AfS)&IY2~$EuVJCQY?OSaK3R|@4B)@4z`YN%AD=wvq>hF}R+=sVP
zxc5I|_r|sck+Ox4ib6tohHWxUYZro<hs*WlV6W(vy-_s=jZZ=p6$!yII`(VJ*Q-lZ
zsVqvL#uSr=Ia(*-sDHx2Zu$ph;rt7nCu`xBu+$1vq{a|1=>cQ~>h_9(P387a13C(4
z*I}ZK-D(2>XGo6%1LGYubX+_Xs}?Z8`TEx>o#3rg!rp)!k^=$Fb_&FV&ZKt`HOWlP
zc6I)Gn2)<MJJ6;%{|K9!gs(Tp1?#Dz5u!%AM{I~3AJd*hR`b)(jEq1V+i76^a$PBO
zV7tm5VDa5|YW+7fhC1jB7iTGcy3|eV374zI)N&U+V62W%#xcue3gG~b0jA`2Y#}fP
z$>SkxIeHkF`*-5JAO*S6q6GE-(Vt#cFmH+x$UT{|$cQiI!D)-ZNtjG9nh~L?@&+)6
zhnx;~mF-kLX04z5Od8xz2+RV0O<x^-EwG4L9>7Pcd-B<NH#x#pr}tV=Q1t?37d;&s
zpY@p34bE94HLj;W1+WTpi>PzeZ~PQ@*fynqQsW3KPnL)mN2>{n(|HUytc7@j3ZW<=
zU5mw&`nFx-j^1t*#)qh!@UE{|OTB8(SIkY!H`&d86>zp+L)(zCw;N4waS7vUA^$Pa
z0ii<)!f%she<lM@-}>}#h<Dtp!GoCFfislFq^PxSeY88qw<Hduuv$m!hlhv1$;<Nw
z)q%txN+&KDFY&F{vJa*(9nN)DwU2}=9Rxn3e_|~7!+C%>&X{0XLKvr7SWNS&<8|`S
zUwGE+$izeUb3Ut$A~|Kw=Bhi+qS<653laA=z1>z=d=Svu_3mx?YDZ^X-F;kvWdl_i
z?88zUzHou%;#fVj5Z98DOms^Csz?y^TlKB~YcMQ}%anG13S!|G2u;kHGn7MXIm>~B
zID=TYp#mXq_VKZF$!}x%nu->_pHR0M=vq)l+PalF3O~WKVM$|OhPTX>50^|vywC88
zIat*bNbsmM87f}Ad2=IZh~M_@!`T7F_kG7lhlj!BnPgcbgKQ)|Srqt`^DvS+cI$TR
zcE7!>hNgVG$)*{w7_swbhifEbe|ok#^((vYP7IwcKuqO#(0rf1-Nsu1*I2`A7{o*Q
z-zfxV@E`FXDGyB;!WM)r*x-VIWyp{MDC6kl3n!nQ^a_!P%|eHG;I7eQ$A*y)Fl#c8
zK9@_aEiMUCY?7^NowX~icT=VTY*C)Ki;zMGGJM92L(WXW=gTA!PU^OQo>qf+ZRAg7
z%L&uuJNNF<5qF|aYyy9uH%8eb`>ggEkK!k#>S|SSL$xLdH#3k@`r|tF>V*=|u_#)a
z5nqL={SB_{^zHLGdlhwBtnJzX()~y?WvBeBnLqgJt#zFPxP+M>KGcp!G$5jJF4yIv
zWTWr}Ns#-18t4`=>3+{M?K=7VHe`9qUA|Kr3WZ-*YFbCtm`X#;xOD^BPT+qD-r{S1
z*fKJCcv`>Sy*myZxSN0X`A+9xaln!s)oS78p=20^$bOjgg1l{HU{wVa&VaIp&Ifg%
zKZ9*atL1g6wJqjDG+hsShb}*5Zt&`_EkF}_E#|Lg4bs%C%R$8xpV!|&p`vLiuk9Kd
zF?0&dgl^I1tfEw@BCbKqs;7ZyUM{%V${<}^W(G0L4BS5S69!e?F-qfGbZLEDYiwOF
z(%IV3(9qqkm20{9`Gn6lk+C+ms|=#Mp?+dmVn7MycM?>9H(HGd99w)2Lqa1s6BCaU
z?!pbP)WxBF3!xMI^fe`n*p@KS<atc&CQS>4pNGr#uOj(y>INxX8%^nEs&K%(&P-gn
z0D33x(}9XD62rf5ZRER=5uNkRK^*JXFVA<c06oi7UqLXq&gFs9-8Ah1C<Y**rSJEO
zJ(ZoVj9i6j)pE<bHJbxMG!i<^DzWR=qlbo0NiOC?6`E&(WEuO6F*HmbBt#tEb?TCY
z2K)4U7(?RWG9xjd3&Fh!AOTHF>UT^bWZQ;7&aPjd&jf&Zgp);vZR68R-x~`Ui0bz~
z1)!W8K8%uU5M__bsvwA>$v}o@UcPo8!kuPb%tU$Kpuc7%(+>kx-w0*=>LWvS%7F5c
zxC)|y#oKbC3Wh*^S={36esBBs?Ue8`jrA}L)XcfRtE$&NU#tF+73E(zcjgej+B>%D
zJ5PYJ9=KPgrds769v_pbzv@+E+O$deKj+VzCu$Gqx-bX|P`Bm<KCPTd>mVOxiLp>p
zsrzfjvvg;)udQTsDhlu^gY*Qd>zG*2|EMb%r5<F7EA!JP66_FF8gsZI7B~=8Ft6}|
zm<paW3Vk~`VtYDl6fvTC#7(WrO29o?KNalutqXH_d7!fU3eD<}=wmqRGjLh;e%}hw
z7m&KKoWA_*JOkJbj9JO^Gq`LNU#z!SMNk^a?@K+aCi^#YU^G)7$Q~RB9r$S7fEr88
zOwmf?<O#P2rml3Z(Q5xR>NekQhTSEzEHNwujlK$*i17i|q$ZH>+1YoCt-nYPX+v*t
zTws2ze7j<mvXm9me|3aatvBx7w~rMBJ)mt(C(QsN$&^-R4Jb2U^hnHr{v{xL{rdIA
z@R~1ZmPh3{mDL)u^#)!=lg5pQL4;<L%}(1%a_}gGr5hCRn139#J*UDr^w9}yYNWIP
zSdlqB>n^$Y7KC>9qT~N%qBUgS2iwEo2h89>PE`R`LTp(zCd+<>mqw>}#3tI9g~K2f
z)kK>FTmz-!K2c6I+)$x}GlX<|KXAzs{B;=U7&1&$7jAYz%C^|H!NUm<DbaSppvSs7
zriR&kCK-2AKoi%#0SS(pu05K_+}35N*Gv&(V$Ii<R+wguHsnlaRbi-_b)tN%7|q0k
zJJ7wr4UZ2H7MEG7FvnmWGA1Wk{m7S@XF^-Jg=%;1-hG^w{yra>hwb6@hLtU<2c2xi
z_&5GCSzf<#*~x}GHLTyfORM~EKf-GL*+T9%M|6!#tSP8B)BQk#>Ce!>+`EPZE&LKD
zqmm{H!jSDgc`9wPYssq>010X^m(6KDq@I?gdXbSi+v|K&J3Po~P|X*~xAydnxbEKk
zuRb>E0B4j{MF|AB=g^&C3v$Rj0ex};FHs!&eRSI27c-ozPoFm9cLB0*AR034m2ALM
z;~kl2Mt2DQ<$K`OVfePBw!bFnd}e8dIHAx$LmIXMJ3-pBma%}DGTEa}{rV#!9^;EL
zlx|R5yNYx7Vznn#128%yG+DD0`<>V2)3T3Y{zR@(orVpqlejr8V8!v%8LZBEa`X<W
z2(&@hVeu=dLg=W!y~J~eAS0YpA^bBzpq9yjDK32jrJ*?%BfVw74L8IdFGKRXLN=Mv
zo)BGXlcIp%L-n?^w2n$A?RXBOd+Ii_8Si2#Q$DfBBQ?+`%qGwHCCryf_Lf286YnwI
z+I+EH32^aq`gyz6#eut*N!^Z3i!ANH31<wGE;Q7Z!LRuXoN`C_(#(u&1MdU+eY?F_
zrszV@@YVaa2^So_vbRkrGWfnSa!}yQu@<O`Er9okD9>XYYB6!|wz#w?f6sP?B7)4)
znTQ{;5O{FSkTE^zWT5n7U~zkhX*qsbYX5tKeWr20fl2=Xd2VbQycjrmI_x4Us>UAD
z-eDjPPp5i3kdpM^z%vOf1@VBUY07<NII=P)JZL<w`Gh&qgov$_*pn&e)Nt8GsyT_5
zusj2reFfSCvwWMwrd33f?$q!Z;}=uq8a({ii;S!fpVg)CM(Y+;38(S%oZrLT6l*mP
z#+NjoNx<(Zw&#*}!`>qy)|?6nm&+%jb7$Jqus=z2yNHDyQne}amHpeCnECJ*ge&NF
zR@QLp{M4WA$>_&n7l8d@dUNDXvmw7w(Dv%;2Hp5up-*qVPEzYc3k=u<SJZoyTX0;-
z64k*gx6#>xCZk0=o*o;wu>O9k6J);WeP|eqU$NLYe=<xIgsMhIXa23(?r^U2@76!5
zE%hTbw~>stWCW8wq9{%&yNj23$E&Lz7W?Bt^tq&uOTOBExGr6srfgvK*rQp!#rVPb
zX+4(*UQwscA54skyY(}F#?+|?Y_+<7^6Zk+s*>3?KQd2>r%zzGNze~_@lRo#d>igw
zV?0PIx3AcF7MjlA8C$^(k;z!e%hyR-R!2riRlFZ-rs%lxBl}&Poy|d=C`!Dg9Ui^<
z_%`eU#YGN6b|*G4uE{rR%n#sh7adQybt@|0`dO=$?thk;+>U$wGv5`B1LE{C%wfyT
zCi9tYtQ)QLZ0T{LJbBQU=m5iH1f2&kr#O@6E=mWv&q+I{4K4yK6F`Jr@}qQd?10Hs
z@|w(NCCbqXuA`Dzg6ysM3VR+%nryaS&y}goy%Z}8>fq4?7Qwr?uzcKPT4Tn(umhpA
zDf5m<^irAA>G7oT|Bnar9!bO1rqqpUJ)Du1@KSg`g>VmqlPqk~3|s9NTG5xU!5$Mn
z08dC(EG0bYgFg}t5D=G+AiwqrUflXJzbhr+vD3BMlTef*HqA7o%;kp0+vfuxSplwV
zE&)Ptw-JCVfm;oW!3JZW;{roO-=mxh^it8%9JtSz=Mk*~K_T`cwM8hj(eX4>YwhRG
zjS~F`q0|yLfT$f8;C&S`P}RgD+Y6O5%TSKc&K)VC!#EC;p-amxBl9m%{Ay7?Sx^cD
ziYk_$zp5-dC=eBh7Q>i11BN^&{ri<ny0|Qow;B?lZD$MaYayKf*za&<5_tk4K!h4#
zur^E|4=NIorG!ELl;K?61A;Oe5@(fRKV~x^G6&`N`uQM%<7Tc<XI`C>Bq^^v0HhZf
z!FHw6#PGP&KnXl1({FLLbZY(YoRjH|udI9nMy4{sUSuGp+JM_Km>JQ+Wm=OSf2Vf)
zULHGig2sYsxt%|f+C`WGp^l`G-ngmBx@%kRNu)kWPIlDj)~x|UT`J4MYd>S`U?j+F
zOHXe<kDS3;S=jJzCMmy0YaMjAOU_jOD`$p`r7O*R^JXZaF?gAV9O@<|W0i)o%;^uN
z>zp#GvxA&)<bxcnAy6Gs_wV4G|G)6QC;M$qqc;|0vKy>}QGoub->EMY;+(Q&<htPC
zU}tUGEUq4TNQUe1Y`PV%5T|X%j<Q~z*$!VXWCgA>!6a0Xc)?4jw*&kf9~*~rEQ5w5
z?orYHR*1am$+WHNle1KSC5A?&W`fGDTzTFZM!GK}Qwj`=x3S)*FaRc<rhcK9reP?3
zT~Quk%d!yJtdv_uhB4v}v1U?mkzc)kM0!$UqCEhXtcsX6?P^r?l|;C2=Jxd^f25`c
zVonG($@IuVkxvFZ>9xlLYd;|cz^Y<n2I~iRP|t;E)P*1}<R*j{LDVT0)HpBBEllDR
zdLPT<Z3Aw=|3yut1hNd^`Qq&S|5xgGuG(iNbyX#20S4Rw>Q!K8Iu$D=foDm_6hLAK
z1cRk`c--)R7)tus$_`(Fgvsw*7D4K`<H0nU3Q<Zxxnm-?<=a?dp7hK_ov*_k1iB#N
zNDmA}+XvAfrrEM4K*0hLdn#RFg2b*EgZH3}x5UOt0YY+!K$EMtopOv`@G;$sLfQlH
zy*ohnEP|XtYJq>$3*JUX`pB24wlbLZAb}NQK&QMA$`a+-6+rh1PoD-6;!hzCb{d}l
zJ^S4||9$(8t>CUT3jZ1$eWyb0zcJDmzdqZSbZ$<f3*?IzyO9rpAVXLIGzF$P>LVK}
zoX~X4V~%HLWSC~29RkMywu7Z2n=VE81`KdjhG06l$#wt}Cr56T^HaUjjA~?)_XqRO
z!r=%9&vu^lmA>uhv*n-q;UN>w%ptc7^hqeM#jX_jUT|)>fdvnnGvzym1?I&37yN=i
zinplujAASSISL+LgoeP_EkFV*c)un8HTP+Bw1iPmG$ASAcBcYEAsa}N4`&E68h}l2
zPO5BANk~t(<x4Sr3*sangk?7=dhC{#eFCV(w7d%4)B5w7wSWXn>B=hNo5PbiqA+N9
z%dAyEZq_-yWFBW`nc0hDoAy6%)8n7Og{rct4v1(y;2>mUDtTc@kg&avX+aR9$zvA2
z)i`2!#A7q63kLJ!s3#%L)5-C{L-iQN9bPbFiUvb^1Xd7K*Fp*!_;$ScC1FcLB3?%%
zh@79dY#<)M2m85B@aIsY$?4JOXWZu3CATJg*lwbCH7#uqUHrkvIUN3n-dd0<`OT*=
zE>Q3javp`&V%Qt*MNeRpzZ(ubUz16C#lfVCeIr8;GC!g(libYM=>b?5OBZZh2GRX*
zGLESB>1W9vz10`fOIjhEg{KORww=Q$i|}9w+U^4S%3YmB8%sl-0MO=D)V}-`nsj8S
z=Dy4fN4c5s6t$1fT;AA?W>VmDHf1prS%<s3ING83m(6V#aN4A^4*~U|QNRr`<;{&a
zEe(wW%x2hbbM|&D-!O0>9I1gAA&nj}qPo=)OSF_8j1zV|TQDtc(Zc2@0l*B>3#J8_
zcWrx-(lvc`i4e~DeHdTvC0`1lcg6>z57~|pNL@@`rR8UOG(4}n&(P3EgO^NJ%6$L+
z7ROEYH$e)-%Ki~yfW^!#nmL+2_#~HxFf$Uk+B?Nz-n_B&oTTrBD@i|H^sO08Q*eJy
zXlC~iIKS4XH{q#s9?dyZ@lJ(Ab~%3k^Xm0K6$kvEnEMAGbk;N`<xhxniyb)hQh$Cv
zqb<9A0sDi3A|9w${cm)A58G%{V64cAXEbR6u}uEp7dMppnETKG1pFEk#vvaQG*2_g
z7r6^<SIXRk;^Q)u>6gB;8cchbTV`Q4c|&{t_&J4YnleJms_}%@WF(nKFw9Dw9!1qB
zxF->;7qsLv^90&~R>QC+;YljsB<QXO*e?ta^|P}fXi>zAS_n!XZ@t>mrC;2Yh0R+q
z#Ia|aZ+7@M#pw|5)Y*+uMs*cP;MJh_OytD`aHqAX1+A%l1IZv-kZvtV*8$|fBS}kN
zw>|+ABAgKKMRq+&e$1u@quD;H1LI$vRiWH{hznco=g&WeV*<nEF=i+r4<I~=S{e^8
zL<Pt(8(`6rQx(qW%4ik+P|*uGyOl9fWF;3Cy;&=FLwH#2+Yb}n4wXR|ecdoxs>6+2
zw1|Z9uU|9>hVT?7M<J33vynR{va>Kk7#8=dOgcTwPvQ?GNex(=rVvrWpb=_tyquk~
z-@n(QTa@}Q-SwQO#BHFQ09sNl`MXTIRe-+Lkv4bxqOmkyIlJQB8oF-d=NF692S?J>
zgfpi#<Z2b&eBxkluS_g-q|Zr|ZUF9pmYr+C1|9C%nd2N=O@1Zn8=oais)UxB$ha$T
z{rc(#X5|YTj(^h+jzT1&lBfs0Ty`dJEkg*i@(Kg~tK@ifeirZ05K)UVb81)vT?A_)
z0<(<y=t>tCt%=91!AK7kGCkCUZLi(4iiNTP@Vh0bMyTb{@n+lvTwuL*unQ@!M!2(S
zF<?oUJQyxn7g_~FP^zL%Zx}gS6ny?{dV1n9b9n6P!tEV|9?T0c*`C%ZXd3~+IufHW
z`{~oe!0ELpg=}aYEsrcL0GZI@OUmOkk8@j{P4$v&6i?tih69imF`9W;piq;u5!q__
zdvx&%8M}Y*gf2G6_rh{0Ge0wB7K20)XjT^6;6z$Fn}3=$>4AugX=#r7B&PR<8|tbk
zFI<@4-jOFB!0Xgt^yx+9E@bGeg{IClpITLm8JCTKVwDy7vSpSS?gbDj4a|!8aTkKt
zV{JQv-HQ_<-8QwYBL%WS7Zj3PLT-1zky#;(aT!!Cyi)dVjA!9k=)3Ug{h=PzdY}rc
zt-w|5<Lh-Zog404>56m-+)ni0u0M;Pnhb0$iR;4Gd)<c@gzW4ycSm}1of0+v)MegB
zFqA@g@&ykD<v&Ee8tcOUtkrWPAwqY_BueUTF;}jIi@p7(_Yk)AvO$8)|D5x2QNE1}
z74pv{2QjN9m}q{r<sl#qz1AUh1-lG@7;G|s`n%mP?Y+I-XnsFMd(swn|BN54P&Q)!
zy7cYsKS<q5pMCu=qe!Mgyh*d>^^$IRRj2J;<r6RVYj?ZZSol8lJ-R|S1J9(&TAh4Q
zh3%DNwbL`Ve(yI<_$m8qK&P!bhd&2^R?vT~VZ`$YN<^&%yNbeO00b*8^1XI!QDgWv
z(<#Vu&y0G0EF;c(Meg$Yrw=J<m-d^zbl{I#zuD_b&+3h?RAYPY+owu_I+Mj&2!SE$
zgtvFZ`Iu3%8pXWuEIxgDz=+a20G(|D-sMCP!lDgLqk)-7bzun0-^$KT;D1(Y8AX-j
zaOTvhP%bH9hc&h$;+G*2MWAyM4Ug<QP?R`7<k=+A-YsUmYD!7~#obb(PV7tu9OA(H
z?AcBdYz*Y@WLHCZR%(9L|E#fsVuO*bH})%V+YLT7Q&xu2Bp&2aOfqt~%;p8AYD2p<
zfyRmjB9&izSQr9tfxE;yb96_bKcO6xrRSGXN?$H}x989F*zan$xS%VDk;PBCerJaG
zjCEJkzGt03{vOovUv@wfr&nywic)8NrY+#y63Q9#aOj$XAI$*csp#KVKJU=!wF_&U
zMkelb!IP-XtmHXAv1Uz!FZi0@?Zwcb_~rBGDF0T91QsYjA-I(2ZKxa-g8oT`x*7z-
z6E}x#NTDHyt0^eYWmHPt%-W$$wOCI&CYYU_;5YCHtI67OYWt$Tc@Ta3f*svNW8gvM
zA!A5NgP2pNveqf-ff!GIbuW$I&zJ!VGh;%d_xwSB6T7R$BD>zyY&%S%-!M?kmy_O!
zbocb!r(5YknocUG5}QW)6{p*m7oEHT;10)ayO)^QzE>}caYq%1b%fOG*^b%Eh1K7e
z`xPgE4h)B|Ql5HAZhMfe^Ja`-%^>3Q)>G74DuajbW_(67LBAEx<JGV^-Ws!PtXfPK
z+D@P)A)u+B9fWsE><vq4(v|%^Vm*WuvSW?-1oFq2B0OYV5qpw?E(47Y%(K=!IrP6W
zGGRii;p9R{QwUCDu;cVYs+G)fRDm_I(k}p|ZwzmUi&YAarB*t2+LA+$W~DrXS}5uS
zThdNq;P_kAdw57&C7n8Tiu3y;B3j;fjSh8o4helUb?Q{pKkcIkE#cfz0KpNlJ9h6r
zQl6Ha+!zoQAj<?~52SV!Sgg2Fde8yjgD}R#q~g}~ElSCJc)Xf!qXdI~YUMXVgV{Tn
zhy^?;g<E;qOtK}8nxT9WAqz7pX@sL(Zr?K#KgJoik+SQ2$0{yWXpxq@xmH7gnOY(w
znS0D2J}`;%J1OMwZ#faN0+pCeG#<_}I_~<MT_Smbi2-Q685bipx6*^@(e>}&zYj=S
zA4s`7PfZXn#vGLL3|!S8oYFi7LIyyM6-9NB+u45K@n&FaR}dS3Fx<XRn}^(^7GwMP
zSUbb5INt+z@O?)?8iD|x$^5`Mwi0&BY7gC=v{I92lwrF>4249IK||@)b<RNCG`V;m
za99Q{0}|^=izQRTGxzxMq}-19ud?=?hFn<s^c!<_De098a?6I5`_x3NMhRi|F7ct_
z+iKL2AO6DI*)g}|HC;XdS}-``|1gv)#XEM%GHb|fvO<G8n;q-2?Go_7`R#bOeNEd=
z`h!X7N{#3B%m+6^;I%~_+3F&{^kSN0F_thlPj-KC<<A;jgodI$x?ru?Tn$?OX9jQM
zE!d=6@|()`wSB!9ug(i#P<~i;(cbs_xptMMu*~md{T*nCqq3+k3-V!{tjq*A9G^Mt
z5G)LlS1ummYy7Wa&CE^5j(zOqH(ds}zTQl4ZXI7#?a$8l@7xjDu5L~bNpkIlJ?{Vg
zTxl1%^JS%3zAORgwtQ*+E$SUlim)$KGeUplz6dv%y`2Yv8x@Fu>j2ck7_K<gzpl#i
z0fV-_@L2QpQJT@7$8YpE8G?09AP*J<-=_Ylh{Z4op8Yj$aLQath1}}j{ON^jb?6Z4
zt|Hc}f&vlJdV-$5BOFp&FNINfW-P^4xm7DctlSMC-gF>d{U#!g^PB6gwieH!Lxc?4
z;XU*nC|PR8b5G=4z>vZc1`hoA>Fu8R3&bbyT09Av7s&t-ZB(&!YMgs*oq<5EBQ4(I
zm1A)i2KT?636durwUVH+m+_gS5OhSM!F>Acf>()@Tm>a1qfppTcLO^D)Z#d^gP=F~
zv>rrV%J-WjQlnHiMto8P`a<JwO>v6r*cz8Ti%tGf+<*28`~=vR#SM9BA*Xpc0rHsH
z;v3mem#%>FFP?FI>MBR_K2_r%ty&p@xU?DnnvPAjY2Cb8lRh22S0Xv<cqdEunb<aw
z6BaG(Rls?dWyBoL1~1bzA+7D@j8{d>GnRoM!a(ii(SmJ|i%z?L?aAr5{5h=F*nm1<
zgt4pah)#R;R<~rH<@8`YrH}YsU^bw1ESd%ye0d{`B3Up_6KxC#QJS$%sZHr*^z4u$
z!}v4NGN11^LH}1vpVO`CAkI=_hg~o#xSzSys|B^~Kud3U8>4L6g@4`SK{yEg)&Guu
zo@~4yKl#D*K(Z!M<Wck|>o}s=E|=POE_?s}{o_+JO}NXIC_s%*4Tj@yIX-_Ekx?b>
zY^A<uho-TF=6qhL$%tParY<vi-}%|4M4{~vf1pr#kBgViS>{rNLIPNBG0*&O8gPfr
zV#Kqu5pvchILO^VkPN0hoid++4i+vC=rm-Z3+y2!R~#~YiBNCwuwW1Y!Y1bs=&#wH
zhuC0H(U^;wEgDCrq6$;bRyd<KXL%YoS~4rgK?d)%6VzMXid}vV>^9_Vdl?U#0y_`>
zgMh@`$P?D!BUW}p>_hw}^X+iMsW9<m&x6peuU}uspt}i~jMB)f8`#zYDw|a}uCUV7
z#+Tr%kQ~V|8v}-i;~2%nTBMRd?rM21Vh7Mx>rK6a@wYKE`6amIU`Vv8)LHH~EYYr2
zP~rgZ+GOBhdA@djV@fk415k@(#xn2RF{N$g6&?cf-ID#XtCQQwKdxxeVy)}Y*}Yp5
z9t)#$HC|5tKKs`YbJbMs-4f$Ef}3mf`j7g#LS!(%fo^^0zNnUOwf+Tkiy}q<S$tIX
zPx~j?22LVU@-wa5gdqc@z6@f3($Tzs;Dn8frK2{%q#BMFVWbDMjPfh%C&S4_F==%d
z6{Xq2UU}CH#<gN;?Z{A;VwuF^mA`?Y&w0W5&Kokvj&FlnGj{_XlVyItY&0HSk{=xB
z_LE)dH^C#AA2J&~n*JITSQJP$x3qgBQ-{WGo7-Okbh3Z-KU)O;Gto;#x&ODeB!FYm
zPKbt%YHu%-v0s8}*@rTX0>rr(^dFx%LykPgoN%8!pphQlGevFi%0?a}|1}JJ=rOCm
zOsnql{~uqro6;waxPjD7hP1`IAZqD-|Fc&*)J6L=YK*^WS*)+Qk5tuj-3bB6Z^w?2
zl+<eD`?A%C={W~|6#c{+m+hbN${owh9S-4iHqH~M0sAX=hVTZ3CBjEHrD75ohf05q
zOC7IMr+NW}YA)8P;9Vhe7I>G1zUOWXm`a@`>_e<kjAHwuc@dDz)%8cU*Yj~)5IqT`
znz7^Z@5g?7yWONY3$Uua-`xO7p7g5}R#j#5Ex^SXW8<0`#Nl^9EN=m_QSz#+Vt1Sm
zg9eogpZd>PhbEh<dgsyg6?^eb`A_XirThrsc{;TKXu~v&&p5XI7QAe$s~h|5BA(y_
zFuR0Wnrk2No7Kgf$3HO8V8)D2V#gtLYjBCYR52Feax!JOiV8(_N!Q3J=Yzr=r%wl@
z(~nKCp7|fpYRcPs54}%4>>TVWHrDv3ps)dbIM~dF&XmdAjEeG~3dVHF?3}dUuI+&f
z__n~n@HW)tN?RmfYIKZS>pJqc9Hz0FS`v{|?BbX-zJ>iM+C^qRF$k`q$83cLb?eqB
zRpTF*Ko%u26NtTEM#0mFbO6U`0yabmFYgW<oCTr_Hh*k-wn53pc3knvZ&lR}^;SLq
z-%5MayHrIYxTBw7elOeCfqHk0|1I`BEu6-X{{9}i*U?&_eFvD-VQmy2>jCo_;g|41
z!<p79Xof#CcL*GB^6#vnzC7qfTAIMjPUg<^B7fEIq*ADV*QjFvH(Dro?9NWeVXBmD
z98u2-pE4ril6Uu7fQGawYn6iN2R?$VLj>7Vq}U!E_V-r7dUF0mp0~&!*=2NqJHThu
zTQZn<I$}05RiA<Qif~^;h)aCeB>=o!TM)Tg)b(&8Ycf=$o@g;>`O)2bl{=&91O{%x
zMlrfr7*`ynef&!?x}r^NK43{l*p1?hfu-YTEO`a_JG1qgoyqZkDLO?tS@c3Eys#;0
zfcbYaJ_DMe(3L&CC~Lq}#3>D`hj<1;KT|7JWy*0Mv`vr%%!}G%=SAB25QsJ;8!o6L
zH8vm#g+oixthP%Y2G+(nL5JH18O}DbqVYGX?9cH%-FatK6$XR13n&V~FWfxbP?VV1
zt_9H8GS$^UK4HzcTm_g;q}^qJQh{_9S}Z8H46YSG=@#`KC|}^V%E1c29WSX88lCRw
z2qLC|RN9JA5h*Ao0mP6gMKYVV(fXPm6Gonwr>AE~>YPB(`Yt#^7`(vHz!awk6`GE4
zp#>CE$!f~HnGt}<3Bw6?iIfF~#oeT5OSpU22)h__ndJyi=9gp?Fp<XiY-6b2e05EJ
zaUv>$Kq?&*8NQo8Ws(<2HPeMn6xvp48-Tr(hy1kVEZB|>|LM{;Zmn{rUZuOUe#Kj+
zYOBKD8-9-xN1CK_$>qA+z@<E3<nw@`Rdj=8Q@p()uC6zh8t*vR$V6cBoNgwID;`l6
z-g;sTNl(NJ`Io;mBr3YiD@HokD8Y}S-V%$Uy5x;6U-wg50*iIa+T_)F_%=QWa;Ah{
zpUMNjX;oU92&<!wR*SXDj@{|V7z2|lA3oehxgx8Toz&gWpL?2})!JTsefD?4RAv7r
zVlDhG1MU;G5<;*=96=ERE1>P}U$nl2Y&~kqkh8sWjbD}fmaM%T{AWE{s-%9{nwhL8
zn6M2HApgbb*~2NkAsl%+qlr7b$2B5692c<COmA&r3`>@dv>3H8>HtzzF27K%fK8#=
zJZ2LS0qfAy1?Vg|D!0hejQKd(Lye8MI=`lUq5&3{8lo`|yVOJ+=_;X-%*}(4*#nUS
zc$TlpX3^(e+EH0gHg0$CH)o#OW)Tw%61$E+;Q{81DUeiP!pm2X#8~Cy!Gm7{7wAc}
zoe6?ZBF*C{2S8b}QV=sIMyx+p+F27}rI1>Wot?@)V*z|1g0&qmU>A}P{m;9o@}&%?
z6cmma00>^vhS9>hMatQS+U}@r>~2X^e79;PjiZ3MdZBZn%LkqqSBfA}WSDXeC*G3L
zH!cPE|1tMI^KATox+n~onz{LNNy$w1{K!;{fG+};jY?|e@iS$B<=(2&!D^hLq2Xpn
zQJ7!4bZKIzUt_^j08?jtLNpM8FsYxwr&X}9&TZBY=4mr{c}nhP5}TXLk*NKf5rkn0
z9rYj3m@71RmA%8ei$lnu{L46*`8$ioZ|^`SFH*>dgs(mRVg%zO(PqGMewKE3ai0-8
zFM0j|C_&>TljX*dcx9YTIi$V=#j<8+ll_)=g-e5WENZxSiJEVr(Wvt!9R#X~6*|W+
z9~){uU-pOfKzQHy?hk0xvQ0tQPLkh1+PGyQHZ$l6L=q&7Hb7rW&(TDWDx$pM74#xN
zszcR+R#!lYFnHMqaduew5;v2TbJlO%SjbNc>fgd0>nV|@F%&ip9ogVY`8TOy$A{g6
zc_gG_&aK#M%*nTA%Kt3ZrBckvWk#GpfkQL=-oeGwE~3Whg<$LmU1CwNX#a~&G7RN~
zcef@;?6Cetw*aVnWNcP7a10`Yu7s7!i@m;)<JiXHkSKC+grpWpTm;W80pH8YbWgLI
zwsyhkZ6b)Q{Bxk2kVU6*(V0S8mH(+3<{?SjcmiBC5F4%m%vwh}(Q$yX;7CfYXE}Wg
zg+(LYDdyeVx1uZkU!1*nT+jRe|Nlzj*yDs_Z)IjBqhuvXl8~9batMirlubrLD9Q+>
zkQG9NC^OP9D`_d(MOwe>y}aM&ocH^j_xE%8{qebc-pA@So{#(Ee!s0d;3ikgX$t)J
zq9RgzW5UKeQjxdc*;Y|6R`*7A`U;l=w>z9|Dg$;0k_UmUAVNn%8#J$7XGfVDaA*%k
zXY@pFQ;vu|j+!b4R=Ih(s(+<Hq4R#yZHwCR3h$&*Yr`2}<Jt%8c;(s-B5jpeQZQ`F
zmOK|CkAPkBV3C3sLsCLq+}5LOb8C<{K!e9JlG?#-!zr^|=Paaq>2FVv7r!BCS_)VK
zeaaI4RSM7+#E<xSM>k4EhVq2%yT+2DwQ}VNDBs$IkkHy!`8Mh7xBUEWxzhof4w0$3
z-12okh|ELwsvvI#OAyXbGLSekasn(|W_`T5aQ^&WxF{$v1a2fmo6Y2JtTV~b_%dJz
z8BO5mh)u}F7=wJ9XZAi7Q-HDOM~c+Tr^vUZ5B6pye~)Ww2kpU`SZL?b+y|7Yu9s%G
zQ23B$2;E{A8%qn$@1T%I!1<RT#6Knu+eKqQ{6xlq3$ls5F48w}X1V8=KyM_A^}Tv^
zhHrh&#^RSJROA;WNeK-OI#x0#A5lgHed5yQ8Hr@dSJgYt;Su5kKg`b<x5X@>;#M+w
zXJBNM%*Uz5!@x5kv(CU(X(8ZCcfrI7&*?#-i+pFs0}i$WE<eD8D~!l@KQEjk(=>7h
z5;tgG1d_?=y-%6!rK!n0av02WH=@3pl1D~Hp)%w0_L1I7ZT@T?d8Yhn%EdV<#P%>Y
z!h-1W<=etZ<rEaC5X~-*1fEuroksnD7G1w%M=f4*TOu2{bH;PJ6_moLAJzj~(!AT<
z_ajTVJ)im~C>LY_3XItyutu@~3tp$H*duv&g3aM<Kw>R&TRsrk)r|KXWDN4~0EUxO
zufIKEs6wx&6M<JH;Ts3GQf^s8&m?L@jgWWmiq|I(T9qcub^O}AIttNMF<V&7aXWM<
z9FPUAA7B^XsidK@psAClX2J0s;^9$2KbHlM=nx)q14+x<dTsXlsTHYQWksooP7Wje
zxkSO&Tzr<lj4bwN(gBn2m<0<`l;3ewlAw$D$b5St^}qt--{dknNf~Zzh0*8TVb-yW
z6}WTGH13FhM{E5sk|%Z;bl4HJS9i(j(9!DC$9|SAVm@=nQfo%UFswp%d3jVqpMl%&
z@R^C1r0MCZ*o(%WnbYN;KqVV`H!$*Fr=B~&LqH7x8m-Xr<N>}(rX1ZbS9Juc4`dw+
zw+D}>lHmoQmQG8GD=i)llVYA_{xM*5`MR$|?{xD7>VoA>w_UyZYKPf;0i%tOU=R8D
z;@wRitx*yDZ3zn$6^iH+V+V<vfFBSrc7bCH{4hrJM6wzS%{MqA3Q9w$D_)#v7fek{
zXV&k>7Gux`*+|CyD`C0;w{mMz`v?lla9jHNLR&DVkQXoNlNT1&(8>UUdEir;r(xP%
zz)X5^v5Jz@vZ*sDplJAb{PRXx2Z9BGfBnj;1ehE12;{Z(xjxm%z2Uhag>?wV0+D?o
zZvg+&zOW3r1dZ`#rbk#gIQTpmp-)*^0S8|owV3JoE|A2vf*<@4IV#d)B4rU%IvV})
z4*KA;;(;Qa6#NX%@D&s^Z%5oVJkSqF)ZnLI#;q}MO=ii9*>A!=#x7QJ={_4{LQcuv
z0kb0DAAf^ilS;~25RIo~BnvZ)<0I2GJ#tZB2|dMnxS2B^Z2!*~TF-yN(DENgpPfP#
z@qi8;3twLFwj9;1a0qz2S!=(CE=(w9#S2<7bbWHSGD#=G&g)lNO(duQ)jNM&M05n9
znZPCK|LDhK5$b5*9bWfgVhxh8DAw}tq&+)h7xPcyFTDgL02VzmXw7bJlH$sIf&ESC
zcXUs@`aJs9H|~g641-%^_#yM@9l}A2FI>I4?#M@h%#1bVPa>@PiPrhmZCYB9|BNNJ
zq&;(_@xZG5u|)Td%=JoC<Vw7bzyZPDT{_D8u0%bw+ljgWG+V$`NTWH9Zp(P!k=~~N
zCY*nXOj%`fwm?{tT;RiPTiKrSd|zGGC5Bp!Y<OW|C>4NsAw(ioA(eUKddL;A5+aTB
z;PuiO7$A_s#FVX^!B^h`S3>C9x6Syo6k`KJL(?n};9uEvP?>aw`R9E2rXrpN&~U#4
zpIL(ww<UIS@pw+jKMPW$wZu$Cdf}nCK$-&%eGv|o_4bb<32X61ScH<SFQ~GmWjMe6
z3YCY<znYp4NSx%ZnI15fhaZr2B-kMV$|q@BPB}{;q%@6QN?c&_=wV@nB*u_>iWW)T
z`5uEJl4%VF|M|3}%&hywv`%9-C*uaO5)^iBYy_>tnF_dR8Ik;Sbl@pKVJ=xjcY%Ho
zGKmW~#$Vhp|CyJj_aWp5O^-rkOWdC#4p+cn9wN(?;@yFyW4gs&1bj>)-rz-+K*55n
zp^UU!$|X;gRB?24Q~=H`<#mN&uf9v1rr5(cU&%}s3MujwRoSc{+gGa9Z4pj#-vc}n
z`EREJLUWmswC+Nm)nq%j&_OuEpRM$i8s$x8gA#4KY)J;_cAWKw6f-gir%=Ek52|Jw
z-6OQQ4_38KBq}pV%ccu1-pqn#Fb5asKWQw}9>+`Y3!3)oZ8S9M!DaGKPDq?r286>F
z%_nfU73h&w2>8oGrpV9H{s--rLRLy+0>xbY_B2>XO3`cv8ZqKrkG&s5U6<Ym(V}E~
zlfZBc8D6>-_JBm1ss+%c4`SZIrOUWQ)bjFx>fN}@>D2fCW{3fq+rR6|4=bXoRm6U4
zT!=isNdu3A;%~K--zM#A-FH!aN&?eAkNYE_MAk!-G5v1?=YWBpV%+_MegUs=b$2HQ
z!{x`$<Hu*a?mGLIq&DB-yE_Kf`nLB9fF)uwpR4CyeP*EB5|=%n>far0O(%$#kl?^@
z2_|HH$$I<kekas{8$U-2OAuK0H-U1y+pW<3{Zr;&aKD-IhzsslO<Z++RsT2K&nI;I
zC!vwaqXIB#9At*EWPBJ;lp0`)ghcXbGa|B){6qDzlQmj+?L^@D&gN!zOG9tR@-lJ8
z(IIW52g3Jyrm$vX{_Oeb1HeXiM44S*>4^6Z&}SY-7&1x3JH>)$=g_4K78Hi`=lxGT
zcXdK9p*I}WYP{}4$_?2$I}nHxl5(6IjI0WGrUyxDK)79R2#L#JCT80UWD4NS&Rw!(
z+MHP5KQe_w==}Mv2&DPt{o!yNHUa`hci+kZ%}<v#f|&*V8~I_n05cJ-UR4#9Qtzw;
z0@%4L!B`2l-VKK3`>k2uMMLP$DeT==bUn#yXb5BnqEq9&;ajwN@C;$P&a#l_yBg$v
zH4;qz|3wY=#s@+H<0<=}al^#;8=dpcj@^f}D<m$?3lUeGeU?t}YDNs;j<(SBU@qwT
zB_|hrn^=WeIOV`4!=F7%Mr(rg&+WyF7vmjVaI#BSe`UPn4!aLTH&!;<!}+cxEtttF
z5lWoUEh&PZ{x0+4<He4%u!-6v9IA*#WejxZ$KA!hz6hZu=SO0UumFg#fccW4_7r*!
ziF%=Ht1@lcyM`IP$*5ouLsqa0&_&D<T*f?Q<A2>sX+u5Sn!`{joSm-mvRBWZ2}D&&
z<|Tb(E|!(vEb$0ljif~qoF?C1JAk^;wr9`!*AHr+n;x{6u@KXN%raz|q_?;fnHKHe
zgd&K;usQiP!S-~JVw~e=eL_&RBp~=9wQ~8YRaM2eDvuX~0$PU)moM*YRECIS|A3-K
zCv8%Ip%bt<3>&s#V7DJ9-3sv>0GwG?PU(s6D_A9k<zFCL3Hb2xNHUZeX95v3Ud_N(
zBc}~cEc1qpm0}Dcx{ux-oIm&EeQ|Y`^#Z6f-i0`7IX(BBEcZi)#*xq8d$(^sL^QF8
zqBy8F5npx9MqRWk^#kQc5Xpt9>dj_W!*Q(y#K-k3s))$hIt{Q(+D&VxGls&>)DJ0B
z-mMd-O{2$WCHvjr$2d<>6?a23hNmg*T!R#($Bby}?P~>#lo$;DGRXj2GY1r55Fe1k
zOqWOMMjipbX&)qQ41^H(^_gjPb)EqS_8=6NcdIV{_^H#U=U7@A)1vVa-j9iC3~wFW
z8@Nu=?ddymh=JPY>U#Wq!<h%9)Hpjw-jq#uh<CGJEL|@%#ll@+7H7aLK(xlv;x^k=
zjmuR8D7A;OU60izTRHjCVKa#)LnWq{QY@*mTj_uhIb6Sa^Ri9OoZx{NF~zXdt52W%
z2<EVIiQ%iD>~Fm%X5#Q>T#9)YHIU1Wqfxs~!5|Bkb^f;j^)VG>E1kK-&A})_$gg&_
zWDz~p;m<5&<|hGe%lFE=VV|)7v^v;T*g%l|Vj<#|dq4wcu9>Vx#!b5|2^5UxqSfHT
zj-ZiovM(Ni1=3yYKtPiMUUR!|<NBQiS|MsVnS}T|A{&D|rlE*hA`$e=coH_GU~~E-
zJ`@_cmtB3-YjQQD@S2g>9p-=Tg_+_QM)!unnFTZ_t`$B_@@{8#2y*^ACrfl;>rQ`Z
zAl#Y!E)i&o!kwQwd*MQ}IYAJ5mk5?q)%(aJ#ASAokEUQ9A~_6}9(I{19>8|_k;gz9
z=&JX@nK5LaG4ZbE6nER1w~&N5)*I`N0)jI8ZuQne0MiHfH8G@2z<Vu#0sZ~`byr#p
zRrmho*l*;+k-2r5#uYdW-ipYmM1LuEeE<*Yo^nsHA#vpen_obUBvHr`PI238_J+y7
zWo#5rN7hZwYke28UCoz+A<&xTkm5wp{8l86F!>TZW8rQ*xTq?;4vLpeV}05qJft2c
z#6>pv&&m4qkKajCqL{@_n-I7>?hR9H!JN-8jw?eVib<XJvGM29Dl#{}d}+dN7>3MU
z9AC_(7ERH}1*c}}-ocjuvnPr>jq?ECdpta*`epU|xi8>T*>O`|aN=xGGL?lel)OxZ
zFi-3+M7SkDHVv(Ki$yR-$D01|J=0q%G@wPxznh}f<UG+45Oizwb@7e(c&*5OSfeFE
zp3hY7Io?DQTge}>x!gB)2MDm@ee*519IrPzhQ#!g-*EqEh#A9)0+W9q#7Y&fdsUO@
zW)<SXVqoiuQY>)Hir^94HKOu^%~j&9@+W*UV{H;cYiK^QKSX~H(lVqHsq3-w4pEn+
zTS>hr+<-2M=jBvg2Rj>^3sek-WbeV(_#XxFAmFFL2t^D7GBf^x$cVMaqDV}TLx5;s
zA;xha!fK#@MD$g=9^17LiQy$4&m*3mG~snPCeUEYf{6q1-sNH#Nn<OqodQemJIXHj
z7fj}qENQ>NL?nI%K^Cy%&UQJ%+QCxJwwPKnbt*mT34|x~7(%t?=H|*8R#XpQJ<Xi}
zNhL=PmTWNrM@%6i+2>*RE%;vx16=N;{$UU$PSAWgO$&$zxlS%jxHxv^K(SKzlx5#F
z54Rj{Jm<X#2#m?7(??S?nX2aviOZoBh>`;H5CC6!2+8XF>U;n|Nsf_hLCLqOH$5%-
z;lq)ZcV8D*%0xDhFUteqL;obw0c2%?WB3uWZHUI-2jEoHr(!q_@Z!W>rU<~pOK2^m
z9L_TT?WY8)p1Jqg_02Z5|FCfWInJM(@UL5WLUkKg2GG$@{7mC>Z%5>nTVlW5WpGIh
zbu{dQ-H)|;8UO3Udsf_m$a!F1us%8g`+h!`B9?CLWn?CN4+B(cbv+{KRT39n$2fMe
zh}4H7B+HYiR$)vh66^QsRi5kHGZh&KiLpJ4wPbkyVlDUEO3vDV2BZNoT#$c><ZBDq
zp<maSCR!5??ccv?q<7oy-Hn2A)(yr&ww`Qnv~X18<xsISHE$pY2zi-gzZ2})`P?@+
zo)ff`po>S293gi(cE>A_eV)Yz4I6GCyGpbWs2=R*gI)Wa-()#=?r^bhA6af$-2N|S
z&WTxMQo+lJk&qaKpUHr3-@ghD%_U5y2U4{WHr<3Szul6pmBK`>rR@l!A&Cu;@FTLL
z6_DKm*75hZ5+@^?2<CyUq+q#Uzuq6lWdw~eZ{Q)Sfe#}Fp)I!T^JN&^J0sgh`2n1U
z24vU6m4b`Mbp6{EHg&FF>wSkOmSGwgLF#Meej&!f>esK?d;yGdA7F)zzOy7?6K!^x
zt1Ngm3-2Ptsffw5O`Bs;@sUvA)DYPqAjo8Ts`L%Si-)I<pqI>lZgy_!X53b^DW~k#
zAxjZ8MnQ*Yh{w`v<ih|w)oZa*rgDL<(Ti6L;X~psm5h|-%P&kl&xe(6@k2rkKi=Dn
zbYZ;6V818ULi0PBYIh@OH4q>?F~1cFbSN%FRNc{aZhLxqqCOEnCF)zVr3>*j*&TK|
zs%?8|LVT5Da9tt6aee88+kK-k0@tk2E*s8}Emdt9P`8s24`IIxc$%j({lGv{b57aa
zzEJ&3yxbOlHPX!t1~<?th^Ik9_!#KZ`JbJCfAaKw1LJ02h597}n~!mO%13;x)?l+i
zLi{t<LlRz6pnhI|oE*gM`A#}&eCwNtNu$_+MM(O)tOB8ZKy(>W-m%fRRpadp>@^10
z)H8AJ(Cxw3RTFQowGHMRde?PhBK-G4njof0iRnm)>p4$AyUc_5h_c>4efs6)%*1j-
z0|TYZXH&X9N6nqz?<*4M104{#jW}aarLe`{SkB@CCY)&~mOvah%HQm6Win^za8o>>
zvO6!8?16wL{{0ziW0o{&Gk9<-Ev;+q9+?<09ENj?BcSNMx?XqQDkAsCD3VcccA4nu
z!aJKreD(%Pfq#5vR>hE7sJoc?Iy8^XAxD)r-)TG@*c2g73S&}Kh9?RNQY~CjUl3gI
z2-lTaQtWo2KijrlPe@Qj*T(8QXoH8)kSgypYKUe8QC$f=@Po0Lt?^PbsU_kANZ`;2
zJq*qPO7L+1j~R$m$USNYw<Y%{!22ao0TEHbrYH>o;Sj!-U5pa1A_tFT7SfEr8xL;H
z>?)ckp=5<Gq3vY_n+C6<&?Q|$sauV15V4u>@Sl>jJND=?mcl)OsnQKWN@w8OQ&l9w
zV`2vNaUur36Vxo&%F#7``DGlkc0Z&aVoS4S@=?VyRuF^&-9Ra-H+l}S&eGhR6DJaD
zzdS0&xBil)K_c0>EDyqsWerY&Hsu6R+l6FoJt9lHFqu4dQc~Uzofr6&^6$ns*A+nB
zIWRCV5?kbaYB2x&pDe+H99^zuw-|?oe-8K=9)iw%EE5^ZT7NEvm+S0gzLPg^uFc4H
z%w+$ZPM`TC3|6o1$8`ag(`eADF0@k*#<60Cp^b9#|JoE+&K4^HoEgbbydi6s+`;+H
zhSQ2u7hLjpj`W_5y`|hh+vT0Et*$g{i#yf2SZ2uF&V?wvFNa*n75~R@xe~A_5_#Dt
z^zC;#{37mCcOpv|<>t2tx?OB`K}%B;S#}rEO0f-u4miJR`;E-rsO5|G?K)w;rJZoI
z-8-cc`JMlE!SO2tT6&GTDPOv0zEi0ovSFHrS7cqG=|Fk+0F`UNP7XL7p<S#*(eVIo
z3l4UevZZ(kzXoP0nHm!1B`m6>zxwdqhR(SGwacXSVA-}AjnV7njd&|0z}kIPzsv-k
zUclNq+BR>}=T@#HmoR|rMcVuzmOb&tOat`v_4)b^x7+>z)RAw!9{1!S506N;7l|Lz
ztkb}O9<+0_Ei6V;x2OOsU0>fOe4MwuiBvP54Z74_n31dBcA;yA;*=OO$}qCyQH*iu
zH$E|BTLfY10q)<8QUfuYj7!{i1!e!WymCgc8F~h;AWrljdAT))R8G(MLK9L`xAKFr
z;ZMOS#)1D^&s0GfKeUMaFC-F`F+fU0U;QR=Bz#t)W~K4MzkEEVWs4S~nRI8nB=E)E
zXV1Q_j~7|5fJ((pVCSCxREaS&qr{o>gh2qk^U2s+jTzi=+__$2<d6+A0Et_;p_}TN
zfWQRbuq_15_^&k_`RT83I}8%cvHi%MB3J<q{*@I?od&uj%JLk})|dA|EK&#&eh@h`
zl<Z@EaXSVlnhJ&dAdsb=U{*xT4MmJgD`=K+STgxipU($4for<V0vvKo&&H<E&^AR#
z%^l7^r`kN0Z&(Hq2$JqR`;1n-NbFIFSp~P1Xk#hm!Y=bz(yklO$U@V1qKFdyu$P)P
zbXV{Tup;ii*?efIZvtNz%H{d5J$R9DPvsln#$hc1LF<xXU;V&W<ZdwN^C^*N6GWVe
z%_To&9R_S?`OvuFU-APL;$q_f$o?5v07?4=0J0wJj;|gR3QokgKxSj4U@jsHNMMr_
z`|m;^CD;a--adFP-M*83u&ILS$_MqDg~m)6Apxfr0YXs#+Uw}FL`Oqv+cs(H{LOr5
zToj@p1{27G<M6*2O-c>tAw6(FWg791kn79hZ$IN{LMf=2^=NO$RcSBQGqrMRS<u)P
zHaY9g6MmZ<m&TmO&Tu0NQC3JUH#Q8v?Z?<o1Dq5}23}|(rF%x=!(^&J=>1WoB%VRf
z1E?-HF@tDs%rr|Axwd5y+i$EjM21!1fnpxV3&4@@d?>(tui|3c%83>(`b-#=8GA_*
zSEY;UnvY{AZr+zq-IuP<UxHKV6NE@4&P3K%aa;uf6zlBLK2RRi95Caekbc3z!L6II
z{Utgo$v?Zf2$C4?8y>|heHWD;Q!y%w9ong%KNp20)ygbLn*TF~c=%jMvdX~6nuzWl
z%Zp_%H;V5I;iY@8CKJ_Q<TU!S^+J#p%1qa*O&+~|{oQp=k1-|8deJxHZBtt!Dl<9x
zJ3kt1SeEtB#$94z3-HS@u2yD2yE}Cq_7wkmGO|m_;BVy~d0Cm|yF2R4d|MbO;B{dG
zZxO;|y0*H|JBn09#(<HeQNWh3b}gpykRX0!0=}_5X5M~E6Dy|TC*q{f3c7Pt3y={-
zE<3ZQCiU!TbEP~9q=H2S8)y#CShv<)ckA_yTK2nuI2T9zNyzLcL>TatIe3*Mt_{wf
zjXbgKAB;N!{6NtT@G2+Gk?!4hU&^kXJ5l7s+MlK$;-q~poiz)4kzIte{PTy}g*@KF
zAjf&r+tR9*FsUeEFs6rqmZLL}^}nLE!Kz!OCr)<p>XSSu1TcQ77l+GLdLq)5Cz%sN
z4qO;obugKg@XzgjEzDYjU=SE@2xJa3^E_H$(SO6JPbh7aNKugf0L!L2U|<NEcFzm9
ziHH$P4^7yjxLX6b1%wge=^-PeSHFG>@WMhS)<sajlOQ<_VJ@wG2rG8U?~bKIh&##t
zMj%3Qdf}~K&-(d6v<n0erZPrQZ&_Jwz;%Gi>lBa@^Ysey?*SE`Ly=8P+NvmzVyI@l
zg-thWAxGCqgeZ$C>=Npm(cL5AIzbBoL3K>MCPm3~-&F)1o530`@w(*(;A<yecdxlD
zgOeS%G*!7QyUZc?NHo0cVJN8nun;X7W4bYQ0>OGu1|LkD6;YaFeyYYu#W=y(CqpaG
zihqW?kZVwd<_-lDp|`#xb^p#6EIJ&8cw3pb2#Zl+1{8%3Rit&%pc^-Cu#dtFw!|1k
z0DM~y*tdyU^@xwg=>XbrhMt+r_dXV1oHkZCb7UQx2sJp2k7#I1!oLb3Q@&+n=VvF|
zwe@O8<00Q4H9)PUbC*GbWI90jT46vjpC!mQ>m<0e{;W?!v8G+EqJOGVfn3w{oatEO
zwFBxiDr}LNwd%5zk}I9xr~O#$aeZB0iV>*(7Ie$_z&?zb#y@5g_h{fOW2XAQF4uh=
zMYVf_ES#E*p}&El;S;r%k^eGP8=ozeMrX2QB91mjq8a*^+@+3mE6cXmO07U+qpY;5
zl0*I9sl}j%sBI_NPnC>YOgpV7$Ns0IZPe7M9(EFv1kZ~dp75sG#QNzKS`e3X9BYAv
zJ5L12s$ZPO`?(h>^K=2g)=@$_fDmCu8N*YM-?*8>A$bN^u0W5CH8%wzmL1W}|LyjU
zE|y&hY@8E)NLG+ABpk@`53<pS$>u7i6yh#Wo%?^|2g&j#KpDNboBhEa!(P3*99{r9
z+>1k7P%=CIcg~ae8CcXy=gE_l0uqv5TG`GR!`D!$7X`i@|MgCVG{rf!i_#M^jG3<J
zLG}<Z4`V=uBaq_!%sb**lrKb<dNL)*x^K~#O^c;9_y>QULRK2_WB$_%PG&bIUUE3r
z^RJ_z8bZ^juCA^ar<GjBG2*6~M;%vSMv&N+LWdb&6^do=4Mq%|fOHO|q9z2(9GVaY
zs2|7U+rq>1H6)@B;l|=2#*{xYK+;nF7G_sS<>8H^(irI=bvt5k@lFwQ9PdTnfYqX&
zESZr4&&9xJMsM28NtF9!NyyC@ce4YTgY-Y_Ha=!Ph_FL67;XFazaAAYYo6evkjqbD
zvluh9j64D7_-Zbb5p$b$9t~5UVh{c!L8)MR_u(u4&B?zVe(l4fh={PaZ#_^m5Hg^j
z5Z9rE&zI$t3(41Emw7AhY#uU+Z5H}Rx)@%Y%<AGHM#-6Az9A9R9bRbkk*AK7KQN(a
zqaZ^MWo{cyViOHXYL8k19xzKh8es^TKM4NGD9#G8kf{8)wI@NnHxa$1yZrnc7{wxx
zRJO;qS8y|#?rY>Ef=t%Z?1S#rUOuR!6U>Rk;PZ(^Euq8sgv3fL{tD7L$=B`xC)0Ye
z*>PbQlafvrS_EVL7qlHKf>QaH!_P4JLhxG2az68VulZIa_5mceORobwsB}q_l5TwR
zty2;GW<E<Jskf+bC3h0DB$0Sy@jvsxswAM>W4-7vth!)n2<)iH+?Yw4Of%!DV1_3=
zv;`t=&?h3JL`4DcLUrR|<3rGQc!*d_R^HeVr}oGGwg1KHqbngZfBZg;AsWwzLG5k7
zVRoW|Bp9Y~9xjU<=w|?<O4|^ELVnaUx?OW_c0ob(%@nrAP};e*9n<n2G$rgv_S+#h
zlhaAlbO22JSbqHPe3qELRRMZP`Q$<DksB|WMfJwqR{Uv$e2MoO@hHx*JKyOeAj|(<
z5b$3{Ap6&?Q;P|BtAPd|%?jK1gdV8yX|1mHZj1eJ3se$4a(CCuD}Q_Q+jXyqavGF1
zFK^}zJ()PeK+m9oRi~6aR&}j@A9ZCw_XSpIOFE?X@_BQlbDwEX7cbb-Vr7elMp0qM
z4JJRlu=45g*sOZj%ylgr4d~W9qWEj!jSqHr&SzN9h|P#BT(_G4XWeMQI4b~AX>!M~
z+Z04#F2u8J+p)<$<`G6{Fg`OP$T!1-Z~=D7U*vW|Jw5zm-jxh+_Qe|~`Ab|FxCNdZ
z6iMyJT^~FMFouHD5w?$(pL#B$;c17fsdfp8vIwLQ9=y?-+s!gStBYcz{;O@GC?bf3
za#BtF`p6rZy~P?%z+Ke@tf&I(s2unNl?u-W@Y;3xFE3G^N+Y-<5;xxvt{CuHg>dqO
zX$jGR(nyKCrvUS&U>gcM?_m&VG@-o(Oou<vS<@OI<01pF9B{^1r-9CHODORI%o5A)
zo0{8vMh1Nl@8cAgH7LvA2;Z1jZ?ltvGvn{Hu2;`xn2Kvr=SgROqt}q&sdJMIL3*Q2
zZr{s)!#sl-asjYKIyc?(;BnIgMLn1^Qe~Fpxza84T@q892Tm3d(Dtx^DHNEL;Zart
za9Dmrt<~&El9-#g{n%t0&RC>A?P@nbOL=n9;+b6ieF}=_r)*s%QJgDjA5>Oa@R+{e
zJ_|2H(3^-Dk?lnJ^Ci}0ioldR1|1OnpKJv{Jk+ppW7*>kfg-IQ(KhI_thdx{`6Fsw
z8dMCW?S%2sitKG^l1acyOC~<*6QT{lUqlrn<8JHL0b^ZL9=Gi5!PLrG>Y8*X)Z3?{
z3<H46##}ky`TB~|{R6x*44#(&-Zr74B$x~-xW2GJra|5l(_l85z-%gDs_ml0`7K>)
z?>V<k7>2{S)j)kGp<3LJXudPPCTaO4&kk%%meMf@-GClzL^GBQPB)H8#ErJsFIM?6
zEl^Yh1HMo;ZX~aF5m%J8wYaz3Xp?2P9fd;-NsZ{h9=v@!wJEtLqQw?hRQLA%`b`qJ
z4o#^9yRkYr`OHFeHxhFPTCIwJte^xL-+ow2$+wdIs?vJQM_{%&T?`Tv(MV5(9jW*e
zxGGE($xIxAuF>O*dqS{2M)FDQw+-0YV!(L*{4>)S{!B&w9gvawgbzVL(dPtpqaQVc
z(Adut@%dCe2%)TXwfxZ}Iv+e}c6)I|I(Od|Sr_Px>J=oWiR2nfG?GWF1`S*-TWcFG
zSa7<E?;Xh=h#R}Pbg+%L*9O({owQtKC3(hVv}E5nyy1j8U7#y@EZOeu*XtSd`@}@3
zw7Hx=e7em8e{t=?GozW2jEA)!o_%D#ksafbv7bI;9{+Uco#^HT&77}Ea$LT7VD@*3
z>@}Mt0qG*GgI7Mu8?al<Q>T8R4r&Iw%o`GHIm;H1A;f!gAxfp$Py<XExAxl0Flb)g
zeM3n-sJCVuuqRq%|8K(5Sn_G<b#`>({(Yo%Zp8M6`_)YngC5_sO06prqn@a6z6L$+
zYSFtgouX#$uEKUo&;1MiHfpol%+X<wFqyZy@Y`kkppe*;^}%Z9!JBU?w*L)$^vj|8
z0ejYj9*DisB<XUvxhs`Ft4;e^i8GgC)}3&EdQ;957ye?nGBB~Z9IFz7CTJKyNpI*y
z0_sS_4}166j6Tw()ia{bl(y~OEowhHAfw<OG&?r=p{XDq$1>MuBfrUI<6H3Qo1#)Z
zy{|#foB%gh*P&1@fFWI&D^j^-#THLx-0S5vmJkA3Uq!k{e=I!kfn>DEk|4fI!UBF9
zytY64)jrZ3h~A5jU1`?*`I}FuU$KaGZrE}31x6sMvZ(vGX;LV80S>{QR46T%s=URn
z<QY(==t!z_n3Q}GcV#bnxei2^)H=IZ$>L9sps|w#pR*`~Xz9t!5@HQf?bXd?0Fv|_
zvOmju<5z(44YVCXHbLcV)L#1J`SVV!<SISl=2notY&s82bK@$!-Q<i6L#oFaei}m`
z^>HW2P}Z3&#Wcj0^r4AJL&THCE7Q&ktPhgFfBlNfBREf<fi)Xf%8d7G)22lwTJqht
zSrQU@=MdU|%#30q;RMM-Sxj3Y$h7mGIpQl7Iv0pQya#j+^QqVG5vrUK-#DR5>}@&`
zw?h11b($Z@(uC><OY;lEZ94PEI!U3?nTML#9~u{i!;njm0$dx81<9LC)~yDw^*y+T
zW*+ui>}UnRpsD$}Nozch9&JrkJTdv=92j3!zZsMz&+`@jpM&!GXHbyHuJNS#NBa>*
zgNTm0CN?+Ohr^Y)oR=g7Bt$8f21o^ApEiW4d`Cz$<=Qe&b9cU+BRcj6(Cs}w(4BqR
zDwy?-gIh@ag4jnu*0VXk@*HsA>+?v@G_5By;@r-|N(}!M3kRhsv2e^+>R4zc77ozm
zEUnT65DhbhFxyhf*eF!7;S}_V?4%tM7bRt_dmdfZXr6^ectuslVG-dU5Y{1?JtX5{
z``u_HCHc5U&6?wh*Q^yCLrqp02d^-5eiMOgT{?gHww`pE(5|ckV8nE~=h!;D-C8;a
zLS8y4NxE9w5e@b!Dq|bA87L~(V~3VSE4{|}&dYynnkT|DC?iz^d;S`=WOHiW5Y9V5
zGWrh<<F^2AEM35?bI18=qsM$2O?B6mbnSlmScv$^c8wf9XHH_*b6I*{+4Kz4L*Vm{
z%GJ{H^iRKKlVh!QZU~Av7_-y?1j-L*Ag6*o+84vBOa3=Fhr?&FU2SvR*u<^@;bHCZ
z@Nhx#rBw;x8W1)DUBBI_I%^MbHdPlp=zZi?Qq?doIb$p47+#@B?YKVf66nj<EjBO=
z#LwxM%zZP#R%g$fmp^Q&-4qZIUte`G`Fa%tY4I6UXEG~a^g+e;dv;s6Oy!ODO3x*>
zyr{yiZ{7EPj+5T*pA>Mj1|{dA{qfGD7{Iw-Fw&m0TgDxRKpP^@&+8pZWge0HDAPG^
zINUrJT76<D5j=bJe)aVrqJXcdJhgVVNTzl(0>jhDwgfmT-^Ipbhu*k;y(jw!$u0sD
z3W1BKK}mO8L(uAtRi8J2h;nNIjqN>fps?MHyCBkNTB}Vx*BAw+5OY+c{I`9`c?n)3
z*?U?Z@E?ary83BPL%1VjGk6pvjwE8z3h=l^4W=}<N4iPp7s(fc^-gvbb5oJK+G|Kz
zds~qYzFQ^>>p2@D7U!vy?n3_uBb**}omMG@vaBM0D0&u2d4@Tpqnr5Taj<+usC<7x
zoh+s`dK@Kk%e+CVDHH4m%H3%Zh|V8I?vj{8rmtFc<wvKIFurmZDkjfJQ$oFb*+7F)
zJK}8W;@ISvh3qmG%Rp}KF#qV3H_u1OnVh0=tJ+z?zlSI;Alv0b%+GyK7#VIcQ6w|(
zq2)XV>Q2>CEC=~v4_%Rb{V)S08#kB8!dFU6TT|GT$TuSBGzQMD@r(V!jOh5me`J{)
zVC9hVveF{bq^tjHs~|VBsM-+g!u*Y8EIk^uIBy@ifKl{+lHc?li-<(z5gR$1^y!F+
zb%!t}^1W~YAhx(iCqws^f*`Lg_%u3qlS}gq6e1YY+!G#=H#T@Fb&igm*_FxH+io3;
z4M7u8!w*9~LunT3Zw$FY{27!^q5OMOuKw<BZcA^P$Y9&CSFZ-3bu@Ypc}OIKkMHyz
z#(l)$*kr}_8=XM|1`{1w9PGS%_wzh^6_<78d}#WREmS9OFif$0fgR`udil&an8EK6
zAL&lFWd&-kefKDJBj=L2jYc>dr~q2d@3u=?cSdKhhp3i<m&j67PtTU%<VYd%Abr?D
z)`)&F`pgJM<lrS>FkJi(NvSxuhQ^;ab1~#<;O}qtGG!=SHf*C5G;GpDpQ{tW$6at=
zelX9slfL&rU|X6_*&?JM8my<5R&3XAkBpST7Iv`r7nKY0?s3q%vKAZM{VoJ|O`WAG
zFG%?d9s)b!iSWj+xc=b57S=!V7c;HTd~@Xpr<Efx$)}Z-zVg=EO@~3=M6DG$2CA=g
zE}F-#?22Tu12yNMM;4#)sfqsjuO_3ZmmRGL3FRl!Jy11aJX7BtAnb4)G65)7BgQZg
zaJr1qmM6QBe=K%V3I&TSePe=1C*)KD>yE!&pqet!rtb$VFVi_XwZ2@&Ji+)F9RWUT
zoHdUMpV&*Ig_B#iDSJ7I7m38i9Em_G{THM2pb64YG5pB_?<m)ER76l%LRL=#up7?^
zOC~VC+DrwNZ~E=EMQ4`g<>Z(+J}+(fJXn}7-awjL4VRb;I)inllHW|*d+cB;R}mM*
zxY8p$cpU5KA$5;2kV?BD;7hFCbv3QEjD5OzH(2P=Ub+)XhTwz=CT+gkcxw3b!4;h^
zw!3A7_wgemD>S2+RtidZE}^g^Z)WC{Mbv3X+|wAo>Z5NOqtFA4C&gTofGq(oY-7Ay
z4p>QUE1lzuK}S73cbz-8bp8A$g#&3Og6+d2ME<jL=h~D~szUuD+mDLcZBStXhAPPh
z*i-{c7JYFAAPbW?Wt>LrZ%CgoGqXyWk8SP~Gw+vaMka%^G-U-h8gZ;VNkQiUw9+G(
zU2I2wqIb(WUsC1xdy`A=^I*?yM?U8*AN5%V#}*txyDppNWV@A62~?>*aIPZU0Z0ei
z>CnFYr!(HOG^VaumugjPNz*Sm6&goz2gap_73*GW?Nwk|cpb<^%-$e(1E4Cs@}Z5P
zV~|OOn||BpeL!QS=l1Q7`Y6ALl4B6{Tlf6&W3pOw3Yzhj+r=6NSMRpwLlpIhbx1D^
zZ(?l+rjv#g1JiF*XzqdY2gAa`w$l1=jTH8NX43;;KvKS#w#n1T->s&<(o0OhbmSQ-
z0{~gWAc&YJIEy!^8k#zoT~L5D_A6oDY`_8_Aj>D9Wl>veh(Um2LPu_KJGK-DT6zI}
zT;!7Y9h^jRJQ>Be#5<Pj7~f&0KmQ_`gp?d8FrO3=HbTBpAU-80yQu~9plYxEz6ZNi
z>SDvp{d3oGAa2}?v<L*APj4x0&FtW)U!-~E?WIS!JaM?UoRO$9itGG0J0&NcPTBBV
z&{>;15>lBU-S$vd>)cN_0^t2$puwAA2Q@)hX)Yh=w{{4Xq&nN&4q&L5{3pyPFJKgD
z2o(<0BPcwKi0zLqx{{HHeN8F=M-?%^Hi81P)2!=ooFvR=-u(G58JoFcud@hJ+_qdD
z0w>Wrb21tsG=7Z!A`D&=+4xNJ{*xt43<#Mw97$+ChhKJLv0Wgxc^$iEgm{a*luFD%
z{D3J6FKBE(Cc-_gt|kdfG<f8w=qG^W<!4aka*>DvXw-!!AH&s@ch0A0Z1CAnu1XDe
z3jcKa;JRPe4RtJS{@ZW$Q5jOEI2sj*7n)oR7j$bVHHq2SO@}ugKOsQTcwa@bp1X&K
zh=XW~SQx5I2QJ|qw7sciiQP<3*XA=yWJ*Fd5=rWiII9;PbvT_;qh38>qQ&ILzbD!Y
zmOPX{y##{@k04K{<Yz5fG>X(_kHLHw!qidqB~n`qb}(vuEPlo%wsv;CDUotuD3ctR
zJ#nU6BmDV&VbcM?dAmE2{Ly|!XjYj-H9}gT&m*ah_F=uZ4flu2*j@L$tE&l}g)0I;
zwvosJn4u>4DAn>yof0ebrQT3)?I2X(Iq!j<C{~hgIx>VJ+<nsPtc{KZ=6iS~!Ry-9
z;1~Iy&WEa2@>6<as#NA9(2l=$WQpEdl;sTKRnUtfCyG0rq}O)Mr}R`Ll#`M3>qdHo
zB`<R=IFY3za;Y){je=G^bAHH-%A3JaB&i(mrj`rW^W0p*flWThk<`*93_VHGLy*>O
z>GB{}#0t=tyP2eATlsieDU{UrL3nb%XlHAK;sW~EUJd)<W2^uB-62bUeA!v9wAgfN
zaAqeNHw{OIlzGz+AptmsDn`V<3l|<-=xB%yZNh~8aY5q0qSD{GwCmQ)hXg!|-as;A
z_*t@sXn<A9I2!-dw>Q!ZaM#}l(6+ZPK3FrcqRV%#I;(sr8WJ(+{*>yhSaBR^#`bg@
zp0a1O%5Sb&9<>7j@Ti>dd=7r&dy^fW7(a0liN^*UJ?05Yl3mcQXexySqLEBl{0`E0
zgs|&?cqDqF$ZJI&Tb2VfJBVKhqdctFX=p9!X?UhGme#$EkQ?(Ad$DS8I{@`lJ5HUx
zNlQ-S%qYYh*{*NjzVV&Ik!eC%ZfB4eu#_uFa94{TG>9^li_(VZDWo0274wXlUX=^w
z$C#!4->DP4RljxXqn3lYs?gWvkGu;e!HE87pBCLU9jNd!Vx#09NBx?f@_|Q?@VZSj
zL3<7z8bv$D9?A{oP2pKSQdre!+j%tt<f8h5(|>x+0q_B@5Iejwt7K{NH2^J31_H+(
z-DpRCd7{?(2(khDN;iP+R!Z07M^G*C@kc@E#E}UC`V<bpfafPVpO1kSbr>uc4LCB8
zwh;?bf+$4n$cY>zU*)~MMegNo$fN*LiCTQsNAy(8L32T?c<HjK6uvD#W}W9?2M?r7
z7wGbN43$HG1W|FGoq5NLr>hr1zJQle1@l_9YK2hYC@hnIvb40KSI2CtU^5gmr#2rF
z>n+<EzeD^IO5GraCJbL1Rre*j5f1Q~+d4J??yIumX0-fZ#S%$u6rsE~y6V}#|6W`B
ztyNym(gP@<^V5-Lwc^^tGiIQ3jhTTH0TrG|^(ENNiS$JhGHVaP5moC;>`%}n>b8V%
z*KF5*mSz7xS`*0%lh&kP&{zY5;{X<Nsp~9Uvc!-|67?*`bo}_--}jUyr4s%_ORyE>
z(T<dty{@iz!L0JGJR=#2kreHMJeq)1^JuB%7U8BP*fwnu2Bs=i*l_+4Y|CN_XT#Bg
zER7BlNgBKoTlX*8Q;k#qK!(s5&7rkG;}aZ`i!8Vzg%o{|Zs0l!RN{)>RSFwRTqK=g
zC)8NtVlvQJ<Y8o@+PB#=(>(1SP?($<o|`>t-PfG#qC5Zb{^Ptso|TQ5KNCKFyi4!K
ztAlpjDxuWOtJ<-2OlNd;mww88e>HIND+%DxHuhjIJnlD<w{X6sg5#0+4etyO_R}?&
zSFnL^lD~t6Cm-sFp&b<VwfNpVIlL8nuWsGSJ!wxCbHZI^7y#d~LoqeDF^GKp{P|4E
zfaM8Jan{D`3iI+l(elA72QQz-Zcf6|#x=Qvpm7DC;lVC_uCd}b2G!K~gnh19(U^h8
zUt2Z<@z+MoS}26(a%X8w6!~*bhZ-SqRR}KN{3N;CJxzwe1&*aPBR5FFl+2KAfZ^bm
z+O?~;;pFiI_Cz`g7!n2<QxM~*1k|vF|H*D2NskbZ1}k#VFKE{)RFXpq^T+q?L){UK
zN>pH+*`B~{3JIAKfkyx3^79Cdk=c%Y4jw)|#ZnD6ED;0D+TQaX?G>8|?skk>p^V6H
zZaduku_>(*|8j2T)Ho(5Twz@*F`6dzrzjU55WjzV-x(-MAh5h)k;uPku4nT4%Hn;^
zuJB7cD1bC5EdUK<Wku1K%MtmHWsxQmFkf6QjKet8=dy;ShlS&KWGUCNro$Y0OFmC=
zvFsMX?;}2t>_v6n(NOj$o)9$ChX#(Vem{cZ=cc5kg+uq)0I&dC(&6<X1&r~iXV0D?
z%%M#CuK<EXY{E}JG}vJvjkL}AiUNA?h5F_x@7r&(@dFj(uh2#BZ@_(Ov!hXA@esLy
z|LLy%1Nu@8qW<w}-F#9WSz4`-LtxZ4OX=4T?`PM+3LnCVg!4ofM2Z$k6h|OQv<rHX
zUUZ9%zsFr_qDfGmn3@1Pe+*cot4|IGMs*;F6PzA42$pY-Ta{G_8TtQScxTwZA)}G5
z4SF7r{JaY8hMuTp!TX<JR>9TM&NVxZ-XC)BUSjJ2wKegNn9%Us8Bl@_Y1<-7j`~Qg
zD7Pu)?Zu64=v2cEKoqeB#MUI8YlNy$t{C<vMl7W<fQX&FbSa}ycQhm<v`h>*`+tF+
zCTb+uuB;geU`7a&*ggP3`4JU5fm_VK!jG_y0HrQe&z>t+uK2`RU7YiP?AS1b=dg@<
zFZEkrqyr(k#4GDI_<!3_|29g8Uc5q-Cg4&8rICeqBQzp-U8220KO=$Tgy@J=i0Ml~
zey7xD=_a@yvH_BMp)S^J*@nqjAaS%ZbkgBGR>UAZHhtF*6{zGiqD2w1ngJzM6nF|4
z>(khP$PKwnGg_GWVFx!f*GMWkXA#lWrDbI1vs<6G@2|I)=B#1H=s@W?)heLO`#h&G
zv24m7`%JP>{u#UIY;vON5U*!ftSYdJNj^>O^V9Cqjl1_>F=zeKB{(}!nkia+Z9pqD
zhQ3Yo^axN9%3Z)73hJ4?$i5@6FTb@Y=Ik4*g5k`(EC*P!{xkggq*xc}IwR=AHhIys
zMUsZXg*TFOOr<zM9k8<%aR5UTzB^iBb{7v&01+$EG0Tu|k?q!+5g}O9{I5fPOa&z2
zAIk-@oHR<VZ7`ECv?AcD%jwj#UwLmnuI}6{JXwvrSkUHz{*GlOR<UwwzIJRstDP)j
zr#G(}!Z_in7R(R?21+#&rv{e*r`zAufj%hW?59U+Ei~C_r<$B+PK3;GS~6}&J+BO8
z<ONR)wL7!CvQD==!Zv26bvED!ROz7jwa8}ehY3T5IGMlYqGKMM&5CgR{;i!DXw#E<
zfM%nq#hCt;K{{YBheCv=&!1<W(z;&;EGN?wvxTGHIq9DB!Mk(Wu?o_rS6Z|p$oMyI
z+4@X_d{DB4#oX)Xj3{NCdxUN`0r-p+o8$QjjwhdC#uTwP{Whmj9|ntf{)K=athnFF
zD&iFT4?q{!(FXF)g;>X`BytD|r2sUsy7tLtUWj3JCppAxFI5|p^dvH}cl;1Z*`<V&
zghlwBEU1(iFFRYt=GQETKv0gNWH0Bd4>zy&-S__UEG!;F6iBiS^@d0T{^mX8eqPP$
zd-h*Zh4;hsg#oLrXq))f@HWj_wyex0(2NuL&IlGb_eS%!p^h_P#6X>aULV8V4ZHQA
zX0Zc=9j}i0=1*hFAEoowB)W7MkxXl?iU_>%4V||8)T~>Vg|kTm=%C7O92P>MWZ@(G
zkkCGwmT}W9H!e%qWm~I+Rt^5(x8H7g+|j$${_3n_nhqW32OC75zGC;mNRa+UO_hF)
zI7!0VwQI41C^Js_@07YTvT=mwq7fs<3a9Rin`MY80n2JaKJ%>lqQ6{U+-JeUg}=hf
z11U<NBMUl_+G}zrWuRIx@te>Ykt%$@*~a^PADlGD^3rP_y1<}N6=?eul~jb(7F7If
zevkwmdSj_PEDzZT`bXCg_09?6ZIg`~52Z$TI7y6fR;;e4`xjX!G^?`6d}Pld)_(R&
z4rOFUj79g@CjOG>JlQBNS&S&M(Ae*y6%V{c+1dwmh_qaKYQW+rm#2KcECL`}OtFol
za9_{c;$dF^D<;<lKayT!1b>#?-~1=mySedP>$z`Gjktm<Lfdk4Yt-)OV5_w{_bVMu
za|Lwq9HIk7HqT+mV?3EVf4&}b^bPl`Tes@pE)ZaxhPVL;2Iz6c!V&ntZc{F8Vghr8
zIHfHR>9hihEW$zeTUPu+1RzZCRd4}6CI(NIF)k;xAOYlaJ}(S2K0wP!?fGJ=W+9&-
zQg)fE<<qn01bX4H9{KdJmT}hmPltam{{G!p_prM=z{Xvq5$p9$ohbBX8v}eKzYSIg
zk&l^<*)5wDY~?zhY4nCz8brS!iu%;arW`(RMurG$T@X3DrGrV?bUu+(qc%G@@K!y$
zw)1DNKD{EhG>9)Rg6Dn9;ltC`e>SdWMiU5}rr-goIm0;f{N!f4Iz<${xB6$~&bn>s
zr<+SY+;P-3)}gA|S;HU*E&;{hRobQ2+j_k@aL^2XN73#w(rmtM#%Z$DnxfpP%!D{*
zdUmz=A!!E0Qvh{cWCvNndu}a3a==Q`gZ>JjO}@VTxLvC@?b{E3cKFbt?@#SG(T_Pw
zVqW22up@fF;dU%SBJ2CP8~XLA97mK6*_;M(LicuZjZCAPZ>;_aH$gtJrRpg|*_7LB
z2Ox`nx-a(W)17?C62>7~7ghk!a>@fJ`*?aTc>3GAk{o>=pZm=8aK7Hnjxeu~|0Y<J
z%tsQ?2Z|H%yZ6?u)hUmR<ojroWV054ei6{;8+1`t`TL~<?T>W8YcWp<s9c(-KPhnK
z#qs4{w&!S7&~(p&exZeA8Q>@?W`2yV&N9v)AF#-&v8oJZlBDMD`0=(DGc%(MaZmSA
zHgM397AR|b)GUP<{0!zyi1V$y=Yu4pn}<W^vM0p9{D+7K>11Fe4S8^|TPF7jaClFP
z+_Vn&L(Z6H$vG;0$f?Aku^9y;z&x#1+t6ueFT_``P(|=2LV1RyM5@n`-=o^8_WBdX
zljzorPX;W>_QNwptKb*bl%ar|P5BEWTIC|~7(seAXq)meu0AeGrm7)`P_aw|1X-)#
z!Sd#8Fsikr6DTO*JT%3Ycc&z`mSb6w8q<i_t!O?mvDMMGYr7;fqpP#?n$X7Va?+cc
z`c8f6X#dy`RXJy!8#)E+Nw^pdf*e@XJcx+QJ}8P;@Shq{ak~OcVRC{T%Lnsn?gNYH
zk_9VTY-IDA#zQSH%Clsy<-TF9Bo30J#%328#}HJ~`38(_&f_IQ4K%;bCs=kM!x8wU
zV3X)UMH~~{+Xn*!^fu{}$IwJE<(O#%Q&MF4O_p5Qn6oD7qF(Dmp{0L?yo7~>8yZGY
zX0WId!Ws)oG@lq1TZ@XEjMhoq-KYah2}$G$SR9>R?y^&Ru*v{FmA=i4yxNOM<cedG
zjLslqFq>EoCD6)Zpt%&F4OM4Z_Tp)=c6rE{bAi|_*_xht^fizpUFAdm4_{$N+D+b|
zZ*0KSK=LPk=`{98IMWsVz${8%#E0ETN=O&F55OjpU#4kVCKQ}u)Odwp!+>c7IW)%@
zL@J|@h;QD!+3fd;0x1jmbc6!{wW_G9vQCx9lY;2N0GN#jM}qP$<DOglM1&t*yBcQ6
zR8!ONbes6`vK|K!t2eb?f$&1IwEUY-4P$d97Xv;QST5yRm>mQ#;1-NSQ3hhB2T}>!
zsUqucp(46RsK0(CmP|$l*&V_ap#Vpm_{wm<x{?#7;kafO02<JI$#Nu65l-bRiU$(o
z+;eJLi<JALR+QH*&kkER!RRFLRF*2AZ*665J)f9)8Iv6y%e9k@AO=zI4vY8UUF7s{
zs${?8cUBJb(Fiw8q$3dwv#2l%zE%IbO@{D!qHw5lTft?_wAzVY?b*KO7n1C9^DZNT
zp0jAtA<I2^eT7y+>@|xq7-N@sw^^w$@YLizOit3iVGSG&W~BtFBidDwVAP4?%OI-6
za+HzLvuj7-OAt;(e`3~)h!!lwQcGT(q)-X#%8diJUC{WgZc&GmC)5}9_0XQLy6Sct
zCVB0k4~sIdsA+ptb1<Rf;NRYl7PvRnQsNj#jMx<S<5C#nmK{X2(K#-qS@iKDDOV|h
zaw&>RJ8JgZwNGF#R8Cj)^@1TafHZ<`K?S)k#dJ49VxZ8BU0+1aO|}>0fr1-??292;
zH`H_wcA4l5AXJdYZ7^V6$@L#XLsBYLug5$FtqIPY!pC==Ahz3!olj)*K+$pEM|f7S
zVHBxjoLUrwz$Bkry0|FBiV)zu(`ox!R`FE@$pVg))n%xLW+A2E%lEGp3q~^rb+#{!
zfB<?7C*rcTtP{)A_(YW0C}}#Ap~<OAg2SBjYQ}V88;Bim+jcGd1%(UQ=m69(;x$~c
zdNnQM1v)MPPeEBZ9%2_`mf#r!K$4vtV>}1S+VQnn3$gSFfyUIh60RPaqlOdJ8B4mX
ztP)OwH!gfCP&|t?b*V$d>-oPZU9H_)fjOf|nhg1tztUkAJ$Bf<5%Gk?elX>-m{JmA
zG>stuenW|bBk>sMp)`P8iWzRd0(SDI)3~%r-$T7Q)**1jd7h3Ocr6j#1GkML6@K8}
zi2&-T;F%&i00b}K!e#c9UC-hQAa0n>UaT?zeYujvYp+ZF6L66_MZaFXAUyaev8gEE
ze*?eY$(Ic>BAJ!^%*=_a{W&^HP9tqsuHVf@L#$`BY2lD2?)>9+s`69G^FME=@-})V
zvXV!gVHIu5k|i11%}vYAlCM1y4PV=u`f<vI_1>agp(Tb!|CpZcj4&bYFbrMk8q6CH
zD38|>6_QRS0m95ebx`%jzRq=YxpJj%u$jDRQPNj)!mk%_(c1PWnQ-H9+nSw!UQVIY
zMpwDuIF2kmCQ;tLn(<rO+ePG$FqO~aTBbRGxl=(xJ%U)+C5x86Bmt8!|M6!he_W?>
zjU^mlH^dzEV;qzsD^W^6KC4*|uYhvy3_IPXyzPII2`>*{HKY*GtV-|7W|msqI}#tj
zI3c2psFkA7;3<H>yBia8pfZy=KYi+rRh{WYkd_El0Pz=fh5AC$Byz4WuquXtJkGS=
zXEaN^sxIg{TQFvWOr;?gAz)`*p0w9o7pOsYgMXr#h_Jc?3)7u>78<n*`8YTQo>Rx8
z^DD;_@l62XaP5jRhzt?ldra3Qlt?OM#@Dus(NhsQ#hj1+=Mo91651|1L$7w`I2`qz
zbgmgiiAS5no(dgMX6&O!H2_zc$my~zrlLxuYDkW6w8e@wYZhYZR4HpNek%?6YQpJJ
zn`$j<0qMYHwUJvSIG?r@FH8uhs@D6|Y-Tc1@le892>c94g-^qWQ^AE)&nCZd5kiAp
z;*$34-yaGo#acS$R=axB74GIz6MzWm3e7#|q=#Jr=BP^qxCp94nPf+G6J$OnueK5i
z!-tDDuO+t&DFC~H%Y%DU$-|In7NMbm9n+z&dA7#%3s-X=q)3<^gsDTg*S1TS(Hh3|
zK0F7;AM)QZU;~L-QGf#B#r~cysXB|gChl4mQY=UNApjq6e{^q5Pbej7lRQ95duJBE
zwPt(o+6)qu|8h5%w+*td-K<$N0>_T31ykgu>`8z(<rs)+EPTx`$o9UC-}!INivQhF
zyv9EyD_{j7P>%PIg+*Vo(tpAA@er)CKS9(t&g4_Q#Wx4$VR4(G1-6LJN+ID@qI{%3
zm*PnKbPofJq{1l4w8@tkee)=FwO}FLn(WSFjJgXFR<Pj^J;a%01%2T`6wz{lb1jJa
ziGmESSm^FR!h~MJ%YhfMx{KLKRC1Xg(#YcDD`<*f76zO<O8kU$5bEZ)`U4jtK2ml9
zm%m7B#rMn{xZTcSsO*IYNbmDborXC{e`JWcHxJ25EI?wVr=5KUb1;MxMDwL0kriAc
zg!!!pwWFFtk8XfS#LV`Xb0R{mn$wlti2p=YzEMMBG=X2K1||_fWEb6S;<Rz&#(@rp
z_~TR-U)E-On4cI?0~zZvvsgxMGSoufylD!EFA<H}9#tHsXO4^uqHE9x&y>LA>_r0u
z=`)d<YF{p2>7#D7@l_>T<5TFyOJYQV7FbS(h!9U8FEmdb`}W-roPOrjK0dzRSb30@
zRsPgY=}&}9lk|Zcu`2VbehIuxON_)=R>eV~)Mq0X7AGlze-@hbSW`1^F{^{uG|PIF
z2P_|J7`v!1H=VET(PW@`5!BGy<!}_ngK+?1Ndkt?Xe*|c&kp^1_ddas!6^O`P4eyH
z%t8Fwf~NzOG`Br<7~T6%oK65)0+5`5j18MNf5Id88$c?L&s-o4J7b^5TPjwj4UW~6
z8Ky^#kTN|W6e-mx-xQL_&jpGVyyp8CuYisjAzq<G)LvZKzgU%(*jlT0gI5ANBnz=t
zG_h@3&P_C)^YxR*P$hxkjZSoQ%kDn}uEPNE07*!co!9iq<$Y;cAJAT+0ba=uTdVA=
zmYkH7jTl1EO>~wuwbKK5-F{0Fa#2&CsCEl?%gH$poWb`b;uskj-sFP=Mb18~#1ux!
zh^AfgMEPXUHlqOz<wBcGjpyQ5wE4FP#J5Kp!k9k+dopwJLS-$kKm!yiiB5DG{9~Jh
zKY#i(90W-8))X05m{#<?rt4LtDl+&04%S8lN_xaIKAHTH@t|`!y#<s(PwRq?Ojg$o
z`?i@|A53(P==nBnN*dJ6ny4nGkWfBTTD<=918B5FPQ>$7MTT?PFo|i5;|7i_&=8t8
zNZ~46-y)V_8kqxDD1Pc*y$D5b%FEzpk!e7x8B?-!I+9?_M?5V4Nlv&^m5uXxNZTDe
zt7LB^LTH4k$k?DTe?{pkdNrn1Dp?hD`uia-xa>zTl3>OGK;9&~UT5ZF?wP|-CA%uP
z5nlpfV>hd#-OOYf6`L7Wti|%NQAYqZkNThEVe>}(k#4$!3hYf^VPS_583kYpR7?oE
zBjV0vnY^}HOCYb0+&6YRidx6QsuNl&QhOXb5){fE{CQW23W<X>med}(*nKDZH>Rr!
z;ms?UsR5=((3lHujIdA|Ecp2<E?(*mgk{Qu&x758lgyo?6YIdhSby_zlbV;q^|v{`
zrd8l*BQN@Ft(CkP`Oe=pVdnniFN?+$UM}C<nHZO0w;s`x1w_+4_>y%%AE`1{S6Xh%
zW}tS7jrMdN#ecLh-IG`c2yRzkU_r*REix*N1}iHwAu}xn1IPrekgM0OsgY;E5h)0~
zlA2TJrIqdyF;CPX{2emn1tZ9?rJFm*PgJ+9cj&vm^_SPKh_x=G1%xoKuu77=!Lhdg
zU}U;^%a-2686sDUpti_8EA}Y`8l&8EaPnAY=!N7?Z1jS}yu{p{Sz+(f)Tx2&w#
z5=P|-eUDsf%qyp94YB>c@{9H^?;OE8M!wg%ploA(<}yDZ3Cuf_T*haOhOQ3aC@%)O
zc*3_;RY8%?4orj({QlIZtC5ir;4f{72?V3;`Vl2JWan#Y(vvm3tzUAg)*zz1)0*L5
zfCxbrJuO?o9m%C8Y54%~BHyO9(YWxGYhI=vVBwKqTH?${JHj1VMbaqg$sZxt6lM~e
zun&_{#D_xo<k@rQwl@{ts;mLPw&NIX1D?VKU`|NYO<{`5vdYU`c+Ae&f?^Eg&C4cY
z$utx(C@`tYfdQ5GqybX{m%=G*J3C>+WVIq7$!^*<x|6%8xw%qVv-jiuQY}bN7BfPp
zt$hV(`6nfXVoZs*Af2+Yf;I{gwoQ*7bvcII{7SEbtkmFIPSIzu3*`kuGO~}RCNv&`
zf&`t<*p@T;XlbeOBr_xu-(dt+N6>m*nnp&DOQfR&e}0sDT)ifMXZ#f~QZV6Dt%45k
zkuGp^TO|_enz97x%3@Q<@%zj|HR{&wM8qUI?w4@0$VJEqsKd{aJblG`hOYQaqS{3T
z(5sh;4rNF_T-s1DF3F>U9NNX02L;A4sHSqjY!6NCO1dnbl!KOJDE8;pN?*a2qtpyl
zIUAa`HOH$C;AW={i=e-=%}!7f8+k~=%lInBhu2lj{x{X%=n%`9Ht~=|vIGW#d=#@O
zsEkDYyjVJ&9teq?4_{i*=_P?vuS{Kf{E6Uqt<>aA3P}S(C~T*TOFbsa7OWV=087yF
zC{Psnsj>=<Pg)8-HWuB$r*ulPKoZ0XX@zh%{M(4FL%G7GX7OUBTq1Fo_MJD4y+!4K
zEzN?|a^Z)tV9^MR)eNrqC}$@)eOUuu5@n*i1x+>m{3Dz@Kk;uqL4cT|JLSc?z|`oK
ztBEBSXU)%u2DY6>RC!XnlA;%h7cYN2;DwB)a0W_XD6#`x_7TYt#l0xg5@wiM-K@}s
zbenqR7tmDxn?$T;uxBT^bt4=i!rCjd&7H3zq61EnoSNYkinbwz8^m<8>3i(n|MHOh
zY4X&;IQ17@o96H>wQajE<taK%nF%<RsaBa12s^p<gOR-XvVGjo&NQYdxJ1y83Mql<
zkkeysWC@qWxQarK&0ZW9CU+5u#=cI!N6gE^w{Hi=U!h@q0naRV)+GN^gn-IYd1^tE
zrR~=<R|UG%sa+eL-U+x+Y04qj+F80tP3aPDAL7+cQ?n6cI9eprSzc(6c>&@&5NkOj
z6W48Tu4Qf8tPeN0y1Ks9eUo%>wK(IomdQd-s>?CD4$>Dka#mdhWado<A6V_FZQHOi
zr=mZ+1h@2jMlI%QS@IAYdAh`=QfkL^yvb&0rPyxUwM{#n8nvMgOLxJnNqvWRL22DH
zLiHpz8YEq`r4p<u0a!F8(Y^o0R<2qVdjGz%6TWs?T+72E>nt~t=LXKe=Ihpj=OJ&Q
zk`Y+`gpbcQ*Lo@cG|`5JS<{80q!|Aw0=r4!?;Jy$OLpU@uV3ZMAbaVjNyE?EkjEVx
zC@MIa9RZfbAxDcLo5&GkeHQ@pt3QvLtjgnf%W5(fQ_eyHW@jtJH$D~c?w=Kv8LxVX
z<(<-z6jL#}FjMmlB{yA4ExaEW*GilUtG_;mhDV^O3R4pML^iC_Dp6b}V7sJq@mma0
zJr~FY<Un?;R^~~pTi__rT7U<ir~Gj2$X?uS+|4eCggLr)Db_GJfbBC%ZxOwU)_?o1
zUA!|Xmv9tk=p5)15X8Il(|hxQOWi$$9EG`6zDRM$AwQrM4+8_6mR6VwQ-LrSrs6IF
zXhZ4;Mh?9Ddqf<B6-P=brFOl(<vC+k-Ox|Sw)yn9oi1G$J(iZ78%!2Cj6Trq5hb?>
zphmVmQgcOuhIqa~vt|{18&ggHRcZA(5)yDA$$?z4!fO+`2nVTq`Dk%LMi_3jr;JN@
zzvpt!39P8d=?Ju#i1wLPhU$C?vV}r&twf5)52i7N-M@8Xd*Q1e6_Mf59TZ)ELT#J@
zCfo9-C5$K%FT(|!wrej{`yw=@b@~c^1X}m4=$NX3V=4%g=n9K{Uz!9s0C(U-N>=*F
z7hvTJfsG@g%NT%GAR%OGk8;&wI1Mg-wWzP%x7FMVf~$R>Zrw=3T&Cv&E6$4md$3<8
z_%vv}*mht5yD5O9prRIKoD}ZJ(=i#DndZKBR(b6C6C)slsyp8pd#c90`@^KsM8MCR
zH%);I@t($&?TJVaP@De;S=nX6VZ4gJ?CBhSupeS#P-9$8r+K}yw3!dm_cLPkm}%c+
z1PurXXtq{KcJnclbJZCvSkMXdA6eBjBMq$XqR%_Ox?`tK<1wFW9#Y@=e)047u4htv
z5gjkE0G%$13-L%ZN-K|JPUU1V$sOa)0o|HrsZ>hS{~237ox^MjIe^LFHTD5yc5@wx
zzLEhsy%)OFz+1SZ8F0?Pt5LN0k|uV~uq|Ff7zsPV&yX(BHOiVwTXmG8FcOHD+S>u1
zgQoog5kg~(%FEk*2~+NFquN(hH9^}rVeYJ1dlt@k&6$fnO3qWw8I#VxPiWt<V{*{>
zX@M4vD0RI6cjZ~YIUsvIc`T!}p)yKHouiO$<wGJ;Uor<2LKmuyJ#q}f0Weot?)7ui
zzCE?GdiOaoaTv;<>_wZHQA~7b<j%)PY5^iyk$B=$J<eAyd%i>RzM1`O6kG?n$xNrZ
zrRppVI;NV;l&c2LI382W*4CCaVBsLeQC1Q#6cVDKf&j;f5^!TRCs6D?M5CC=$DTjZ
z6~I`)l^<`y!nPujT2B36`;M`2f2ws;I6RZsBgxhJx)e*|eD^;k9gv4Fi$-%>KJa4~
z49_(p)z$=b$e6A9j0;MVGkFb>jNS;Ja?f>*1tfsXZ7aAl{*nDXW_ZxDXY&t#N=Z4r
zMv&#;=hQx0gbTzpvs3I5!y8cR+djn^1~6k>BbBRCx=m4@6w0^9cJpIoi?C1q0GD<)
z(kqaSdJLI$LV%AX3K<9%Z3wGVcWU2_ph6iU3zjwD4<Byfk7lSiF^WhJK{$%+SLF?T
zX9V9m()&129(RoR;)oC4%_a=w)SxM{kQ1Os9D(KE9LvRjg+r;6ZfX9nqSF4RsR}18
zI|wDIiKbejBE;1dk_w`TjBF1*S|Zv%YM?AEM3$07`4{aB->wUPVFj?S{DlI`AjvA@
z0wx@(8eOJP0e8xN>;Oabvt^;CEL9;YQ}&a?bPK1=KtK?II$c00DHmv$X8~7&DLK({
z1F%JW3Ai_~Rns<i(Xwn?XgXp&BfuHpRR5!;rKN6+?M=QH88Enx2ina$qPzObp-5ko
z_7B(!imq!@!)DF=xZQ^v-El0x-5asugUHD7l0hJT8yIiXfV03Gr+7}CX=_-vv>*2D
z0r9x94>pe;b0VoN*I=Ffxl+5v#l=15PL0k7b(8FYn#b0B;3IJZB<?~oq%-q2DLH59
zUyN89y6uFzv#{wZ<0#t_ZYn$~ebgi#>WY=Jj^$;KQA0EW2A6aORC=S~7F6kG0&mf(
zs&ns8K;-5I($NKCP4>;gYq`;ecjj=3MH!*t8AZ)wIPo7~xs&__DWxKO{rrx3Sey|i
z^h>Hp?PY_8IDQ-9OXf1=bDm)G^yyPo9?w(2iRn6xPT%qBgcm|-KfgRI85TKbK^Hv9
zBTc*}kLs|=W*K%hsuA_OvnY*n8h7P4&$9oTrt<F@b@`nNIa1D1VfjXScg~(MujrBG
z{gwQBV*HRe<xb|?seH)c{!Y4i@W#jQ8AyKbVQG#E9tbNX89oZS2RVd{VJ`w^-bsD?
zy<|n@$UAtDcm2d&uKi9$XDr+Bz20781s>CaV?Abm+HIE!0=>4p^cxwU!4D5Kg2`MV
zP$2elu^0nGO1uF)E?uoC3aVkXzSSEBl|TPE*mE892LD)vMOe7qZrvo&M0ik;0h$fF
z`5aL6YV~N|7Df3sAfDGY{T(+Sbt42W8@I*e$%o<^8*iq-ujkPQaFrv>SJ9h&@jn}k
zqXEhk$8E!C(Ql2XD95}DBWvyypi)}0z_!?;Ww{YaAXO=xDD8#PQLs(e2RWH&&8X)P
znNN7C3LcLdJ-QvJ0|I!#m!vR(zcl5C8phwJ(3q%h<|4Qlq78$`z5Kb-i3M_Deg^C3
z1@tYb>*W)t8h^s2VD`%L!;J^DD~I|mZ3dPm@D57M*>mRHL)NGeV=ObO!0;IE>#M4w
zm$+cABM1@vKgt0yDpRF}d4B}XeV-aJGrg5qsy#ht-1c4;B=i{p9(HM~ZXEPuk!t8W
zy~ZDRnE4@*ltvXioT>MxX41kA779It53;}XAUY>e70{0;NcTLBi;@$eY0Otvkd8Sb
zfsu?z=3zRlub>A)RoB?qIO5=qdU_`L>5k*MnfUZ3>XPnjP#-St#0x=vrjHu6ZD1n_
zbICZByiDiEyybZ|A*P4E8Xyz>PU3fsTD6|+E4);z47`9|*$g~k`g&V%cw!w-r85`h
z6z=UjD(5j$x+8o5E3)Er_`dQZDb@lZ!n3Cb0r5y`ymi!LK1&eLv>D%$76KE9b_|`9
z7a7!Pr{3yfsvv=75uqK1Ztdmr95FH^wCh6H%du+E7U`$x<z6DL;_Q#y-moL!tn6C&
zmlTLoqALb^VS0Jpd>-L>WfzD#6g9XE-IWa``u6&u$berpKg3|1UJk-95EHj~5z&JZ
zJqBR=P=ZFXN^RyTDB=+go;<AAcECS7ZaQ^p{`&RdAi!zFBI(%Whp=9kntp-pUKN@^
z(``JwAK~`z#l{|_|LKFFFSs|HF+gt<z}C~$)CuRNx^cm=k}!Mv*A}q4@QhI1ay`QL
zqmUm2+G}&Md4*bPsJH5ZySoSm{2LpWGKBH}t1g`#rQCDZ$otG;GyQfFfZ>>2?9LO9
z(-*D6?ab5K(T=A)^`3!@1DFJ7E})p%7~aasQlesdGo`j*aRXKKR%Xhr)u8a8HNjuk
zhx&i2RF796fwvk~j)u1`#(h_eN$b<MZ)jXxS)~Hg#$V@~YSMqXjd-5*$ea;?i07Vq
z`!Opk0>zxy)EK}%YC$b<SddSFIfxPTD9iphoV$!XYfUaB=40)M!RrAsPXmfsc+slu
zBS-LbYGx_{sS}w<nHMI!qO*|@rq{0z;oiY?_kP&7`zYUJJ)X>F0p^~_5YtcT0z`}7
z8OWZhCVN8xEGE6eoo|G6mmtl(an`apmlCHCyDW0CYTPtjA1vl<h+oid!Mnf9DsZ}G
zx(62$XOt#oEXXnYSx<rG$oj~Dr@X>(<U&zeJ5RaPLHhq!!W}a90w&jH^Z}#UN0T$`
zn`jId)6y^tQkscAMKy82Ba<70;Xwq|^w%hWQ*7Tk5Dq2z35O5QCCd{TgFvda9O{YM
zyi<n`RUwp^plI`EAJ1Z+;U=3_5sjR%dCB%i^nnx=bmY6Kq(pc^4NLn*W3-hcEBhlE
ziX;m1_WTc(Tl8ixTlSon{DxRV>F@_{P=J3e<hv7PCRKeUN-e$_AG7Clnt!XN&sHB-
z1@N>v;|P3SV<+ltFG5uiGf~uQiY^pILbqvGzqpq!&xq+Rdzi5(5$HUN9$lmrcD7)8
z5~0Q5)ea7ab|Tx6)=&1j$cj;c@R|Ihmtd$1k&%~rq$IW#E&~Zc0ErMBm-*@Fo7<ef
zziM6WQ}%xX43LQ$wBR&?IBqE4#ky?7=KGRSfx|JOPRz=J%nyeFrC4@xA2w9nG@|6k
zEgtdv683|ll#oPXU<sk3K&JhdrFbr3_#`ZKc~W=-D7Yemh8XTuS}l$H@OMHl)gP@A
zUS>^O)>8*f2f<Pi<)n<SeFws9G-=X-I2w&%sxel<HOjdX#R!hr;fSZ_QIBtceOPR>
zi7<fY$k2CN+*g*j(Y9a2LMAH%`JfroZTjw=Lrw)%LKfrF<jHQ54~<i3JS7qg#G}#m
zL*-lmtfvM_+;r{Qn)ORUc$g$=^>fU*!*<I=x%KwScBV1ig1E*#rjO#n;mQt+%71Bh
zluG!D-qh#3W^+D$(F)>xtR}0J2zTdGuN}ff5-z)(Al-Yifr@#Dw!dM6202lM+Gm(-
z_mdmI@1+>nU(yu|0?h{zj1@97`LSGvD=@4`r$S%^H}u-#uh(oJ8yXqOCRRo#$t&=(
zr1xP9efZ_gFZSWS6JZb7*tm$VA8s}w`HQNwxiT@GxrBJ3JHED(CWH!|Q{Ijh#jrE7
z@&;s8K{kX)*tk-2upDU@@cpPyLrWR$G}%NIIU)qYNyOi_xh@Pq$jh`-t%px~)T}37
z)!{KtI(HnCjK0Hf;c-92$NCDH3IBK~Q-acUw6pv}_kqgrDbhuYfeApM=z!avJt0I(
z2n>aq4<MGd;w}vaKx{{{&#BF_Jiy$PE<oG6G4b^XB3lAnfE-144%?8vS%2u;%9-3{
z>NRL$wlBOme7l<*HeQNFwn|!HpIFppBKw2tzK6?#Bl$V7D`+2=+;Xf_iUU1nh-{55
zVaj4FgB@n;;S6v!!rNc>c~|WPH#EJd1Rw%#IxZMe!=-T2Q@5K(6M=ff>@i{EWwk#t
z5i*{DL^kr%u+n<FuH-G6a}KmjydGJP%FaQI-}tzOSNCVV-K0G2udQ-3lmT0VgM;5z
zH?D~nhK62;Z0GW)_9W}KU8xh>ZY`cCD7H8Brp$ix;W8ys6Lh;b5eKqR4EsOK&`AGi
z5x`jw%rW>AwQbOqx#9S`C_?i2t`VKz!$1Ul0<BQ%tk$(<Hht@GTRzkN_PyKYrhTXX
z1RE%SKAbG*j-EJH#RQlFw&l$w2g{9A=`jRYe$3bc^bEYwb*v~8iy<}+L4aC2<i9k{
zScXlQPxlk3)Fl0@Vy0t@sY!w+m!g^D^Omkpzp9aRRwdxcqepka?WjlE4rYfwNAlVR
zmt+s;a>zZ&`_L(P`+jv7=M!Zm*TW)ozqVSb6IVSbIkoWazRJ$ceZKH}MgL!C=O0(|
zy~pulVYx1&*3eWuT<M4A$JS+)W*QUvh3-&?RkN7oM@=V7`O&nD_M^0`RVqo5EQWrZ
z!mdfCmRM#)EOnw(=M<+c?(^O3$DRFnJnsHuot*PM-|zeLe!qUbmCc#v&kpJLe?&D#
zmovn2F6{Y}sK)ivs=Q;zvrTf?taoo(`4mjR9a4+Hi2%d~Wub^7;2)T;*POTOxa_fS
zp=9@RJi<>qJ0@I>c842Kqfw$E&cK5xDtkMh%&JOp<>QE_y70Y(y{6YIcEd%cD?q6%
zMTrw?%e71OIwqiGM|9%jlh|=oq6BCkB<Je>P>23MH=f^mX&k>P>HT+8!N!lJN5lfr
zNj|u}0gWbd#`^Kyhot3F4#}r6WQ%fu*cPM&q-dJc2OqDh8pM3Ur;yVfnCDgW;cicd
z^f5GimFkZ8NeD~;e^<_BAyoL^akdM#Q&e%v7?Bi05}`jc(GScWO9$t>xsCZ>w~r&%
z8n#3qAZ0~p14<Ka7y(o;eDh3;r<{XK7$I1~@tiT49WR1355}=7JoLc|rxCWcwr(qc
zaK4aPgb7smK=|}gPENi_!_OKI7$E2=bzl*5Vpt#~NGeY}>0m2JjTE_TFC#b6{BaFM
z)K*$v{u(~ZsDl0GNNc2aak`4|R?b&&+7q-dge{3s0kA-}b_(_@nhA<IM0nni_f7nf
z51?<`!tDcW5rcUJ(Gt$hMo0Bg+}3g?0S_Rvk>p3wqhb`XC-23AU&=!#{#tyGk*r0U
z6wq~OZD{<qX}ugm%4M^wp0iSREzZD>C5jW-jDkU1OuQ0P1_^Ho*oh)Mhq&YYvd<_6
z5MQ#*+QXb7n3FmuqJ37ge%+HqBY8FF2tf26P~<EDHcBECgUQWj=WQ13fVGTM;^F3t
z&JN%cSON6L%-cJcZa<FB&7mFYYC5uXktm|J$xT*#`f3zHHRF?D#yD6!aPaS>@j2`s
z-io=|iW)4~iu^<XTM-O*N_zqRk-zrt{(+{Z`)HG8N7f9aCh_#j^7G2FA&y~(kR0zp
zWNH>x-@D(CUGNCLl}kUH0fHFEh-(QQUp@ZY=ZH6ETbGxBm>e6aylJbF2lopfqBklq
zmV7`FZlt}{X8gPQadg4i^&&p+m<yQ@Pg}T0cnl$2)A@LoQj<97;t|_fBh|N_qZl?R
zczWQaLs_y?23PQC9>Ow6gbTrF83~T{RA0_Nh~6U`U}HXj`*z>K)H*Nrj_;M9hTNP1
z3mgY~p{`S2a&*uY0w-O+b!#t(wo&6SW+P+gt}7Jy*0(@xj<0qk=W#GnCpg&@HbT~-
zdQhBZkg<62ooD<@-><$B*Tc9Y%Y;f7boj(Nv(m&CMLIAT0G=##$HIBz)|=u^vWS`f
z!JA)nDI|PFRXyBc9kAW?_zQ!WJ!mmxn&d+Trsi^O6ziML+)0|7>qYG`eB;)^nkPin
zT9oB)IL-{+!VQoo-ELV(iimpnn;#VOsF~cm8Nw_@*@!vnVo>;-c+0ft1=@<A6&7}N
zSBE`Ci{&`J6iKOg)wy9Mz?CjQHpTF5xKvFSr2wC?!7;znCic#D^M~)Ct>0%7$CREf
zq#!M|=;x8_?o~5tOiiz389|&wH9OG<G7h3c*mB^c4nCuCF**p$YX}ih$P_hq%f0Pp
zzI3d{odyEJ%AN0|hla_6aY|d0)FByzq_TuOpBAx^7yvvR<+LDzm(peJLd0d`QxM0~
zU)+3C-Ma1KaEbYQEH9SC%{~{0TK+C?xXhH+{?{QI^TR8fZ^%pqd&h%v94(KuUJx%T
z2c$z_RkG~{#3moHMDpRT-%2;Wqr|$Y%+S+tgEE7qQaEH`6l4@r!LDW(Ls^OVXn;mV
z((@+ZH!_rQUZJ>2-mx87Iz?z3nYN(ZY*uADxp)38VeUkXYX21z(uFGq1d~O_##Y#;
zkUN9Rhyy>Xp0DDM<1#2FW}$~%ryzBrp2^By$)1NLQH|p}lC`F8V8~Q6d<u~b4GkuS
z!A|$@`H9PjV0k^ngtC1G{GQ7C1s$Y}VaT|9h%jAtBXaVHt^sECg6;lml{YU5DbKts
z5ef)GwgJOVUQ@`e6MX_2$^QKx_395vwDU!rAF0gN5Il(p3wiX|t`T3;nM;o&u^&A1
znX?1;|LfBpcuF{q)yzE9i+`A#J13QETUr)Tqlkcc#dZm~2Je|p{E*+JFVomB!pw@0
z*!=IEcV}eiOSgo?c>lJx{K7&F>N$#K1qdALo=@TaVumEWdOZOMOk4({ZIgG!o}d}a
z7eHZxO!}^Gr^0kQ`NlM4&EcyVJ{f;e9fcTM<q{6(+ax_w3yFAHsdC`FF|wZb9{fdi
zy~qlf`1z})$$Y3=KHhYLDm@z%o}w#$BA3Z;73+%OT(@Qg)>hB1D|U2AuiO)mLaYJU
zb3+|pkpx}9To~~tkPZbBk<a{>$wUYzfzo6OdS*%g4(<Y4A}u!WD;?}z^eI5dM{+G1
z=B|uM+oSKk*mo4$IbKA}#;95P(Q9wK@x#L9<^6tGWp9s9qZYvx3#@J2e7PY(zJ<N#
zHW#Xk5%?LzfE2lug21<5V76?3Zh6IziCb$XQtZ0j`J(6sq>6z$K?kWkrz$E6MM?tu
zG$+$spgyK3u#-8OpX?kGQ2~Ywtmj*B_F*0$8ab{5OFy#$Ot_x>@hc>6i~^$Q;6wna
zh3y+8=m))A-?=5-Ld{~W$ksxi=hLHQw?Nuaao%qXPaVw0<Nf*t;GdQbdewK+;S$;5
zXs!p8ENmTbNphFynX~P?`uvf)u+8RT#O13I56&_y(ymAlt~@~uR`5Po7cF1j?-)V8
zqPnAfMM-A_84RLIzacXMaQO$kI7z~s{QN$rVJF8Vm53#Hk@4(!!6v(nC!CHM<+U{i
zlMNt8MLTB8ik)F2^gq^f@A>ZyJp5zfV5Fu2g9sbN_~^OuFv3CMqPalAHo}%lhgP*#
zw~Sas$#A3v6CO!mUW8pswoL+87#ccZ)+>^QbYbr)uq#AGUK(J71BleA7cR_+0Nm1q
zk$up9pY=>4F{5D^1Gy$N05j_2ksuq)gkuGL2Mb@napUKGoAxRl#yKAzF7fv>^GtRP
z$c-eG74@&-gK%@QCipymNq5XE3tBB4N7|m!WtLNmi2*`QMg>77@TBSxx`%;mumJyn
z1IRs8gT@6iV-pA-XH!^E>16U+3fM~0Uh8uw+V-i}`<7&U$Wsxq`OxT`m_ttDg*o^7
z1g_=EmTNDdwA+~sLTm#vDq4wLhkFH3U7ZJsfPiMNa#avIYMl$5zA5x+aBTxJETl>h
z&LVwGnx&x>GzcKn-y3OzCHfMiN?n=WoAxLntcf})6bJ2<<RPJ+BCOzo78nIEQCNL!
zf!qQbw9MIR(U0=t#(J1(i5}y5(OwEg3k$;X61;`re2|55`NNcgbaIM`9fOexI=nu=
z`VsFoyF{7;3T2UGsiXyGi3XNFTUIG3!rP5m<g<hijn7FmEW#%8b_Ls$hAQmJAHZwd
ze05wc8PW;bl(-@dc(v?P79Rfq+n-5x)7Gr(Pl|#M{rwjdR#EYqz1^bE4_<>iS(?np
z{*>4rO0cBHpt2cZVevxlCon7r>D8I@Wn~*ul{6Sx0{JFgUB{lWU%3uyHrXYsxjji)
zSj`2(=XjQzaUxp?YAzk1U)Wit7B?bYy0YvjfyfLzXrpVJnr6+PKZHljkB&#6sIv)$
zy>4^Pu$VrP_bN^{kr2^tZ6-{M$t<FA$N?}Q!gfwGF^hiycXb`9_8vz@g5eBdiWH^M
zB7PtG1p5DSD5({}!E??C9|k70h!8Agqdb_j;L7q_@9g`Ktc^acX4>qpBTpx`oU|rT
z>=e=7kw|;^2S;&2FuO{tJB)MSG55^DDC&NbZxJ%p+MCk{%mDE*l<PWNN|KlbJ>K@6
z<o80vu|T6YvS%<z!Zh~IcN4~^`ARxKLgTX}uI!}vG}be_fXQ;1!tRo8Fh&(vbt-D}
zW^8v^!@$_=W_rs!AL(E)+K@2YSB<PY8mdWr-i$@5h32^abp5q!dd^fK4Bw)sV@U-#
zi-b9dR#|uU%P$T09pn<pGGky%dEKIA%MwRb@CR8iOTka$j;2c=D|@MvPyDUr=1;cr
z9wa7(1%d$Ud`ZS&@Tjnn)kLKdJD#u8>9UsPt=gD(%IA?a4tG{pi<6o(6T%m*@DRHe
zE4j<1<fHL&YO(xy5)w^jDm};%aXbFq^q)JOUQp5w0~16M;G;)FPzS<^1nQOLxpeZa
zZcvI2M;I;h5GhE4EJ4<m;Ok4uKd~U9S%^G8zm2%Lx%Y&JXXM8;qMH)36%i~qd)@P$
z2C+T%5vL#xB8-m68F5{C_Iw7o4Ig>>c8`MKKuT}a`r{8$!tegcusaR~l1=d1*k8s|
zJodhkn4E-a<ZaWyT|>UcgC?=xC_&H)^GJbjv->(xBpTFHmAenY0id6>?H>holJ?Q6
z(Tsrbga$+J<i@P5ZIjFojx78*3)n(7g#v}iYKLz!{rRIQfm1W9RPKe+fn<J=X&x17
zVEFy{E&R!T!Qzts{)V2>-sX4jJf$>~#EmPfN_r<hTRf6r?%ZR{BKrPio(OdGU<Zzs
zm1Q3b##tU}z&xq%5LWR`)%}fwq7tyClVl5wI4<XV!|4&l^UtN^ew}zeB_?eJTQOB8
zYwM?xFzCDfa;8<zzJ2#K4>b3VuyQ8ODLiV|WQDD|!u!@Nb;XGvEXKG=LO96wul?&&
zih3X?$MXD5a)a@QPT`;Z^qS?j-4K<_I7fcCL@x1>h%o%=Z+=e=dfiwSl2B$oha&>x
z{uH%f^_5)_84NTqx<ip6kO@NQo3EXZhY_+4ai~GVAa<0Ubo72S7$l!dybaGrC6^xr
z$9>b9);p3sE#79Dv;$y(fSf}6*u`oCYpG5B@vycB>_jahG0GlQ831A6fc#_PJ$JnO
z+}$kF7>jm6xQ^q2<90hq*%W*WM%vubu`9&BNl{Nl#<kjQ*GzOG+l$Pr<KBvJA=VIK
zis@Ifx<v_of=7XrpvBN1l7Gt!HH-;q1zRJw^3G(#InS0ea|Mptoy<yD0$tCJ2%jHH
zO<-&sEf5oEu10c1S)AAFv2zJ_W1O5su1KSqzUF>TvzJSsR!>R5z%t@?XCcxX!6=G%
zKiOrk_v6wIaLE`DTVzE>-MLc6pxXfmswHopo-2oR^o1bu9vTwZJ&3<{C+}gQ|MCY%
z3f|V`^|0hd_cU_p)Xt@A^kC_yO}K6#(@-ipIcsLb7yuzrzPNaNcUOWgLm#J#a$2ot
zB2^~^!D>K5N6gCU44On6!6SX{mMfM+UUeyfDNV2)yI8(n#4qQrI-`+md0@Xe!6|$t
z@)VfTcL0$+7!A~C5}x65(aMR^NLyag6mFHcvTy4^*&57tfax>_?dAOX9l#i>-SfKa
z>pHyGg)4FPJu8`{(3;A&qNc7KOGDtJ48|gk#-8Ug?S_+n$Z@us!<eH$><%>MjR`7k
zora523$MdLQUCJVZzCp;)Eji&7y>se#n|Qa>e)u+TeKNkd-gX)3&jL)=@57>Gs>GF
zWZG{XlO*40KWSP(D3%sz18S2dM<g>LAH;plsN46;yN5fnW}=}Uh_LhB)YQevCI0~}
z{iHs3Qfn}GlAc?WD=tu>CuyVmeyMq`lZBmtnLQ=t7%f$!Qe9QQ#NU4tLo!8T34%V!
zfq>9gpD{tsb`0oW#M|5y=^Rf4m3NCefnBr0+xWNTUVn^1Ku|_v<Z5)tnxKX>JLb1v
zWiQ7>hWPf*HjXM@>DqLLCsmFEl>?2WDarmhw}V_fo|^Fb0Kr(9J4CEz=hLaJ8y&;w
zby!>1_w~z1$1*YVJ!Z>d1hx|pZS{n=clD7%H$}yz2#Xq1aIqQCz417QEg9*`P~*sG
zFw)kxsrzB0J1XNDjAw+liWE7TY<(%C$rWf3%GMHSR)k0KaEck+EHPO8Na@JfE`YUc
z8!AIw){Q|jeeV}k3(f4ckSkv=gBXjD<qC9HUcd<FZ!m{09j2~2(YJqpZ~Ut{C^_a4
zAMtUUP@4iK=JCb3c<i!MM4d|6Fei==(S1wYL)K`_3@Y#<hlH1+r@59D2VVMZrlK4(
zs;l%<1KY|&$a0-{4h%~!N>~7#_A~@D?|Y){B#>fiAUk8(M{<8i^Ud}%xpw*^F{^f0
z1Jsm_5vRa`J&|_r>!pYp9<H-;7#yiO8=ehLsdV%UlDT0(&?rti>hE7=k#zs??*}=_
puSxr5{ll-M`@e}ax%i;x$5mz15^oo|>ha&pF0-fZpR#Dfe*qutK`{UT

literal 0
HcmV?d00001

diff --git a/posts/xgboost-for-regression-in-python/index_files/figure-html/cell-7-output-1.png b/posts/xgboost-for-regression-in-python/index_files/figure-html/cell-7-output-1.png
new file mode 100644
index 0000000000000000000000000000000000000000..88e3a25156c2b11eb1ac423d6e036c9a84f9a814
GIT binary patch
literal 17274
zcmdsf2UOE(+HYo@5yt|~*bt<QVnGy8Dbht02}&1`E`xLdr4vZ7Gb)IpfCg#OyGRKg
z6$O+|fY1?<8afFjA>lr+x-+voyWf6$ch9}&?3_$=Am#sm%k%u&JGZqpPi<JsvlfHF
zY{06W(8XZBJ&wVA^L))}_{qsFYlGnr8TXUt-1S^++`TMatuY#w?w2mQxL>sYMbOjQ
z)y>|;`G}~b=-~r`cJA(%++@YXoPK+OsEezu*nTO2lkg!wTv9V~!(g~9(ZAm$D<#=u
zFvo|nCzQ{6Kb`2oo9cU%EvzhrMI00_SmpE5lQ%JP;H_O_-79R9O-tOdplaGkQKhg$
zIK{h~Q}~dbbd=_X&NbF|qF$vXKFu6ot591gnnP|9?LNmBw5m7u%rm|Sx1hwWYI}by
z>A7ghVt=T@<$kZT{yU?r;eq~&HZOdeaUZ?E(#vrok5nhVUIi|2t3-0vUih=oa1D&W
z%NPIi^S4agw=6S9$Fwpw!|`T7K#fffo$e0r(}zz>(uxkN8ElSG4lkcn*;sTftDJ3|
zot=$W*nq)Y+Hj9gE-}KTZ0QB{hvSAD`T0dtG9IYhT0dhlJh${|#`{r7$g`N3n6k&_
z<;Mg@l;s$Ww-g(%d(vVxENHPQDOmWRq4I%n^ttn0mhi?=i8VKG-EyeD|8v;NY7!3K
z!~2K#%=fE{IrhJ8{^z&!Ki}{_`Yjp=1L@hqI=sS7XSvzgz_O@sF&KqwBL9m){70Yp
z$DbGR-N<+8%Y1Ug_WhviJDZ$Q)<R$T#fk1hH|v~^T!+^E<(IQ4X-jj%BK!BNTUuJG
zYiUtt2C7&NJUl#~Jc|b}9sL9k&ti|U7U6PY+^JxPq1q>F*R9h#d-l<boSfF~?s3B$
z{}qBpl)TZ;KmW|5=vNSSz-VWMc#&@>26G-W=DAVUcRqcxV1P5jytJsMXra&FLe6XQ
zQO)4kbJ4wfPw`4SpT%NtzpttB7^>Z#kdTm)knlGBU3Im#hew|J>C^hg#;Idt4uw6=
zZurxu@7mNpj?J-cNG&Nb-MxEv=X1;JTMwK&-zFg`DJ&ynBpQCY*lWr_N9RG9m__F6
z*RLm|q%aunE8i9uzrru{Brzw8vucQ0WeoMXp*nKdK@$n9%Jt<QK|w({Y6gimSv=*y
zppAyafBg8%+saA@YFmc7hDPgfw0~xEv+hKDw#q<NaJW#0QI0AWi<NaBx&QIwM_6<2
z(;t-i<UDKFn}`^sH;#AY<hhUPZrFbK1bhJb=DKaUo)g9$6P|CYs;GCx%Cs#lV_n_Q
z_!s#y&=`$+ZY&CEYcqg*RAAA_MHBg5vgui=sUZ!+dg|(TWQJS|Hwy@4-MO<iAu%!a
z#S2}|fQt3XArh9azmwU)sektDK2cH4(9qC3!NCi~A0H8k`(a8`Q}=5`%Id?9iSj-#
z#yyKI>Ykmv>;9?w#~>cb)QaU2^DKB2cDJ+$VP}{8;6ccns0qL46`=5;vRH${i!W$u
zx-LbF-GGnbm>5xsC>yD0<xi`+-YAk)^6cfyW7f8|UPWb@nVPR&y}I-3uhYE2w{Pp1
znME3H;^XVF>>3%dD<?jF%w10Z@y8$6a&pe6ZI^Mq5HOINMLn=)%jV5?GaujbRVwfY
zA6HS4Q%HL8;)P~aR^^N0EFZGsTz`Oorz{)K5Z7+}<0Vi#0^Ek{&GgNJSI5Hru_~07
zug_2RbWL<=-m+YDB6rLy=#bgRM{DX_nx36}Usop?9zi5-=H*R$^k_58$D6FS;klIb
z^fT|?y<59>?bsMiR7BCGM$(BA?l7y%3iEx&B`u4Cf{`1`MT_CX+mW~7?aJfab}R7Q
zP-M~cS}7FAH^2TkbPg6GEV~%}G=tIV2fJKlBORQb4OCRF_vyzn7&wimM|DCzxyyQd
zd~=(VxiS>tQY#{>nWP8Ho@Ub5*eDp?Fl<OBc#ITRJ!t87(3LChne-CLFw@p<v@Z8H
z%P9WIr6y=WHlcT<;+Qg5XJddA?J9q69G6q3tw84dQ3ZzLac7cAG=Ba`YFb*I5-fTx
zC01_!Mf>8}2ag{o$HWMh*I4D{<)x;i9J;Qps~f_@%bVv&5zVm~u8(xMc#)Qtl9Hn1
z<dk7FFynoJ)D`;pv0dB6zOtUF2e{y>UEwvY{bAcNnCDT>7C*|m4F$3Xl-L!fihhgF
z;rq61-=6a0E>*jc##jPfSy>rw{?6UI4~1YQ);&3LYTecYjg7G?=vA=r_y7F!E5ADz
zYabsfTIlr+eextly4PDJgtj;kEOPK*UAGo2@5+08BBZViLkqnAUahE>*EiR}>+E}9
zzpt)t{P^~+r0@JiR!<)*l1)oe5>`^O5Gz~UsTKGQ=E@{Ttb*^n-B?S?kSmjF&+j{a
zD8sCHK6O6Zbx{5L)vF!a(v4*O*(He@5s7g7`vnDGrW=u*BcW{7CDk;O$*kVueS(6=
zBBWgm3=G2dXb|J?-`{9`;ey@L?4V6hMZdO@(UT~@S!@umv?eTKSWk4{4I5!#GMzbM
zh4m8MsgX~DcgQqbUY!2$YXAQIx;i>d)}*_EfoCsYz8ucvKlIBDDW}dBYmMW_uR#$e
z=(~N7!L+@$4f+OysoA;azw;RierFlJ*rxueaCpt`h`K~5_<g3(n`z@Y4X_@Rp~<)w
zn&N0pr|5Kw1#_Z;N_Su{FWtFwNB7*h$1p7(%Zr!GiMMX8hI$CA3Lfy6NLi2fSwkaD
zaYPxnL@4Z=w`@rmAHOImEzN8l?<==>xL3<SP3_lPTmqT9Plr{D2Y#*m(EsYA6w>4-
z3slk!vnHQULg(u&_R2E%?*D$Gr`T|+bRnv2@#DILq$FnR;HOV^1C>Ghg@tu|d<yHI
z%Eaa7O629{K9=0ReS2zVrh&bE3TjDaW(P@KPMI$D_DzA?kGRi|8-<34r^Uyg;FY@A
zc<uXjwZz24J-hertr>K+UzqHHrO`Y)SfhXLTzd~0fZ;0b^Ip;gQ)Nu|KKz15QGNfV
z<n;7fm-OUhHLpHA=^npAD(tZ2>}(yEQhHb~e!)y5TG1@ztf^^wd;2-3o+5qNzl`O%
z2Gkp&@7OIb&V<BEI=1Z-7RCl}^68qIK9%PW=H!=8wyL`KZqOAQVvv3=wj;;ZakM#s
z=kSGJ+cHd&)@>8Do9N7Y(KP{mgR;%SFZT88<AXy(?`?8C7HHpKUSgUj1Y69YH@@h&
zSTvd#A%xY}*M~l#j>R5wG!+g1I542EqZ8ZGa#r8K;C=t4$3Lru>^bvTFrXq9TGiXP
zZ&RRE&CJZ$tO<#S7x|Q$!~5-FyQZe6w|si{(3;f6a@cy%IDXfudv-0!I($P^I&P4T
zs~d#T!#a^zeu~i+PR!YafgVe)f*EHrT~)x@offkJZ>RDF*(HiGF6C8?wF<iTzy4lf
zq1z4BjQc;U2w^=uJfP<ooH%jA!s*PJz_GEhTH?sa2q*LIy?gU%w{PE`F%kIj$Jh@a
z&X*G(>^kLnRFhd$e@$Ci`RZ0N^KQ%Tfth_061n8L659TAUI&VqMCpkm0)$1&q=CL9
zu2tVFz!h{ha^M>8DdrYN;g5PvcI&s&jW@#R;HF=3w03r;Qp|lP?$Dtv^`wrC=j&^=
z<d$nQTuE=y4R?2&x)pxGqG&KHOy!~*paUL;B_pzLUve6`IDN6?ASKt#BK6&Ay2<2i
z#qN>6hNZMv*1h^rnmYPMqNHOOr+{L5uoAmn?AXyrP1qD^v9YhFQnm5;(mk3_?b@Cm
zz5J|#P`)(8J9p<Ed9<ZIQjXOoEg?}2`?59Hf{!tIS;(V+*i!FIV)m#zZri-sy+h30
zked0X%XOesGe!FEbmBAx&EWE2ZQnHvax^<W!-V93S^{;`rcKb@KY972=jR(pN=n*n
zR99EmwX%xyoanS}8}BaEgB3-9Ur45AWJo%~It6U<)22;z#EC8vQ$crX#`}|Np=|fz
zRZuI*6N2u53iS;Q(~63WL);gqE#dD}C<8^_Gtbyv{!Ew79NS>DoWsJlwzjsWWM<Z{
z-^CfA;x<&<1UnI`jDUS#86NlP!Kp|0@1IvZDP>d3-Qzn|`o6Zd1*k->RTZbTjm`dp
z2Q{E*hr|O=2;!47fd0yypCF$Sz{jIDcH{`T<lE2(4}LE0$%Ji`mYArDa4UhJh&mAl
zOS$BDu^~!<DC?10*v-ZftN4A-`1trRK3wM#!g}@kB=vbl`_0}?Jo98H*0|6$;oDWK
zV7nfORPr|`Cu)#8^BlKq+<3k%%Y>xa(a|wPwjLcFB}|!Z-ncQQE<##FMCA0tJsP@D
z-wq!>oa?vbnv=-7%-$RArwi3e4;DGVNtXGl?=YD3v@T`4_m70l%8tynmN?FRcy%4{
zVf?da&uZFEsH&zG6cp630=W+zkC1X2d$9+~aTm#@{Nqy$<_dk<$m(~NgJVS|9GVXF
zbI$2|q42WtMR8Z}rj+tBzLyWeZE5t)7z}Hp;SG3~^^gA*&mMAXochYlj@Z@%F$t-&
zAv34Sr%1ix>jwu1p@0et37ybOJkxY_700pzz~-Tb=&a=AeK&936jo3$s|praNVB)I
zyWcetR?&YS9$gb{DSAoE4ycp^_1b^nz!@DKo%i+i25xRS7IkbEQ_^Sl!p$2u>=vi{
zjbB{2y}n{((tZ`@$~Vbf$|FD^iU#h<<>KgLniMgU{4?-9#-+GC&#B&nN#vAIpDsiy
z;ycp;#5Pq1@#^d9KAW9&jg)aywYRr-u(v<t2&?3*nOO#Kme8=URM<Jrq%PCG=H_SH
zkJzA3-!CR+2%JmmVtdT@>$X0Y)X>mS*VIIud$Ok(t(U=?@U-X8pBE<q<6a%}a?<${
zbgI!Z<|NfU(vsUa&mlC*pY5YC{c=@EJS^{1hYlSwE+>=@)kmgPR;rfL+e~l^JzkP{
zhC5dG^5yJbtgP}ps0P4Z#^xH7455p9j<?4G%{(0f7%eV7UWiZDT@Ci%thc<Z_jC;G
z%RK-du*TWH+_Y(k%1AgJj`+DTfHzUof*1gE@102^ii+kIels_LjD|);h(y$RYG^>s
zSfxU`u5F0wVNsE6LVWyVE6-vxgbq%gyos(4c*ByPVmVKEO~T@$HzH4xl87a_9^HW&
z)eoEYR6{?0_Ut%Z4m7H>etsp}uL%jmi_EhOp@E@2zF$NH%^y&@a^lImTM-A~k#sm4
z;5LmboyC<7x$5aX6nZy22al!Jd@&2zUcUJ&yem?X`1}0}Uj8(EnQBjmzl%UpIe_|+
zRSI^e1UhLa0o$Jgq5-{12nz#B7=S0nY>ikQ5!%$r6DQo0p^^dLYHDiYn-}2Yn=o`6
zy}Wbh&dob^j2j+t{Z!3aWp6<-+PHt;J_MZQ0)6HALD}%c^;%n7N8Ymtvi7o=OJvob
z9x!I&)`ru8WSNH#_(f0Myn5p|#^a{IyaX62EtT3N*fQB{0>sL6Mye%2GO~5yga&h6
zf|ztP+_JE=G{w}}!9n-p#dKX=U5N|T8_SLVK@;=;Mc~3n!sQH{WrLuP33GU&l33DW
z-s7+3C$|2(Kk`czCSRn;Xp<mNr`>sGPTQp}K9(`Juy8TIefiG}t0`UP^2ZYaEdRMW
zX#%sO&@HKt0hkfZEpa|grBC5r%S@{N-=S8gSJrb<UUxZANyBcXgz2(|h1bW9|FN4F
zbPmu)m}`J0t7|5ar<x66&*nRI+1z^i=FN%kzyE%zwWX~sH8HU=9s8f^Y`R_R;?vU7
zJZK|29T%klhVBERgF1|cMiANnjg8vCOVmPka0Y<<5(k@hBraI7xeU*wRqqPk_a~Nx
zKw;HAKIn00jg5&QQV3y%g@t+KJkz_KN%Iq3Nx_N?H2{ETZEO;K)`{%er2?A-WEBJW
zJ}Kw!<mF|*C~nwHA({Pc?NJK85*}k`-3nQBiC~5K$8DKrX|_>5s>2P@hQOOZD3Ty7
zd;7p*3kZnlN;fUky?XU30wwj)N?G>gM0-G)YuBt1cSLO(F#Fu-3mVP6d_fd`*f>-d
z0h=!Y`*07cu>iA;%lrzB1}+^vYhaN0PdZ#qosFKkdDQIetf<Km@9FbfwrojBP9Aw7
z`QX8W{i33iuA;kC0~>+<6{Ta<)EeQwi_@W7x4ro<XJ9je?lT`xvRHj9rK&+Z2|#Ll
zK;A-a#@NK9j;BKWQF!=XsDJPajVFgsa&YdL?h2+*TbGySU<ebQKNkl!L|vR2AV_Z5
zu)(#s2WlgF6!*w_j6JfbYl=H1={AJAk}!yd09Zi%m>fmSMmmO{>v&el>>a{Fai2YX
ztQE18yo()&OBrKnK!XlDg60PF?Ko(G#-P-Nkw?$Ey1vkiR&;9n^dBN$I7Tjzmks-#
zCwZMSGD?YNjoT6rS=|z{|5W{eQRTmUYN8|Olx|n+XZ2=jIGVDGOI{F*8;<naylGP$
zol0?p`UfgWi-}8V0`M_Ia*|fK(3Wl6M5Lq)Oi0f94i2e`E6Z-sS{-1I@hA~WgzW2x
zk^pm?V^sn+Z`-yvxGL^C2iI78wiPhlkoYYk26pq~9mYlODYw>dZ?<GZCo#|ZTt7pE
zpkSez)j2<Sx~jN>|Gep3KI`T`?-TqtCJTr8S(9GnWJKLoW-Zuv*^5<5NEl5s$OsYQ
zQ}jCrEC}%nQg+t_5&>ZXB)ld~p^1HNOa-Q*ph%`cs{=X-t?p4pzqrbMA&tl4$^f8Y
zC4jD@XKkGTJg;t}5Efnpy)890wYeolPp-B+Cg#}PZDNz+lQZ6%d3ZWUWx_*4J&Ls^
z!g}y^5%Y3lL5`)aIs!}66<k?0m;YR4zj7w;jkx&^+t&8>WC8Yc08|8R(AL64%~AvS
zacUuF4Gs5V?*Lot65X>$)q=2;V6<t+j`UxD{gqR4qKnenDl98oR52457sqlCRnP<^
zeC_)6I^yYr#<^;Rg@vCRF@TTIkdWs7wMb3{MJMFpLm((E9#*2ezG#2Z?Ht>2_NHm(
z?MKvl4NNSwlJq2fQc8<fq881$*Nmv}GLCU|HU&uGn*!$LWDT1Klt;My7q7$U<Pli7
z9?vc(Bn@b4-UCjIM5vs4o4FMZvUnh;Rd`WuVh+PGWl6NS2Ohna^85CUys#+AY%{w}
zO7LGD;Mnh!NAH>|G*|NO8}_vnjQtG{m^LVyPF%(<zime1UX<^ojKboF>*{K12wY|n
z=pt|vY9UWGlYqGG)I(je^tUdl5-zz{^XbT?Ww1#qvxb@nBtBC;j{;pp5t2Z|Ep!`}
zu)DwFyz1L;YpaG&1@Y`oB9D4SKQAma>dJQ_zq!3>Hx^hjh@`V=DH$31&=-szK6(@n
z=nkrRO*-ngpenK);GeT5CXQ|1^85gYcJJ9UKm7!7EU!jzA5R2GHE1e9%oxe<awbE&
zI(zP1vIToKJfzcE|7)8APoWJ6U1)T4)}PxPk{Ry=27Xor|3u$-T}pn&;+AKsU@ImT
z6dc9s{v$cDV{{YZHpV#lHL$KnZ*DW!hR!ucr4G6lqAgLn`0=hy-H`_QxVhm5M7N;s
zp^Xjf^mRo=9WhkIaKE^?c8*P5Ggu{(PMs2<G#Y&K%{Ml6MIO{gk&#Y{W?@lLnXPGt
z5mXq{T%S1`0A2RY@z}LL{Gg+%dJFuBUNVBy&c_t^Z@`D<f%h_Zd8xh~`u*Ubu$b7?
z3rQ#%$h+v{J!HQB@L^q$P|WIWL2&NOwI_!r!kZ8L+Q0*CIqc*{Fp2&PpIb$gN7Ay&
zns)`uKZ9DjUqa$6V$V=?YC%fbCnlx^VmH`K#KYEiICmWVK;rs0&8u$S&vPOUbAoV;
zeX|e%ywMeQ-%1)xhQr%CTafZqOSG;DOL_iW8zjUwph$!6vY}6(rbA<hkd%>;=`6S$
z2i5<qxp^iK$bBFs96Wf?MgwLY^m3%p0TBb)qGrM~f@)%Hj6A1E1z!W0c2F9dZf)Sr
zJ^$)jl%f;3o2%B2kMfnxz8AR@6x7<~Qtosw2n6P3mkM#@)hVnj&*45?<q*P7mCW7y
z<(FS_p#arI${GHL46HLSx*@s0yj@!trC@S8T(lLmG}+odD*Zi`POJfz7$Owiuot-(
zU1ffw-d^pi0v&tv%b#-{US>}Qmo-q5<(N|w12nHcqIz(RYHYYwgV|I`&+&7J0|K(>
z%y)`FT5Wpz0Z?DmL)gp1N>nlSN3mS5Df1o=irJ8-60@Vhova}}8tK*BR3+e_4p4Fw
zWJB<p#2t~J*yF##GA{AXW)H6{M}s&&aY<x6F~Yg|eW(b%Z-wbk0k19DpFQl~Ca|)2
z$_)?x%+6iA=rdIUjvzwv4Mm5CC%wJ9&5PRts4KWo<u1NZBRqiq$N<;|`8AuoPGADJ
ze5WybW!bir&7i`UwDy4+VNc0412P}h<4ge<4g^~ks|zY)pXXP`w)?C8IKNJL;$Q7k
zu@4lZwV-wYAvXX+Xp1lT$rml~uW6JMULjJm^;5$97~A(FG>(|x8V3-`&Z!YoY%oVG
zc#6ooSXx>Ni7%ckIt@s#k(yPW39?p8dQQjk$}5TWaTV<eq8X#<Y-5MA8fOwbytuj6
zt}PnhmX+pV_|a)J`c=QNw}}&L-Y=8F)w{4{&aL))Po(5<8Tvlu_s^z1x2%wW`7O^r
zWK}r=^*lr<i7bre%dBppP$CRnpivPcB3c_88?EbZZ`v!nV+B0?nY2RMQ7E+_V)A8f
z253$loe@Nv5afVrmvNZC#)G8pome2(kKhB|zkgp#udAy=iw9)QeOUD~XU=MC-?z{J
zFAEw`EfJIiNhv7@m>!!t&^Q5Bj2P<a>BV(+8s}6F%v6JFaOhBWK*J6UrrQp;+}4g7
zDYENeh!A4|jWR*H&rc8fW@3E4i9T%Z_YDmcxE)Cx&2DgbSOh8(kdzQ15V>XChU{Kl
z`<_SEJsH+j-lhKIyM=@vYdr|senb`AuB^jHj?gQ4UDQJO{g*F;u`LWPZ3v1dWLs6e
zpYWu%8d|izd$<?95%z%R(raNbgEv4WBGBRl>_OQx&b500X8JRoI92=BRDIlHe?T4a
zA2KgV5yQ;0=g-I3)DaV)i#bBmFfR7Y_!c@m(SzD1EGK6I955svxEG-BPE&z@ATG{+
ztwdupf<jnOl)&&P9>;-^Q&J)iCCe<|<_~U{Biy;ZscHA1scIm1tVV>C+2wlJM7YKD
zLvS-jrLHH}jJT*Ok_rtMT)4z@$0zd_ZjPmpHXenf1Ahk>=EH2kPQWAw?h5`lJf~5v
zBMaf8RD`RmYwe-GOSJ0$O}lHMZS-5fS87?SV|$9dsCCkPEfb`^N$E0RSzESlbpX=U
zS?WuI?E*m8gF&;;+4CPtSu{p`?v)!$x86Y3SbzZGoMT(M4uEbMXYN}D8O8|^lrZO#
zpS}W&fnF~+H3D86k_ClQzFJwxg&HL%Q8SAtkt{er)t3dnfqKYSIyJUXzET<z67mRu
zCdlJ<AdRDph0wc!nGq1Ofzr2`dG#c~Cpbc5HqNmLA`*!>Y73%!JD-z0mp?t!a;Oa4
zPE^Df5#}QL9uEX?2^c^m0Tim*Lm=J5ZbNhjG6cYZLXo``R5Pdx3Lh{dh)txvf3FU1
zV~~JSZ~7B)%WI&vz%PK%LFG{gsswi46Uo1!L0w;)<HXg!(fcq1MAC%V`1SSW<r&G+
zxM_S@7DHWY`3y*2U(9^QGbo+u=06-qITmmvLEY`KG=r-Z65g|?(nbCcglF7qcS>m@
zo(YZS<f&6$?f&lWxfXQ^RkwfrRr}JV=SFTR*RFkg(7dFeqM&L($Ii|!sp}DtF<FA-
z?__L(DoiVk8h-`T)^5ct>Md(%cmulE=Y-S{ZoVJp%21yI+y+=HZY(7UVl5Kh(;sTu
zK#(f8t`L_EG#hIs){kW+3>E*`!)gMgu<U?WjD{Nh2%r%putbLmGBZ9VnfAr(H;UDb
zpgPiDM=8(dd$J^VB;(>ZfMY4Vh^D6OCG1)K?e7#T4gbR#Xk8%FLfQhVb<++<uvtO7
z$-~hd=IC<3X?Z{xH2NFL^LzB-t1vk5Ct<LR)9_9wkd%&zdCmbyE`+l_dgM*2mmFNR
z_b4>DRbzgcC$;83sIn85@Ni8(eZ8jNTO6S9T!=aWl#B2O;XMiM0PviOK<)-a$Px_(
z<INH1lT9`DUvEsz?<JvPYA6s+?vs}{1>2Nh1x`6?E;3OMAI5<q7sRh%3}nfGKqv#W
zg4`%9s4~POw)apL1;J(D3`o?2xIv;;v?!M6h)v*R>4G_Qm$4w}-uprd?8N~n=X&4|
z3kmPueG=T^P+*gQxKlwPcj$k86BNVoOA+fZm~&cL3X*N1|3<gz@+&$3<_JJ$DGJAT
z-`PRp3Kj=2qKBJ*`srC<;QE6WWy_4n0~pK*Df9G32F0IcSZ@7$pYe=%b)6#(Jn@q!
zPljjQ{+g<&`b(-J63QUv?uVSp|D*|J7OoY|(TLv_-PKLe8_8EHebh|R!(4Ihzk^~Q
z2d@49d`^E^7KOQT6-0}>VLd(bB+}b93b}CqDf6f%xW>i+0II0@An+gzD+pYIq7^pN
zW2Eut^1mzjrOn_<iAw%u;0<oxx-}7C0iP0~J1a3ZRvFw+P=Me8e@W7eoK2I#(UKw2
z0Tf6aW&=S9fME)FNceqN-7=tT2a6sz`)+R@n5m%Vj)MCDx>FN4rFH-BnA6ydi4~wM
zLXAr~EB{$s9F+Kk=;cp)53g4NL97{!rz{SB?_(h}=uG>dxKb%u6exN~dLs4WPD30c
z^YgBOHe&#;M!s`TDx?<h?4@Bahg-ptLMk`58DJvnPbvYN2+#2cpW@)+ZvjLHrnM0E
zuj7C5Nm>E`G>_p}lE;Ceg~V4^t6`h_D*~W`B!SVKQ|U}{oc;7JH95Hvt^_W4H{w<U
z|NFR~H0X7}sDPdUMON2K(wq>t$fAg3Q5;33B_**T(Bl3Wq7cvY>0-ZDte$3e-5F7i
z;LYah8y;RzXO0Kq`}Tbf82px~xWIi2Ol6M-g@t|Kv;8i_r3Kx--4t#?uy6a0gF|HJ
z&Sxqb%z3EEckbPryj1%+^mjUV)0b#P&X6v!X`pDA11^Ik3fNzhBrGCAbzgS)5~=Vq
z=b?1MMsuUZ)CIK!a49W6r_8hP1nKpgKgT?NH=|kwC0u&t@gNm~cRBSO6gK~7Z~jVQ
z%lxw|CEKk&x8w(AogQe+*REYFddeJpGwf8DIKkM&6jT9)vUQ#B>UUi@D|z7-@|OWY
zU8$@gYO&{1TpWkf=h)*HTPjab!k_Jjhja&uH#wIr*|>F&x~n@B8y~sYF78%RR@PgB
z8*rC}L~J_vP~~aI>2x{~au-H!C<a)?f*9cPe2&|&W;y*g%9EGVH30z}zRK&@u8A_=
zQXP6iyY~fiTWoMib2=8;kQc+@<Q%U^8xk9wnk%-9f&YXnf)pgmUMNZ0aNh#^Bnh~N
zmVp0qHfa5WBmx9!Ad*K2uL8Q2=hP*2<de{zJzxn|XC;0OWN7@a5`pdHBu-GP#GxUL
zyxSYy!k|;>4J*qHYq_|jE$)Ir166z{RO_ctcK|PPq>VK3DByBu=jOKX@T5X@&+!D1
zX#D*A)mxl=sj%=5nigpP@WT&7dKauJ1Di@0dedra&y>zo@`l6%xi<x(@i|5aj;-Bz
zdp39tIC`t0$LuF8dgGHPPvj4QzkpL*gLrsIJf!aygUVU+T|@=ne4)`Nwoy$W%poTU
z#vTBiIc&`S{(c)4hyh!;xs#1@Y(~M}AWAtAk$rXL3MR*<r^o|Hyi~O4X~;Z}wWih4
zLD1_g@yUTKDk7m^&+h|gtR`G+>UVSF*5$z}8z-p<FrC0f@#-M{-6qBJCHm+7RtAyU
zeicKAEB;?5CFqN%E@EB=UHIe{;p-PYk5Q?a4Es2jU)}W!b%rJ2<D(ipwdwi(%zrUW
z_;JIQZ$7Lh^SXT7K^Dd9VSB&(iM8cF73i3WO~YI<_uA#BJ)W=_bI$R@pS!2|uJ16%
z_ToNzX#CY3T{ZFuqDXo~@6KFPI^XGtc>iiH{&etN@r=<VyuxlcU%<6~QIHm(w5$x|
z90%1o&!tb11<pqs3U~uPny0s5FvkRVr%9C%4Un|0-@&TuTMiOJ6uR?;^<8s+mIr9(
zw3%^09x^Pt)7qk<lkF7J+-moYD8>uS0IBWeGf<%+9^hUSM4^UKsFLBlyu7=y5K(>9
zFg%w=U-FanntWQunm2(OXGY$!N-U}r3d_Tn1}ek#dHWls)edHq`g4^iJwu!JFCKy!
zy&qJ)iBPL3MZ9sy1#VS}8#Od;VbjV|^hydunBKm5vlvwM=1p>DW+vGQ<o8_fnU9<U
zajHRMp)#DCx8oJA60Yz=JozuN0n<z_9TD6@0Xj2L(O)pm_=fLBXU9eM=x6_7;y0w8
zm-jhc3(;J;i{zG;cnA`LNqh#Ke^|QRC-^ml@HNsldRLMARxHSyz<4~2q+_mYL)ssN
z1X)v7N><j?rO@Qa9_R2L=LOzkP)Cd8>jC{AU~TBV_vNI;UlQZgo`B5E@A7yXwSvBf
zW^w3{4(M8h0eFJC_=`XtoSLC+TgA9@UkJqxCp;ay3oauU4V0l2n8>l)d!t6EDX?`E
zr9ND56eG;6!!svH$j5YWbld`ocxV{fq;A)mg<cX!aasN>DkrHq{p)#;zgz{on<L_K
z!ti@Io3c9j8y3&c02nhj-uN?Y%xrSZDosRU3kVz_Ewv;?6QDa4?S~0wGzAmlr>8Fg
z{i3p&Qxrhk%MR-+w84G`*XQ{0;}X{IH}f66^fIgR-{tr33%wG+Ta${=fCWDESf4Qs
zLec1hQ~XURR}IBgZmL8Nu)+{dgw_v8i@_{k@kL<;(8UoI$-D6Gr@Z`pqU5R!E(mu~
zYM&edm*YZC2MsQqnK9)uT>Pi=8m?~}?la2p(-2>T78`p1eheVJLQ_!%#z-84LCreY
z8gLrv%y3bFrDl1{Et%(Ix?rvXA#831=4U08!Jwol-1{}_)*UHZo~<d1>f8dzga?5!
z9X&lOaPoMM*wl)HOl0{~#;r2lXK64z2u9SZ;?{;r2t);0EPvcq2`XYBq(5_pVD801
zAa({GyWMwTQZi99(xLlu6<;MNwsl0v?@@jY4weRb9tc^}U5bjmYNt;hp;(l7H=&=S
zSC97;Q}&679D(^c<kDM0B?qqbWttTSLfn&w)JG_uY0=YqGGH)L0jfZdiF4uG$Iwc$
zp+65y_g7RxRv~C*F?i+qSpg&^a0A|xgfRDo9g2SKi%4DORa$(-6$mE)Hnc-BzCWN6
z0H({u_UEteU%1#^(By*mm+b?$Y!`5OZvYKJEan0)#Dm4b`U!$~)q;b7W~8k3nP|nH
z#R>uT`DK{JVjUwR`#?}1)XtotbyXdks_=4xk1BvH*!u7qYe-hO9;00qbl)&p#&jNl
z5t`;@_H+5$$wTrLFA?Wx2ri)7VH8#2zGa!4pU(sF6>k{kIwG(_Za6fu*fT}2<yP$y
z3xH{Z*wvf#rLnZEK;`SJ(ZQQrmfUC>H(()=FIP(h%|j(*W@?2!1s}%xuz^RSdJ$e-
zOY~o^@|S?nW>b56yYF5WACw*`)nI-nG*v(&rLNzHH49;H9=NOqYl=w%7n*p&QjG;8
z2t~#!!QJ_VyYl5!MZ(H7oivA^$fK+esIW9Gxb!Mrc;7y8n6akL&Q3@CNG(j*dhF23
zqHzirr0k^8ifCzR;f76r*J5MQ1Ln(&Z0D~<8$_PvN<CfOvyj3+l~d&Tw}^Y#i|{20
zCe;9EYt$8^8?q`P*#}D}9gc*6`P~S<L>=)<0Bo`KO;r^WFK7;CKji)TGM@C({-7@P
z6~HaN;t;Tdip5xJ@XrL`i(Ct&Nfq|)#}^Do`J(9QC`j1%g;+De{%0XjPJpDW`U_2g
zPPpleFBxc7Z=e`N%Pjm^dH2BRncFaO9HO!yATDixWH71%2jn2I6GdRyBb%A8t*;s{
zxW<EwOpR2Sv5J8F$~SM^7cWuR^$=AY8XAMnr2~4F(39cD)+Nv;gpVFQ2S*po+D?FO
zbkydrbfamHQENy6!U38z0LPrv^yUDim8Aw8i%vmmw^PT9lUnjKH{iGgy_`*1IXt{)
z!?L10b36w(5*t8+MGJIOxd_C(N1#uDU0?k)LSmZPm{}g_Vk<dOLz!EcgzPL07J=}t
zT@}k8w9{z)p@!x<pi#qT%9;7^TCjZS|3=j%yW=9E=9Z;9H+2f(*#<@C84#NJ+RSQ}
z%#6uKth^T5Es!ckQ<9=Wss@@s3KTPuZe%t>%KGv^B{~xl;!ZL18hv?n6}osjWTVVI
z2~ht0VQZn(JcK%^U_k4^i82sofvM5jt%A)yza)H2Yq+b!RFvf8f~wzNg|}Cwa6z4A
z1o$s<z)=BGS5@JUjF($tozYQ+W5<pKv=~`f^!*$=Zwqoeg+stEMoV$&1lR{TGLw*C
z^ElJmr=SFR&7mP1NeKz}Kk@(+RscK7PthW4;RP&4pbE&#fRTY@FhcFDc~ZIl`STyF
zSEhc6p<A{;w?yHszV^q5tlS|a=H}cC+lQ>BlmQD(lS^?i=BJ;2f>V0*Gz-@I5@Wki
z#ayb(aF>w=1i<6(8D`+FHw=^0jdD<c8bI<B!|R>HQjN^%(WL0qvNCgsNs0SIx?(UK
z0e01UJM8NxR3}y`Cr3b>0!{}dLpHs92A;qw$qu&hps%lJEb*D6_4-bY4g@Q;CF{g3
zw*`OA%PoH;9&^i(>PAs;Lp>9dG$N5FXOFNj7Ir11ox_a~5dy1*(F)r+IX}NGea2fJ
zm=KJ@_|n6bdnh)x!e&9RLTV)tEG$F~!A$jlQ+meFxF8HYHa-6XN`EfJzA;!LK$zs|
z>J76i9^4S4R#bHPa{m#)(?5sGk57CKmDl?J>U;Wc^XeIm9B}#w)?Dq>LINaU(V!kQ
z$?smokIdJkSt=^7w3)bU2a+!VOD7Q8;sB&Q@d|k&jIMH)Nk#u9?iEJXiUULfiT+?Y
z*|wEH_qzpA8W<b}E4YVB6)uIAQL7xZFtQ;9Yc<tdT4}LDxA0;-{shv!GbG)3U{X{8
zMI;4dH6x_z+CdIRFC75RST-5lKw?hnS>3V>QXGfu#hUf&B_LJduso{ee;C@7?TuTv
zcpxeeHdbL3tYEWVK(X7;j|~Lt2q-&&AmG`&obRWNk}Dy85r}8bZx0EC0G1Vm;)lrE
ztWiHLXhUz_yy50jpxw^~WH5l<igHT3f`hzTIieeE<CBtj(bdE0zKI1nGL>$e$Q4;g
zOUdRf<oYhwk*371S`l|H9NfC==+XAYv~q^X+{_$XQ#Z-Km%wz&NzBV@U8HCEJHgR}
z3TG0p($YJvKxl`|iD95*m9UTkfgkP?5_*GrI7Ft$4Ci26c+i~x$j|S5BOrhm{k^HN
z@q^*qV7LY7nziCETIg+7y(K<`;1lLr;j4d!`eMNzv<QS;@8|DX?&k`E>n6LDZecXG
zv|Ny<x9C;=`s1!Z=yJTE`J~N2I*rM)l5S*71I2Y`6o1IB;-`iWGdpafw=1z{f;SZR
zIKx%%5);ch#lL+jl?x1ovhk@}*ywK#<44ZeDvzj6T(1+yeK;r#OwajPiRv2APPZ!m
z3(qgFJcr|<Jz#ZH2G0S&nkrvmd&q3P(hDJvMlo<tQ63EuN@%&@>gej}=^Gh!I9BbD
zagB#Y0S{1v5gLZ0bpv<0THD&jP*6M1=bHioI$ewr#I8UAH-wZ~>0G@Wf{1W1@drHg
zCs_zFXYqTz3)ziMLqJd+9aL4Ad9w+H9%y$hmY=COLHHGl0O;Gk9-5d;0U(+qZlDS3
zLO(P28Qg--n4)fsAPGuc!PMLr0w3z@NiT>oz~P`UkOIhqgq1-H_DM(;@yL5WcP^zH
zfMaoPo0vH*?FTNda5$BMCT~o-d?^T_iQah>@<9CnPRk(C5svrjR+(4<44_=+QcQ-q
zhXA5IG~mucH(S!sH!&`kPOdo?&3wg0h!I%+z=2y~El9!<StR#MUSGYT*~On}0%;0e
zWD3DGMfnVEM<g?H(yb=!AUYsI>*`}0plPSCu=`fXP}xMW3|P4Mj>7f)^5+0Cm=Yi+
zNF(_(Nep<B_L$$^YFq!b2~r7?ev6loI0V`YGIJnkSx%A1nC_*}p3pIk73_wUB_&m`
z$__zJ)d~cmN;r%gh`uX4V<pL-vDXTMPD8UO@o{tqr`uAM5<B}rbo3Odu6U}<aXqh8
zt!dd{m`NZ6k$3>7!KHWu6#}RRlm(%^Gg0y#1u(mEJ4`~dFAu0zh_4?2$^SFjL=c?p
zV_qkZq47mny3dv$#WZrD96bcf&O+fq@+ROc8nbW395TRdkh*;ab_1Whw>CsR&_NsY
zVw7xwV+@XzOnr#D0e0PuMaRGap&*BuMas<`fuoz~tT!x2N!L$mun<9ZQiH4`91uq8
zFdee&j*T(OXkKB4;qXt}VjbBg2U*=ns)kERSjEAK#N@~*a%=6XV}1Q?+*b%zh>k%T
z0z^D>znPPt**t+owLmEj1{A^6y;0uB#Y$-*$cu&y4*2qmJ(Y}1NI#$z28QIZq$>Lb
z8d=K`QkZKuY-k0R0eooLq2l%HqNi?ZYHH!sEKhz6W3}>{W92BNQVN%Y4i<@;mn5U~
zW@Ajgtvq`)nw?VPv@({(R)<q|&1kuL8g1LM#p5k8vzx%2V6dkAvj72u4>L;~h6M>M
zlU4}BZ-8(Wy~cvAi--pbf7v$hLz6j!GA^`j{xjRqC<CmJfLJXH`bq@K&w3O#K(tc_
zDPs@@YVc>f0R~q?$9|D<+P6F;tsu34Vv-PEK;x&ctsP@oaSI)Hg4ijLvm)m?%q1sp
zKmM<~0uEd_4!#d%ywf5)gBn>n{dyhR_aJ?Fz_Lz-d=rZ9f_a<g+#?H}+r6*>6mX}`
zTr5!b&kPvAaWcFNHvH+}dIi9iAhUtd@Oc()-1x2(THlc4wM?|~dCd=?z}nq^|9@n_
aR=Qz8YnQ3^CIf}XU{7kE$UJ`j+W!GdMXdz@

literal 0
HcmV?d00001

diff --git a/posts/xgboost-for-regression-in-python/index_files/figure-html/cell-8-output-1.png b/posts/xgboost-for-regression-in-python/index_files/figure-html/cell-8-output-1.png
new file mode 100644
index 0000000000000000000000000000000000000000..edcade02a80b93ca317a33d3d854d25ff45e014e
GIT binary patch
literal 16837
zcmdsfc|4Zu+V;~tr=paK29boyEW<*S%23KY6q)BK^RP51LyAhqCmN6`nF*;7LS{0P
zd7fv!<JQ`Huf5(qyua_=``zE~`$G#4p8LM;>pG9~7|wNHM(XUAjWiocB+?d%b7wA-
zNGruiq!n-1uf=!53${<<KfIP=3YM}a+LqSW&9z99*DXyAO)L%dG!ER<GPlq(F=l7w
zWIc8KfUc#bsRbV!o6(<tfYroYhwT^_9sVL|gXuX%3lfRqI`Oe0LMU91L=rwFaYp!x
zP0&E2qvh3!;Gq%g^{v|u1&zw=xO_{*P5haRf$+g^yWam2){(QP=5clHy%%EXrXSX;
zyy>{fS-o-B&dgO&5h|>9Vr&B{pX*n6ZK!a6)hcn`uH*VQxs5!M_s*~P7T~?>xb)@6
zXI~P{xsMfdNpib*?VYDgmL~m*dFdu?&#!kq@QV+9&#9V02WjFHk!weN=p{bN&`bEN
z-tCTmNurPFC3Y--cjaa=yhchCYNq5NONH<ks4tFE^H~pG6cPD26Y_vwVs^9ZftD|i
z4!;Z!uZUGGQf*F)7A+%w=+voGB~u*q66K1u+0H`pYPMP)Ue=0kVI{$*S6N98l)t(V
zHONfkby-E_qLkED2?>dq^tes_j=I;_dTZU*PKG^jjL@W`;TX(q{p42g<;y*R2Puz>
z_^7Icw5nfS@Z;Jb7R+ZAcEOL;Z=%46j;7XwZ!+}PO#BM*+u?m;7qZ85sg*AH&|fER
z?&oc|gr4)E-*AFT!iRpv-9z~IxqoPPZ&Vz^!Goa%rI}Mrn<^bY-nf4KU{6mEJtHH#
z`Hyoq`)jvzi_`D8{ykA|yu0ky<HuAC7oUBY8_K4Wm%Vyb%*jcx)%2Mu&qaLVwspUI
zR9%V*U3<Pa<B=nGXa(#}IL=z?>+6?<t-(bUZx&NdHVl3Lo_|vI^5vBuQnDskNS&qR
zQr9(`6cTl}<Hdt4EM6xy^Gttyc~rZ5&6+jQN=fcJndRO(E!aJv6(~$KYmbPD@v8dK
z(Q!#mPW;lP?P6kLkDokwL?+WIt1BxP)y8Y`-t6DqR}-g`>%KEuCC$g5T_;4A9gm?S
zN|CN<dZyG_aB%X4w_qW4_OzI|=WgL`tNAA?Wxx8fyD6Jw?sVxQJBL(OUQx5_Ir93V
zzlfC;C)T**4z=1f+&<~JdRCC9=jQ5I)eFYPtQj{4!XhH>dF|ysb>qu6($nLLA~$bx
zWmpd}u(SKNwQ1ZOY$#Y<n3Ir}Ue|J;F{GvB^w5t-XKD2&C^R0^OZbH(ES~8L2%ve&
zre5N?hyC2Sb1irVFT%oHgMxw-B#%;Qe7?8+@zbZLv<j(`jT_H%(0E1RZ!t16dz6=7
zHn*^-dwYFVt0~W}d!)Nt_n+t4_c}50@z<{xxeRN6k&}}<zHP;i&dwe8&(x+{F>^QB
zv@><>yMN~1nM3jQZWMHDdj*-;X}m&=m<4@jN0jY_sQKo;eNd~6r`Xajwtr40Uywt$
z!K1pj09zKD_l~IBR<o{P>y<K>F0J?lk5olfU;mi>)Sy?DW%svVV^uS_A|so#aP1)i
z_FQ%9r6B_C`ZPn~1~svG{h*x1v9ZqL_E0xk{dezEQ=dr%^9A>qDqmcXec*TckMA2X
zlsnR%udFqnuvLs${LVm=2N_DMB(vS<*SktD82jXLhQARC49KH(7#7z4`fPiK>e3x^
zUS3|K(MFvkM~+}EJKrsC-TTMCi~E_?u~Yi4cqc5=e(K%L0VOr3nU30biHRJ~2aid`
zsby*uF*7r3%3GC}mq$cKhQ4`oY*7D_me!F^cei*{?c*_hULYtm-Eu3{jLYadRZ7;I
zH}1L|-+yc@XS^eFP$X2ww*$x6=;p_yjx5$2za7V)C-Z@$u1qslGhfkBs_dOsQFmPe
zqh|gCFP6TFa0)#=Ju|-<tf@kZvFh>R%`_bMUcP*p8ZbI)o@~-g8&yzJ@;W^|pg52_
zUmtIL#>&b{rFe3no{5dkyNZgMnsVcv-5k2#>h|G`rGMp|J6~4d%cz)axW|`4GR$%A
zZZYMK9Y>EIy*Aj8Hr`j`?X)m^(t1#Z&#L!~ni?$$$2VgrD-h?TJk#Df-KsAyS}|ca
zkLk7kM%{(Esjk<7#u}qP3i!vqvXdGtN(DUkazDT(Bbl3<?>=#jG&3{9KUu4}kKg8@
zVuJQb+tC}%whR(JCFc`F%f2TW5aqzS(MsOeDnX~T^YeYmi@|(Pu&#GE?-4OGI~jc1
z`au={%n#p0lje~7l#J$WpErnm(d6QN&T43AM0e&=tKXO%?}@jLlzevLS22%Wxz&se
z4DYmyX<kbO7Y?_6l8};Oz&hm9IloVHJKV#bKKm<Y%qioeC$BATd3bf2@6mVm?8Ucf
zi}T&J(ri92mn!>Z9{c)=Zd)C$;`i|3FDQ0*?%WA)%)-++u9Euj&L--R<k!k6#yIKo
zo@3pO8Qf9waf=r%o?6=K#~i3p>RlD~a_rNPpzl9MjQ>(<Kl}rD`Op9G^M#oZ7fW7Z
z@ow)})!9r^yc0(mDaq3;>+`ZGM4kY}pRl2`-F>lLXz|vN(^Q7#P*X4t^2n1ohdVLK
zDS6fFgo*Y1SA$1@Jy1)cc$3)Gt5-ME2?o7<Nv~!<z)2KWN@@O_F2SiZh2r^{F~zsn
zR(R}U(I|S#^77TIliFW*KiJ2+BZ>(-zv|7^XoYy!iM|?(@$O4`UVG0P85u<r`{v!d
zQ||8WM~@u~i;ni3pP#Rt{r+7k@4^0}c+Gr;Y$pL`x#(*hg-^Z)@s92Leo(@PRle9R
zbfVO0k{$T2kZh=ESXU*lzJE?&{mKx#UZ&2k{-0t}nl^qID;1JPfn>`Z-y!?@VknL;
zKzsD#CbkoH<GLhNd?|79wJ459xVimQGj3kGa^*8FYtN}0>q!@$9zS^cbYNqKwXeUw
z1P;k*>%kjKiwo#5Rt<lqlr}{f9{qCI`y2kEg}x`w{Cf^&s)b(p>_hltCXRi|(aI?g
z*c@6n5oZmhVAWflQrct3j(sU8xNk%hi<26wU%h&D(f<_j*mih{x9gWzR1_2zh5<hs
z8dM1U;Mw<J%^{aiQMVmi-I!QdjsVcM?B#m+<jKxWXGm#jX=e^?2P74GaW~c8SbTKu
z_Br|;8IIG#*GXYzN6T~*d1G{w|AZ<sjE{G3)2FUnSR^~E#rBKdnx$14ToQaw{@LG%
z8BDxX4DAN;X^ynzetmZG7Tw~on_<&?|DK9)Ty1LmgVPZa5u^j0oSeoPfab};#!bJ7
ziLKesXZai}D5>9@5*HEiT&FZda<Um!CN&^$uIGCaGhV+}#ldCL#7}G29m-JT;o;F!
z8CjvDm&41<#H8oDpwi7cCHOTvv~|jD9hu*mGR+08liysnx7}pu&w`CcCZ%w?c%vAj
zbjXe2mYlD_r>R}~Vly7>KYjD9&4|VzE>1yJL}XqkcD3-g8;Y7<22xj@xj4z=Ew?C6
zY36y>4t#Q>@O|`1SV~GNN~m#{mpBs>({<b={gER{y~`*eCD+<_`}-Px{%Lw4yQuw-
zeEo*HsOsF-PGqJwEIHZ;UHXZ{1ac`Qh+G0nq3r(B@D7bdf_cYz60pi8J&GsOZo<9l
zikw_tY%g~IC0SWf9G8Zwu)@Ogcem`_6~$}X@{8|+^Ki~Ovv$MpiA=_hSFi5GlPcKx
z(k@iyp3Kq88@87(Uk(g5HA;)yRM*kuCher4`QwHwUDG|s>6b+d&1VIcyMl-zJh2yw
zLxOt+9eB{|JW!rQUt?NzRer^{aA`VRP8;h)Rb}N{<HmG_mOUK0+W?AYoE;q<hRs<5
za=yXAl4z;|xlJBa;X3a3_V%U*4AiH{0*j)USXmjUHlEb|_Plf7Q+kQZW3g0?&PxlM
z_i}lrS#+H;YdeF-`8A<bum~TUDg}bC%E)|ZNVDjviKAPXozMirS@u@mM~6xJ*m}ac
z*^!%Fr{pEN6wAJ9F9M!STRzlgl+8{Jl}d6qe*RJ5Yuo9sLrU!mvER*YywBg?pY-F$
zj}zBEkVePGZ2RN$d2EIcBxn~C0GKs)(TAQ>IYFdsqDH;+&DAIytmRCBw37%~(9xzh
zGCXx*vR;$(-wt#0(SbxAQZHT<sWofQJ89gIS`jOK?%eaYZy$dA__3nevcL8bVC<b~
zEAa(xyramEY{rU~M|+~Tip-}Y;qy|u-I|R4{~vLK|5pKvG*CLV`lW@{#43Z5DJrzl
zq&R}m{G(d|&56M3+`fG~m7JKESUY=Q{n6vc=>-IWfBd*1{$O7L&U0HHI^&ChTwY);
z4}E+--rhiIv$QbPuK(uEn=hrMjQh4yZgndtE$#XiDr&|#H#wj=KRtpr?c0m96tAVp
zQj(IA>=f!g^pR2taa0Qf&5JD9@$ux8W~bB1#*6ba#}(eXpnZwgD%yo3vYTCd^VtXc
z?pIM!QN7i#IwlvbkTW~c$7?r!v^~%3MRGEioUfnXSw+P?AUISzcM@#J$;rvk(a4Ae
zok|CIPPBS<2tkcd1GGzmY?B=yVlC3NoozI6aqY2Nt2Vj1gooI<^ICM8_Qj;E9G)L5
z^$iFpyug<E`7x7hL~N{fPx-6&!yh+!(hAVHV3VLu<)CFNC@$8jiB-kB#+$W^qfuM=
z!RN?@FC`_~Q-h5+w6(!lzk<Zpfz<-)N&F+9DI09Ng86|-mObSi(R&X`bSa~#ozbTR
z0TxUU65^Y5kQUb8Uc6OoC~J-tzjRyVApY)0;^fw+u-Q-chyF%-jQ$OzhxT`*=Kzft
z@Gh|4T|1I4J88-y)WA#d_KsV-_C~SX`M$>NCq8*3QL{K^cP?n{pZoUQj7=$u5eOi9
zcXO-rLflfWU)K1Mt^0UBNITEUqOzh__zOG*{u6j={3ARC6~4C}v*`|X3pmck%KBP1
zT#9^ink5YTN^%kxS3tm1?p;1MT3Uxu2kBPVYjiX!hM6`KRr1PODno320IZ2j9;bPm
zD9`3-8OyO^?tCXY9@7JLb#)FS9_l}4y2v`+-$F_B^q<v7x41qpc$Y_06ZB@$P@l#g
z<;(x1rg;D4Pl4e8BV%|K+1x|?#pB11PoR79@$pfzQm1}b-7Siz$#P0cc~jCR8=GMX
zr*XN{hZxyH!p|!OIO>|lPioj-H}-hY^``~vCHLs*b&cP73#{9`kLMxkuwV9EM)s=S
z*tYTBDrPxZ?CXp-zE7WuOG<75Oqt1h9AjqQ!)2%t-TC3yRg#kvIqT>?dnM>qyiT3K
z>t|k+F~~m@WTM)*edC>g<I?BP>nBjgvP@YW#0sfgjLYPC@}=Om$Kq}!SK@jm!jc9n
z-e*TcKB=4vOPV#4BFj>N1}7O%10}<x$$~)0T-DB~$jIxB=~nR{*mb`hz}>d#7Zw%O
zWjWeMGob(K?bQd^?2D>P)T6AbsxsrOPcaDxvdGCovB5U|_FTBj9Gz*;sj6sZF#;q|
z5XJ`^1A(|svlV1>bVCf10cS)+Rzk=?=imq8RTif%)L9aIAzUi>OHt9P1N*jGOf_1w
zTXbGbw;qy8F>ZXuX|QY6x~-m>dt;oJ78@)pBrk^W`|E_*u63~=OmjoQf8_7K;TI@I
ztJZElAS6W2r?vOIwT|MHLG^J4>5%@@rG(<7CwJ5i#Vc(fxqf>16UEGYmryC{R$Z#u
zVR==nout1DQeGoJe5K2=1PoF^U%<z9bLvx-3QX<0fB$}b0JXZqBlKkXO|Q7Fe_Xwb
zMTtt8{OsA608WEvd{)QM=rHo~`eTJ?o##g^^zWiEb>L*CicyGf^Xwxi=<(w(-@YAA
z<V5pyqxIt*&@Fs{2cS7K+M&}RKh%_|i08A%0AxduIeQLpyWx*Q+<%@aE$e~*ipvPn
zJQ~1lLc3wZ24KMFr)&NOcS=$H9}90CK!tLxUTBg}S(I%i>vU#D{j&F^IQ(xQk{cc*
ztESFQcS5=wNNFwsjC2EF3~m2Pb9)jUeLTc@(e?qIPzk_k^)J6^Zcxk!%}oXJ0U^5q
zNduD%YF55As9^<Vo2FZ8cYcUw5{erb({;2q{Y{yBU4E0@Mmvhq2Gjd?3)pc;^N*~?
zPqacT==x4A;bZBq=ghUY(S9h?bN}h5WiQT-Rgf1sk{uvfUAc1Qp|3Bc@~;0bNz(rE
z@L+tv-=Ilic6N5r4B%tu=s#A+D5s6&?G5~TddK~3_s?jg^nH@$pg98ZO_EtK;U`T>
zN0mK#Qa3*S`{fALZBtzxg@PQZ3if5?QwB}y)ez-qP|57d`(+X}ayL3H%{3=z6+LC>
z1FIvQla%yQ4gs<^W!h_U609fV;}vz;Y)5b;0sBeAQ%-*r<0@+Zh&tg7uU|cR{5Y||
ziPe&&&TWP}u~w!l_tP!ukRc|OZQyJkG1PHcv-(k<eLt&w7InTioyn7tz@6@sL(Qd*
zQ;lcARB^KTjlhRQ8U_xm4>)cRZB`uFTl%`w_s>cl!4binK}AC$?db$Hr@8-k0jG3+
zsQt1w05CeUR_$4RSi7yW;%_RYo&QDj<yhy)=;#|L%<)=DNlEW#Is=a@B_5r$BfGnA
zw43PN8HJZURjVW}TzK*N^<6Y}sZnB2o;(4)I4)<=^5Iv8xh3$nwmg)4*@juDRAFVh
zzRc$)L_)WP%0v{50+zW>w|+MVC_e2MeL2}?JN%E4r!1OU$(x4Lo!GnY-o2|WZOOTn
z2AaUc%>2TnIcph)djCf-<i5!W;s{wRA(qN&u)#k%IyzMgunTEMAo>0C=Psa4<-i>D
zmia`tRT)WXYimoo{ptE()|@f=MhWzEJ=HOL3H-teEVqJSmKGI~+=P}FV%xcgG?>;!
zOK2wA+6*M9tebbAaG9TXptbKmxs{gx3)I4A{I=#%LOUV*u?px+3?{JqLjz`%3cA)?
z8F}UE)sqm%-9f%_0nwcWjO3S&bCVZ?cpgzQN}bTJJQ`v@$h(7*Qj?P{Yx+<{xU_fX
z1tYZI#G^|VJbCgFr$O}ztKKVjDd?ok%}?R-`4^|#Ssa(4*R&+G8ydvDjNspCd9VMb
zmNz%}Q_C|hu{<K-BUSM4knmh4yX;r`y!5EAqKMWj8xgs`V$8%P`eV)gN`}Pl+6h90
zl-nau&HYaq%l``ba?!pUmg_-1*1_iN&#IaZJ&F2VLW1o;QjJF6tpfy~93Qvz@%7!q
zYfgXqX8*mthj-jRVf3BDZoFIg{P~TP3>RA#rkcH@BHz3@giZt|5YIjad;+T=x2nHB
z#TzQVuaA$2H?6=kK}SC5H^ux8AfJ3_r(r1^@fMstYLYpzi}+aT$eyP*PV0E8Fzhen
zEVyac@oQyaVup3^?m(T-^W5|4(c!Zin*TDk6uUsMrO3%*CR!mUhlYW(w6?dg6bT?a
zYd7!yAS*498^mjoI#I2>!e()%OF~kz<CUM9M&V<oEAsLQmOYo%Y+Bcdd(&?D0HA?8
zcWvN%63mu&Ke4G6eHQq_ifaWvM{eloB-l?GpJHc+xS`!u65QXMO~$&CLTuVL489+e
zqP6|8J>9a$yu=Hpqo1GloiOP)F)<zLiyi7bu(Hs}_tz!3p@Knh*WiRyv;DBRjFJ*A
zMV2sj%5*tm8Ng%#qUSGMAh;XMirb%B6%MT2&1ZQO>OQ<8ygT6)DJm+)|FZgFVBiJx
z6@Aq)4L0pw;XxMkI}Yx+|Jj&M&|$ZWE(dHTfqFRyhKcr+NuWVYnVXEpHx13CS27s~
z21|>cEIX8mVsxx`=QQZJVNenRUR`K60G?=@vll1gRLpXB63*R<Rcmpj8<*`O$Del5
zn&)78Tle#Q1C%px?gYmSST>AI{)kck)i84W9-49wZ8}$7>7Cdkb0?PW?%Lb?9)zS|
zOyJHYbpCKo6Se8o94D&e_GY}ANiBQ+XO!CSPCNi+=5}<!gexfF%McHJs&<yp1Rw|e
z1*=}iucLE#xQAVPc|o4R?8CY%Eik2EWh5jvK+`+O#1wgz;2vIrH+E(<9coWZlF4v^
zg!tpENz!5S(XUJ5tXj+NPsJ`r0;!I*C)592sReccmYBNG2%M1l;^z4+By{fD(5vUC
zmKK|5awwcgm8!WtRZ;TMSrDdgUS)(G3WE?x)fT(!s+`;@!`inB#lNmv7tlX-i$d_;
zUY&~9l4gBTZ{FM!e<2xr!13}?szX#8ITb@5Kd#hfR8b!66RGxbyjQ8#KtcP+cLVjp
z$;8#mwwPWqh||<3L1T_(J+D1TDE#R9m4_a!cbpxE3Cl8gN*&w`d&FZ+i14kokKp^t
zL!v8z?WD<xJxLq`+p$iC;@6UakP;q5fj6Bd6psKjV8F{;Yq#*#hyt4YLbX+yW?Lzo
zzZ7#fUxerl+2cCA+4ty(ATc~b-4+&JVbN9cN<G_&u%`&bij-v>9UUdDcl*?U&I01Q
zFN$|Xj54=&aX>B@qUMlzUvst)ccUexFQYUhypR<wfRXDBsb&x;R-%Xd<g#KFWSg$|
zeA;$^?$-cL4=CYtCG6<`<N|iOgTem-%Nkb<EX<nkV3UOc|LmkD>Dk|dE<;a7vV#xq
zpB*x!R1i;2#HANDfW~tdFM3wJpKkNGNEQ^BWKG&Y$?#UUEUb3+^XD_w(TblcBW3qM
zDJ>`{fNMQ6-R@2PlN}wZ3jsz)h5Q%jis`A1KfH;Wm7quAF$69b`LPMLXW2~+7WY;~
zl`0&T@EKQ}Y1F~O{w}Ftx>bEF^g;yzI#na01n5M}eGn6J6Z5e=IC7rIGAz6O{}xdB
zwn&61UMAoCsmBRk{hjl+ngF~pfmauP?;!pAE(Q4l!epN9iwnno8t<?3B~%AZbU0?5
z!rNpN6wcY%@!>gm?&I}Ar~0`HCI+@I%DBKe35o0IIo>ZV&Y5vSarn{I1-Wm7+b4Wt
z0Rbg706oh|=q!-PyUSjzLz@!*`t@sGg5s>^V5fqQ@&JUw4^aU8#7DT1M5rVG!G4db
zuVf+enoYZ~Ck`bW)`=)7(V&L3A~6yl;Oe?kE=CE5ftj$H&U({oVFOoISLf+t_a8-%
zh;Ttz*rCG0LTqL4n7Ei2t^V5heW!1F!pTYi5s-^jDL`_e^cCH*4w1#13%SrN#g9c<
z9QCvg@X$P+$G!ePBLoHoVz;u=0qni5%FD0%hT}_`?5`6AEkPF<A0TrS@rJ`kSy;4y
zWrmIEPjoqG<b6Xzq~zpw!JZ=E^!lf@A6rckWZ1l)?=kudJv}C9^eBSAqR|DyeJL(J
zvg7_+t3Ekn)Dwm=uQP|v9D1~Dx~$sfC4OK0MOV4I^i?4zgP|^!J3DSw@6TQ)S6}~<
za|k!1V;#@;uCAOunV4i}Rvn5mh)h9FP1YnQT6WT(tfyEmPR=LL(DU^9EW3Y!J_@bR
zME%kwVJviDKLnK!GfuCmiX;PhyuK|;4+fSk|M)ji$*0uR)D@Ak;nD(~Ph~D&E@znI
zRUR|Z4P1cZbECe$`_S(KKeEkk^6HKg{G~tUqntMjEbDsC)uG+w#UAps^U}~#JTxSR
zz8yPuv>C7azcHLj#31Fx@Hs`OegL@}eUb8gJFji0McZYazod>TQYq>;S(HvaE(y%@
zhSxcsn3%9p3_#N^pFO+NBx{C&2s0VlVkgU3?+llZQ;Y8ai;9opLy)1evQh@dl66wV
zyLUd<Ki$pMN2UFCBbyFUjRu}TE+cnkb;O1^XpGF)M5_HBt%P4e(BnFkWMr|9ir4>}
z_FVe6y1#^)DyLdSoceW{C%i*rsywta=Z>#miA7nw@2BqMuf8-}Gu~6dXzU0p=G=t~
z9`^1LAb3`N)wwH6p9|kQcmBM46w(qpP{!i}`uh#vL`BuTx3!4Y#$5r)@~dD?1>?*q
zNP@(o#RB4ScD=>f4zPaJ-DE#RG=FV?=%ov2;56eJQ5cYq^8E+crIGaSa-Z6AJyz`I
zHNVj?3I$*}l5+wip8zBv4O(#xND>HzQ~*a67JQ`zC2-UJJZ6UoK2{lViGQlWf}VjP
zPhZVpL=46St-arOcpb=JT{3jk&^TmJ9eo|NjWF6_j$ln}=11}at4Od|bhVFebd`3t
zzX3@Tp$V#8yAYN5+gz*nx7<V}FMiLF=Kp^&Pwc22#P>ZyPW(jtfu4T2kU8#i427%9
z$B!Z~*O5}uMy3tb`*1A7|CmJyU}wJUbPL1Un}MypL%SgxOa8mGiSu6-q~n?bYx2=;
zZrEa+HKUD>PTAQ1l*>ee{35F~EG3<He@Qp6Y`P=jx%;FmC=bGWgzyFkFyq7{xuK<X
zNlEEELVJ6;3?aMcuiX9tTmT0d0XrXm{{mP!UkVG|5qdxZ0(~$dKplB|O$<+EpQ9I{
z-*LZD&|&%nsZ94V5??+sx?xW9)lH>o8;`7wIqkGejQol4<<GR2RJ))`w>Mo(<cOxO
z!9<vt=p|qtDKawBtVmW?wl2fkB>FWdfF($ygyg~`SP+_(go0_Pb)Hdr@5xBhl|xfh
zkktHRXwLDeC_fQ#GkQl(;9R<!Tvv)Ro?QHZbtMrP{h1x*n{XryqSCR8CE2t2-+pEo
zSZLgPkH7_L-1>VHNU#S+%MH-$<wzjI;lsC(Jd|@zXF)C%Z4CNhlurFB$6ybL7|K@j
zf6a5cKEEpYD<Dm#?u1?#%<V-d+DCXmc+=md6IkN@iTc+5&(jHOo+LNa)ZMF0F5WNm
z<AzA!K%dC(kPGm+X(s+fOr_A>WH~maaSL}u$3&Hslp18uKz%?ghIN<C+ATC^aC0h>
zwzj;mbiAv;&}CIsi#v-0H8}xLVd?37vDzq_kNy1e^`TpBxFdo{Uc?a!glSdR1~|Uz
zMh7@Q;_d9&*p#3{p{1oonCuW=W&iiX2%=>~XmzS2cDAPbVgSc|1Q{OGCFocnMu&Xd
zc_1-#h)@JBVc6y;+m046>LG*y;q3>ac%D!uiC_5q`LjZ?2V2$`V5sKM<-ilyMH+Mt
zANhMRyBSqNJIT~`3xg)JV|BQ5;6Dchi=ICN0#@-~2natFH~O9m)rAiM&;b%gyUylg
z6FNZKk{d6e<axaN#M0U?#~gys8GW=3dO|P3x0C$;P<XFp$9gfLrI}LrY7~So2Jw1+
zG9_C8!8>{h>QE0ps)aV{?B`H&;p4wGs^`jrzG4BWNq~MlCcp_g`zS3!v<wmI)VaZo
znWm|%v&f3sB9HO~vJ?_kUH1AQ2Zt)a(^?FPTn@TrTAP+%Q#;UwqxglG2mV&!Vq;_1
zwty_dZrw%Job&ePoOe3aGCks(=$-sx@`i}VP@l&2zlxfx9CMuRe&JnW>#5682(@np
zU!Y>Nf!&Z+3!9c`I+u&>br+l;7S%T}fQYv2Q=AtQ%VkROhxbJxFpkVD6bHR0yj)Lf
z80^!A5q$bxCG!)Zv*Rzb)1>1yzirCXpwX_H7#g7_BI01TT*mAu1g8(MYk$4hC}=xM
zzm9^Ii28&#zJ9e&(1B2H;7mySv8oa-_Nl)S20pN!$Na~7fCEc9*@KH}0`Ybn1qS&$
zFrx!i1l~|@ytDp}h*=Q=hNVf>RML?|+J2#6%VeuB5t06xmbQX9B0U*)ByL+(hOWA9
zW2oplqQW5a)G+GfvjeblGejGLGpii+z>`BR5SAX2$%GGpRBvaU4q2-?%h3UTj!?5K
zBBj*+c;b$8_064#sJAf+&1`@|0qhz<O5Yq2tKgwXPI%zcs6GupdNPW<eyjO^o)<{G
z%C^JxsgqD3Y(KH?Fh@zS|H~In{g;GHgqQ>Ry7`o)$>wfsJ+R{OfqFmk{13(;e%sN+
zdP1H-8^bX(Gv&NA#T8^*bQ)`gMnc7^>MAiCU|4$sbk;o22q2q}9=jW@4LEW~PdM55
zMO|`i=VaSu9>ykzG08k{I%yDZ7@hA&3z^OSmb5%wX(kUU3(W=#PWTH~qvVpxw<iWw
zcN0-y>q3hkan1_{%(Mk*Gl_!l=SPdc4$G0e0TTjGp1attX4rMcV7yg>pRVO=Cd9D$
zmUTkcU}(Uyc^DE>vfsKP2SXWt()&}6lxz^av}wAo)G4n*TBtZo%!3F9jCCz*MVb&^
z$ms)6n}~4r0`#c3E!qusSyP!qmjH;H*xrArH_4U+w|{ZgyHf~JSSiJ9^dDI>#U`T@
zan8X;^~ocj=oTO1JR_Y~$Q5E+WMItJtNOL;GZLk#liv*#6Lrg&YD)i>DsbO1tv!-p
zw<gYW^-vzh+=S+Th1Eis@enGM`c5d}CRg?fmQDF~odtfOVi<af>TV}jyP+36=`d|d
z>o{Ik%HNr5Bb6vl*V1;HSPtYvc(RukhGxpVojwmaFWDpZuGM7QwI7olrg<lo`-9e7
zmLSKnx527L)vU-kbE3K>qj_OaF9mnyL`f2cAi`W4JFpMkE!3{fa&c<=Q|7-L#BD*`
z)uPZyyEJ6KibXM}SrMA99iZxoRh@OaN?ud@edk@VI!Z3aoj9L29zK1#i+{HJMZA{t
zLZA9AQ=WbN3*!~x`*qqr-_JoDk1FLjH#hfmiFH;*n}sn8OO*!*&YD)!)SWut(`lw!
zeIXuDl5&!b>IJQWJQjj<BL!9ZyF|AIrBP21NexmCFj!i-;&Rt*d3U7`1c6^f4$I5B
z$#$KK(0nTek(e@Z8Vke&#*LmOl9Y%yWaB24?BB%nwtEY`PEJlPSh@_)LowS4nlB-m
zqe_|;Vc5bJAu4RF7_OS?n_QU@)=a^a<iww?XWL?yXy~@X;-1eF6UP(lg9rEVR&Y_L
zCk(9l0DC#D_f1qPcK#kD4C|AP3<i4COj&a++6+~<c8S94#=+2;k1c=7$GBOH1>1Id
zMM~q%y)`NpTTny@_ZzN<X}wWO@XbKAOuLc(gwl7&d=bwOn)V*t0Vuil-jerPgaHV0
zOY1a!9a}wH2J<j*Cy4wCiU=Iuu&}TK>S@SUqa*orW36s<$+?Njxpm`hm-!h5BNvMT
zr6;Bur#*uAn$$EHv0CcXdl<x~-6c8%b)<{uhn5zy34IblkFG}RW}~VZHQnX=P}$n`
z2^3r&sImftv1IyHDvOnrN~QXkS?F{wn^O+1!nWs?-*kQUf-=M`%px3sV*w35*gO!U
z1h@^&l!Sx?B2nfow8%qfR^Rn|Sq3q6(-NC8*tSW1dL<Gl4ii!8M{q2gmlmhO8!@zW
zaPa0ZneJVa%}nm=w9M2irT6osCZ-njoNIIvigTZ*<#Sqg%V0EyCPM&6BWt!Q4xvbq
z#ToKaT2-g*!F2pWXKH1s8^FstYivI;-c{<nI3^uqa^K=3Gag3vV$D+FDhmD&#LH{9
z?mGv`c&gc12yMARH4j?&%x=m>VvvX^f_T)#9U$4tK;u;&E=`uL4uAEEQ(_e22r=G(
zqLPyPSXt7--t3s)N8U?pEG%1E9y>RCExFa$WON!^gr2Tf<G^Uq>DcVW*$bWfW}>mY
zVMkQXcZqV)XyKF)Mi2}TcC{m}PfsuJmg$l9L<8#PLf{sA){&(vI2_{G*GlI4U6Qcq
zh`1AmRBF_ngOy>fJ+^{!k95~(7G`)+$TJfu<lHODjXDFlk$9UkzsV<abFn|8QE<ll
zf4`r_G@+x)$=xgnI!vy5g&K4FLWe^H#Tg>jhbZ>K#U)|mc#AMawxVh#$p8+f>5tS|
z+oIJn`Dzn%q=!d~*wzWyjjt!_MU-vJuj^ci!2JMZ^-~H-`d5b$BVLV;oPy|1Nb8|(
z+6rzB&4o#@CN6VQX&5@pn%vynoTM7Hy$z<Hwg@9~xox%N-0}+;hTAxpy|j1@0!<EP
z>u#a&w=B(*mn^dIH#kzLvEpW6;MZ^hi4wB%fWq5r!f2?HYcs@&q6tp8jxBrMe0Z|n
zWE~Em@;A(Vv>-DQe-c^XTkB|T4&0ox&Yn|!`0UwkqKxH$rMaG^bL7wkg1<#Di%Tf0
zQ`bukvdCJfusP(#@$lt$BT!;363!Hu-Kp%%pHg&Gq7%d)Sbh=VDO^gth8S@v&k>&D
z%6nx>)Na<iv4iVN8ayD1UVL`qAW=YWL60PUWRemg63xVF-s?y8S74ND>mUYro?8z!
z2}5wcmE}0M)}r(4{kcs+9D0{eV1}%AfV|MJlY@Pl`y4PK4Ds?-oa2B&4&J=vQ@?UG
zo70HLbM%wKnBa3oK(+--y#*#6m2z_`1qS3y-j9fzHd@sXk^A+rS+l)FUuneOS6$Yi
zrn0f?%tHP~jzxdG+qTtKCSpryPWB<%uqIUIC{A+L_V-9Um}!ESACglaB79BPuV23&
zKuRICGr)k={dp}-CQ8D3m>m0wS+QxP)#wnzUJVi0$mUA%l%697E$WzRmZtNC9(b8C
z1yRD3^cZni@5(+Ews?4h)Wld|^E?BWxIr-G2H|0HoIcw8bkCUpV#M*kpVzW{iYTyz
zuyD>yh<FgulcWE^X0Ps@OW)?LUY>T6N+SIL)cI4GD2Pq#qyb704h^b+R^2<TnDpk;
zL+^cW-(HQL1U+yV){}6Zu;9UF8H7sdM(31zwTmIrC<())59tYID8zA65jncgXa{*A
zFAzpJur?-ZD9||lKG1Kwm-x(piF46Z)|3Eprg_l*0xQK|TtX4(M#+gPy>PjH^!%fj
z7IIgw=Ay?T9xaja;BIzc$C)%%DJGJ;U%os?uylT*c}W1idW!{lPQ`E`d$BM36k-)w
z6EP-b$nD4Ch%IMAFi#i7bnIB0f%+T`d9I<WyD9TDGRf`8yH@P!O19{dBAOlZ+@10d
z0|L&YAkqPER4}c02xcXkZXf`WgtzT@BJYy^c9IwlTAC4BVnlOz1Od*~!oK09&{v?C
z6MBQp(KA3=g>*~3gwBIK^JrB0bO6|l#8PCVVAleHg|`i(5AoII*!TfsN2jyU%2+fz
zP9YPW2Zh20Bb9A=P<`8LvKKj_Gw0|{S@hRp{^wZ^GH=r<&U4%do#i(#Ei_vo$1hv$
z=jX?+@p*mL=##kNYGo5*_6C;fGaj?HJ>G~g@^V?b2rZ6y&jUkmVD=~6F|(ZLA7Rr>
zcR4TW+#L9xOLUGq<yFk2#vWGYt(4%~1n^%^9K)7i>!uAZEkvO(Y{V1ajb3?OHvGj~
z2iCh1ZCwtc+wIBOSRtYV%E-k2c#fo}FlI?zaTRMZEwq8Ku-QvOVF8m|i2ykUmWcvN
zT8-kh$@Rc`r|}nbgok2Qk1&A{$rYnQ>u^jl?zfSMP}I&!26C;zH!09K?p`gtZ5{__
zEB1^lz*m;mdE4q7V9}}rN12#-JNSi)`00b4)tG!NrEq6r5I!)4bk0x_wpqc~uQ#xx
zNC?&i?ln$5Xq=IkvJUYGn4m<IbZOdq=?kuTH~PZnxuz)%PHYh}a;-$R2~qGnf{wGK
zxA<JP#0XCEd?$Byt2=Y-y#=CKjF0b)QOgWqj!kvN&h`loF6Pb}*|LjOWw)|3h|UNG
zbr%x_(A*zJ1PGCbpbA{3de%3ka^4NDEr~@4lm^T@y@rHlkWUO`5FbdRl%m%_N?)fW
zh!<*x8<9XksvW1`oV0Wo<~;}<P*&EWkQX$^2d|DLkQbs0($QGuI%R|ND89EjiM9)~
zR=$Mg9q8j@o=1ykNH$y033huqRwP5;6GSi+G%C-q%qPKvNT}wAiGCV$B@`bxYg9f#
zOKMNHN5d#sQ7bd06o#bBJxqCxd}S{k220rQ0{@O|HW%kK5k;gCqT;dwCI_UQt^F`~
z(2Qh&L$8CB4%iEEk#I3iB5{)tLcDu;DSfanJRq|omXs6dsjK7EOEE6BE^DrFX!H)7
z!>@q(H7>|a6NK^IwVahrj#H{chzk>QNvTn1mxd{pKEM<x0G+7Kux3MLUpv)g&!#?G
z?nhK(#50+(ak@ZEJG`Ce4-;vQ_9p+E$A}*S!Tyn4Z%l}i@9{NE28$Y1>r2YY^%?@H
zv%OJ3fE{R8spH;{ZbYWP6~U|)zwCLfz!K5r>dB{rsDmaH4q}89jVcD`F+1lau!R%d
zS%}lD-mc>cAAs%`FxO3VSztGwUXKC;i<WB@%&@HSS8ONs%IS&f3t^6svhe&hVW1NB
zArdg?0GyCZi)SxkMuQ0oV~u+4gS-nYMdYmMkcPq3@8qBf@N52)(EKXY#f|`j*z2eZ
z@f0qGMYiYo>^J1}sBJqS5u=N5&d+5poz7kuFe#n7Lcd{{Q_Sc47x07<LK&5iXvM|0
zBVymx1-O_Ac}kya^XlB?Kva@L#KI!!5Q^PT43&U&&2`x;`A}&dg|72)c`*Oy2>w4e
e<z1F2UcVi>?R92O6aPyEl7yJlnOKo)uKx$2_yPR@

literal 0
HcmV?d00001

diff --git a/posts/xgboost-for-regression-in-python/kigali-branches.jpg b/posts/xgboost-for-regression-in-python/kigali-branches.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..93e9735d0a97b114b9d11954e1ac7c0d1d5f9523
GIT binary patch
literal 79493
zcmeFZby!u;7B{{REg+3_NQiXTp}V_5k?!WuC7=jMr;^eQ(v2X}(%m2^aA*ZV^4)m9
zdhhc-_x<Dd_wUYm=Iqa2v(}n5Yu1`Qvx(ce+cf}3QC2|~fIuLCJoo`__W^Hp8)ula
zyN$C8H8&dvARwurf`kOz6)|#%A%I9mFGr3500;mAIf58~!pYAq$SXjOAPS&yad2^S
z2y*g~Bk=#{jqo3)f9azCtqTW>t#0zoLv9-LkE<LuO#xN6>-WrLylt%jKt%;$0(sDY
z`w(of8vsB7YbgZhH%15Xdk7wY2woxJ2Y`?Q$iFcFv_Q!I;*XHW|L}ksAe_HtQ9-^z
z2-j~+0OARV@L!$ZAd<k{C_#+~07UBF_?7J+3><~i|G{V`ATJ={;^JTl*n$povZUtZ
zyd!}=ywmZoD<>Zt9~(%%a}1P25X#9%&B+ht7KHK&{HGLkNZKE}5j;aO|Kh-KntzWW
zoZ%l#4FGWfa5oktP%T#+;-B##gvBBK#c6T>@)yX5P#K5&n|_BI;_&|BjsysB=g+$_
z_a-3x?RO*r@h^QQ0qHMZNkIL_2GHXP_x`~+AV&Wy`z?XsHy!jP!a;)not?;pApV&U
z|GQr#UTzK`A>rTn9l{@<Vt2kn{DU#Za*6)Y56)j6<o9^NAO(PYgn#IF81X+a@;{gq
zq;vhY2ebhVGoSl+UtkQOvE=jo#;hRb$~XD9eo)n2xl>;Z)_Y(@{%>>k-)PJnTu=@U
z0QkSJ0N@}209bhdFv9=<fieIPehUDEqoD4)a@Q9wSdmaCkXO+Pkez<V61A<G7T5-6
z00YQzXU82M7HIX|+}+)7gI0qWumJ$XZO~hPcp%`2|KM&gWB%X{koymYfH`$H-(c<_
zsRNk5F{l*D2Dty1hZOAX4?hxs^A|%vYwrvIvlq!5Aoxp12Z%tf-*zHp0wjOw2mtAC
ze$eko1pw7wIsj1r)l&vM{1aP9V*u@Mo;!Qyz+V37Tmv5cl|=--@=v@XZ3B=0(jfrz
zU!6w)>mNGtB*69ugMMWHi-iF0KN#$W=MM(G$@>>00(^h)gEWBu4+eRl|AB@6VgL~S
zi;)0{zZd`%|KdCU0UMBBNERd;@)W`gp@O6U900-ZNkstJ;6Mj}k8ls64?+PEgfK(i
z1NR`0Ad29Sl)*_u0H>7?G7DLRynti_$PjW!DUc0VLxdp3fH&k7*p2|G11})YAc_!7
zhz>vk*#t~M7uW;O09S}Fg#R}OKSTk71{nYdfiPePs0J<oGjOU<z{&)+BLTt?A4mc~
z3ZVv_x&nL$>L3S@2vDLGSOe@Jf&c@=12P0;0G<FIAPUaA1Edo23StI%0!f3cLsTH#
z5DiE^I61=rHaLx+0a37p6ts5;u!R(W930@dNWeQafE)~$N?;I327-ZgAP`Uirh#_I
zLr@1eHc);7v`P_N>m-2$fDDoYU_mH>RnP+}5Ov5;Kn{opj=>;)1V{r<fn|^<1i}q0
z0frD0h!lhdG71<&XdzKxTHt^j&%wkNg%pCptqZoBL5v^_pfn8Bp#Y8<5ugMuF@U^<
zd;;ttNst9d6C@5y7J5JiV1;0S97w<mKoLTRAPgu$bRnN1$)Fd*K;3wN3+Oo(P#zgT
zfz&~y0R@N+qzwXtSVIgzp4VW&+5ipU44?o4042l#FmrLRb%$|qOhFJ3k&sbP(R8dF
zOs(B@JWU-uEDd0`j+XA`rVf@mmM+fbHaHM`02o_eINegf6G|Hdhk}MnM8UwyE+BFj
zOQ8Gj)`z=tm$Uy_#BOhA!F3n5etWyk1OUnVV1T{`0Le!nlMq;+gR)+CX)DFSC(Fmh
z$;ZmYFUiHqBO}AfDk&%-&B`Oo%_YOZD<Q+fA^Te~jXE{pD#*d%1jtHr2=WUGaI<nr
zO3JeG2yk(*3i5FYuuAcBOAGSzbMgyH2!J9UNdREZ3j$d%%anfW0KlMwJTw9y>DaRT
z9I_J90<x_9oH9JDJiHt{tOC-!T&$8joU)RfQj%O!(j1@+rbZ8ddb<Up->W5fy&LJ>
z6Cd;L`hPuvsm)By?Wrw1OdY7bY%MIQ%}pIG-At)@*tmEd9jUF@Ioa+EyAKZgE*BBN
zwf1gVz0(G+iTCcFEWhdCVht`sf9ZESj5|O7%hxEs=^(zx1={eN|M%eT|D{90dHPM)
zzSD#Imo5pemH(y_|D`K}>*JjbcRDriM8NW=zdL(DjGqo(Ex=0oPaF7byQAaM|E6O|
zM1upllesGp00njl`dIJp705pYfQJgVUl1NENJ%|Y*Hn{LP?q@}JqX06;4bD)oud=X
zO;b*iTHnBs8uc5f<sW^f=I$;M>gvjOs{dT?@W0E?nSb$4vfk<1yUvRdcq!NISl6!R
zyFkqTZ@&ME&@I4-2V1Vd%3<!}1_KAc0%CP9n9CjZ2Qh&gXcUO!?y&X0aQ+=O`xkEd
z&7-X;1@e3bF}1mkDd;W89*9}Y{)2A)AK1~s<F4PkayL>;ODCPXfFJ-XIk;rl0`6dP
zJAg~54?qnrtla+(exBd_Qh+na?E|=ha@JtV!$27ckaDNN62uCC6JQQ5#w;M_0vCNQ
zfa}iBpe$H^PXFUSmVeViR{=oe!|m;D>%VD=9{`}59{|vs|4n-gMhGqj0Q5Jzn7Wz%
zRtLs1fM8_>0N;ziU^f7xXAG1va<}5<`rQt}lmt^}<@WXmxEi1)0>E|L?d@gm?d^3Q
zm<US%(CKu$j;?P5gSkN2*`3_kOf8(vEZNMR9ofB2UBD#bU<ZW7yj{RtvxHHbSz6mV
ziO?LjexRYYwGg4v<5l5MagnmLv6c6Av()lc)i(FFHy5;^5fh~r_J(>px;R?GOsTyc
z9h}^u-Xb)=l|w;%M`ov?{>=ij7opKtQKy!2cC)1BW8+}spaG@bEUciK(z5@^f_EY`
z{}}1z<;CX3&F1W8&CV$(D9Fyi#m>dW3UaWz`#8Z&y;+^yY5!=D2II}m)&*wk>_mO1
z(bUY@113TP_WUoC99{mZ{jX#H!)OVfR{lHZUCVDb{b6<abENw3{C}JO3b{a~+$>FD
zmeODqp}E_4vT_Kra`I{aIX|$0`{+CE|1dhcX*)YRh|v7&#PP2N6=w@uE1&<T1{&%+
zGoez>jxNsbwlGTtM^o#&zG$fbnghKP{+A)(#QwjmU<YT0{hz71gMa5(l=^?=e;)Xs
z2ma@Q|9Rkl9{8UJ{{QEJfA114oxmN07q|z!T?C}Sqsm>m`$o7cNQige$Vf<tNGQlC
zD8DP}Jv0>5d#ETVXc%bs(C-R}Ffq_E?{4mt{7V!884(c~9Tf%jH_QL0uiH)l7Y*VC
zIYETb00_7cL|n-2r{4!G6hsK*_fGlu*WD@o9y)-ChH%#|51#EHh+ww}pt&f>5LD#f
z2YFloJj&xe<h+M3L8C#yW$KDfNE<8}pYwo@TeG@L%4~f7h=`b;N6S1U_t-6=<`Jlg
z6as+#{AW*~CIIXWY-0!Cwt)qL1VKbX0I6VsAmAcC#N(7e!l%)2<uW}Aj;|gk=;B_N
zGRwJL05B23Gc7J6E+7V;^%vzN)D%S`BNQm!2<&IlF*FCFxX1;c{hBy)?~>vTR-VQ!
z@8{5?7~L&g+1DX=BTf#5ti@NdFel_Dj_1ARS1IXedfMcfb(5cGPw2E<QP@gvLqpZg
zD-?lo66Y(Kpn!xM^NHN9Fnz-C@u#CAL&@?7S@M(Rd}KS83_mO&QNKn543F(UX`-Sc
zd67`^K4mwcG@wcm^=%BBt#kBw%-69lw*O#^p#Nd)(J3aW{CeMHc+r`8Cn5zC8bz&v
zS%D4lmebpjpYze$7)Ts4%jkJZs<?Tm8&s#LP!%VYWCI-+LLP$ZkI@q~IGfK`S<eu+
zPR92hCD`ePRrIUw2qYi*U6FfV-&~NXvFO0QLCvH-t<yzZnj^j+-Ty9Rhpzd0T=rBd
zz<W`TnI@u?kn)jRga&4CeENR;Ef9EdkD&13#MaNL`R@<6SqGm98NY26d)=B1GyXY>
z!%tmR7ItRW6uBT~Y4<HMLrH!OkSrw2nQo)pg2o<d2FZ6=1PB})%^tSG_yY=DO6}}L
z_v^##4Exlv&`hT>;uH6qFvaOCyk^Zp?OV-6`Q^V7V6dtBZ3xW3ntJ72D>zPRv1vPA
zYANBL_B@2tpl93yh508u=FV2My{k&07Zd^BW?f9GfeSy!xD=8}_A~9hEaF}|kZ*j5
zX7cRK;z#m{k&Uh?>f5c#_4#}}J5C*AL(B!m&j`(Q&=w)FNnYF{?Md+tVv!EGw3+9a
zsd8C7)O*pSsXj!gBmZba)o2@u-78j%qDJqk{)<blFl}r_;Hw{-p0wiT`u0<-(sp!5
zu~`@6+NagH&Si0a_OW(4dwP_@K9@G7+tw&~<~#j*yt9=GKJ{}~gZj1)DjH6wSO?=v
zqC#?4p>LRJ(a|o3g%e=pKV4pJkk+GO3uM3d9;kM>(ppbh5Bt$r_4B;)flxh%Q)G()
zs<s;-PD?u&haM*Zgq8+LX)#GeMLmYYJ$uET>WiH_`Q&_X?T)ivuiT3)fHxTg@<ExS
zDhrq#f5@Gu>8AoXrn=tm-A}?t{MIyuq66^KrW2k?G%?zk8^BpL<U7r=+UxMDd}Ziu
zQyyzRJc^I$R+w!k+3<cHC$YbJGQr^In*HL6vsCBHGs2%Et3K2T-U-)Dw#D;bqNRk2
z-%i!Mq)tE$EzfO>aNNPBc<rv;))Uo^*xc)9zwf}PJItA>7}nVv_mg#0%Cjh;Vbr6r
zJcJ~T#LlJ^&t^hrum9;iNSLg4MpRVv<NfKu@i6XlssJpWkC|a9-4e(Om@iX1sTd3o
zXIduQ9Xg*_riz>(Eg=Z(n2;Uj39xfP%jLV@chULGEckNk;%ZH4cV+KxZox-KVt+30
zCm1*7!FQ<i^i~ZSYQ|=nwLdQ7<0q1yU*7^Mh;=+u_({orj0brQZ|lbJY+w8Eo0MlV
zzRRcfr28d!8Mv@{sC6iZWq7T~SFaMdRx`dYO3Nheu%ysnq*L10P@tJshag6%DA9$=
z^$1Z`f`WBw3Pb*6*8m^ZXf}U78*wHS`7KUWGG>0Fr>Py=p@bjm|J>btXX6vPO6V{Z
zre#ffou-cdf%de5j+E<5dd}L(Q4%J3YHsPdc*Gn_<DA0$XO*}{lqo|rZ~N~(kh7%o
zIllZk-PxS`bu}f%^_<?|qp@vMknK{sXQ7Pzhd1oXGZ!V{A<#(0;M0kz2Ocso>V!_p
zcdn%y7mL24#_gQmvsO!Ol&w#_Q172vco057FMFq@C8dFZFVbP9d@oC2f!{!~%23o-
zLUt^PmObI7JO6DT*Zd)lvePaOOKCxdwrP>^vSL>&g5L&GK)sOY^T5hv;nBTxFIr*a
zI_<)}c_Ay^#mlssZIUY83$JM1EbZja6$Yg8eI-iF-PpPWc~<BSmknuk`P@vLNzdCA
zvHN<H(`j9tZm67@y>Pw#TFUqiHfvRD=gugyDoRSX9|2#aJ*c;kqWnYTLOdQ1ckNM?
z@(kI^U{<j=VOxm>L4{%_F=x72o|i~u#ra3eY!%HO6F6j<c~92}>Td9}H@<$f`5}_*
z@p41!6mOk}o|jIP{K?Idn*`=?7##V!Aua^{xd%(CkV4Udc6~qUDHmOz`32&2{ls@h
z7y~M0??qryVkt|M_vN;5g_z#h=g<C-f~T#{N=A?{^uXLtSt~H+>lY;j4GQ{oQRPAw
zRiphQ=^5&6`eA;j1B5v~*mKdi#>{kS4z1Q70+^kQ+zPo2^)KAdCIV0Gyp#BM7aOuz
z^(#Lv21v;;tfJD&=2l}-T=EG)a{EjA&kuj(z_b&lnly{3ihAGHndr!9iiqB$^xKdj
z^dPu)_!LV@<f$Qnanj)Dzr(L(kSyqLPj5C5npI%ipTLhwwNpjtmyl~SPZ*{)M1iCG
z)FO%?`jsPu*BwEL4+f`MMw#Xpoc4u&mp@VsssCo-l<?^$jVY@`@_Mmpri58EtB};<
zg#$-f^OUlS(P(@H?3%u-zaPrVEB$MacTc-N58WjY>kZ`Nk|Fm#v;Rgzl8{p&FS=D<
z&}W4S*>MFeEzN>A*$v4JrPvOB0hr6JnX1}x$L~FNS_htyCOn6+E1pDoP8fR9M=&Td
zm7*qaALK!fGx0|qk~gWbQ4y6$bFxi{zYot@OWByo29~tquGu?^G)cDis<ww4GYs{j
z$i6}2DM*9z;MYDU@E!~IdJ{v0SM7p2L3}o@VHJmLxrY)uAg`FgCcGia3sv?S!*^Mh
z8Ic;NB=BY!iJ*TaIJmu#8<G6})2@=%T7VSsE$~!b^&=GSih&urWG^m`Y8=I;hF>Xh
z_#+x$XLNE3=d3r=5$yz1@xn$<3fXzid(6NI;QV<R8*{;urN_`b)ATjos5Z#F@Exa|
z0*&y)d<Xm^k2DOK``UHu7oDSBGd5UxHolGYGcA>#8l33ZPS;_H$sHRAJs*^J9G(5>
z-t$@-%4r~hk6V>I7oD#CxS@=*U!)4pObbI?>iQ+KHmgV4K6^|+>DnU7UTSJc(d)=4
z_7FTCrArXd(Ltn3@B;w4Oo^aMFEMh_Euu(ma-nt0j%oKrf05$ykk=kPDsL+j28(O`
zG{Zz-0rKT}1@}CxF1mE5j;7PRnmgg47&IQ<2VC4+I*H%R?G#KlR(`&pbxdnvWaNK2
zfvCotuO$pjE4{Euf)F`n=LgU>aB;xqH>U_u(VwJ~wz_MNDw<1>uAP*~2#B;L_-*V7
z@5t3U@DHyx?WeQb&)e-*&KmabRKN&0Qqu}^=9r`-+vnegL&OE?Z0Q>*Ee&=$3Q|bY
zXj8sJ_QA({0L!`2CyJr2KnBMS?!6{k;~({n$P_cn6#iC*wnc@>meH26CBAXVkmn}y
zM-p9A1ZLk7eOXq9M11{R_%4SIA1!gMt*q5lRDLy^pDGE;xsp$vt8}p{G0r*3hWB5m
z?x4Qk<N2^bbic{HZ-sQZiMjWFwp<fmI~?0=-KI)4Kh8%_>noksDRT*CJ_6R8AQdr<
z7}AT-BgyQ5`Lc?i?ORT-7FLZYnLbY~FiF8KBVz*k3KDDk*j>Z6&1drMb9Kmv-gJq4
z$7(T5$c%Tk3=pvjpxE!oHVPST_LReYT$xVhJ3ZT1zg_t-Ys>CR*mZz9a~z@N7SM*(
z{Gxh;z-2yhT<6S(7A@mocYF^TFqd77T`lIY)!-qp+VfCGov#n2^22A6f&_{l;GO$i
zu^}H<aMaa{(WsN%a1jO{;k8?!7*)_HD0zhUDI0;O{m<og#nY4$BoEFht+4?X%z|Ld
z2jZ3LQ5?$T*3ggvM$00Z$_W9s$V&NwpI=#{OkxEtDek2a=aaxk{Ton(;Ie{?H>9fm
zJ{Eozqc1a{#a~pHa%1eu0&W3tp_vka4oXI*v0PRwjfd?-$v52sMCRe}X2U8W-v%Ys
zwMI2L9Wt%~6&(y&cvm=eSQiqG1XW>{i~VK`I|-YxJ~_!l2V{{CYS6_4zk#T=>-rsn
z&jp{}xUDW;+7B_b1q@(IZ8hHlvs!h-?5dY?sYM<lZ(T3#2Mf5_9;Gsz&W@|ACwJbg
z@E*^rxuS>}XL8l1=M8hI$bQp*gq7E%dsvxpa(Jrtjc796Yl)4VH7n+{?lAeD=*GNP
zf()-ic)<hhc+_S!XbDqgehDg83_$v=r#KE1Gdq^_ScK)|=&iCue1|ZFChHud2HTbP
z4U@>cUN;=Ei&ERBB0WVP`-k1|bP#VH&v!x-F>C^6{6R!Qhrv!9^d!NN+}+5wz%Q{>
z=dWR3iio|EjW?F`N~?S~JX)zd<s7o&n)gW`3+PW4;DA3};fZZD+I2E1$74k2k`<dI
z-H*|4#}q96mYy;s#&spUEF_Drw3k>>GwIr_qNGEoK6p$rpN9Vcqqh^QJU%oN8k#<)
zM=ZUSh3Zb$H}cGj+^9A9%5F(WapPWGflN$|JnAX_qJM6q)C1#BHHmWpVRTiAVqdH`
zD2K23CbpPgX{S%D(1-CaEowgAu7r)TI8iDvjyx<&&R(Po3w<9%7cJXI(QNjOEO~fV
zqvrhdW7*15Nm2TuVauXlq+)yqYNQLMW|?`lF7=fyEwaNgM-(+R<yYZoS&c0IdfB>5
zR<c385+X+b{XRk};Jkz|;K&ef!(}14u4eMd{_z8}kNTP7W|dtE(V2cB_mC(=wa;u*
zoV%M~CVpiI*k`CZr0<`>ZAx=18jKkVOG3ntrexHQhwu^O1_vdyE<%LuYku*a)@(5(
zqwTdy?l+g5^$yn0v?cT1A0Xv*7!#@;Am?Ye&+|;n65}$m1L~`aVxXt2hn?H&snUIJ
ztDH>iqF!mVaXP6o+$Z{^?7M_z<3Xj(d68Np<H3(N`*DG!hG}UT%y_$M=yip{Rcg(X
zW8|d?_ojq?Y+b25kzW0Y!M3!?lyKSo%P=MHy0mrm@ir5jQVzX@g4RreN|xkBcR_>;
zR8Cd-nX6n>o9>1cJl*fuFZ^tH+&lBl5NwIaz@EV&BYu_{4<+a$hnR|30BKo`mCvUe
z79nnZJHAv}{|7A-v*>e*2g65<pBxbsD`*PP{a*NmlRz`Osh!?IpU-k17oFz@$Tbzz
zm;US+%bD{dm{u%rbgVMV+%-|}YN1W3^wDvDjsdfXicKt@_i{>HHI~Ai3%&6rml|M;
zpImZ24L@c6Mj5$k|0PwRwYM;9+EXdB1H)2TcPgxcn8)o)q(Dm~SD5-dy5DoG^bY<X
z<0EQZx=gIqeK)rJ-+nYM+Aq|E#JV}zMK<lg(N9R!g(y>Y_T!J_e@&|1Gq1{0a|mn^
z*5pnXdG&Bce0ZvZjG^HL{mPf}7&Z^;UbhdkPOG~Y+w3Ev7oD7$xQ?-@bXVuc-SMLQ
zTF1MgagOOP;b@ffDvJ#{O`mdR3nCxvB%m^%oPOG#6Bw8(c3ZzPHTo9oq`QCM*ep@M
zTHs>1>b8D*T+!F{QAthjkUaydE?!l*1o{GrrM+*YA^$u3<i(C98HIum8>D}>{8`~G
zAb7UOj_u9)8M)=@dh=uDDI>~#2`vSW82K^b_k`ZpjUE(o7f$Obd>-@?w6#oz;Tkuj
zS)9a=eb+4de!(OBo_QbKFYwD>GIBaIa<0mGqa^cGLib2TjrxF65GPEQ3NxQVF}2{s
zv4XqIU|VZYsQ)7KG4m^eOxUFHBi(>u$Kbhm8Xbj4%r7isXwor<UgA|L!nvX<dllJp
z`_7m}=h}*MD)X;|v*sUZ3vBm=$ekS)riSYp`neA&5Sxu}_RbM|jAr7ym5lTu2_CFD
zw#K%#JKOoSceErMx4dMc_1%@u&zRJeBI7K2W8Tteex&a;E9~yE%doRLXaz5w&#?Z+
z6jSChPMg-=SZ_<;sw=KX*Tt~j6`3VVo6l175GiDtwxc9QQ_Uj3N&#DPm^u^BVn=h1
z1?H;4G$yXTR*T&jTwJyAp@(Wv+Y?IMn2O+?tmle5XkWTr;utSEdLY&~PEyJm>%54R
z;f1uZ@pO4G2-=3Q6%?AZWWI*-Gjd(tOs93>k)_hhwEW;{H9Zt1*|n_UWpW&iHx#h%
zQJIfuUnErbv`>{6Owx9BjwJAt##&dr##%V_>d;0nw2s?gsD{!v&A#mdev10-=5pq;
zW@~C_u`4#Z=ZHOzA64sHl_5yHbi0;o2;}jT5O%}yt-<b^6cRVWfU{~bd-zjcjMK9Z
zA#iR$^+>%9g=HZ~0d?q=GsV8k;-kSi{UbD1h9&?;)%y}IB-5>iXsdTl4G^Kg1#Eg^
z1w?bBIDuE5=!Bgxfxdx>!bvwYWlpUUDmP&YdPfgD=?|Jd#MeAt&dE79#UkmQQ~8l{
zIhJbXxlPjVXB5NreOTAFxgAI>yb{vz21o>5m#D5`28~~fOPbNrWG$VK5{vwtCWa-f
z747VdN3y4YvrC_;j10vdM$FT|<+t2#&;-wk$9!LF^*x}ECG|Sfoz8uyvm1qC0k<;$
z71WS^ea2d|sz}q=Y^HwNz`3eI^8JBE4iycWer`r^MP%1Qyg_l`+>wE~>XhJnGG5@8
z__rk;r=Bc_n-}mlBZdL$i<-(b1B_yd(cs+l&ChTsG33Xk4Z_mbAUFzO-851^pmz{L
z5<zmf+WS^oj+)o|lL_Ovc@Etv<po!T&BsgEv%{rKBxPpr(cK~fw7Qt@FEKK%Y%J-s
z(#uM<>0&?hHg*xSPXX5_54G~wg|-amcF1V8b#^eG3!B*Uwm)8C4_p|=Af3k%4O=N7
z6fQfCq%>^WXt#JBw%GelYi!iBsd4@1@^}R2NV|0|+9UJ*p_pX2%_hT+!*Jkn0yN0!
zu@#)oV&h7zu(aODxwt?zmF?v7%7pUa%vg0NZhxSJSaR%Y{*`ry2#1L2*%<;GU&pgw
z<;&CjBUFimgy}AN1G75y>Xv#H7rsS#B<ea5Hn?t?is<%(-6^d3mr%NN=|MWKDRwJC
zTGEgYPmC!J0t(<1t%r?Jp-}Z_C60~E0XjW&`UP5Y1a2%`_+inMd{5ZF(&leyHP)9s
zc1luzvTgUSxUe*WzObYyN!VSsldGjBfkYoif+AdyH2N0!qP^C8zC<p%UPQ{*-ce((
z6H@xsp<`MK*T&|hM2I}8CKoN%_BL@8esD(I{o1l#HH?zDB8T8Fm8ERs&+9ckSQu&>
z^?Lo{HxLySZSpuHGP7u7zLzEUZM$ZzmLS<A%**ZRLTBjg#<@vI$hm48{X>K(SHa^4
z<$`7o&BK<^Q~Ze{)Jv)lJCz39IX@r&B*JN`?-ChM>N&W7%(T^b#u{(1-x|`RZ&Ff(
z$<g1VO36p@R-eH1t81C|_hYP+Wnqr_Gc20r3Z2x8>w2e>4qXGH!WiEY#ylU3=XgPd
zpFb5}QZ84QzUy-=Y?jedv-dFEK#c9KftBZ%To{P<ZBYg;)b|yAn@BhvAVNM<Dc$g>
z2>b9dg(R=6&HOkv4K4Ugeor4qoc8J=>?qUB_5<@xSMPY|vmUh}Ru9pkbs`Sk@KU<Q
z2^H5xev_`q-pQ9G!To)Tr!eKW0t^M>qn~8mOg~%W2x@Hv4t%41<%L?*;V|?`C%j-@
zw~+iqTXP`hW@?P5T7Y7(KhU*I;m0j7nC5QO?~H%W>$Z5`zgc_VF0#E$QE2$;OBS~X
zd0Yk_-I?}d#2+5XB5&YDZidzim%V<32jftIH}W>C98Hw#XAechlU;N+1gVrTH#Rgv
zOY|>fdMk+rnZj)=r|Q0m1!f8E7$Ffxk3rWma9rd%p`le@<W6$lPt7|$`E|tXG(;%u
z79sH3wz1RT@lr^c*8m+IORqqoIG|^<N%i)aeTjafx9eMK>eJZIv&isA0cEXxUqw`E
z&_fD+Rg-_Nq^aizv#7?BJrCldaHlHAM)Z#Ud`Rtg7$(d~h`(ZLXjCs!n8It_N>BdL
zhY=7s7{tINPl>`VE6bl?>}la=vV790J6W;7A*IhOJvJx_71&83+JSeo`BQFk^i%ts
zk1L<XY8PjUyz<{rjk&6y(4lYn!D7$fto|6DRPwmLGfAYcJErdz;Nyv2F?zaFXt1Zd
zgPxt6n`u-`y9E?~hg|wo@ECdoMAbmc#uUy?s>{kt{WrI$ch_ub`=-RjcB^m)G^ADN
z4U@c=!lGx9)#kRO=sD`-X=xw2ugWa=kt&hBqVqN`GA>9&^68&xG=8;fJ-WYPoL<58
zX)QbWM@7ukDFfyBlbe<q@%!pK45ucZks9k`R{^bdx?*9w*?wOe-F3fHeKI=bNlA~K
ze(K4k-NURc^)#4Xhe_bGt?IZO-MI-`yqau4FKJ{$1f!Z}_w-NrVa}qS>*`OdZ#k<*
z&`T|wwuN21(s{RcB{qcxX?&OowCo-x;?(fDt)37K#|KTzt@aLwLddG0S5ipHdSbpJ
z${5)<P4;OhXpvzOdap8d-mErY<u%5Lb<1=fP#^eKj1P@cIt!691#7T)3RN_-CB=R2
zE2WDOKOQDWcgyEY{<wh@A4-{g)7(+ykuEDz!NA#@rIRL`a46SUY*v*_LMTa+LfPe_
z&sFSHWTyY~<A>CsFGQzkYst#4&e0oAUqmp~2dgKXNtKmmzH;nqsO=A4L-d(yaYTAY
z{2=28g{5^ek{Z6|_9N^?&-kt)mn4sGftNMi7HLkA6w5a0$DS<-2FZG>65;tK1uryT
zCQ^}Z4G#xa#yqr%DZ0Q4e%qmM{eCKi>Py1L@Xv9tMSq#iv#|7#MfVu}mmQ_0b&@oJ
z9~m{~XFSm54<Ldg9uye^u6{Phv#<HF9yxTN?XOU3KGC66sjAy7=7alGpSIl-YLzYz
zta6}yDYyfWqq)3dmak};i_+60!9+tHApLI9e^`Ypm%TH<AkHwFNin%Uh^DPzM0Rqr
z$rFRn5SmX}h|1EL;rBd}#DHrUK5bqb4gCw?gfqjO_=}YU8my^>I-Hg8a8?SdB
z8t8S11vnV~A~uD`o<Z-Ag|$$WX=zZk8o9*0Ip4-jr9JC&L|1vMXnxofoY=vs=o_TE
zc?&EW6Xw1QDtH7D><e&?$aNahh(BXc+?<11KI$*zJXvy%O7!Xv0|rHx2atBnkB-v-
z8!rM)MHI!+d*VS&43ZCRGU&{7BvFXNeq8L@Y3t@CzsQMtOc7u}cNJypBi|{rUa+(M
z;%K?p&>^N+iEHB)SdOBwDW7Yt43XFSWm1f{=Aqe7%y{h{6?h?9nAXnWd(jdm-s_%6
z%;r(IJ9CVQO&x__utn#HW_ZcFFsXU;gw_3{4)kpn_hf8`oTIIQV$VSzQ=`*0)J67N
z6R-5iG;EwySDXNqu8FqEL{{u{%y-Cmrt-PgkX@lnIA>7;@n<W}s59!bKKIpzSl&(8
z-sAOGWZuJFuXmlCu#T-=_l1I)3_m!mGCj65Yu<s|JXJC;d5`5f6}Tm>H9I}6t)QW!
zJ~6~o@YPe0ZO`qq+5laGYE&s-a;3heAK`Jqfp%Rf`oQWB3-9YXCG}cOl|9|gqxW<O
zaB=53cuCQiAR<k<b>afaXzWd3+EW+ASYf%ZjjugLxsI~k6eN5X&(Q1{<>`8aq}M;W
zKN3%&@gs1i&M-Nf^vqGT;uH8qJJKLI0Xt!nj7@+sZ==gZUyF(;G`YgbKDK0zIyH_v
z4zJpj*fdgpHY)l+&t!d}qn?8PeX<`55%!gwi$5@7bgG`=U5F68nKCtb)=*5{38!1>
z1Cs-@PWuj{b<5&cPDQh%B=37v>u;zmJ2)Sed(K>>&_~hX`C~_WnI$Iixm!l6iNm-!
z`^Pg03h*Cxz#cFO;@;HJp3Q9cHO{~C-&q+e%Q0%O@G;?5bo)R`@`MpiOOL|2$(P`H
zS@9H)FnUCEf$LrCfX0ck|1W5|5$Op=7V!iZ5%|_5T|B>F*MxvPmVuO%?u?5w@xtCi
zgo6cJ?W{K(n{PoKjuW$SGX9A`vA?)1H0Ia&?2tcac;Cs5e>CajqD7{Zbo;AJx;Hxr
z4c8SV%XWFRv<*Rr1=p`#wAJ6Ss_QX^0OyysMArC|-%v)l<1-)czYpYl^RX7j;ZXb?
zD~B~HRyKOrqe<uUWFLdq{umh;sM)0APIK^r=<6BkSiy;#UlzDpsPL{Gx+ecEm6`LB
z?4o&bSs~m~^F4#;)6H!E(#F_K!eLFl9ooz7FAeEsY<gk=1S$`nYzA&TwIx3o4!nFp
zjla5IrJz18uJ^UWBa%?Bh@!V%iH!2s%>@tQeFgJV{+%2ij)-hxv63;K!l9P~g5$|_
z5Pa$ls>)Dpf^s6{28H~eNzYM7!es2^vF?rTRuP<zgmdNfnThfqpudH%bPJwO2$7SJ
z!rrN)I#blA9VJUwDpFV__1Nz6wclq=Bb!->uYADNKwvT3TgJ;P*En<H;aVbs9aST6
zL4n59lRS-ohN0a6J_3upu7vlbX%w)v9mJ02+qMQ|e<31mdOdTg^{#L5O^H}9&1veQ
zV7(X>6~p!{FwlKw_5=k8qtF;6XyY=IrVb(V{IY<vXz16l+AyoqP)Vq%siEMBHi0;n
zP*1-13hTK1^|cA1<qj83F|t-?qRyb$G=<?Fd{rSx7apU(@+#%P?4VYyyO{||7h^Ml
zDAk_3nV!ToHJ4?cNwiprJE~A!k*b&h-SJ7_rd9+QmON{6g;t%N6Q6I|`-yHs8R4La
zkV3u}+z%*7WX0#<;8rix*gj^_FF@_Y+=nJPyeLJS<P9=X-y^YJq4gMOXa51(RM-WN
zZ?;VL@PPg1R$BW2uhYl&mPOB=?`@n97_DyJ<fT11C1vz|VIzd@X&ORnhF^wStog+9
z8p(AzmjMoCbhaXmi-f+^QjQ3L_wBHVeY*})As=goTd4<D{@NKq-Ob6cZ_rp2c;$Vg
zV&{u4aHymGesNgHEKOi5UQ@J)zDvC{Uyz~EJtGsLPM&UO7xf|>KpkCWIGt#;+zj<4
zwewvF&nb&eP{ww0h#Od~*8N45Ga@#r`|(7pu;kT5=<}qr*CHJWI_o{uGL3qE$8AsD
zF5NxMYx`fw@2peI2X&WbXqB(G$E!XiUjK+!5jwZAAyy2nugW_kD){1Dkd5TU+gq4y
zN4vgE-{IUfpgL%nSelf;I3T}x@l2C4RTqv`I#0TK<^27_9`i{iugH{<FLiL|wnH9v
zhkN#qX@^@NTq<5<V6aiJ{_5G{jM*Z0IKOY}a!3c(4rb!}gqn!a?mqQp6aPMFd<Gwj
zoQk*Iz%WO={%HV(T<4_0=;$M1rY-l>^eR9g>f>8Donb4*cQjoEM~{9qg?I%DMeRDL
zw-oYyn~Zqx;wP_I+h$a}7t(Km1lC18)hQm=`|GtdiRjxF$)?ScMPiz#q_#(>RtZ{(
z(+kS%C!E?NhYAxljPM=6KcJp;CDS;(ZRoq>@YC9u#v8W%x#9-TFInKvR1j)woiHq>
z=qYlk&J}yDqVv)VLM4qbp$hRz!@a)li4R9yK6#E*AuWs1C@H>0{P0PnUnDfSqR(kV
z4uv#pwXx}?&s#X(1@*J9Uc_(dVMRK`1S90~kb~H%=gCY+uvKRdcJs(zis~Z%ibWNW
zoz#{O&XY)R53vY8=k{A5zP-75qj1PTAoyN_GUww&&CgAHoFV4Mo3V%_Q>e?xk~6#n
z4rNG=hdfU6Lvh+TqC(bCwxylmz;q1jJio|JW_x|K_aTzd$ikq{+w{I7_f4bJ$ett8
z#W(0Aor61B?!0fo#x#hY7+Xb+zih`h6GQzNV%r8?K7{&9Uderr9&g)m^y1G~+eNpv
z-iecSO?Jw7zPdiD*IR-*FkTt=Sw=H4HxCCp+Pv2+k<xwRX)D}d`&FN@2EXM{I62y4
zqvOuUajGwjl_&7p8N=Ccq)i);Rw}&G6GB=#@@SVO-fw~}OroL}K3|f=5K5>eV2?j4
zpHWm-)gAY~k25_J+9KY+1u8ovspA9Yp@Rv&isw{?p*J-8UquG5L$dm`%9xgNkWYq(
z5tJrQlS<>OzIF;i(O9kAR)fL(LSL@FzOqq|Qx>bFGmS+w4k&p>K_`K8$?smMX%qbp
zf1y%@LKaV)WUt$B=3XB1%G*m%5k(gs4;3Azka0~j`u&4edwXMK$?!t6fepll^o)+^
zcQMO6I7_A_C9jtw9_gmAL#8NqFDyJKDt}ql9*}nXHZ0kZW>vhZW~Sbb=%?%4=(j|?
ztVAQFZZcdK(ea#HERdTipYMb7u$!!yHLf6cbVMq)FnxJHtts1VO}9=GC>7#|yV{I6
z^7<FE#I{h~(Evj-ZQAMh)-okHS8(^$%MCRm`i^X`5f8!eMz`Jire>4HRI+yDr!pNa
zzc4E>+J((@Uxe4JY*4$sLm5!^9FctV)uw9xjIMDd7)OP<prcZ@yOxbBf!*oVEim*W
zv|tp~yS-U}zQ?h#r#;=Ut}(hGps>Zrjt=Op$7%9_Q?MB5d(dLyPi&D!rfaICLE{(9
z&ozFv`oW^5K09N6%R~>)O%aMjMLMHz4?#gu8*yD6i4$5pw*-ILpu(=pB=pr~47OLk
zlKl!Z1mhOqG%U`g4$K@W%$xj?okjHHM`zuaGwkiJ^?Djp=py3{Oix$r=}j>QFyzPI
zB~x<H#mEp_h#U=e3Qu$MN;mh7`0o}x=JmJ<nyYkEv#HoWI}f<lDo-V0wEg8Avq@Ws
ze{bKfbdkmJ6n28bU8Qw(61>ro-u`2U9nRk^Z&$eT<gIhRX}+#cY4KifC1o-R3tbTx
zBjCTaLFr*5XdRMz!%1=ag)9p5T3wakdbWD(=et?)Zyk}T&Z^G@4nG!eGkd@4YM*Uf
zYtp`<49mT2EG^E<GLGqZ;S(>#uQvVM59>mC8Y4<wK{HYQ=o5A;Q&zq*Ie9jvpPfZe
zM9Ofh(-Vnx(S!2xEPJy3#mtUOi{vz(a>L?psnvTVx!%<=n+$>78jV9mOmW9QLfR}=
zaSw(9T8xQ~CIZGB-SuRUQhlBKdN9Wy#)VFkDji^De7&ygwIuzPq1W6)pg5hARAS$z
z{hC`+$C%ftG8SD{%6fy=knJ+0G$mfX@#8qLXiZsGWmU#`*Vqk-u7rU-0scBk$J_5g
z{O};DTfh-z)kxFNVZzCf_*2p#g7&9!G(?d%QCOWfZ%`#UOB`k({E-5z_N*cT595%c
zMYlTltEv?1UBlr9bSRv&NReo|*I1D|c62!vVOf#1<RVWsxcPkh4le>acr2B-Eq;!F
z*JR16>hRk+dBLMnUopYQ_|?}IxCm$%+&V=S|5k7O7><|aXRmTVPM5Uyp5c^=RD=-O
zl99#q`Q^3+3+l{s$_(>-FRIGMM<F3k-?lm%A^IDhWv!O;G%B$UqF<0e1B;8oWD@vj
zPb{9qHk6wWQI(<6llQtX1@b%<7%*AjP4Uy);7?gxN)_0FS6~SV>X_YER=x#}R&c(y
zB)gC?m!2$F5P0RBud9j??Ijsx3Q;w<uHCe#&MfR!)J$kCmu#>yy_Hjdqjgn34?OSM
zKbz*<blv8POn4M_3tTBJl@DR=Hu!jbLHb_sk)XhL;4!UY<TD?US*BVK*_+uW4K;N*
z8u40&=QAeqEw}Db<e0AkUuXS?JRaypu+3b)Y~S8xB$Z}scTqSc1aK^ji!XxJH$zJY
zKGKK6N4VTn#rhEoJ|H&Leh?t(5ke*PcJ|`C?rNyy)H7nz6Jgq3mY`FYYsUBX9_VJI
z{xw-Xsb=E6Hg40*YK!VAb>TVNVki^cZJ!;$+b)0Z4yWqoD{eF>tw^7AG-4zRFqCPZ
zh|DT9r(xFo{CQ<h)yz{z%n%ZG01@Fm7f`CyBR=q$$j`4JKVXIOYJ4hPo;y1qukS!B
z5M;fsuf#CP436RA{*m`OaK3y8vuuiU331h+MT%Z_QKy-~fpPywHlm_FOXoyMKIYby
zgx>q{fQ}wLe1!4%Y-$RD^!vH*!dX@Dl6pBd1}@?F$=C$+JA03a0~S(%Y_yg$=NNf7
z^r58oRX|<EGEZEOaH3Xcb$JJFAdi}};^Rl?UFib-6!k*YrNPN8bw8yf>j0tWZrT=6
zwgsy&^zU`?-4hbvyWPyP;jZ1J!T86~qRF%>4Og;0&{Setim+_Te3sI%j<2ffT-PTG
zT8h_}?}J<mHq+hglG(>kvJ-d^^6a?q4BX^m7IrnIpE$?{-s>OD?x;QLk%huaQk}WT
z35aQX>nWNaB3(Wc<t+MEj85Yl8Bd?RXthJ-@4xM)UZ=9(LLP8jQDyII`+i#jRhHo7
zW*bS=wZMT_!dW4@y{3aVULgX2-vS~ePgwVkU&$a+szz9OL~51?Ia=1DWbG-<on3Ai
z<c@2-q|)mfsg;W4)E!QCZ<(*cS#ls0G2Miegh>wi!%nWK8h6*O>>GXL);mtV=*%$n
zd((QY7zGyQriFSM6f@?#(s8BY@;4LHkcQXNoqTW5p^r+z43lxP-Awg#o9!MkE%y?&
z7qD3@GF1$^?_Cu0l?P#kJotQGq<lcw6|+`S=sjwF7$P6f*P)OnNNWcli;BL^+Kw4H
z5WO+2j4WJaNkBhe;TTt*kpKK0kqn_-?!72V+nlclUr_*&+nThT@J%|o{N@>~MrY*m
zwdE70w+=!zOvV`*jmm@%-S>2u*20KoMaHt$-1fU2bhL=VN~HQ9ihl2PJuC|8^g;5(
z6t?);53T7rwA-&(Y%KFk*{{}o>Zr%$j@-NG8f+O-Nc)w7tQH&fgc>&lGK=y9dt0x%
zQDww_l$~7vMh-(hLCf*Fn>?|A8}SQMJv=Z@_#Ukrmz)Oi6!!&Rf%h%Yc!6Y<^%kSe
zs&Yvv<`~xQ#C)nU*r1SG?i)GZ{%b`vu3cscK^{#mL7Y-GoUc_)7)M7(;HE@`I_RbQ
zoPC>!mIz0wpzzm4dsiyg6`L|I!ajGs`O;EW4^0IL4f*%<%J_1sV<-`CB4)%7^oeKl
z2_34?^hxL%%RXhI>&q)ffq!&OwP+x#5v^34bMWwFDrrA8p;FOm)OLdomOTspK19%u
zsl!_ULyS0Vee=@a<&jlMN~`T*GutBrOq$FcCv{qe8iLEWc$2EnieaY$J5FDv378V1
z=g2Evu7j_+kFPK7%x-}fxeY${=Rd5A-uF@Pz!u#KmC`kXbV**%QWu>0w)Se1NSgmZ
zak#QKcgoM6z0Q*x;7n=H6xKN=Z5hW2{3Vgla@_j!`YDf9M@&wU%=zII>Z;<PQGL7Z
z13vLdZ)Dj-vM;xQfq6P7wNu>?Ggat?Ev;KzcL1zyYP{8N`hNC@#9ZISe2?$}&Q1oA
zwoIoEWhamOd;naqmq+R;=G1!RRzvGUB=<q(EoZ$-GIdv@$<#<PH+rWQ3?4n3n>M_u
zdffh?8;YQyn1u?~o6P%zM4VN2(r1W`fi-=CR89*G5B3*lDh?Z*{Fq^y_bM9G?rl5a
za&7W(%Z`7z;H2$y;nf?l@n-N_ljCfy3MDI$uUMFkU@ga_<LP?(mL33K3&tnE+J?O~
z?^TFKb<F|d@6nkIXT6h^-@ke02n))$TBtn;3=j~L@N1gU%ol1<+@>osiL$fJP4mth
zLOiMcRJuy9fQYp9<lOfCraT7zCVpl7N$RQ}eptrV(T1lhbo#vX9vd&t#B5sy-U56}
zFC;7HhD2i(i4TRUZ1A}!O8az>&BQg!dxFc{u5;PklAMeOnl(t&B}gW*b1@6yYEMN~
zo5|O#(brcp2-g<%`47IdrN0Q7Hr|mT>=34x4trBAf03U+T^j877W-vyf!Vv-DNc`r
zBFv(-*l3y2vZqI`3~54rn2i($zGS*%`HkvYe$UZv0esxz<S<mTk)4b)2<lGds*afX
z{VG=SbBiq~O*rcFA&L_EtQ20o6xmp0EY1X>HS319h>f=jS|LR6nbgR_$Xr8%H|62O
z1UW|4)a$R{T)f9NTbv3a9LT1sjhfv8BV4Hc2i!kLJcfI?bG6*G{S=nv;dD_p(mIOj
z{nd=Lw*VWAlqkk(@5B19*<?NCCQ<C(T6uLpo>N3@O`tC(aq$_lNDU5}+}j^N=7Qp8
z!V4cB8KFvkDb0r9nV`aXT4j+npOo5uVI-%Vs6HS=6g!()bTp`))mqu(jdpGxpX$?+
zb16*9k5i5!q0kT1Bt}5{g(yx*QVO8dJiIvDuPTHp`rj+t^pDwm-1}0ix><~7<j3bx
ze6N+w=BM7%E<!c-wGY~K^x~Mxo)ze=_#`2G$9eab3~r``r=Orwo;*H^EHp8oZp$FW
z+F}=zoV<>yU1pM9K@7`o+Hub0cT-<Dh(}UrA-^#|*toj1hfh0E)FjpOoM~w%)ezxW
zMpBE!{<_+@S-|2<efsjmbE~jVm?%reC%W-iGK;8eQ4ejn(R)GDVDt;?cW6omazD4Y
zTq_Badzjks&uZh9CC{?!hLmgbD#aF3xIWy*ZevvXs6Tik-hCCbyqx=NT4AqE`n78h
zi}r*0@fXjYII>S*-vUdV;B&=JTu|q9cgx(w<%h%30sUNh)+@WYjRthG;Sd{p`pCzh
z)Jfnp8QcUq>J-|j=-in+kE$2qD%s>ShxW!<OtnMg4`5G~nEZkXZh@3ERrO!f>!`xy
zJWnq_!gn54=EP))j5e0xzLZI;l)t!6p@T44h)I_uxfFFFp&AQ(cJtsktSp|?jS#^t
zxl;6V+oGIZe{@+=;#n_e<lVsYNG%uo7B9^*F#A`OYs=PN+AFx>(4MeB+xXn!k$U-z
z>HR^$M>}>G<{E%6j9A3s*<xVUsxpiFB8uTH5W!$~2p#`8eEh7*)Uk>{kIa!bhr!u{
z&2f?^8HHs%zGOz=q8B}x$&K8M%LlezIll4Wd(Ba{|4P=lOh9`O8~>XzTnXVC9_{ZP
z5;t+hHUS@n!au+8LVO~aX?4?+(-bugW_m(LN+AyoG^M-R0hy%RBA_+&x^8RQVB+ih
z1CDYOpIZQJuew-pL==sxFwy9TiPLhlZ?nr*kmEpw*K(_GN!^O$snmTrBV6x-m$BU}
z#j>)2Xd@~90<GwmGCUs1xje-=vM$-#OGwrqdO7?8Taq$DrX8I553wpvU}J|u2#E^h
z3Or-d*NP;a6iSI}2@ooZ>Yo$3Ve$nfoc*7K%$d3i!UEBzCN7|}-$i;m@VY)aZhYLY
z()!e1nyTcd^JKp|f7ZxRc^cC=GrjI}{FDNI22U}E`{ZQ!>K1n*&sY2O%=w{fcth<`
zisIMeg;ew;$S$ipwXdG@uTIpQaOzXp74GO~(PmmsV?3K*t}yjFMs;Y0D76EqV`=wu
zyg8L$BQ^IQP6WDULenwgI_v~WJ{o3m9HqWm-49^>;2uQVKHt#TtOILM9lwlM;pG<>
zdzF08A{hg_AQTo~82*yqigOU>(^q}09(_+Hxh^W|Gg5Lx-3vtRZghpu2?(FH<KtWx
zrI;P-&ITX6D-C?#z7emDNaoK$+cfsLl*TvKNn%0=3yn&Q6qjIMzL#qYodEAcP$p8g
zm1+odxU6Zvol!yOelM?>I_HVJ*evQZ8_JYYDI`4Z@K>j6z$-2A)$@MAQ}Wk36Gj+|
zt?^a-tVIrO$v#V6oF+1s43iHL*Rta;cbWrYN@Dtoo^fBcCdD{3ydvbAmI3l^tWtJ2
z`EBVHNgmsUhvek{{H~qv#$EQjhr~VPo|aW%qv7k++!BR2Jlyea6d*opp+qGB+%H!?
z7#bcRWs2~vs4ecI_F`Ioe*X-bk*$`kZLHMne=@lK{2S_9igt&Nc~AfCOS3c*y5|14
zS)FbI1r;hI1={crf=rca20L1rMg*l}&9rs{II*jdI=IyR*D!b6=15Pf*CY(R*wJ+7
z=AZ5t^{BkE`l8#@uC>HUG-N?I10VF6kV#-v=OB^PE4C;D)RdSR53rJ8O7{|%lkd^}
z1kOoow_}y{94}9a3=2GH-j7ZM$|k$(7$wu%F))-!T(pJe^Rn2TT-&PRLi!9y4)gXK
z{fZh?6IDxZ=J%(X`*uqd2qYI?I}ZgE5uPS86^U=d_k%CC!;bB93d?<L3gFVHqlp!~
zZIWSCl~C&)wPdN|3!9j&c1ohYI;iX7*@P?$TX4;EJuh7nylGEWcuQX7<D-;<my#z@
z=cN3_%j!dny2N5%9DYp%t~b$C6f>&-qImjhQ6~iV{kH+aVy#eJG>R9tvCw(XMS^K`
z(lpVy?k@1<QpE5V;yfhG;I-^%()mWZWOezk$st&4Rjzq;mM26()%|LE5@tdcY<0Q@
zYmL$q3dDl@8%^V%h2AR~#svUTn2&$tF20=LzmFj|HGi0xN$}MKMswhq-J>{+v!6{o
z=G-OH?xqdy-wI>dc;m}o_&rqkk^4GxgcE}V?W#SDSI0p%4!zV6R+PAuNO#_KoZn+>
zOV9El0Hdk}Pac)xL`TCIUxd!*u(Y_EOtfBJmpO6KAa|Z=s9D6^pzzC!23^u+&BOri
z-gf<40Phx<)VjA#2p2J}S~wAY=^A-T$UC!Q=|uX(8er|se62*^yxRAHjL5q-){RTu
zG7tFVd9=Q=H`n;kW;unal;nW|ZBd%OzMQ(bQ;JRY_kKvPVq(<U>=6CLq100)mxkG4
zsb>;86f*Z6StVlRA4PZPhj+Ks)P$f>hX|4H4)n!KtcH2&lv-9e5uufoM2sk5osW@{
zzL~%7b1J`S`SMJGO1m`(+DhWh-pAS}rP7m5r-<Fo;u2KPvr4GFGsm<0%Q8aNulzaA
z@Z=)FOlki&v?H##IqEA4Sj%4agUtJV#r*6#8DnmOpV$cJ&BK@OCoQu+i%gzj|1_Tc
z3H)b4Dad#-y=e4duA90o=*}N$yU`#-yE{Kc+N@iONd$<+s0u4y1wJjQ_BIGyZL{#+
z;d89)(K2YZgOkkAH|AT6==Vyk70MF9>7EP4mo~f8nyvHAev|XrUqNvmygy#c$WJK0
z(75O}M6RQ)+LLpW(JB8DH|75Tus~10GOhsy8|S{bvOAH}1@NGXg-t{eFv__@^+r2`
z^W*;jeM+bBr|}bG?T%x3ud+8T>9*3|FWWBO_FH7p%9CAVrmC5zXq-qRV+fKm@iH^?
z$<~aaWa^2!OYI#8w*^t@jGPIi$4NQxfA#IDBK{d|uy+SL+s}IYa6EPn8O#Go6jEX8
ztEr<$jhRa$bfkc?6*xU4m0~n8M&$nh5BwVVmJhYo(M{JXN*IMCsBCFJyf5@uG-zZh
z7>oc{!24%Oguc|<(TYk#V;Rv!#_shXmz0(#I?=}Y0nX93uAX5xToGjp5Dq(Ns^L<{
ztVW>pFVqjtx5IvtBsB6!z!CJ|`Otg3@jXAdG1ONhzPh4`Fp95nQ9{Bwp%}-J<Z25)
zh)RQP$LbW-mL#SXReKWAHPFFLGB>;ZV5UNTmtz{ndXu+K0s4W|*52z>Pa-fm3~+wh
zgCAV0f?Rz=FUu+ru<hG4D0=5(DOypJ(*2YIO5#7GwtSY+^$M{A6S*^uYUET`VxZv(
zIPHxrcq>P9th!sET8ZjfTXc@-A1u9H{+^$Zdkkkt>!U*zJnC4e5jTIZ#=G0lH-`4j
z<IT_eLwKPw9kIj7t7wpWMOJB~hMz8^WPp-61w1whKYm<z{n>F|O3Cq!lCQy*-M4NJ
zZCk$ijgs|Kw&-QMD@Ikn64dBq%AVks&PTx~QOoMfu{V7;IPmV>!?zlS&v@H3RMrcO
zN;)nLPN(q<PoH)-nN+F&00PEFLDaEDX<30T2xI>MH_o`%)|Z!i&hFs0-{aoir83K~
z>m`cq5?~QQR?#V#cF*+YNb~kt3HT>n-m!D^&3zoWemN|Fjw0G_RxSSkd9_`m2~!2i
zQ7~4>$!z3f;O9TzKy9}PEp^b`VnE)+iy=lp!Pmnn2eAcCJN@+H^J!r=&MZCW*;E&L
z2x?oWKU9hgZhuUa2mAiIZQ_Y!MGf1y8PDIh{+i~-)Ul>?KEXS-%$7UkvC>9G9Z2`+
z8@?JA?mx@H{YHS&&C;x_Gn{uiTGH&PxQAh+xyH2i!x&0>bx|Ql_TyA$!xDO()VarC
z%)kIU1YOqMUt_P9)pDmXR6_kSE_{C9aD0QJo#+ho@qGeC8U1zjUDFsnwc;sVVEd`o
z=JutDSr2jKX#PxXrl?svGO0Pn8$)U2F?~_n<0s*i1ERM^NA24_HM*Xu;y+Oxk8%2G
zTKX_x(cP-w5z?G|hin0@X4_XR!Dgw99AE{PI0xffX{Lg34?`UHAT~58OG^rqPi`vY
zU^tJc{{XIq(SpaK+*Tzb+K$n*fQE?o?c>Lv?lh^{d-Af^x~Z;|R0dkwb!e`%V4wGh
zd^b<sfck(vfgQ9>6j!GPUYTQzpOP2-HKOBeu8MTMQqvhFhyVhBNk4K1i)vkm@JR=y
zCqA?C!^gy}f}WD&vNz0>7Hz{dWynyixBch{tI3cR4G*~}b(5eJcKU_^)ag^yQ`ANz
zhGdag21#RZ%IbTD1a=^f;Ey9!Kj6n1?K^6srrW&jk~CKtq9SM!N3!3PyCKI=BcvRE
z%i~rH^k(9l@6JrEn<Bck<gVDaTAM7%B%zAa7W_pnK6*q_^e%t=q6cBBdUoM2ZT|p4
zGAmXvzr<U$E>pw8P3R{tZ;kI(=_2VEN!e*<4V-yoMi|fdJb5~dt_baxuDZt*GBj@c
zrWJ}+9n`$>5J4Tm1+p-F>Q_IA7im`BI7`F5%^8%_+xH#Do>f!y2AYyKRwEv91Au<n
z?e^oJNxoaId~W6oXWD6PD?J?s>i&6umBA?u4J63ts~$SuG>t**S@HMRFRD-9X?dZs
zx`=@Fhv0?VfPSF-UD-9VRMb;mcNE)}$zNHPlA;RsHHh6^x{AnVjJMtsu4~&hGf~`@
z)dMLKLMDw^<i_k+G7>-YDL=8)qW=I9_RO~2Ih*0W_MXG-$#>Y5v~^X2P(?w4yCFJ$
z3N1<R@+8OjbrqFsbK85G&3&XnQyd~%dirueFYS_q53xNRK>f}<X>uhTtrSvR2cwm$
z?aOH1wWiZ@nvUUStcLGtxKcMvv^6D}mE$?;$YkmV$8ryxYp8v8wqEGC?rsb7L4QPs
zr*Lrko}T9feI_)WWJw>Tfgqpvv|tBd2g&e7=@Y=N81a{#I4wKo!qxW6mc+PLya`t{
z3o}Gucv*yaJ-XY;{{Rsg_}69n?eK!#eB`#q_Df83^qXUF#|11D>(Y=#EgZ2%il7sK
z@xq{Z`}}Ca5_o4M^{@W`1m*LI%$|pQU~Qev)n^Xw6BAFw9jMgM2;(_Q>JrsaF(cuk
zH7lRLPmOmsH~y2Hn{$n5o_*uPIz7L+SToY^EvZQz6?#fR0jB6sl}Vful7pXyQ{Pby
z%104jg#2UJoM4-%MYlIa1<ItfCHi1lSKh*c2|mlo*c_i9p|H|JPf&|3E0&4CXq>49
zkoQx9GBOA_ARpUZK8GhPZa+lwT4|6Y&Cj71IsCHH%f5JJOAi&c<vanZ>Nd3l$8vz4
zqQ$5SJGv-POUBMVq1b$J6)jC%G}BbfjqVvrOB=2_Ty`Ljf-!<MZE9*6qbV#+(opp&
z2L(v^8sYwaJqdYa=H>33L*Zg44)}pxByMij*pKp>sXE8GElmty29c1yWQ?OTkj>TK
zH<lQ+@ib<iWD;BCg`$Qa<ynS$PuLFwL*T2Dn0s=jP<AW&>!UfJ^mXUQm1~6eYtIU#
z-aB%J-qka1jfWhywD2N^$(k~YG)w^MIYOWj^&N8kee#agZ;DCf5(gO>VbsK)!yiBU
zYmRV~9?@v4=vLd3mbN2LRRYNysZ-Pe>_!jlar2~_?btSc;Cn^U;44uWHp%=4P<tWI
z%8YmY^ro{{(XCfPQ=yxnEVH@{f%XUBXu|ih==WW$(bo>B*!4zz;-y%Bh%g5q8SS8(
zPhm`zAK|>_&v$McPV3~`byw9^Q0<#%^UbFu&HXwWSE>n7NgAwRD*N-9i#xM1lq#rH
zCsMI&0mzm71KgcgFVh#BE2pN{8!kL#-kdmH>14Uu8r5A>73tusvr|0L$t2N(AGa_3
zBn%vOP{nY^RwvrGi(cWe(rybi#cHT4>0+y)s6=HG!740~$O8aCWMyCo2fuxE<C2tW
z$>fb}R12>~KjGB%MhD+TR}1?=Fpdd<`;UzTYN@Aof+fIi+0kX<IjR1sGoRbB)e;ie
zXASSJT$ylj+>Ly%yh`A&R(pLpI!{wiEK->m;C$;Y_h>?g>+TP54vLBcV2Zo*QN+{8
zy-ZJRp1|meUC1hGI@chU_|awOaLG7m7qaKhik7{Z@e>^V#;+I?azf3w6FoSHHhcHc
z_S3Hp3_W>i7r%Wr@!i<rBtO4>9oV-?ibaWAr?v((vW~PBEsA=Y*w$wDCho2NS_wV=
zNF*X(KYV%CQCMp(bD3eB0^P#5__PA+Y^qsAVHmMLI#iLZnOS&_L!Pop9!`laby-$o
z!026No~aY9ff)0my7(lGdK)^VQU$gW+^5D!IL?ZyDi2OGq4y+gof1^B1cG&Rt&@|Z
z8gtZe`ppL{N$gIuRpSz0C*&)a(<hZ}-^0qj5AV&TJ>zZ9UbQjN)Xmd0SVE);!0J=f
z#lx>jQ}-RU4Bn`Impo7O^x`92qWAASIlNX${>tvfjGt^5C1~po0FJ3wr~d#fqv}FW
zQmu?_X-yntl>zWOljBXNJv<=YoOtBRiJTtTG*q=$t;MuzX{~g1uq265IK!-9I3PO(
zWJbnSoDf0puNrWV8c6gZCZ5W-MR=<fG02lNH5AebDXL;}tsH~HDe|Oz5!`B5eILDc
zwtorudAYbX&vx;vuBY3(UaIG@HwE&lYKq$0sAC;AR+NV`dyUihc3?U_v~$zYL{f1x
zZdUAl$t^8y4Nblmouf3h3d<!tsNFI`!Mmwa4`bYS9~v%*%W<uywNTWnhjl2-JNF-a
zcmu!hs719O*tVXT-h9u05x+dV(riuP%kJpdTsuvZylsDgmu*{i?L<=0fK|kiRV7H7
z<PL#|ki2b<l7)L~g{pT2_VZHr2eQ@*coV6@#_SNUb~*U*^ZuGx+-&xJtFxwBT3Cdw
z0gfrUR6kJ|{N73a`uTI-_3O4S5N>;PGm45?x~geH)1-gX3Sto=fSvQ>2fi>o>U^T>
zL)sHo+RIh;6AfJ_S!l^Z8@hr!kG7p}PhKu@pMgBv?ZLpVK2Y%5<A1tRS>)Y!>W^m8
z6*QGGO3_?<9MZEz7;rx@!0KF*7^_6@Yc%pIXP|&_*dMtXFu%}VC^LJuP43gkn5d;>
z4T17;{w2qe<bK+;9|?=wE$cmfMAo`_D&?e-cYzvdC_24|a-f!rshIm@<gQ6QILSVA
zRW(qEbu$o7qbK-j`P;E?&R;Ivr11laR5ETYt$ca5M%%Bc>8p~>EI#a!({-HB95XD(
zp>kqJ9l8j_kzH%!$tXq-j{4|OrgO^EqwPh<-Kkdh0Kx7x>iKbBz<Y48?7n|p1By7B
zF*6?{BSrTb_2_uP)f9FkbSCbT$g>t^InJS<*SBrg*}2xY?n&#5!++S+GDGf+h=dZ&
z0gZa4Y>bYq^v1bj0-P2zA##}e%uj-QYC6A&Uxt?2-Vbp5jjEE0t2W}A%`>c7j6+ii
znF%Afe{u{d@q$Od*7Xs*EEt_+B7bv01&X4ra;~rrlHYvlQon}}hQhmv+)m;-Ia^-X
zwAtb-E;7?iB+Zo{r83~-w|sn!MaJWtKHRJ7QV+G*w`}+QH6k8{eEM?L%EkM6>>fK@
zsy6LL@t)ZB&CxUt_UMubrc+N)$O(Y;$O_0oQ`gc;<2vK?)Uqe}IwyQIO#)FWAW(xN
z<me{s*n5A7+dA#T{o!vFcDvQOY8#C=UOm_)LXM#P920}^aDFxHR@5VcBP0XgwuePp
z#C3!!N8D@BdpSz;dcE>bx%pn*v+bJt`D^K{RjDPey^cQ@Y^unLc2GM=9RzQV!IgmR
zjd2uNNTeAch3tQAS%=lLX>!wX<cEIoKbY<vCGsu3wx^EIBonngQq|Nl#ZMYYthrNA
z@i|7y{RHDaYe^kDtcC`BkT6ex-v?YP7qfA_c>EVq<gD#JT0K7RJY4kI-8}5@qW=Ij
z%I~rF#YMMwZ5XM{trfYVf=H{Q=mq-6yxmdYD@7YPIY$SZILomr@3zP{Es~x}nyOGB
zs*0U@iB9AY0Pl~}I0R``$wxW-c;r`&R=zLrM{_hacIuQjYYk0ApUZFbgcWL9=oNsT
z7e;mOl7#gx2-DHccFN&T`(t6|D}Xy<uG_-iDn&KtXj$eaNi22iJH>Fa$tOywYD$pG
zg-1)%%TB``MMBP(&LehlZQpKQHuVKDyHQC_(?(mOWhz0>f)9g_kJmt>mNK222R?JB
z%Zl4NhIl{QN*Wqwk~vXs@s%uOQdMJ+j~_>Zdv-ba(o1#l{)2SOPj#T4DmTab;SmAu
z{E`CsZ;$<SRz&+Vp}%UZfU{(exYpN+OA?1Yv)fy@uGGKmd(ZNX$7raeX<&|`oAD79
z6Vu3{qnQcp=;}cXI~?d0Ed2wiG2L)RnWsQD!y~~VU{7xSv|~qj>R_%)_wv6w14%Sh
z6$MDaD17V7BYiBV=eCBKgX1g{fNCq{36AQh2E9X54L9`dRPxHlz!`R9{&>+%9m=MV
zu)*!yr$<zG5m6p}$<*}ZqDTSK)fmxpv|KMLwG}kW6+}Q7o5Kuu1I`Y+2hx|-(}^5_
z@2ZWD#ErdGe&N;jt{HFCaj9y%VMX@EWg8L!AxI4io`(toly9i=F_FP<!`i0z3H-(~
z$NbN2dKFua1uE9&DE*o_9dOEg<Y)af(UXqDr>0t0VDHXbx!B;%t9sa9Z*RSueu4_<
zAgrfJ<2}h$mKXY=m04LzuvSv6Jb69k*MJ-@<-27j%i)I8w@}7qnp&!=TYPZKRMJ#S
zKlK?#SRs_Ef=E?SjB2`moZhCKTJ+806=i<g+SJ$GpSbE{n{{oOkzsnTZCN9Ds(MSs
zBw^gIlqHB|Im*~4lfHAf2hQIMmy2A!Zsxh$;+i^Zr7cA>!&7XlXi`&4IxxVIRmXHJ
zhX<vI8taRU_DV`n{2Y1wL>xJ|+;;a1xW5(3AqBqUa<WTE(GI@4h@+`VLwPC%E2&?(
zQ}T5a7p=b>wX#1*AuoZH1wZ(U5p4GPEOlT%Qyy2hS2ujRMZ?XB_!D=a+_ZN(F<(*a
z##I(rr2+6jJrfh$5PC@LNYp5E0l_=}00DV`+#DI*6?D_q*tZf<Pf<eUqM74VJ!_B-
z3X$grw{h6(%k_|yGjgpl;O!k%=wriO)qCbL9qmUHu-mq7*tkP-s3+-{@h91W4;}J)
zpC4*`=Oa>U%vT3(e171r<KW%ytMKvP?X6E0U{J=IU_-?y&yJT>$@7BP(^u(X%f+vN
ze4^MY%?n3)-f^w|_aursfti2Z;TZn_0zuJ%_mR)ex(>AU9AtUMyS+9pI4WL$?3|_Y
z(F6Q5<v)$rE9GAe(}!0&s>)r#HruIsN+~6YE)hSoBz<9eKmdm%;|M`G2UE|rXqL}v
zrLiogM53gMnmGw1xQ!J7$@89^{{Vdz?fVUzZoOHtS?Q>SeQcGL;*JG~RMb8L%_m45
zZrvj+a!v+xrJ<&d6*3X~XnKE4y*ram?9-nk(WyPvhB4Ud#Py;D_ZWV|UtS=`rE+q0
zsi_Hn>aSPZI_ePA)f2p-<AjFzE8AR6>!rdT4O#s<c#FiS_)B7gbyn0_sA=~NyK7OJ
z&|c$CIwYwm9vDiYR*_q%0szlZ2fJx#@PQsM4tr~uy=S;p%ug8k1>Afb<)?yczOS32
z<wHqbKs}=+6%tGDJyaq|6EiH4La0YqPg^lu0zhxVDUE!QU#dzAeYH%IztmV^vy$$7
z{{Zv<08JQD-X*olBEmme!lCXzQakEVX@7;6cU*P_zVSuQJvBDyYeZ95S?)VO-J*iM
zqE?m>Q58p9M6wrAAdHd}9+RC$$2c6c@k^90U~e3`@fz<<RZ(rYRME*%Nl7dwSflA=
znHEfyNuzk(JCarV_t%{$xb|KtGgMp7%}Z{cEA3Qq)3hv=4ehf=%2b2j9s6gttXt`?
zws?hT<U@ouzA8@?m!#Nq9{kC1HGbqJI;q&qN%t7QV$KOrevn9ASB8ewZ@O*x=;Web
zWsnDoL6MAh{l40kpW(FMyji|*hlv~*z1+oi)!7&OfBP!6RT4{4NBFoTc_~$$$Xa9&
zp^XABOGx-9I2#O-m3@Xwlo=({+wFGfjZEDwft+JV?cH^b;ZyC>_K19u^P?T!Yg&9n
z#CF2~dmp}Zl&&+zr>M7|I_`{ooYK%9x}rqx4{x0lT&^XM{aMys?u@Qt>Nv)Unpvt}
z+m9rZ<5!GHwrHxVk=&}Ep8EL+a&nD>_s)cyaZcyC*7{3RO5av-ol?~cOa^0={!mVi
z_NAI?%GCu}oQz|>bS{H#%QE2pT;m#3?VDvKmZfT(F=g%uIMn4dmWGU7i+!uI;`|r6
zJNpi!{wqEL{{U~D749n;qM3`xvk=Fnm%-4<w+N`9>I8;g%BSb`&?)Z=ASJ=^`9`^y
zBDzQ>ogxP^ApNnT8|4i&pt2wb=SP<Z>MZe--@wL+Zb82aTev!;L~g`I`jkS)I!#sJ
zKljp}`dUX~SfAfXn!eHzpPhBUl(q*cp85X(O=i7oHF{+D$a-YENGIxxs1M&p6c>pl
z$d!rh->Xq6!T$i_P>S)h?QOrgVS4+(%3kWM`+;d8R{sDANE#`bay)|A`55p!hm-G7
zep~s&M5N))<C5KdC``Lr_EI{`1qF;<B&9-+o`cjs#EYGI7YC&Fb~wp)a^J59r8Sd{
z8xHBtF9GZ<o1)J&mmT?Pf?yKkNU()^=%dH{c*2I11N}tmd>2w$8a6i2c<(aFHUxC6
zRw&;Ds~lwgvEY2_*HjPqiJF#JW0o0#M~X<ptA^}Ut9D>P#t&oQ`PFi{&$4WL2RYnT
zu<+|_+pLx=cK*3jTP=3US?TGaj&?w?mhaP*IQU=6I{~g=QShqrUlZh*&hOYM-YP0O
zz|Rbouq^8f1!i8bKWzTG-MaRrMW1QhE^RmS3u03{Jydb2mK<XpXif<S03P`LJZa<P
z!;~wxJsj6<j$Ex9zM8^r6qVNruC-8^76hcHd5ICJ20}<d(g^L;bK3>Z=10=Qo4!}B
z^Vv7|2dXc&YNODX-M2|C%GXxCSpkYCR880(iB%Y9<P7VZgxpk;&}FJ4-iAf2rAZKy
zSpu+cAdfm+ZEl*2dWmp)m!t>q#CZPy0Q~Q+gXbI57mU0G<#T8AopafATYqeBOM$gi
zTPB^Mu7Y+f)b$NdCJ(nPam&(IxCjdY)THz7)2WUJG{gZDs4LXD$omni$-XA<VF<cz
zJ{<8fEyu9$71nF5wxTMlWyaf5^z+in04pSNAq<QTd#V2b(^JLh!RlvO&rZpogIsaA
z#@tKVw#BHL;q*o~EeBUm;{<8|N#i9Fbps4tWB_$ASR&#L3(@XKE7}r~O&_+olc*BE
z1LO=J2m5K|^zY`*qmvG6wvQi@ih8OW9mS%*P}9moRod#2qN$8`?n1G31QFJW*y`A_
zG?IM+YmzLjc_{@y;o?q~PQZMfC9B#@_xbbRN;`GJ(ZB2zJO2P@UE;XeuQU+XTCNZR
z5~`Y5)<~pKdw|Hwa6ltCJ@g8)%uX3c)$+L4V$sU`67$hCtJD`D9Y*i*0`nVdws7{v
z$^;u)t*h@fG~14@^HW4Ul%|2@jyP$Y{%IOQ;zk$<snguP1fD$+r;w{fgM-@|jBnx%
z<->m8*Jg7qw`^%sX0uylv0o}9sFq1-9$JQ$2@F^%96i}uT#v1kH<cu2ZQ^tsX>%gV
zs)epLccFGFND2V&)Al|5_Zs<Ps7lLaJzZC{&Je>?BqS(hC0KYr<@)G!Q6N@=1`et8
zyqWyX`~08R_8Jdu4NHBiq8+~#8qH4`e#}7Wd;#D7T<gq7OX#rAq>mIgZy3Ek_;b3q
zjo~g=3Ojv8rdstf7nvxkWTuSB(h^=zYB<9;h9f%Y&{?PHVXhMX9PQMX-b{B6&K6gt
zmv3<juidB&#ww9jPXx+Wsfi3aSTMqm%0@{iUH<@GZ;z}`5c-JmuTC;fDCUyc!(YT9
zw)Z~Z^wPal?Yf!Lp4Yo=6_<*7c*_Z?y5x`Rz&J#bNSvO-w{xoXR7PW*NZDLBU<aSq
zR`1;$JKOwD*jK(7ZrXK{*|}`>Hmj040zHa|nkP6tjsxev$Qr6YFkBkHa5IdXql5d7
znyNazvwW$x&TDC22_!Jc$gWYqW$REJWO!hF5vXOZR#G3Pf(47+v@J+z(cqO-yPxne
z!C&*iAN%TUJsCJ1!p~C=M!WYZJYRa6N{%1#7PcL;x?!VMn#}caC1g@S%tu;=mUWdf
zGNpTh5rir2p&anZ11cW4Sx>l~V>s-3f5n0U`|4N!0EE+xmm7B<xRc98T#q%obZtm4
z^m9pGk;e^H6u#5+z~`z~k}9RiPU;{9zMkSxrrKlp(ZqY#Vs35`@dI&idx>^Q>28)A
zg-xdSM9{Qs3YLw54TFYUFhBCTpOKb^3U{?uRqfj87FVF0!dRRMRA8fjbt->+>SFzS
zy)n3}MZ}BuKioIATPpUd+o+d$TIdo)t)rCyh3*cADJ*T6`q?m30hk=Kh>qf&3?BV9
z>ZGa?E5@{S%$WK=GeZ9We&n+M0Ac#-4RD<qD?2-qeo?KyqdbdWwjSCR`fem}TX9cQ
zv}T%~Gab&=af#=9M5WvMasDbgi6efJ5=mGd(mziE@sEMmULS5M`n$~xmO~q|)Yz-&
z<fo~jsp$mMyix#HvFZTe<2VNy*KGPDdYEtzkiFByt5-8uyX_UPbla#ZEfU%-MzSMQ
zN%)?nc+`eRbd^;bJtX#Sk@W_{)IZdBZF+|DBQE-*;N>3M+aH29n|)A(HPYz<vG!)J
zlc>k;t1&6ixrh**ZU#p~uWd;^gXPMOE2niy!!iE=7h|0ntxRoBl=G;_7z{suohkQz
z(W~3l)f+<M>OJn6V3`yFEP2Ywf!G{^2p$MO7}8qhYpkKDG?CNIDnt+Uy_1Gf{wxgR
zTGY%6skgAHVGq=yI*A(dPb{zz-vo_(wp^Y%c^r&~z+ilhZl^(APO=46Z*Z?lw|r+q
zuFMait*5B^WRgT;J9Np9`s;k6g%~wVH4GHxOF91lu+dktW5Nj79_KCd`)FmVuB{{~
zQ8B=8>9l7DAFhQMF^1(x$Z6uDk<^c=IvIu!+??s{<aeL`J9>ZdiFmETT9>P^+*&BN
zhRv;$6jy3=BORCi6v{KvI*z2t9o3mjF0VN4CGuzi@0Wyk{`v2(CgX3UKGk*cr&%&O
zW{Cj*08MDg#o7+MmWbtVoL&WUp~1QyGw>63$8xpRueV!AE)uTSQ;fYOR6vsytNqoN
z4uq)(T#v?YNd3{qo^|W^xw<#?7}{Ho^;>tjnrc=x6w8Q$QsIam<ehRTag|kwvF<{c
zkmS>z-Z^@H?wZ~E%YE*eT5GbX+*@Yx8&7Y##+ml|kxG=3KBnlA7&7$`dX$wm8s(>$
zy^qZY12)$kczJZ9siE8!m?~z5j<sj}4NFF*YDpP<Nfd>Vg8<)7a56R1C}Z^y--1m%
zZqT}<pro`?L29)^uv1dPkkdgK`eTh0DyaPefIqgjtX)s^ocx@fd5WOa5`h43j@lUn
zX&Ydk`17k1GK!VG;8}YU<3@<MLWK6m#(>8&#z6_=!Pe1TCs6882kopB(hTjC>FxW{
zume4`GNO`>RR$SUF!B%1h??Wjr|STD3$Ft2BQ{P!{q(fh)`OCP93wF5A7i6iOn&$t
z;)8+PI!w{b?=Mpm`<)%zZtF;Wwn9K1yXjhCTNZnDR;r>pMhO!<ilRqcs^o!^K1a?q
zRE}+{_m1P{a{0yXAaOe5zN#*Jrs+#ny6$BatC}i?iWUll^eaOel^Gx?P!0yB)7JN$
zE%(Ci4O@9&;bWQ`mlEx@V{l)o>w;8JS<#Xip^h+&!KyvRi`@kZD!)%rBnVjJ-rcuQ
zY+DY-vY=V+8;+JLN@&y`wL(je{GmY|$L)^#;?I-jpmbw&zad2oyOwKVtfwQ`!x02c
zhQRTHM{;}W!aj-|8QUBq^w{Cm!jguaZu{=u+n)ViqxzK+)jNAC<dSg}I>i_n!0A1d
zvkgM;0{Ld#Ja6L0&E?Z&xLdC5yv1*dt<{QAqh;r*a0lg%B$7km9*ksXRu1782>4~o
zPYAZ>0k~-duvm7jr6_HwJe_j7f?1@IzGRWv<ZlD1&Hxx19-TKP%_&qrBExZQm8OUB
z3p&V$xFheT(|J2m7U?4g@`0rt`*&1|7oetj7CiM3UpYE&uUDxn`lFQNzP$`_eokpL
z5Z$I}X6aL%588IgC#?me1QD$ibRwpx20e-LMwJ%GmfK7g5DPb!$BkLKMAF@`LTwsQ
z3rm9d%Z1Q6ZPC%cL{WJ?zik`Xo<~_4c@7)E`|D)Tta9~sBR?ZjvFSBoC>4~lb*bz}
zU`~Zwt`!7^mO@5wK={y_*yDI}f)BwO2}?$Z-sG*0{{R}=XkDPfp1G%QY8e<D_5|q5
z@RepUB!jj;&p}|Nw8Vm5mM@(5*7~c=jRM5XG6n!&jUx}_2xVN#T>T*En)f?RPErB}
z26LSbsi_?<fDj1pqAKGmD=2Qq?dMMr39Fzg#4-N>T@cqs7?uS2(vIay^O6u~s%u=5
z90fVm(<T(-9k37Tb(%6ZX_Vuq{I!~XOf*(^mi`EHEyo@y((JBWxKn!Fdxlt=Q)#kL
zw56b4?DAs^9AoB2Ay>C?p6@KyE#Jf4<1LGM?JKt4+!pCa;^`_9dv(H*(*zb@q1Bfl
zgZkthH~c!>o2O=O8ZIF4gL~aA7n@YiRYhBAt$JFzGPvmpA~#t$_yDV81b4=y=anB;
z4qW-R+mxKTdb;rD$#kptRtskoYOSIUm+ll+ya6Pka2o=mA|g5dq9=AfD)#fGPNa2M
zbI4b${_@%POJ@vur+GIegRDX~oM?ZWTNkiasf|DtkuE-`P$mPv^+>_vk5qa2d8+jb
z-`6f#cidO~xpS$fy4KWCF#If6DyT%YP{iFMt|fN_9Cl}A&Q$6}IIZX-!u`#5w{IR*
zINkiZ%~)1xX>alyoPNWVD=jTl7&5O;NLT6ywjUYS3VOP7mA3gx=a%EPID1af*tb64
zx87~=ncdztWH3$-R&klB<X|#FtEdOIlOo8W^ZO}{t(?un{wHtl4)Lbb#V#G*rncX9
zjb%O3(^E@3tdexd2}~&g(XeB6C#0(o!wg2PE5Ob&Y~DEVyKV8mhBsq9mv!19x!b2e
z$V^hMOl&->BXvf~d#fV@<5g(l4VozJv>UPr8HKu~8lc9VU(j!+Kylv~IsV$3f8n;@
z6}O&Fw|@_3t?Gws@7H<-6wauTR?^2g0m&-aktA-F0IyLeJwr``Nx{Tv?lyL}I~=}k
zo^H8Y-yFVfD}5}IZf(6&1-7E5m1Z=x{ROG25HTGxB%yHG1Z4NYAk``wTFWKcdu8sC
zsjBEI<ff~IzcNW16mZ{cFlGMQw;SE6;gN7yFe9bF@O50jTP^@W%fG2M`=O+!f-6q)
zmJQ8lf|`s$5~XUA@v4J{=$bh}U;r(i!$%}|s}c^?Bb2zOM<qtyg1!~)1xm9p1F&F6
z!1?X-r1h?&sp6S3tVguqdGFjG^8@_!y-G+o7TTz>RY;1>5sgc(M^<vD$nZz{X}Pz<
zGSy3Mm2n_qhw2|hihE%G<5PpRLwP56_-c8Oxp8llozu&X;l8a!!-jXnn#oN(tM12T
ztqkg_JecFES=16vJ>_2j4M+uDvQ?L@hya|E_S6M>33F>~<$s^+Ew6Z8T9a;ZE{pw@
zyU6ZDP*hDSJq+#oEmACG<FK8NVVzi>`L==~jiU<ca8n;roc9M^(Vr-$Mp0vJbfsZa
zfWWBx4HIvk7VS<g@c#9}?fYelzR9>Pve4Y=XQvV)#Uhc_(%p+G!>`$qNFa>q$cJv3
zDpo0Jdh&aHIvIT15?O#T!a!l16~;dLrzOywUu9ppxaE6$a$m>0w=aA|mIkP`2%g(p
z1CqBoSya3g3-pi7pO|9{BD&=C>9DvWQ(UNP+oS<6vokV+3OMbbx6}a7>#4;4AYN6h
zTuS8`d5*Q(kZwL4*To}M#F2-V*Ha=zJZwt!NzoyN&~<$|kbruQp!)qsSyN}8bzkbl
zzUYZ$%Z5YMkKeHx;L9EpIwJWZS^ofrUx<5BhV{j+57t#f4btHaviEMK743?MVfU%$
zD*^g?kjV*Y7r#rWBLE#s1JOq;pi{qCJ<5UORnPRi<9Og-Jp3@?JXMpWy1w@vrsyK7
z2#-l{U&A+0J(vcTH^=_~rZr+Z3W|tjG05aHF%A2JjdbCcD1He^Qxr55lusDyh=~V)
zbqZg_56Sv%o+@pQboQ0DV@11Z(#5)}nx#rZa;A&@*o6GU+@(eZyMoYs6OBy^R=j`J
zPW`;;i1h;Hii@1yRJVr!tLi<f%cUaJ*lLytq<C&H6D2=Ml!cG2$F$Fp(vv@ZJfq1_
z6Q)(}!s42+$KO8#=l=lf<6V{LL&W=c3jJDmS$Vx*#RWCy?QpWx)YbcHlC5nF=@ez8
zARu{6N(lP;fKifgc?)#x{mrm9m1fP|_L=S1OFeY7cRD$gt3f<&Rzx|*aKQKQa53jf
zz7y}euL*eDwD`lfrI=Rj+s(T5O4Muwtn_tBAw@qOOWW=BckD>lCsQc8ESKtG>cz<?
zCtFVO!tP?XEp^`cd$80}&9QdH0iLc{Dk<49GqG5ygQS@qZWyZu3P*ib*2A~1yT51E
zLr99WldGB|?<tXhIZ=#x7z6E`_tLwZE<4t9PsV-c$4kx1c5T(Tt`|t2rGB_S5i9#q
zfEe`qP>AE{2MSbxa65J<sE;d_3SI(mPoMq^aBkDHcIOc|dt{{DyJKy{i$`m`ieRxi
zbPOzYD2&WbSj!d|mTu!Cl+$a#%S3JP_LkLoHMWcGt^;32{{TR#jIjV5AEzTYBOU(w
z?oLd7JNSL-XTeIo9&8(>i-%lQ;fli2-(aGYQf}KlOUdocTLf}3j<TM0F&X1rOB|60
z0hBT0J~`Rx>=&AS$z$rIu9{Z-V?y1e>K#ib_aW4e9frEQ&|8@+7Q*!j+BWUqJX9~a
z=&QEtRK`8H=a!~0Y5KtJlRQzT4gf9)Be>J!sWkjCWhc;?=jYSMn2t&5t$S;Yw6?pH
z5|@{5?#mH(rMiSb)fDPnERjsA!HjOBtEZ`YhjGkOswEt#u3zZ#Bp@*R;Cb__8~Vv`
z$8qsj)31G6Hg?Wy)yCZ|y6+u46rx&)BBiW~S?7g%S-R2@Aw3vkg}^u@8uoAPm^SVD
zmvGv9+OCnmL})<}&5oyxWBeo%%l7(*z|hGBj&!;-T9j-Z!|qbJoy=S`HojB1X>{H9
zL$~TKx4R{*32SM+fPMB@i3Q>S2VR_?tN;nqT~%nkZF}Ul8}*{D-(N*gR;r4+nc<oi
z9#zr6Wj~<+>B8X_`rUjL;;oyATsz&hJ9_I!P_kQZRBY+^Y0<;Ra)odU3=y0*aB>OM
zv2!WQhXK8IIepw5+VbzPrLc1|vMh9wZw;*Tpo4P%07Ql8Zlzc1=#gNkZkJOSf<+)k
z+ITfBc?(JGhA5|suBoG+3f%_;p2X)~ky-&8h5XC*_|dl%xIx8E2yXY;Txa2}{{U&;
zBaAy*_Z^~IIl`X}(JF#h^%)<oi@z2#Pfbt*B6Q+FJ~SL!6c=y0ei|_omJ8srpBfwP
z&KONgEjZ8qB$UHH<}_I?8+%eq3kE|b6^GAle4v@F)h#;b>jS73K0i;kl)=3UwL*R-
zw&P2IpQK0ApWK800Bv-irgx}j?eyv5m38pJSAXHo<dQYrn@*9G%^Rq~S5q@#R%pR1
zf<nrNp@_mV^ICr|sGd54X|n$SluZCRIvEsHUf)wxQ!pbAkPeVX?c+u>mN{A;l#yr7
z1#yF5a|yyb2OIcTbDpzr?s@@PYN@KDlAcG5voF;gfHIKE!|ZX8NFbGPQyiaMxF4pW
zXVQDtJ9hHjakBE;%^l$`JTFM1w{1)8P(~`Y)3ks^6s^?0<q$00MY7O>3X*`JnfY@w
zb;T+=9_sP1Wng>|M`90<N0F}R$xT{0r56?sincE5%5pq@y7O5wm<;J4R6t8JHUjo1
zxjlzqPwX_&bC1o(3;87A1?!JoRk*6t+u|_U?P5$#ceuK@;pn0sOFIm+cu3XU;4Z7D
z&8Mg*HXP&biLShD<8{{KMAa=lHr}_-B`u=fNGt8rQY!@jDJ$0Cy8=lksFS4BO&B+_
z%JrPUa0=THlIzS4!>0yGP&G#Mx1j2Ym1JC*v6N;|!^kW%u3Y1vuV<%E2HviRZgCpz
zzwT7C7@CJ<S)N!~)rz<K-AA=fK*2pq&IrL=<Z8StB^oXwpo2g2JV2lNYuj9E7>=n}
z<G{|Q@;?Z52>B-mdVdsWFV;%KNy6?`H;wk2`B`Y|FPF+YV=ZQTBSwO9VFSws41N@O
z(S5)8sW^8Xme(cAl56CbLrE#Utv2_jT11==>56uE438N<n7CF^<Ev1bE2P2tkNNA|
zb<fq7keqk?$5V5V=|q*jY<+3DLg$Z!+kT^oJRjd%JBXWV8tGuEw$szxZ89X1g0xC~
z_~j*$V<MrEdR-eS(&ML?cwfGD4;gry9r_*BY^QrAEKpi)*UOAH^3_#r5#0?zUv#bl
z0+}PJn1BlBk>j{ZLCnpMv2NNE9oABR-U3KJ^Po7?Cyq0GypI|zjp>c)@dqyVC+1g(
zd$VufcOAmpH5T7ZyICD|6|M(vndC~i*ri0S7uzL1lu*hUi#EE>>vnH^)H%o<$vUmq
zruVB~8+|`Fr6ns?U3X6n<qaLrhx6UpAc!$z?@{#vw9>0Pw@3mfE=#KrrFzeKe-wW5
z+!kR=!|qLO6jv{LKB=#}##)MDA3*B?V1_qlFV)g|fK|qNj&;$C+Jq>aqT40H{{Ve8
zaJNd81ozH~Y3T)X84usaxOdaH)~{&uiMuN}N5?I-x3&)Fk~Dp{_<PKcNqaax^hosp
zdsD6hEM<?lrMgd2^q%In*278~DO86fhE^CLkJIB_F_dK+i76(Y!L(L{LJG!7_&)<f
zR-F-ooRj)%T_w&V0gYG#<P8g@!=PzU<oL$36cI}zqTz-I*lW#6r~}=Faz9;tRUe!$
zPj1@uCzGV99go1$w1&2T(mOmv5rV%RS`L+6F5ckmKIhK9*B*t+932%@G>+e!zu#GX
zfx8WAE|Dq$-2Q<0)|9$aXQd_jv7f6%@=GcLGEeWIbIRqi8(WHEw?d9vgi;WyatG!)
z(Ouf4b&(lA+~}pQ4ylf_+x6DAulI5be)-d=&>OS~_f+TVAFjS1XrW-kMH5^?u{~YB
z`qFNJa^CUNsJ19>ULbz2{{Y(>%@Y3rG-vvV&+Dww;%ybgx$wtw?TN(APb$qDlrvJJ
z291F~OAf~%DJQlEjORkE*IFvJj+=DTmCN?J(Z)KDa!7IDf=)^E`s#i8`t+IM4>dAZ
zaZ`ow4$<Gz!p}6(M55n&ln%nF6n{=b*n8>U$6B^CsC(kCEZ0sS@9VD*IEP`Wux=Y&
zS&rjM(WzvYx75lr?nJ|+hD`l9DUfyZnQ_k)j>S)**Yxo9HshZmw<~VZx>w1%cu^%u
z{8rqJm6Dg-A)a~(8|tQ%63y%}5z2GfN$VZ+>o3Ty+ns&}*g2Qr6mNIjn@-GXDXsMG
zfBWjmF#Jm_3-dY>B~{PXrPI4~>x8*O^or)!i{Hq1&fKK1Qf$lY?kX>nU+JfSK(aJ)
zF{V<|m%_J9eIVtRtE3V{)$nJA5pYw7HjWoqcOCNMZm`n|TV+-Dt!Ao9bU2D5BC7QW
z)G8?>AyGjm8P|R&&UkAl^fPKM%HD08eLmo$mWF7(+G@ITvMY|n5rQ%2xH{_p06<=5
zFWd*_<NpAlH)n;dJVcG`wXHgnDqN^mX=tTJ1H+?98+Bk2B9WE@E-$RlB30bEaVn#m
zei?1bc(xwvxI-1<hTRa1SuXQr7U&(83Wf#44$C7l5IU1F-M_2u+hd9}ThC_Rs%<wK
zzizgIuGd=7PfcA;MkR^RhK^32k{DouosPL-CxbTnE|QdJw0){Wbf{ROOqX?3@^VSQ
z)DL>^bJs68d{3j`_R`=Ts)i06ZtJDGi*Y@{rn_8#KE$e30>=~`Q2T)ik%7n52_U~*
zw|c|wjyv)(zIk8Y5?v^G$!OuFa#?q5)sggWi0Rig4vf7o6?)K#Wa!3a_MOo60t{yE
zo6X~BaSqkEHum43mKLasr}4Dj<VaRgF$NIIT_2@YhDKDL;P=(D<XqHK(-#&$EI?YX
zav2qJBAo*)&`1a&hrtK9Kljpp_t#Ij-l5?!8Q<LmF*#%0_doj$bSJ34F`PZ;7o6+2
zF`FeUHs;)$X3JT)=r;A5qLMlrve8E!Z956cGD@`w-BS##gP?-M7G{HV(6xTwH4!O!
zppY=Y%OK7e_x}LPp87_VXQMj03+e7xOD%0Z-oYt?syR}!-zQ&Bw=%F`ayyJj0nh8J
zUUHkvhRo*cm#v}9e#)&{iapO}j(VuW<*6%F1gfTdc#<i^h{MldenzT%wCyFJyG}`@
zezF;|N*pO2{1K3Uwx(zJS2(-3>h@<BcmrK}WV=#bZTpIb+|s#INbwq3<V8P9;#GsF
zjAz_JLG!EOeZ*OHWVzcf)KtkL)W)mQr=TGJ0KSu(zVh`?m6V*G!H#=tMdDYEp`waR
z<LCa`B)C<TU>N7)T?#w0HpHJXJTG26a2t8>Ve2WNUD>uPEo(hpKf4qXO%Yh-WbM=u
z`XA4g1YnI;pLpyI-@?uLeBplmvdL__?dzP@`=tYbOf$yCT?rWt+-D%1f)CD3o<&o!
zAW4Dt&Y|1*fcad!aQ^_98-F$S9loiy_mverzVBI08Y)3}g&ZEWQ=X}zM1OFMF-93A
zYJQ^~!<W$5-sq3Lhjg#oR9$xIk!x%h67;Omhx(&&G)z^Gj~%mu2CVPG-XNzEICHo7
zg%gmM`yCz6rK9L?_lOd2%ahaH{AA-*c)8uFEf>RahBallQDKn26{L;39{&J!Vn1Q2
z$@E|5*N41p<kM(y#dl@2Z%vJ8Hm$?9(Zp3W@Ix~tJwS+@yiW;>`m2>M)DKH`()w3z
zp|Yn|cPif4!|g=lq<znGqWa5(vNJ`I(tb2q4RK~69{~OIKM~2&Ok+MXopx0m?Xng>
zh{Kg?Er-kw-{w<mZ6>C-@5&d7{{UMH{{T*V%{ls=hCY{ia~mG)7@<E&>|^SjQAJBZ
zRU4#n#Z+RK9l8=lC{BMv)8FGiZBGZUR{$d7?;uw_^Gfb3xOH@f`3n2a5$W8DYK17O
z!P}-(U|Rut57Yr5!7LPl;YB^9c4;Fk7eT@5$I^fQ03bER>K7T7#_b*?wM!)*@(Nc8
z{_^nxl6M7xZpY*F)t`7Bv-l~+4oY@D$&Y2*>vmP&hZd+Ku-DNmK_x5EMg2C6_&Frx
zV21jE2M1M>+c(Qi-mx94A{8l_-T<8~l!f}ndxM7U+qnMu)t&iEvQ}BSS>dgkwl+zr
zcz<P%RFXc13@Bm9EAVsiqv^j>8zT6k3cXx845#Irm@D55c3f3&VX32z^|KzTR+0Yz
zm&jW*%;zRohG^uBG5VXQC%bEfY<C-$*}hWVEwxfo*3i_X)bqsHcx6V)vYd7yfFS<>
zeOZ&$6Oj6jayT&+-(Xv#ZT|qXmTJAR;D0fRfHPa!k74d~hK!!eDvtr%Ro>=9U`@3*
z)nnCd+M8uc)iiaqHB`+Mkj|u}u{sb8az`L4%0S0fJ~XaA)<(QOfSYl@C6zrfd6>BK
zjmd7p<_`H$5xre)Zw>lL0LSwxS)K{b2W2eLi3<FCpKNLv{Y&{{v2b6VE-3Q%Zl#Wj
z%~g3z{?~YE!X>r}=*3JSRa>PaN0L~TJd{F6T#X@pBE3m>cOZOxpr3bdYq$RZOm332
zd$z+9O;ZF);AXBWWo&!v(zs;-n+gtBI_R%aZ&2S&{{T?@@fU|wo9kt98*kB=t@nMy
zx7Yc8<D`owIy!?-Jk3ie>VIZq9szQa9+Iq$m8_Wm0CWiH=#F@8;_GUtwOwN9sivLa
zse}-nETpOY%1@D>9thV<`f77;#hy_(@lC~zmg3`6X}61g(#<UWFkfvF<F1}pT!fKb
z6_P*!M^s`6EDII7X(%5503PhN%6|)d`uc>uSgKHc#5wL5s00rgQ}d(V57*P$_>By+
zR542GHibz!5<0McjNo@G)Id4zFg*9?a%ox{^p+0hGnZahy;^xip||e^HFj(Dxh^|X
zaX_kT>@{qp2@}85ByH6cuoKeQbvQU-0etEDTXUVr%$0YpEpVnuc8xV>;iz||l4Yv4
z&ZM`s)XK%+c2y^$P^5V8sfzSU-}edjR`cNewXHP;wyK)*Y7E4PzZ2IS>c3#m6mov-
z$LC#{y!PJR-PWhE?#;Vzx=}d>S}UCzR6`~_bt8hO9sWCOw;YyyN<{CDycH{=xV<e0
zS^&t!vZvtadqoApKg3bsg@r?@^1r5}SD(+}{{ZG5!^L}^=H#DeS})rgtUXn`Z!P93
zYWrEwQKSn{z*LNr?Xt0AxK;s&c;Ym-jk!}-Zn|5iqOGB+dRod#sESDpv9VW;)w`=H
z1Q2_IH7P;Fe)<E}+T*N=7INK6L#_@FW2FTY{_J0iXn;}Ut^of4=kuTxv(nsP4gg6b
zhYSh)$34!1J!Qhq(lXUCNmM$hACw;0{{THDH=&ac<`l9*%#*@c;gob>BLkH<IUYdQ
za#}x$hn!uJw(NUS^};S6U+#9>d5ZaYt&XY|xJ%T1sRVS=M;s+%=nNDP>=}nhJt97-
zH$?OQ0K3%4;kt6@tf#1deHCtKBB*Ao`jad4s6q8N+?@S}sg24GjU<{LK72h%IhFMb
z-WL7A#>>40Ew;%Ut&eQjsoooX0dS&O6<yulN9bOu9hl$%I#=n^3xvF>^Bu=Mt#IDl
zBjASZ+_tMtT2owZb{)G@Sx-$f1uGnn3O7rhrNa5c1K{ZsZL-f-IC&#)WPgh|>g}n(
z{u5o#YoyyeV&Q#8{{W#%-{v-}#V*(p6ss`^WrAA7eEo3)?L^8lQMyUMkY=t)_9+y!
zUYs7Fw+q~Rc8`*-D9c$Sk|c2^#;yUG6tj+|h$tWePEW>*EBP4bO4)onV&Tef5#qV6
zJd?G(wq19;B8+9^>GreIqsbkHb#U0Covn)ub<OR2cnW1cKG@EY_o|zeGBkgWl=Pm&
zO4$CxIteDs+C@97_*2CzRb5@W>BAkxO;K<E0P*loVyH<eyY%KoWCaFD8DKm1(K=hJ
z+2ly=^$gIkSyf?TV<+H@b~?DEQ_oX0lhjO0%AsX<IWC~&D--#Kd*Jrc6N%ii^6AEV
zY}Rfpa1&;3Dp}_<OC8s3Q%KDzRYNCOK;imTM^IpJpl7y^wwO60r6SvLS7dulP-x$w
zzo6I+_C336!qmj9rJP{>{{Z>lR%qOQhhCUlrYc}LdYaJG(#F78cU{7nWePzk>Sl1k
zyf<KQdlBbchwDx0S;+qYEPSBB#qI&{g1X0dq1`l=`M(ccQCm$}4PhvM7S$4;Wf71v
zIsi<M(n0G5Rl%gfntc$fB~%3TtiikZ4WU%ikO2?i4+k1`n}-`B=11ZzG=ao_)o9}B
z2Pfklfh3<DzyN${A-Z@ydfP#(_W6?IL?k_ND*pgl1@r#^?;p2+(^BOV0B!dKF#Z;}
zB(S){BY})(I1A+d=Tal-+3P>PI1NEV!Y*fOZabG|K_W*(xTTS*E_-^4IAs2ajS#(R
z77+y!FnGalsgwl~yG1L+zlo!eGK_V7IA#6*MwB*dj75>Uy1Kvc{4e<rpVa6n$_~)<
zq^XvO+S``fcC^xN3q@>oRCUzR)Kb*b$Q@;hL=KDyBoI}B{`yy1w*?`H!-1T6)Kz*A
zbCqIUol(i>5O+1Mil&CC>bQSS@yL?aMJbY6s$((!n>=g-B6g9MM#orOEScKdsfS($
zNWePojFw1RIb@zWP_GptqXHBpc-OHPan;-R*Sk8=y)aT#H{V{W5TW`Ge#ctFO`y7}
zot65t=i^1ya4&M5vO8$fsJnIp_c+#rMqG?!Wc|GB3J}ymCP_L6G!6N2-%9x`%_I)~
z&N@buw+Olzf;@hj$%@cTJU|c2pmhq-=ReyTF{zIo^Pn{WBtKJQRb7e|RR$Ss{{XIn
zO3XjMjSj8oQdAu16r;&LHE4EDX0OB%tk#L@P)J5`tkp@KMRBHz7HHuN-x^8o{txiu
zd)=t-Ti1p*3&nl7gF{<&vO!ZU7|9(EA{I~yBn;pl{&eHtt&z;)cm_Zje~g_1rH!SN
z+i8B1d*|a`BxHJ2#MQ3HL28m#w%Tdwefp=Vs1&rx7?x)_c~~d`orudZ?oX0*pdQ!Y
zNR+7RJ^uO8jCNJ0eyIp_Z_06`{^W=)NsbvhyZ8sN)7^olH~iXi*|WL9;0@=Oz1_U4
ztaKNOiaDO`Y^RQC>6)IVIn=64^fpRh=ir=o)Cuzi%RcGhF8sT4{l|@~Q);~1YD?Sh
z2^xtVLmy_ko<a1>8!iZ8m29$*F^yPeN#TThaz;yhFgQAHd6eaYgPhsy6T{9fSej8A
znQb>}TqSL?d>l&zm^kio(Y9lM45zUX&4b)loiU+4I%RP0n`3d=<*BpXsY$z10M<Y$
zzo2EBh_q`{3w<k95go#w;gok)%AUkzClu+o8f?2&ZPK{~#yW?kT9l_MWQs`hk%NSh
zN|D<mzD6^tGvkk;Z!ryA)H^ZlTdIO7nA$IP+q8E3g*vuIS}KA9oM1ABbZq?mBsibx
zFUEcw@^{7i*BdxFaQ^^m_LknZUhEfZU1U{OdYVY<R9DE7M%hZJ;2lO5?5JiVsC1oj
zEO{>4=(3gcMI9c*xWOwWwpBGTR<E`T<J{_1Wl~SvoOjaWZx}0;rjV6rq$whI4;fy?
zSbhAH`;85}*%pp^`Gjg32<hrlO!!`aMmryrI`5tH=##~n`3<ylv$ZUnl1=+8%`W8O
z)m@9&sB1iJBBf*~PrE$d@Smfla|t8{=^1s)By0^4y;o0DV&1daAc0ny;z>gG?i?PV
zN%-t=G^yqLj$C%-BaJjXR^V3hs<CccH`Ps36q$y;k&a9gr*S+I1?hO$cj@>cNC~~@
zqNKgjZYT(7j6w;W03}EyqkgBkT>NBfM(>PV?UcK^*B@M#(qpaY1oc4QnUCnFx%2a_
z8MP}zQ>rXK$cL&gFP!%9>Wb-R+*EsidqqyExb`*Hs6$gpQUL2{41|6nor&s=)RyUB
z94i7e{nk$G#ThVh{vm_kRe|9C;hSi0T3U)~xR!$Th$@XuIFTcpIf?KH=pm0!R&$5%
z+fqTve?A;};8k6-Y~F5jZk?*qMGaM+rfDadnm(WlQHLebfX*UUWo!Y_oE>yW)kRGy
zA`*wuD#-(N4uDTrZrX}Z;y>pff>+KJ*!fb}+(xzDcU=zZYR$=Px6IQutJ{Fak<9D;
zG?1BJ=;|(oMpbj2c893nHS%zC)5m)8--I`rjb*~k4G!D6!wbalOItG2to<@4RBVdU
z954hh9taw)O>G3`8FvJ)?nx~{F-tBSxL5AStItzpl6?H?+45J!7~x+g8CBUYZ?(n3
zQ4DqTztxRapgRQvI3Rt;!SAOBl>Td~xoGACWb+xdWhJb)IR5~f(?Xp;h@q1a)>Ann
zqz8qZWxE0uv)ejPR%|=A!`*cj%Y&0oRvt@Z`(aXQcO$p}k)Eb8k)GW@Vl><BgLB(g
zNhPFaAzZ~gM13!Y$4MVPFh0i`=El<6D<aocirNdcx|;E2u8OLTvYtAMI;bUAj(A-|
z$09P4tgNIkU`Qb35<BSXo@Qvf`5KQ8;qv2^mv-{|eC7U4%`BH4w|9H3mfKY1vRUeq
zFZd*}B_pU320#=??<Z_Nq%xi<qa|D){dMR@eBznQCB@JKh<hVw@n?tEt{3n7LR>7j
z`&`r)IynHukjRBj2?wQyLo0b!Rbku>R_B|pUt{Gzn|<-in(mQ$hiYAa4M%e^`%h5=
zMv})PqLLCot`&wz$U3t<B@&hEBfdUHp>Ozz`Es9QbA7k@fsVGF?fgR9=}#prVG>wt
zZR(K7%mM0f@w_BH4}7UP0BSkXaKECM?2j5TNok>)U&HlO+v@%2KmamE>_}xKA0w%8
z_ZU63WPeIdVfdlW2PK<lja*2gwhG#+y1EFfZBU3Ju7Z)oD>Fg~5K2n2ykh_q>I$KP
zis)AgdnNkUdzdoPxoS$Xpy5_e$WQGcAGS%<hkp+*9(S(_InTIq>-_F#+WY>~Q9Vrs
zORQAzU1_6fW1a*A;#Wx8CLN4{fh3sMJlLnkUy`kIM38!&`kC$DO>P}p=df`84b{Fk
z1w8ZFZ8eo<DoS`DDjuq+r}Y{>wg|DN8bTO#C{lF?Tla;0S>g`LxACisba7P6#Tu)<
z>W<?~?qU^!x>c>Am)ua%RI3#w3pwbT7YyK%JEQoAEVVa&X?CK#4q=~QQd7iENNn6H
znXo)|1Hby|$K?~2O73ZW9j^Q>qDViU*to59z3f8D0~(mp5iL1vcK4O$k3GJl9!>_6
zET@wzg7ML!(uoI>*`&4H<+jZW;asj{KnpLyBe?DlkGMJ4OhL(}=da}!!On*asH!iU
zJk(ES-n8)#a<6M#>4;Y{^nzZBWN4+1M$fnIbV=b|n!Y7)hWlo?Zi=a?jU={fgn{U%
z$dpCy5h{>;7@VHRz5{)=)gF?bqt`w`F1Kx$zqY$>P8x3MVwZ8-(bw@d=~WC(R|IeL
zO_;Ef!uS#-YC-By7(!5&Y|@QA9Oo9=U*wb7R*9vz-SHPvpQ?_mkYSX3u2>HJ##hhP
zjS}p-Kjs%pY*!^V=c2mP3yowjdy>*JR4UCNQTd7mbJ9OB9V5=AQ`F1SD{oTmXXY=G
zjj>f#Zmo2<OS0^=?<z#@op8@b5FJLH*_WyeW+KSTg#}hNH|3s!!@8`SlBTfJ$GE?v
zSgj^8TNuW0e03oJ=eTIbdz~wc@lT9`pT^0`^zG%pgWMnCZ7&o!O~j}-Y29{uaXsqU
zUb9=+M$Oig#Z^f`Fmadq$4F8J2-ieZ)O+Ny*8}<!^9^|1d>X!T!@7S1?(qGhs@wCy
zAGbAKwz6s6{n6q_ilZy;0g@(8tdW!5Z#Hw&;Oo)D!`fU{j(OUU7PF<CgNz@7J8O$Q
zLVX$d8P7iMZYkRLjl*PbDWYYrueMO7MJ3u!l@Yy63|A5YxM^b{+#jo`s5<Q`xbxqr
zWB1S`kS$DQkgyV|=?Bko+g0MFrYe(;(N%5VvbS#zs;8jdHd>2&EnKwp{{V?jhLNLU
zuZ8k3K?EM)_C54TUwvsPe-8fuQ?dm~egFr-*I4tHeWki@>$eIm_K??_4c%W+cA9E<
z;{IIUiI^-PM$Up#4|PUmRRg)m*DqbTYq{v&DoWZyCa9KUPtr1GU^<Bh#xOIFK6`75
z5{s2R9ZnCxpSGkET?DmbAROQ}GozZ?N*Uw&<8T1_8zc|!jDN0#aX*0kY~W>88u5FD
zJCAKO#xnNSzgcIbTBPY#>WQU5vJRG0^pn|vK6F!HxTQqq3dX_d!k?8NI;52_FWdzH
zucVXSqE``j=t-abLDRv=r#4q!OnI%@+{{v0XShwZE{jtot}B~U3xow(CZ=f@Ju30B
z?7*oY1_WnFIkt_yyQWsDoz=UBSI_-*;;)AA^cJA6s9cO@qL_sr{^6xlOw@}#aI@C;
z(+`LoE1=>$yx{FEx+`e?>8x9-;wf&FWD>0u)XE*Y!BdWopu-HLh8kmd+5Ao3n7dY~
zs5U|0`_fpd3~g0m+N(`Y=s)oh>t>0We&0YR?Tlu-6}DQNG@iv|y<IsWpnQ&>zJ5Qx
zzBM$5`n2^cF!^5%{{X(5m2Cr#iy3gQ_=deNxSrJX_g*buHf>Ery&lH#+O-iRe`aZ^
zz4(IsWU%k&9(C9}GUB%mIK#J-b?}>r_6^Z&krG6%+t-*YV(D+nNr~I1KOI;<8mLoy
zupGL)Wccd@X~5;Tnl3$ZkHmO5v&*j&u3J+32<Vco*&yhui#agVM8L;U5&20Km**Mp
zrdcbrFBDr^R&~laDo4Q@O>skq8yk$93hlz~DQ!wS=HI?iwJp}wUmsLe5gc|nAc6=4
zGLXu^6(E3h7#xg$6V}Dt)_ZRbx&GNz7Q4#IB7Pz5IsqjV=yQo`=u`TTustBPICUVF
z_SXa%N8#5JcxT4ll}*HM4{z%(=&@EPG}mhd$~vlfGNG~5g!HxuB^wMg+znPTQd$+o
zqCL5Q{vLi?cNMO>zn6|C?tPncXp39zw|dcYwTVVZ4qZh`Avgqwa-d`>@M8OqT)y)Y
z$`<5@X7dfh8oj$MQ^>Zu%BK@j+yb#vEj>zQBp*&f<Tz3?qdKxZ?gl7&0T|PN#NHij
zPAcuGHeVC=HK%b}WSM4rW$xV_OdjgzrcQ|g0me=kV2^{PmIzPwVyuoYWpJ*$FKq9~
znxunIT%|)*(*UxCAK`3_F!{m%05POx_h#KTNh5H;rkqF-Zgk(5Bz7*IQ{ZRDbu;|r
zdOC9Jd*Mdn;ukHuT{cGRwbDy-f}WPXb8M?vP`t4<JA0nnG-$yZJOKSYK!SA_dwX)&
z_hpqaFi8wy)jAJQKOK1<MleT#u3VVqiKNQHN=rj?vo6;=70QP9X}Y{sRJ9S+P~0h`
z`k|UP4;;~+;5xd72jGoZchhT**RE_iF4|mY-qlp^eJfLM+%-`->6WsRLlm>ID2fb#
zETkalUvmKcCsp9yb}05e*{kiRs@YREWv5j=G?JYCW9*Ig&ts>Th5Y{jbC1IwT5o)<
z@b`%n)cZ2=1%Ki#)7lbAg4RZ>GRITYB#B7{`c!}mKUYw4r(wvUE%Y?w$x&n)8WK<l
zsiu_R9;l-&kbDu3kNW7wl9WgcQpQ6M2*U+`LOwNCI<99qpSN19yEl7rXLv_xx0as6
zZHI7F!9`Ehd%ZeEBBJL5$8n$6QW@xV>Knz&cOLB=v~eqm*E`<O+*bOEOC1&Z=4NY6
zbZGSv2qc#-7C@55V(P(Z6oH>L{{WDiNuhnFmt@N|nNlxuNPd!g6Riskz7MI{ODXI$
z4qhO|EFXlW`jv)BZ4_4SC^49&1>4W*tMO!8K)co6Zl*+#fKO!uL^VL?toPTtir1(j
zPT2dMd77>#UV#uf`O_srg*O&J4@!6a^ifod!>FGcI<JN@h1B5Z!O>ill0R(s`+fCT
z=vJMDQ$|?$)`zFUmVdU64X#g=@B3?1)I^VMlxoUUglfcPY?6wG3z7%Uvqu*j8-6K>
zQT_Fr+{y5e^KxU&ZrJB1mCd8fZq}xbHQbi`+%53P&L}IRV;w~?ECFtb6ppM8r7FX*
z8g=Y+v8HC0AkN1C1@Wrf`YHO0ZoW+UtFmqF)xGDU;im7nM@77;D&|p312FdCulkgT
zj84j3(*>doq~Hu$t7BJMnx-n5p^8Bu43i@US5h(vBe4Wvf;$jF9(Ci&tCV_w$~{R%
zC$5dPw%c_@vN~v~%D7LX?hott(i;6>l7>SsM}DFRZ9A8k;<{Z{S~_G{KpjOqpVzjM
zoJ89XOZ|0q!R!@JgQbM1Sww1Tsp*=ptYiCq^}3$kl~RWP00=$sHLB}wnu@rEazQ=v
zF`>4Zl@q0upSCs8;E-`^w@@KgmJF30j(lmb^%dfF)8O|ad{5rHmt@VjX=U1#7Q<<Q
zx<t=fKM5N<VDv_fFp-<>8O8}3bttPLnFIzrH_7|x*6G8|rNoV!cGx@G#SPNSc(%(&
zb+^^1G1ODYy)KLne)$LBfJbmO8K%=kBdRKIdfO5~QA<l-RB9lvwKNYVFnXgWBj*_s
z<bUm{xb$81R=;zPQAx@79BdR+d(&)fC7*BZYh>&qzEM#mFtSY(_l~I$qn3=6kadu;
zl_1Ce0IG+lmht3gjcv23UVE<Dy8a^Tur8kHX)5TDgqE6MG5+%!x~q1>7LC_Ecnt?3
z+|l1TH|Ec0b4O;fweH(ws;GwNYoe-HUcTQ`1Hn2YztWNgex>T|)PNEJCtTj2BZRof
zq}BF!AFtP^2FKY^+qtaZR|_r+U3uIUF>xM>#YHsqh9-mD7Z-seWTpX;gpLX$b%WAd
zCsBQFw8OJ2VT8o9+G>}knn~9pFOOyO*@5u6^WQzjO`JGU$Ib?Gt-vMU{4<<x4X3&3
zDrjpdu2hW+u==8@qp5_RsN7_u#!qm-f>o7_So8k?=-aq>75M%oa`|DZ+<OL6TI06L
zwMBi7Gv@;Zs%eLOI2~~e2Mh<fr_YW_@<<V%hDEj7(H_gbHrf)r6jfQ8%_(d$A#8Mp
z?n6f1v*!U@QwQm_&1WaP>RxO%4duXiESzcDwq>rm+@hdZDQ~h<7~*>9%nTAoEK+8U
zR0Uay1@WCl_T=H5t!Ag#JBo7j%`1yF)f6#;L>WNG9YMnOVd0mk4;cx58g9NIa65@H
zUF@5tyKL@yCF`NQTPr#lqh)CFkK8;`DRsjF2qjs5G`^oSQ{K%urmU1N;xgm)!)$N|
zX>cOjFjKp)QrxbRy8_>bpj9!b@<C~(Q=k6;PZ{oYz}}wxJllKUp571b4dE0<+Ba1!
z^2bpc6qwV+B|=2O!B*>%B~l3Q)JMoVaQ#oY=HW*=d_3T#qRF~0HQUQ?3YuN;E*oF@
zd{R#=bga_Cs?LoWk~tToFvnxk19sKvnX};CKB4?Hr%4zJ8?{w_1We!XC?}8)r@$u}
zC$Ra~MoB4prTId|HH_~rYkeI!cgNksHQ$9hhTFBMZZdoCYnqkBcFLy6Y3Uj;N+Hf#
z0y;Yo2m}R&o-}Y<bZ|F~SN;_4nlysG+jOX-sR2r-y%em4mp#IkP|N=Sm51-EE<p*S
zDg=b|;~?vVy>LB0Z9HA!l>FlG(#)}LeXmTv?PPH0HydJrqJELv-GjDIV$o+g_QtE5
z)5a;v1mTglOpYE&xq;o>w{e2Q#ci8f@!U4uNUKN{-=?mKk4;@1s2M{eA$2N#N|Az!
zb!0uAe%rh2ZCm%A*|;rLvg<(&ZMLaMB5HY1NgMY2s|{7!$8mdI${V^cQj8uzKqtBA
zmmmKC7Sx;iXnMIn1Z`H{UwNauAlTNsTsHlsx+pA#q_%X-$D+BCMrj}sz67foXvAxg
z(n%cnGN{SLy%dxiM76|`=b#No-|-o6Xm1WwHm?;ZtdBJNH+rH~)l<_3c&O`PXq&D4
zs(aDE2gg&7li$BK?Z%GrMNfFPS3^}vS5A{sRMX0`M>KAsl0<bRGO!?q1cC@7S;P(=
z?5-_ryPppDi?%6m7CY4&(%o%!vGqk$3k)(gFh}UXhEf3p5DvQK9#LqTbjrEQK}ma}
zmv7x6Efnz(Pe)g`p4kKRJ@fi$+w|hA+?HN&_-Q4Yma=QTyK__0pXHRU^1DG%6oUvp
zqFNL!fsuweI68YhPQ5JmXCS+oIE%9@UiZTb_25%kt}?M|yG2v=G;~#9W2g|V?aa6m
z1;b&%&oFG;#`odh7aw(Oi`~w_e-u&NcKzDfRP>X_RR&{}#_GYa$jj8Z!!APtxf=82
zoF5im6wx)Fr~HTR{%5$gwD^a^s+E?tA(q=)1!*ZwNdytkG*cNydIPhV@JDxLImUGC
za%I7b*Djp3SU6F75GBH$s%tLdmQd&_YA1Od1|6FUQF1$dJ-cab%kLfdf6V6q_QxA`
zEqwKKciUpf?Ndr1nhKQuokUW^2@yz>EQ-hu2W)mA==Q(bR1ib`$obcPJWx~j7i{O3
zC88m{b2)3e@b{QF`GnhaW*av6pp9<L0-ta3k>VPWB7i|H8%Hj2+`FFrI^ZU-U1@G|
zZ7YOx(@8BBV~{utG-s(q$bZD7sTeptjzHCh`Ml+4Z*y_N8F)>&F5-cWYShwJwJ*G}
zxJQkZlOU92b_WL|0FN4n9&vd;;@>V8N()yKt!o`^kG&myEE*eyvPnc{Wo7wCRARnR
zF$4nI;`K0i^0`)@`7Ie-E6FULn7r{#&SwCsDsOvQ-(cK(GPWB%-rH!IzYWkJR;H<i
z9GN4H&=u)oaPY_#hyxM*>W|4Ke!=4g=9-sn#I}9WaH_v-y7;Nbw+%zLyEPsF!j+MT
zBLgwWj658A^p)av;o(O%+j^4SJ}sN$8+Q3hSoGzCNlh+1b;mk@5B##ozv9%W@t=eo
zJmZel8#{@%AIH)qLR3>2=4hs;m<$JwM8hLsl{oUGf)1-s)p~tBp)ZWAiN8th#dWmv
zcd##Zx0)!fUy8NYpi!5no{m;1-TQ$oaW*~-dHB@8Jz!7{F_ETAmaay4FUuwJ`G4A5
za+0Q|d*w}5-ni9?C9Jm5&l+{o^|>l>C%uPN3{k?9*y-M$W1n#3>i(M?@#Cp>Pa{p}
z`#sJ=#4+uiYZY=ASE2UI%D}`Xa;RJ%r>KBPT;n7HK+j{K6U^-AE8AYE>O@s=p1fjJ
z{{X1hTF`}b?pG?S-s-(>8;lh3)mPsut1A|XnW6XPs+Ht1w4YIrthYfu%Jk>#2A!Eh
zy!N2BTtQhO4J|9i<qR_gV6F~(dF~HlJoeCzGfPEk<Np4z(%fS{*7v*$X=azG%!0mS
z*VWM&Y^lydDEk0Aop#OBK_1$xlvc;oD5hMrpa9<6V3Y7Oj{5O(TATcx@Af)F)%%$J
zsm?E_M*(+t3$GM+imjOD9wS;Tl;S`3Vy*4)Q=-(<BeA53<foKGPtZbu7-TM^0+L}%
zT$4Xwau4?eYC-)Ppxf4Ne|ULmy4maMC^sFt<z%?qVw2jWtBPBE?joyzuv(gMP?nAQ
z!8l;7K*t>Olf#Qo2>Gw#M-6t>g0|IcvF~f0C9c_Vr}n0%rAn9fQJ1N)i28Bd<Y4E%
zyVK_zAn9}#&|D;;bpsBMlhdBWdC1b1*;Pj|6T|JWADLP|%74sh1!cK=^zp{WAxT_(
zsUYL~X+deMlG|GHb~wOc@WUVbYo18K&g9xLZaPnKsL!|u^y=UR2>lONW*^^190BB~
zj$F)bE#r3JO_et-_hQkd#`&@Bk;^6N?YS#0!kz$xRK*MXYbqldR7#8ylc|^89Msh?
zqtnCH9Do+~!39t2(ntL?yyf4H8}pR@VzyT_d?!;;ec4x(t%8cKqFOla5%l!EJd($T
z_T5QV8SV)h5S?j{6P_ay#2%KVN|j==MJi+h4hSIlBf%a<wSO)zC-h}V!5xxNai7rr
zzkOKGZt^whmHa?lvJP!*JQ=ub9rJgssJGsCEs`arxYAwVR1%5RKi#fT{b<41b-FV3
zJa5zlR{ENH5%YuTz|~vcqJg*P4XM>-ZTDfJbxH+h=~b?$C7zYMePFyNBl>{$6nf}2
zXACq_!sR82k%xRA1bxBz8hAND=fj$QMmK!>zm~o%R#+Olq+P9V8q{0vYCH;xSpkkh
zGI013E`F9>HVp(7kqG2gAP_(ef<I7orlQ&rO+_;^AW~WeT)KXvADDlxf{-beC-mg?
zsO8_EiU{^L?feeH;yh_j8||yD(m%uA=9B4Wwn*eOzgQp8Z|bOZ$5_j2t>?EvRhLy5
zC&sElYPno&wea0-b@Nix(bOc=)fDoya=|K)$m+m`M^XU{FhLm_oG(GIRgLA%QJ<84
zWAC!vcg3RHJ;!hEy87O_t-(r<)hrUJU#_ABZmzI?xK!k3GR=H>Z%?v0A>|SuQDuTy
z<fWFHH-=R)%95xk>ca#!eh4HV(@<T{{{STQ+_Y~ihZ*+Or~Q%Id*6t4*3Fr*?-R`}
z)#)kYCZguUq%0KDNIKFv4_R4b^PK8*IIqNxAMjs?7k(%4FL2Y_Y*tDMD=u}@pQb2~
zWZ(|$#NkyJEX+cHNg9DKS$|QDi_YH`wxxdq_8r2C#j~u{QU3rqzEcWn>hV!Bvt~7R
z_TV1S=vF;smt%lJ>9g^8{$2Ps8?mO``$ETg+cPyWc@|hoPR#v7_XEdc-x_Z=l|5m3
zHZA2sJR_hUl0@+M>rtPmcgLQ`&Ya#5Pg%Tn<W=`tRfZW*dk{zd6!0=mGJb#M8TmR%
zaeBj9vM+R-!rH9xTk6N-I!18|b|m=7$NJ-5Zu&1B>5fj|xL7v_4fdwrqv6dYH@NGn
zZP)tQ>JZW2Y0!xyn9@fYM@=j-y2Q;HCT0jqsmm9C()WYhDQ{4%ofhtnQ*4!MD(rMm
z7xe!CoKna5id5`!h&{Pv1RNKI7XTjX{jceh%?8imHIt0DJ{P6luHjD7Qczebqpg<N
zOEROZ<|ti*yvk6s#$N$Ko|OYVQR#>2`@48mx-Z<p@xO=BTd#Fgu+eQReZ>{3t<jFN
zQ&Lm7f=P0bj}mkvcn{t2$1JleS`4QZ7qhNfwnbvf&4ZzI2q(j{XY|l&swR!fmHz-O
zZMVp2+!5pT*RFZXu>Sz?Fwaa|s3B3<Xuf#aGBfj|%WVW#C<7}Dfs%meCBoemK0~%Y
zL!|7WOEg_WzOov4OMt_^eg?MGk<>zf28CNK?NJe(m;f~9U7M>daYswn8n5LY#*<eF
zgl<Pp56+c_B2xkK2X5LVy-^Zb$iUCWnKMlUOyHjAr}f6OSn5L&jb@mgvauDyu9C4L
ziUbUZFu<H2)4!cf=kVV8r_t{Htp^~ya=r+!mXvuoZ*!<|{{H|-GV<G){%4vpUP#H$
zR9#Dv?F_=^<LZuewUhl;Qpnwh$B*3i{{Sr?^4-Qw*~@1gcAp&fG||O-+Lw547V$Y_
z6p|x%Ndk~r8A~fDBO#8wj8a!p>G%H6Q7?@$;56H^NTvKW3F}}xpNwne{jZlw7Lx^l
zOt(*QkMFK{^z!FVhrIW4_h{qx#HSQ;)H~8&tWs2oCbU-}!_&DQRE#=42U;?b^RAJm
z+!U1d)}@*;g1Bzpf6rQRadb=eV^4Us+~%!hc!}w3H*cM3fF-@e3Q3dvSw1v%yz8l{
zy5^*qu1V}pd+3VQEhMYf>{>=Sa)0Tot8pOdIuGoSNmDOdp2z1)du40WTHYWAdVK!6
zM_MWpG8G5_pPeWvE<cEa@NhsG^WA{&rI5;oYn}c__;Jt238C0~_W4m}yIJcRziaL)
zD8!Om>WXwQWF!e9a00sI9>J7`QK&uU&(gb^{!6!=+P8P`+IyDHmY~%^ytd^#&ufIS
z0sY1wT{1KML)}#AE>!;jgqsslQCHF|iCGMZ?s&@o0Ip7ncMPz`C8C-non=Cj#^5r#
z5rV7_W(0W$wxujFoBhgirA*-;LS9?A(ZK!`ZVl<aE>`v0JN}m4cG~n5w*fnux*cSy
zD!Eug#$!Sc)W#*q9V0!td$uV?Lo&x8Ib0kc++)VIZ8}?ZvI=`Wn%z%DM@t&TO-)M@
z#S{^!!m1*Gs-SoP05mr0VvWR-ePCw|oa)hBQb?0_?B>2<IVk0akMdN@#V!t6>+5Ex
zXpP>MAH!BgPl*;-v6&Yq4B?2$9l;>*ml%BkDmTJZQEhHuE$ti<GubNdR!J7Bo2waF
zj3lJWMnS+|0DumqM)?gqv$Vi=J;nwzq^(6H?J<%F&NJU5Oz`8L+BZ2zq7G*CZs&)C
zn;VH+yLs&$xdzp;S>w9aOttjYwHFqml**=whGdCV9!VtX=<7r%L13dzw<&yZuH{RZ
zJ-5t_cG{nFQEmFEq^`A7)ul9^)b#HQv~E;~F|37xp3GMrOXpS)sH390(9~G&v~bi?
z)zdr`H1kFtIb(Gy=*mF^6(kTwJL(WQ!R2C$ls;td+!Uvhwzk6zP*v@!%J_oGJ-%qv
zs*<V^*&p1Aihv1^z&RS`^$>VmXCDQz<>aZNWq5nMc8?b}MVE?OCgLkB_xofu_S&9;
zrbuN#%1AyBd=c<L{q&=IC8qUlsj*)z(N)k^)4e@)9P5;x2U3zo!{joO5AUes^p579
zr-m2$epPq<;lJ5i&s*#os-nFX+VagLw&NkkOmhZjiCePBts0-|Dv2ke06qxE=tjG7
zW?7)5j(Nv~wb5OUad~U4<(HZL?ZG%!I+{(revr^5JwcbQibWE{g@_;26!IV{d-W+A
zBH_LMJ5|a(vvQTDrmN|a(@`gWvGE^PKR1E_@9;84r3cpslDj?QY{Sk+4tD(F^Fvt;
zHv0T0be@l+q8ipu@VsoP6S(YA)VFo&)L%`vqIp`!F=*}8@0vzV#+IY=7{M68Bz33R
zgAca6ywLXYIG0jWfhRtWT-|OCAMa`|RX5$_&9*P4&eCeihGw-_bmUS*%uh@c0KsGY
zsV+KjBw?vTh}1izP{)9Lu|2g9zL1;%y>UN@8*_-9RIpsCZ8lqs{{Vu#+T&R%s%}(k
z6!Aq{jI}W;;z5uWQlKkmsA^<3-1P5;oO^(<%AcKd`e#3DVo7XZy-fKh;uk!;u|vVW
z9B(^qn)7em^tYN=?+rTATnsVHf}U1~C4Q+xs}yh<2W8Jtfjh>@+;4B1DXo&yZmqj=
zl*LJ15e-dQ1F}gAXS(EuMjvDGfvZNyA!E@k7q-0wD>kB^)r-?_i|NPR!_7ws_FY!s
z+k2(sxL=^M)BCkHn)Qhynn!_h?-ZR2FSvk(W^lk@5}u|lu4}R5OF%E-`r;*3?x%fi
z3v$#Yn(1Ytze7gd(vq=eG_wQS>2jqNyZ81-ZrRk0RZt<2x~TK6F!XTcSBc!Q@6GQ&
z6}U}yvVXPvKHGS*)>L+eF!ge+O*5}fW2k0WCTPhFs!1w3v##f>pk;^gDcMiK8o#D(
zVpb+a@)OZT6f9YX&ut=j_sgFMd5_uu0PI&5_NVhI6YdzKigt#2a_CXaa<6qIP6#<u
zg(MsdI#e5SlC>G$W@a7AjRmf@h#W@}w@z|0b<o^prY_dbE#$A$6KQh&y)GNui5~sl
z>0sQ_{{XTTEmI%is%aGv)7LyIGP8or3bE-9g@TdN+UpsE47py~6IBbErs+e0uQIPx
zMN|OabD@tMF{+9)ci2x+)fqVMGxpYxZDmHm#y<KLR)#hM$<KXwqo6alcRq9DPEMFz
zJW-s2GJZ5$R~yFUstkOME9BbCA}_E;Th4S2?X@0~4}1)d7|x2CGmD}Iy-e>LZ+Lo=
zaXR^Bw%e3jE;P26nt&@T^)S&wYGrbubyOKymPR=s!~rCZ%#PyPlzYz6Exrj73Z_iM
zQUJ(rfY|)Y<P|-zPmOhttj8$Zua~Z5E4hiYTjrl-@h0jjTCx|KU*)%tZ9fqV<bop=
z51Ar8K~L6z4vzepYrHwE6Vr&qZrSQS6pt&Pu>0%I%YNf*=uf7HN7I7SU%k2c;Vpu?
z&jMViH@!6V^ixX5*c8ho<(Hrzq{fAmk%BM@#z;K=Bh8Ow+g$$uQ0|*mIwaephnCvw
znNi*9B9@|)to_N32#5aww@n7=+#8=@@4chN{vKcEv|p`NPV{$bW(y)u3j5GGBl7@N
zl!6MCETj;`9<}}>OBIiE>@EgzqV;v6q}~;lIjL@TIN>QYl+r^5NtiB4NTR8EBEddq
z01`pP@X1RP1eHk<UrAL_MK@hIRtFtO!|mfr4Y_-#saasGAzYlQp5z>0W9~cXhBZ+A
z#s|p9#<Sa!ilJm#GU!9b-*Nu{&e{zrXv33guBf!t8g&H5Tj~J$$83!%?b0ES@i0yb
zxEN=QHb2iF_0l74UMV*X67@2~(MN-p{J6;={^!B_X#W6hRn^-nl+Fu8Kn{d=BLL)n
zqg=ArV=j)S^xpLe<Bz8A9xXf1X79?nE*)Pt1tRex!1Tttj%ouSS*oL_$uQK$jLfn^
zLmYITpn7#-+b0!vpB%PjyNKH_cTw&=wYe>8RcpFaxSF2fM=Jd+I))0cBo$%U<yZm;
z)m<&$fvirLWGonETwrJW<3H=EF#aDdb@qPEq`2}6#7e4rZEbDtruD=7jP(u@>m@-`
zQEoj)p(kE}BuIb>7v4XZ4yyWjr!FkhR)-hVGx&NraON&|;s+|TTKH4Chg()#si+;S
zcSF)h%n_cdISXSlLJAO0V#v;+OPRh``Gn->iV4F$Vfb}dw{E3N(+Nt+1#}%nMD+6^
zDLiD4%%>y61wFNEOPq2wD@u*vWOwtX(~Mjf;g=rvMQ088&BNP9=CNE5rA@l?MzW#L
z_>}%=oF3uPm4O)qYpLUmiTjOqRd!ew@gg~tDIkG5j-nKNHh(bx08#O+R(say*gKw=
zaP3XUS!%ja&L+3qt#xk%(Yl<8Vvz`CBlIARO9wt5!@09Kal*~($L>-1k$hRKR1;mP
zxP5qnI*6W@CHE(W3cjHsWsFGZBvvP+A!0~l6bdP;>5MJ|D=1_i1fK+akKa&~C5)<)
zmq%J%{cX9e;%_P!?oxS!{3i3Hy%-fYg=~^Xd5IFKY8fSDVI5MQkb|vMr~@VN2AQp!
zU$%F>ZOGBii6cc9wM#k$liNffu?O>aC&z(|>7uBnsG$!fL~PKjW859e{)`8I@1eHZ
z%E~F@t)Qur8IbiwG)vtx+voK7@uZt_?8-EB4+XFE@J`z%wwTjL6!LxzoMj}85ZF?4
z)K!>vAD1IP8d7fevsBkrMPD3?Oz`wdYZIh8{NSG>89Gby>ouEw-D&GtOlNOd#7>g*
z^}ST`yGts8`GE?6F$1R^Bk!(-<;#@*D)Vc{D_+v!#gFkc7CY^(=ejPna;j2LQ3|zk
zBqW0)xPnDQKuZuoz{%GwzE$LHStovv)Z3zu)0>6VJG#Lw6ts4lm}sDum)U73#cbx9
zUYwG8;ws%;y0e@Qp0}{s9<njh8lt%DG2=y4R0&Zl%yOt?ECC(?J%{!kw55W!nlz3Y
zd5K^!NI2K3?lDN^Z<L&Z3&zfsC1dzd%B}gjfBBsgRc*+rYL(gn(VpRkoK=>ksL3Hn
zE##j1A>0=K0OlacPmJfke%djdpI~TASG;y-SE7_Eu<oGv&^qgzR7ds6p8gJoS6FAA
zj76~H&U|S*bxEz#nt51)J~dd=73dWdsqTMN<bIkrw_IWXMbp^!9kiLY)pb65WA)OS
z(JTdW6?2Z+(>)n5MPXP}l|SjC8@)`j<}kiAX<s`@E;{l*L!fmOs~|6*2Srjd*g)}p
zUzb^-QqwSyf<AScYbI6t+j_Hc3KLICvL#I{K>JQUtYg1P@^F9ap^rsJJRagyknw=B
zsUKHR{{H|vZm8}x)(X$X&?3b!`2({LwtmM(FmHQ=_uu9fGy|@&Lb`*6bKv&>0OmF1
zJZ|;T=&7c*c8{b#KG!ZoIk&OzJ<Wc(QtT_-rrp17w5~{MLKsC-V_*1>Pa_Ec@>i`!
zbAx32HvZbYLfN*>^RTxSHP+F0p^mcIbE1=~8oGHDG;+q+`jM1^56IO+%`EfB4PCM>
zh>Re-j6vxc?x6k0?Ws2O5cNadJT2e$u1NULx@j9?-lm`ZmAhQ3dJT@Wg6mAK$D~uw
zAw8J=60qsOas{zC<oLeVU+euEapfCC$ZlJlzW)GFJzcx0J~STBb3-B`##bMkLvKq%
zE@q4?eLc=}Q)>~^$@qtJ=NKP4yi%i$_AITnxd9Y+`{PS3r!@^f*I60EV=g{4fVEXC
zvNDW@J@loexTHz+FvdXq+KiWiu@(2t&lN{iA`*I63zqUUk)GVu6%Q)lf=)&>YP3?-
z(zMX1>geiH4u7_^R7FukMjlzhzH)NE8WE&0Si5R^l3E}kRRp|`*l2>{)esX3IDY>C
z;yBXw*Lk$l&^*<y<l{Ie&XF6_a8gz?EhQ+8K_?0^r;lQ_K^4v6Sc#nEpMmkD{e4oy
zDfU;hc0XaEbyY%EDqAFt2NWqTU#rOSb!4cVg6LR@dV%^u$;O(lZTU*!UpJc}w%+u&
z$4PmljGeu?;inB%Y?rI6vFu(JVfDmL7!pTIVS~F_8d0axqsMci*lIfDCmu-AaZ`&;
zXbHHxM0Id`()*70eeYMpzARZ|vDqP`d1c%b&XrWt$Pe_t3{}<3mjSc%JYjmf9BM>%
zC6e*Bc8#lV-}e-^is}n&6!n*B<LP?YQZ-hYhE5sCEPI~h_tC6<m#6{y>)J0{bqz-r
z40u<>GMka^uHQj+w$E$2&_P#ARSQK`OH~TV9MQ1pWOQ!Kq!J3A#2rT;8NCiSUEhqA
z5pZJtXNO^3>qDwsZZ4HpIGFV`VR{Ll+duG`Pf@~#>A_<o-r!Z}UmD1(-5&@CZ8ls`
z%Bn_;oLGV6^OnnZD_jz{aHft5X03{*X|FdLS((=7QeUU>SYQl&aimAt?jkurTOCaE
z+Ny;NfwDV!@1hE8i7lDP13wxP*JvblSci|l&bJj6OQEjaigw*K)c{2!62E3XbYpM4
zsE0BTK=a=Oe#hfU80rgnC->0AhNeipu_0Aq@ug`6HVieYle}sRqdE52pTGU}x_U~A
zm`Lg90C+wQg4v;th09J%zhFm=9z%74dBr@@vZgvtPsW2|BqyS3iaA)uxd*ZPXmmE|
zs+--Fex97*{Aoc~Vz<v4t-72lk(pQw=R;|2ws>h;DO_MLeECzL<YcIAulDp;W`%HY
zdz~d`d4#8_$-(oTFE_!6O36(jD~|oNiI$Q^ldOI9XuS&iG($+)_aq0u&NL#L1``BR
zfKQzdx6+xE2VCRMj4AE(u>&bzt38G_n*rj}P3}w3=d!PU+R=2TT8e@tB~^2h0U&|<
zcR$;{hSE~WEl2!AA5rgtp*I^#P@+{yoCgnpap33A#**oVv_*fY&p0-2ZtXGh=fG+~
zF3gsdD|c=B{k1`Qihx_$q^E+mAkjP|8CrGvgFImvbjg^fpr)kS{6@cQT20iqnhP~x
zr>3~70A|4NoOUD-RFT+_PNXBBo?m#EvUvXhCocAV&n@1&cU<cCO>5W@qL^Ccsi=7A
zf~VI589IRp+uDVXSJ$ps<Hskvs@KBqA#)M3Zr1B1vxnx%Y`)tr^7~IwHDPq4f`bZu
z*$O0*GO++EGYpW};^mTg8j;aa<4d8Y;@vU3Y!$7ct#4}IRFYN%UZSy(RJibdklF2`
z9yD<~jXcTj3LVwV2Nh~BM$cxav0K`CA&>A7MugKwAdOTcIsX72KBw|a9d3^AMMY?q
zl8MjtqGX%h%Ou3EGQ;iP<6Wuf6SFvV#7;F{EW57PXt|mVi&al`v|Z@ws#d2p6v~b5
z%B|LbVS@pXM}1Qgcf?7wqhw|Dv`#@dWMT;)aoa+gBUMS&8UpLxNc;W2zN9<Xi_@mn
zz#a8P%_jvmojnH4+?4{4aNBLv!Z>YLGt|+tq5!eMG7KmkmP9Mzmr>zL{>-5K6M{8K
z#lln~X{$zCt`b?<r-lx+o-9ZS?~LdC#+==^dZNA3q<@G`p%8y}sAPPVZ|pJs^xD-^
zr6<+zW+|WP$o!=L0NCh$*Qc&;#3h{_s$i#g_7nr|$K|jx4nfC^oF5vD;-sv2_d~I_
z?jM1XscIV_zG7qW-(jO|qsOh+%l8_#j~liL{{ZjJp?{{lTTzf7aieUsUr=m^UZfz8
z4snBxBrTi13!qx6LDN@6;bo7b1y78Sd>@bPr5EyXxu{lJ-)?xDze~r}hVlAi{Pj4h
zb{<srT0@2#yJ~T-h1L!$?JBq=wQfrtEz;lYPwps=c$Pq&eX>`mp*ROBatDoN^2hB*
z{_ck(xj6XL7y4=Xq;b2`s@iyAblY`&K;cx?4^nozR-`v86wcjKR8UJO#H#(->Et+O
z0INnz7MjjlIicW>Iou<l<5vK8kMoVcUnG$GveG>~^s~l7%LMSq14kkf0S6%D6=nnz
z-3*FLL}dX%W}!xaxBy5$FK-9$s@!_S^4D$Uqtr`?dlPI}sV(+Pt$a7@1r53%ZW?Mj
z=$26$sO-Hm`^$mbxAqojw(atUSXasXox5jI_v^>$zhBLd1tH`24DIhG{Nbgxs$alU
zQx}%`Y5xF=Eh2;knrN6bu`Ul*WI=-CT7NjSjgFJq0aS_B--nrgvb`hR9hrW|S}S(+
zw9G5$7#dYzDj|10aQ5?`l_&hPJ}A~HKd9xLzeX7SNI}624{&qa2e`(JD5QpQ?6N6l
z1qeO**W?{>TTHpXf}6I(usEG(-21gJCdp4$Kb=!;`l#cpsiYu>NX((5FN_rq3YJg~
zGm&I|Lb+_^+hA~d*S`5_w!F6O>ujRCPkXMotz}(mya>p$N`#{Hbr52Y2##L^B$HYz
zzTc;^dllA*0?`yJQSV4t!ujotpUQqd+Lv$PeCDHU(Mh~{HoQ|uO<!G7cTU`%DoTkZ
zt?7wktw|haD5H%dr)4Lgv6Y5c;A!znF`dvS8;4}OQ0*hr{doaEe%kdm+f^g*{s&-5
z@1x3!=tLbO&N~61a63v%2|r<-blV|R7SzEtEh&#47rrzGG9#xl$VYMcLGPpbigu}u
zk<=WIBhIu_SZHQZ1!+|$@(_FLCXzK+in>)a3|Oa6SAQAOFLhO|ZDwN!z{%62ymop~
z$CM<4_VK3n?_?<>6O8v7=*HqC(-YKf?RRsI`ZuDhky8cF=yX*r6p$%qC-v8ZUo2_T
z7~@vcCY=g2tUq9MaZPNeyFwZ%RZ6Z|Nyc=Hp}7)OOo#jFOKGThqZ7sn0QVWvwL_<7
z>8!Cv{Q*?|y3KB>t&VAnp*Z;MtkBTOM5!%Xt3V@$jOYGmC%@=EG!~M7!Pmubk&i@8
z)$FqVnEuE0(CC#24wJB4WHOwMY?|K{#S6()h1akJeE$G(uRQ2N?`v*59mds7{JM@o
z54ci9bM=9q?c2AV2;3C&S=iLtjB>{!^veu@5WtTZ`2>I8NDZ}mx$WzeG!+4V1kz?z
zV~_?2>L1%q6&3E^TWx57jKK1AkssC*w~&01_x*By`h=q6D@79}%h5m8x^6Dg+Ib-6
z3ci@`8*Qm6wl3poFmCX|m|6+*+wl>^quYNoDv*A)%Skj8^=!gYr=!R^s-4o7uWwev
zUt*e6Xyp>qRa3~y@h~Bkm6U?42m}yvFbLGXdPw@^OT`M?A1VCf+&}*Sq?jcZ+~O3@
z1a~&<BZA;b9Tg0Im8c&~tjeqvk5hUWvMkGgtNi}}@BEzkAe*?7kxz3pu`Fu9ADtM{
z+?r=xN)K0@d}%o{wM|Ly?Uf~ZSa-<y{qv(6Z9I#BTfgXaoKZB#J8E6pmOpAd=Ky0u
zcO(@zS44Vn50Kt7qK%BO;T~Btll}9hb@iwtm0q5;g7gC9ujTzU32H@`K^-31_Q$z$
zMKO>;>L&xYZ6o)siD>%f02qgSljro(qUpLSA$O(fArIvw`O+(UUfJO@M<`-@dwJ8r
zOOPE<l&BChtHii$_wWAzZF&+BAa32hHL5OvuTaVZIX%AG`${^FSuj55S0k`J(wbRf
z>P75)_R&l<#Uu(p*Gf8><V7*0V2>c58VMBx)J!B1`9A|iIyD;U)r4RXp%Br^$154e
zj;%XhO1D%okPpckQ?A2W2o^>e1#(W4lCbe&5YaDBLnqbu(S1cb(>@6ErLAp<D>KBT
zf#bG{Ywe!!3S$QczPBW9B77{;t~>tv8A)rKw4FyksnMMUfXVd;@&U$-MXjPvlOOr%
zT!NJW?DpxVU<d_IoasRYx+t(ZfjS9jry+|4Jt{k#Xv!6fB36)g_|i9HF3r@oku<1P
zao^y1(CI8x@W^7Qi1mH^chKt{RXnmFh$$cfMl@$V0J&B()0AZxI1Q3M`kQQU1-}$X
zk;@)dPQxE<X{x26QzAg&+qqS8Ksr>>ZN;mNL$F3f!iP8-0bOeLWHdj65=_~~SMra(
zbU6l?*KQ(OY>Ipj1Pu^POeB*knD685rz)prJzSDY-3d4VjTO(b3^iZYU!*Qe0jjAX
z8WUEkA=CrLNFM{|N{Fnqi9)r4gm*YBJ`RLY?YJY6Wi;#%_g2SauXj3uK~7>uB!>LC
z>_1%w?TnQkZF?d{Kg?=k9V5R>3}Z=tBHbUQ;EbXoc0ig^U3b&PXjZ7I>g)PMVD}i)
zb-8b;b)Crs5BmQ4F;4A*O;E;j6S{aG%Tzmyo=zjW)801z%WFj?&Z2gZq(1DD#Ysy6
zU_I&TB?>}e<oZE4Q>yOqQ*m)qjXXjZJB?Ljbhpd6se64z%A%=it8m0gH`LzkOZ`e1
z51y6n=3xH-h+lVGtX!gPeZMKJX>L43qm?74{{V#|Lkz%)KCn(2XxTtl%Pw%Z>Kr}k
zufpq38F|#<X3|?sHPrSiqwY)mZz`yY8dzj_!z+Gc_=bn*#z-IENs-=F75uT_nuY%W
zG;-eV;db@lh3ZYG#@)Aj+n2SgXs5MbC|;hS7`X~C-~zZA>rz+b9BXF+xrgH?D!gS*
zJM!zlqqW=*zig6N=;NuUo}86tidf_YB#p{-W3dH61Rna6-{MZ;yKg>TxOcsyu7zu?
zyTYoL6EuW8f})JdR55S>29%%5$0T5=&S9Hv-s)izTWTwvQmRh2R!oBaLG#<bGovUt
zO5z0hM`edNJ-B#V>f6XHBaFL3C~0>xOK;qE>Kc~Kw&zPG1w}KQGL>&`c>1&1!Z%RP
zU|X3U0q<T}c+<EzExza~;;OS(GE&sl(7iw7=@^qwScu~)lSk7a2ggw!LDbHA5^%GP
zyej3^`%T2nvtMtp->#IGouX@*Q{Gu=dS{U$BivMOhzNGVzA~f9Eobp5a6->?*!(f$
z9jj{2`-AzFeVSX0OxTu@VHGJ(>(hdL!z(Km1bu6U2P;{wSd^b+pz3ZUMAI;;q<R@K
z)EDR^X?4oi9dA6Ya}%?8{{U~XQ(Wryh4z}{_L`CmRk5l5qz&YB$c2}<AQA}&1Zlfj
zO*@H!D$3o#@-!<FRpRFjJ{6R##GDkzgO9P!xs|7AwOTvB)jO3Q0`S_mhrIFfhjVKF
z8gZE`s+x)>h^40CpTxBylmV4hWRfzDI-__QbBtr0ww3ZrG=U^RB`m7FK}qaB<L!+*
ze1Xql=97WD!_!}KR>eWMFE<z792Ktod~{;3c0)?k{{YJBCYLdoYH8WJRP>W9U<L)d
zwL7<ld{N*B5N@0o<JQBt?S0E?SdGrnb%-pK{^E7VB9$dxH(WBZk_aG!p_M06K&CEj
zH7h&{^keDs5$IOyALwzX@6tnuTo1K#YrZ+1;~y|P!-p|&Yiz;1C#u^Di%D~$_XHIb
zwK7R3SZR!rz>umxz3#B;QWregvMP#VhFYSAGCiQcW50ZN(qou<6G*mutvxw2imz=8
zsOe>I@RtVwWCMfS=i^d?YGXwcrO8jOZG)ZeZY(?u*_;NKXzi<&4a|$K;k~5w)paq%
z($d8oFsP0mrAF#jX&bBQQ`@P#qTpkeQBeRO=OLB6exT~PUYR_*^B>J06>0WgFPqZI
zw=b5ONiMQog+!j^3P8z;RnP#f?mTLw=jvhb4yRL<4ifXrvn&x(^H<DX*WTam4zC@*
zXo^bOdVu4gwOme-)F-wNR%z9`+p`cddNE{jdlw{Pm8~2%>nrH1A0BG2JlgV+w`7}$
zHFBDr-?c?Val?CwA;F%J2mvbqMa+4``6nVy?+Vm`+B8%~4jiJEQV1cBP$2gN9@z2-
zBUaa*o}vdKgJn;Uesv#yq<)3mGv=D2>&C8Jt038&L#Pr|*4w?Xww=K!kFq>#fkixy
zGSvx?%*47JmkpC2J+8qS_+%tQ_&SJQ3hGb9BPbKovSdiVvD2;TqsCgkRlPtsYs4zM
z@SwL-ZwgvljP43V_PWRXEb8R4hE<MI$Ee{-207`{Ba56P;>RxBOSo`{j=Vd$ZGF#d
zSlVk<&I~+>C;T}OsVgi@0V^UBtUH6G=KO-;QMX~Qdu+<I&P=5v4T#E>ar+e~{WZvw
zi>6v7wHTEwqCog0bpi-Ks5<i(^yCsDJ-#!j)bt4T9pa}lF<-fD<|B(Vm%E*g$w5=F
zs;l9G3i|paLk!gYYRmYhg(HRgMr7%EUERJUz8Z3;G7@v+wz}gbC1~fATwNJgaGRtQ
zPah*jX+ss#k|~@Oa5J3f9YwgcIFdZ}?g1JzpqDhL$S^<v=UJK|HS(gQK=c(&)zOd-
z>G7ui`ED}EWcd5($lMD@3bget{X^vG1$wQGosu5Q-^Yzt9g8s~TTKvRtVd&_E0jqK
zlDPVgNc(9+ai)-#c%v=*XrgL(>Xf9B;lam$jdVqlYRxvvWkwAqG3UR|k7=u-jz4Y^
z0-w#_&Wn<mM1fPrHLxL!0=_>@7VO<1fAb(a2FLf-Xb6#qkNRsg1h!Pxp02i;`lFEJ
z80jarG-pG1pqc%unqaKQ7$kw8-$k7mK75Y{UmhTSuMMsPwwR=<w8JDtMZRUoIX!s;
z{PZJe-(j=MPqtEuRd=Z^?vs$fj|3n4XolT+saa#Bt(1uKgDCQ&_50|~;Y7tsL2{F=
z(<3W@N9aM&X>`J^3%X0C9m<%-Y7|E+bgetK8Cxg7{@>d~S4ijEwcT{m$U!+t5r`oC
zcOAjeMT2nC(7iRbjyGzU5zK~l>IC`7{{YiR6*n_;pq{7NjsU}~By33|zH$yp@8eTR
z#f;Z2`6esTFV+#b!B=PGvzi;r&AL`GO|mz2Oqu>}h?!a{w`2bR;(#47N&f(f6f1pW
zspV3U<BdnOpp*6h$3Qs0)k0`1uRDI}1<KcSl9EWJSz)NBsOXj_A6l}o!3+q<Be)||
zqv<v4!NIO7^7nGvJooW})3vw8#<#UaX5qa!i8m#^NL8psKundYSyo76Qqp8digNuu
z>R$AJ`+uMP{r>=evND715|Ov=uu+ARrMiL11nBysaXs0^I;f2bV3K?1?sW(p&i?=v
zq*iUsaNAs<a^XR8prx!a(Qd8tUowj5hBG43LD10#Jy`3Mw@wKF<N&;>e--`nZ>Hfq
z>To0Zw35S4`%v)SIOzm#3khSXTDe=Z0<0ub*eFLv8zCMv-)jE=pU^2fCfM35YpXvE
zGLpa^gQmZk-e5UU<s!_tZfALh+M6uB3f99zd%3FCRKU#0B!-y-O*)rWP-8|JQa&{c
zJkI|B7B@LuL!PS9%I6HW#|<stQc_)OA^vf83)~0mm<o8?7(Yx+gW#V!jqW#d55|sV
zc!O8Pem`+S^SgHydlON2zuaDVh$NnsE!h(!;Sef}_5(t=cE(XpvS#^-{wMBOb{6pd
zLCc>2@7zGP#T$x?<=)~fk+h+RQ4~!S(mZ8w-E1Rn`04r6Q5W%9Zm;H)=3YmvVuf4%
z>NvO6N4~=c@-e3AUi~M!p1_ZgL`E#|N%SkOapzv+i68Q^lk?wM<!ivMG5RJuYX1Ng
zCe^In2)p}}3xy?+!~IDtyUsd_wUg=Y7FZ*Vm-F_>J=ecum8RSNDlS*!yiAw=Q}}}w
z0!~Fd^p~r2vdI_2G;B~Kf8`Fqea@i)pk)~m=l0jnWT?+WX?^KG2IC*1dG@FAX>yZI
zG@|>;2NB_gByYpXOMAA=(kLHEWn!Hqp2r2SJLLG&nX|u&>(gg<*~Ht=U~xlhRS3gL
zLoVE>uTHO(Nnoh@zjCYnwOqp8zDM@f*x50HagPT}&3y+&sd!K88R><`>WJw0ea;Tg
zrmdMlTYluJlFv9NKTa!BB`h#<10RpL)AP61&Autc-(}q?ZuF%0tF=`nR9}D35oPc}
z^Qv+JBwm&vkNH@3{{Ss2HV+gyo5IP2+%e)N;M>xf1g1^9eW;=(#xUpDb;%eQKcM?*
zxu9`H#qE14bXJ(flmoaQ8Vz`{I?L^J93KQ~xBLhGBTrS{1XCrRUScV?wInpc80h$i
zx1hPYC_R`?zSQLWWu$J}?JijV0E-uGZX1oiZ*!T;gqAqJ2hm@<_6=KA#%2lG&Awo+
zitIax<!+M00D;(Dhl(Y-g5MnkMOm_2<2ng+hB%pIhz`TJ({an+JG`g!gKbNvEc|@i
zd$wahm9;jTWx;=#QNq|6sHAL74jA;eQUDKZ_ZeJNO(eN)uAqK%tSbzySEr1GhqL3k
z8dF;C;u5nE0~4e*ohttT5?Ux;LcQCe3t)b@(cDqprK(uo-#luq*vScB`i%yxDd=?U
z@<mxq6wKg|$E=ShMith@ti?^PoI8<#2b_KMliD@YRa7k*UV#`TvGb*M75WQpJ4Sjz
zV}bY79+^}XU#y>orFRB2l*f>rKRQU;VXV1N_&H%Li{3-_8a3S8tj|{|_j@no$?c@%
z`=UE>sH=udJ76d${-36uTSB_9;i(>`Z@?rtNc;@>(p!2_tvPD;l~gh8pE&)s>fgL)
zp$4j=V(d;rlZ_N_?a51fg(~UarTQ@8hkWVMe2m(m6}c`f)df}loc71YlXnWvf<QkR
z?V~G#8C|9?{q#jr-BPm3q%M1ad*fXZAlo8C)>D`v%|9P4{J2^+<y9S<g}Z{LD*Eb)
zaV65`=@L0e@%=br0EJ>nkdoL{##Vlg+@oBtTwJ~LExak|+K*{oXSdj9F&}cOxKhO$
z$U3`)hMq%{{wZbbaHlH%X#F;LtT%4*=5n^_MY{J6>D}!$9iwmCB#uahLx?F9WMBnV
zg(PF1<)mi7I_vIE`D3|q*TFhY8{RK)+^*FZ8k?Q+wvkAI>nAJLkmtFH6(fyF!C;+4
z4y*`cjNW$T(km$?3XA=}*pKxJ;BMgNPm}%e!F{c?sV^5>)dVYUrKze&3QCm`$R&;6
z3+|vsD6CakzLsF5op7Hn9*Gxj4|5Z=Z2V^6y}y2KN~qpV%US%@q@YNor<n@P{{RV+
zdU?I4RgWtmZrSP1rGCEAJv@XsBfpX~D#x|zD<l#y^Wli_HPbl8O*Y9!O)3la&fkKO
zDXmuO6oQ(TDByw!m!%?&6e=JCzzP5Z>^03kzZ(iWkEf#U380CZ%fGC%#wE@Z-7`}h
z4xlsrUy$9ifDhC=XK(EErYlh+3?az}=TW!o%g^TvH#Ww<@(;Fl72>yY+V?f4s`Y!T
z_Mo@XdrGxIA^Oaf5|Jt<Iu#czdYFJU9QhXphs77jg$^WGqqOd6Drl*CA(>U>V1Ayr
zE#vGDN&evIuGcI@p5IbGr8<>A-%mE(*mk}kSMDoK-ZH>i>EVf%GR7J4D<p#*{+y{N
z>`2oqUqe-Cxzt%}*$q`B?G)b8%JnJ6N8BGHT(EYiyHv#w4LAPy;MWqYoKD!6F+;fa
zt<s+9XQ^Bsiiji+8{qj;RAhaN51n@ptDmX|CqAHiCu(kv8FIb2tUO4dYEe<d3zO5p
zl~jTU6&C4ZjJrz;Ow7=+U{|D^VakpKtgcxpVXGZ6%9&YN#(I<I{{USa@RN$%PT-Fb
zZ2U~&7UwqW-HPM*>s3oqK~oe`lw>Z^R3pUdQJ58VWg(76oi^h_={p<_67Og)&-i-C
zk>;Y46LiUuBKaNuN1yW1_ZyWj8%Ah_9IXkAx)g^hy|S)x++h6atK7Uxt*hS6zG=3u
z-?F@rK^1Kd+oEb(Nm&LSRfw}JO#RuBI%5Q$g+cV`-r6WPe%$vfDy2ZBj7u5@>Q49p
zNIhBmeCiWiWH(OHJ3f!xmU4l!cp0-fzvYgV&Fgq<G^E^>knR|(VKoo<X=<sP+E9q_
zPX|C%>0||!!EWQPq?*2#M~YN*ECzC<c0LdH!Ts=cT0UAgr>W(Ji+J$U)5nZlP`1!*
z>rDH*b!}a#di*-YO%q0wR>ucW>u%}Pkpk_H4TGY-QvFG})ZjlKzxzpT{xjV7HQM7e
zZMrD;HR`6SEyoBs_9{fKz>E{n>(bGd$`$XM!f=cgA!i9m+hv)mRJ2`A$`8QDbFX&m
zOm*Kub?QDl>6GNRo<0zIfpB&{D{!7^C@z#Lk#6muSu(YD->W(Pl)wSk8(^5H03@mo
zFheMIY3gNypca&_4!RSyqAfJ(oFn{5JSyQ0XCgHGO26&85xlK?=FLg9Z>eoVQi53}
z_vxN+h1Q(HC1QnwmsH1dgAG*lH5E`Lyu{%OkkPD5(!(F(B=<jjYRUZYa2mtSpC(*I
z<^Hi1s<#%`oLd?0ABITo5aN1C8cZIQT8N@%>~f)oJL<Ok4yIb$O(jKTNis5hYNG*1
zgN*sh0)K34o7D9dRu#b2%l7!+JgRdEwKw+<ZdBAa8$RHUyKP+RX;O1ZH&71@k;uVi
zjg_2qGb*q+C47rl(A(Cg5jrBWnAMe)feZ)-AgLh!;F0zuchy-p#@jU%G~0p-7paof
zR~1}v<Li({;h7gW2dM)t4{rYe8nbr*`GMIS!SFw9@zaLY4E7z_W2U&;#HtoZqjm2S
zf!I1QXxkYbNN`BODTYnh`8Nc`bk4D>E1&=l-Sk0Oe>9HDi}a5JIL^G@H%GGWL#YSj
z14)aH;`g1IiuXPSy5kneIUrT$=TUQ+iDEO`9a+(AwzOfHqys<epb}ohB=ot^N}b44
z1s}euqberkm}78`mWnaf0Q(lwPND%NQ<aw)Y>a4Z_x7qi60l*Pldp{xG|nYP0OM3i
zqz2P8poIWlqDURif>hMBQk5Nt*yu%IF*Z}D^&>!L>y^8CA37qpqhPH#85+$5{-E-@
z&0z|rW2E$eKTUk;b{X%jg%~g@GBxTL%7$?u`}E_^yyociV7WR2u#wy-2Ra#QyG?GW
zUvj2V59M^uar%L+ag+*PUd@x-jd_vj$`Z;+KRz@r%B*XS*mqcqA~g2PoGl>-rbyQx
z-@l!BpsuK)5z@U<%QHsV1M_D;>+yr4t99nK%@h6^5#wXsWKeK^;OJzs#t>a&q^6cu
zOu|)<Z`^3n57KSFRvOAt9Pugd)z#sZymucuD{)2aBxTi|dxA1M`PNL;^iwanDOY&G
z^O4`+=UQDQl=bu>9&mh(I#e6K28W%;gaGHr8t@&QmSRc!oo*2ZliDkWCj%$WwY)5i
zg&uQ&KTR-=9{^%-0Q2&7u_HY_MYH;RYsx_^gAt=M5AdkYls0}F@dE3#>$lCTVP0zO
z7TSe^zU@tImP%?_8G3?CDydW@dw|4bYZ*{WObo5i5gR}A9B5oN^lbZd@0{SWNq`Uc
z*O)3I^CcK=-J7HkKkcD#+-aj<qBcR~s;-r`ZwH^`l`(?!^SQ~$GLFFi0Q2<Mp+|G<
zR#;1TAchP6XSRgLd4aI$B!fPBm$=r%xTjE#DzziytN#FfEwCUcg5ymYYN`nw1Kpxf
z!#}6ay%*@G_vW=tfP1ij;~%fvL(=C|r?klZKYSf+itk_afQf>84vhz4&|ZsZbXRGk
z89mqB8DI6*`g%$!+^r>OVyC}Db$`=CqpYNQkyBY=h2>u|749@7QEl4hlvP?*0{$j|
z;hVRA%ZzAU29R7RruPC0gdvyd1dN{g+GuKm{{UK#t0x^v-2J{ZEN!a{i5yT~YN9Lu
z05kPx^dW`_*5t6?{<NmNQ4(AFU(h~2=MCpuk_83UyXNBB7ufInGjd#P*E)484P6Cf
zl+&(9a3qYY57cJ|=UwN(zv5o?58<B5pJwl?PR-&4WONS&uW;^LWU$vmHhzhxG}1<u
zeIvFRRI$NlJ@dnI((YKP8iswh1woBKNsU%agFX8&&y4vxBBZJQTJ1G<+nr!ga^VTa
zPhbi2lZ`xOqcq3snZHvGdbz#3y>A;ihrdllDhT4+wi(*hYmGD17x2`j5&QS*ldZG!
zq{j<++vgvE`-twI4|7k%D+S7O7J8Zu-Bv{E2*7la*(4o@=2Q05bq#%jzBM-6lB}6O
z(UrPfXX6K2<)f>GMMTuj2Y!+l82vv(t;zW{v`xPuAH{)UvDz*B3)DA-HP2lXam3s}
z;r#+h_<*s1WvsWfibRg!mTGbkXS74UB%hc6098*<Zf|W1b>L4mJ6@vV@xo@?x7zi#
zU0``wsz&s+MIEFq^x{6TlA}LR%|~hGg{nUnBH@RCPC|`tqMmwM)Vv88Rntf$rI{b4
zpm1`mj616X+z>$oXUNjDn0Xf?xy>S63Qh`?VkC`*2qW$^oZL+$lZj$EUz}>X9JhM$
zdX)0-OG#jxj$?h<DCyQDwQ&xRTuP$#oGn&Xq#VC?GBLs;_!!q~^6&gm+$h~ti)iNG
zfY)86F{e=t^JUC#)YVdi9eq^Ndvin7{Y83AfC2$O;?qWuD3i;D@7Rx`Nk`ItPjjNV
z?r&H+NbFBxoi}_7=ChVQZ0v2hd3xhzk93$RmU{XM8lIZU1ZFBf5lZ1^gX#r-wE&UW
zp2V&f+KMAkt&%)_wPcYueovP+Xi-Kx6Wr%OYi-?Us4&1C$T|;KbCzjHk-nAhgQ&0d
zll6t;KM1(jQ_8n0JLW3{mO5D1d2VV-WO~G{ao6}bB$KIP5yr{Z;X{XZKu1n=*Tq``
zSEIZ6t>@d84oM)Oi;uhhS{sGhCzjV^wp*pGuc?JkOG+V(eKB=?KI4^A-GL+=b3Yw_
zhzo&~JTHd7l=~Ih?Y!ReMyYS!droL57KqA!5bq>)0QLJ|bo7&g;E~(3KUQ6Mo5kJJ
zHu>FmyWZ)!*A@J2*6&v=&ndvclBRYffCp~m4;a#iZtN)IXP0&EXGQFPmf30v01qMN
zVcX;KV0qLho%kypX*(vfl%H811ak#&gJAO~XrbYD?5di7!qrq=RM@A1Au_3Ix`snf
zBL#H{Aj2|_mOVggtoSFy{ta=*YOe0!Ulw-u?YAl<k5{*@(NzeQ^U>}q647U+up|mj
z4{cW?g?3I6TyAp3Rk7{!)J0bClTgc0=N*?U`R+%@#--QM&(r?^fLEQFZs!N_HNyHX
z^Fu{#vRBD(q=BiVQooE!a-WTUva$q}v}y|dEJ?ycG@Gu*mN<4uG;Xm#X9Y)3=1l4m
zJ#%+<&ffg_TrONag1UFHZTd?#@y(dU5?m>RFzA@E{73X=iCL9LL4>K#W2v^8AcB8v
zeOU9`TxaUb$#)1h#{)OVFxH);aZT?*YmTGw7FUst#3RKKSzrp?ZS8grfm`VWk^#}l
z7bxfv?1ugZOGRR)-CMf49+Z08RgPH?N~jdbNYCFaGBnz8hlO-}K(bZwUu1hx$43h~
zP{f~2R7zOL>yHOHAwcinIPIyE<%iJsh1A@>+w5Fy;$7ahvvk|wxNdq18)Pt|OEf5d
z4yIt_dr8qAN(0o4e!wx@@$wU0Ma-QqEIgO&D$U1R{?4UkO%#t#k8Y=uHH_3xH}ujt
zRisa;l(EU_>MBOL+%mzbQ0c|GL_OP#6UX>^fF3!!_c$NhUgo5l*6OP#RXl`<ozHmN
z{!{lKW2GM$I4!;TcjBhx;CB=);*$RWv~LqyuF#_vK{PGX)Qy4H8w@%=Fv`k)bXRJX
zB|~qnh)ASi(S}10x9_V<Okc^MQo$?@4ADCW{O-;d`wyLTcO|y^UD<57#db2k^0JZ1
z984Md2>MUK0Dyb_!5ZgjEwt8Z$fJr7^pynrEg>XE-}12?@$xhKX~yNlirZ&{JagN8
zN!>eHFP54L_}2YnqnYX2;cjOih^I*-Wo2@Jm0(BKstLh5oYk!vI@2Zl&|83V?oMX+
zCpT186!2R%wW6zQ($Y^>t6luUp{eDOq^AV3#IT?W@?bkZ&<Ik)oV|6uEB^rd5^-`)
zYPc;!6nrq>_xU=l^oi2jX74Jz^3!yIflDV$j|&6z{c#cWDQRcq%VKhawDv?CY~<r^
zTNcB==xJ-ep3%uPk8*-Fb%aRMGcYlhl}vrQAFO#AOM1d_himN4MeiGTc5tGLcT?{>
z#XW|ap1S=V0k+l1lFd(F5oo1pUKt?=p%mmX3_=#wRz4W%o0^pA5R=JYGTc?=lZ%#)
zVE8Rkt$MScYT;^<Kq>Fm=Of;rsPYO(x#)A=DLQ0a5#D}yC(J(z`K939mybLhxk)vm
z^#Nw4sR3$j)hK#rqo`l-O%vr9cFqG2rIecR*miZpg<N5#u}N)-DQ|I#i*+q0QHUJ$
z%}#)UfUHBR_aRshjY+qnmin}8{oln71m3Q~in{YxLAdNTdEY;YSm~Ocx=6Zmxl$=h
zsl#BaV5rEKr^xy7@WYZn55XbS)n<6!l6EB|LXM>QQV1jbiPR-}n0j$;zDBsVyW@w}
zvsuDT<#<Xxxl$_LS?qPPqrdX%fg?1M3`ZkHIV3W#OLVf}%@S1mau&~FrjMHLP`G2x
zMdM>n#hulEir2C)Gh9-^ZK+7<ZjwnF#zgs2;|Li;p$ryT0Ksmd-IE%e8va=fb5VQf
zgo~EYt=c;xw{XtST$HqrG?2o*O(p>)KtG%H$Lrry*T}ZPJR{^^g?j?4c2nNvwrmS>
zHPVW|@l^FaaEW@Mo27lohq=;?f%Gc*@=<)jdVKPc%+#>l_rDM7EmhIe$4u7^qi{CK
zB!xOaM_AOx_*4i9Lcx1yu*0{vcE;A>mk;dRHP|#yNoKIx;J4c8B&m{ShMp7&8W|*J
zcOgI-VoBFNFHbyjwo-W_;iKi!;9bfmkKaXA621!)lltr3Wm5GsvuD4JYl;?_gChk8
zz6sZAM>BQ+ndESKSd4x22BgRV3{P*r#+2!1ic}^rr{_8YQ)~+4Oqu@LZb-~cej$O&
z6RptPWQcUB?V;71dQtsIBPC9NOK*}E!R_DYOxk@3w0rzxO8qQ*AJ<=B3f+kQ=Uz=T
zXan`*9(AU!sAr)Ef0K;>8y-a`upi%9pgnHg$Zza5nr<w`SRRzor@afAY1_*Y-ah^t
z?fdIkc-4r4Opn`Am+E=wUv=Tf^Bz8OPibn+y-!IkRsR5Fyv9lB)lONd0R+t#PDzZO
zrN?4NZAJrGAh>7wLNz^HVD-~8eJ_vsN$;P~>&={L)Q>_EPvsCKk;u^!OBOx`el#9O
zLKPWGmFx-db-EhoLSdW`z~HGJzrMWHdr}oBr|*J1X;r68q}+JgX0>jS(iC8y2VSEy
zQ8a=H5tW7+G5-Kh*IJpo{+h+Y{X;rpN=d3`qV(@dRUm$@uBjz?Gmq06($kp;IsX6-
z2*-_mAoca*xIa4g1i6unsS=m)OhAG9ewuXbLs#NtM<rXh@sY1o^(pQuR1?wHmchaQ
z06citnH?F6EieRm=smTpl`Kv%_Qr{YT@|VbDE-G}jwWn{2fL4s{{U~_PX{Rc)$xCk
z&N16~^}wseG#83$d8w)`cUW3`b<UDxDJ3;sIAj$O5JPma06L=sl^SFAuH}N-KM!7V
z0sfOP$XzOuiW<_DMa@IZ;DuKOdVb{R{{U@G%21V{H$>rb6?^nf^^c;1XmMYY4V||*
z;We(Rw{%$SR;s&nbw0?AX0D^DrDTbtZnSY5;#6Y5s{z)|ZR7Y=eKYSC`o*(ec$I3N
zp|edqd!7l_ly!rJNh$uZ)0G`W`Ok7S1KcCy-xzsx;tdB8d2-@a<7w_nGRl{$4Oxmp
zgsEtoBc@oJq;*E%qhke5r}xp{*Jo(+@X&Je$1S@<wRojTXjV$~xI-PgbxMw?{{VuC
zh<NHqKI2mkp^l>&AashS9E#U}U+5{Zr{6!qL&>#$G+*ooGu%Y8%9R19Yn>Ghdc0pK
zs;dcB`!OVbnq~OU{3M&6f)i7~9&oL7&Z++Z;P;N)mXNdkgtX5n1NU7{9P|cAJ=Jdh
zK+ZFwD}=8EhpC5G?WLs5+oHJJ`}k-*QTAjtmDYWSwRbF3-At>u_l+}4I)R*|Q+sSl
z@zacx@uo9*`b+wjaNt0@qmrH?#44v);A&d~)P{EaqxuE$pVt`q)u>wGBLWm;{{Wu6
z(^E}VImCqlJ@MF`4ko)@6+he@g>%)h>{Z*tb@HRcdwsIoB1dtrtJ)Q791P<s7?kah
zGoJ(GjUn#44~lzkk~weN67xi)7GK9%ri<y%pQ#6I{OaQsdwN=mQ6zJ?Wsd}hJ`a(k
zweM`#N~~)!D)f~rIP1!vD~8W+9lU69O&b3IVxQ}9M~X=rC~zA&GGnCU$DIwMyjvpb
zslXrw&Ljs7{(80ha0`L8Q>2xA5ZRK=9cSXwNj8-WGSB}2=H$hmuh&{_+z{pGW<yzU
zeh}<SMH)!emDMk{ZS?UYDzRy#V;EJ!mdg{^XX8WL{lKP?R4v7H+bcvXE$~RhWF(Qs
zz$olnEIfBQ8*HB6R+O=H3a+P6k2VUf59jVba6Wr$to?8~xZnpg{CuR_+@tWfYFl<T
z$)S>-n|0VW)KoI=TBgC0Mk<TfNf{jwf}^i5CveIK7ST0j1;UP$LnJU%12qX_*^sI5
zesIM90BmVx1>mK+6xR!NhT@>rQNbz@2@bd*9|RHO_w%4p?UsPQ@_o%TaY`~N=Y&eC
zelSVu#)Cn5YL=23n85ywYZ+eH3d8=|GNY-XsGZ0rH5CH^hhT&p{{X1cZ34oIyB_OU
zWs;)JM^F@!vePkBfC>D*PEH1kp`Myp&<N*<`|BLe>-0q<4#1Lg{qfsNx+{mfN@hxh
zkfCn}>~a49LNqW`rA$VS;YO22%oL-iQn4$M*!k_Er}i35M4N4RR!Am}bx=$DQU>jx
z(ER=N^%AKnMO91#`a}9;gWD&${{XPkwyS0BiolOY8KZPip{Sh1#Rt?Bk)GNQ{=jai
zDT>ft)zTsi6t$?2B#Ij)T2&{p9f80<eQ$Aj2NOoZ;x_o<mg9qX@aKD8Hs$UHV-@1d
zaFUj)Fu;PVEMzbTI4hhU`3F*Q>96ana<08U2l@Bn1Oht5g*M**0G^I0A%LQlXs#wT
z02w}(6*$LP!vzPcQG;7+uBK5f`)xfWE(RW!Z+)U-NIhx>Km)m9oM^Pt*{&4E8XL@V
zhb00B3eV&GErLglJZDb8*V!%{{rc|Pcpb#cuM7EIwQZf~lzVlSuXiQfy*&-uPpJe3
zF!5C^F%@SW)XL!t5U!_zxi{n^oNrQ_*8Ah;%a+e`tBUCJUoX3s>a?P|=ThcrBTu1$
zkgFLFWkm!GjDfCwqS*Aec0^m3w@(=>Dn>9ugWZ2a`u_lZbvGiM^Kny=J|WZIb~8_1
zv+C-T{iN8xi0G%TkLXW!9;1k>IuM1KvY=!zImn~Q89IF#B$kJ-VSf+js1EMA)5E>5
z+ak$C#5DD@?suy(XW&HyY*>%CVfy2~x=WY;0O7H?*l$#~u4XvxLvgl2QnJlq+H`Y9
zYtS)1+tf|cVpafTq89p!f-%*u;^c3g4qkcF;p^{?3GOMX1r)0l=FxJc_ABj@HhKi{
zWb_9Etr!xlq#S~ApkAf^pw<shZWCBH)pri6w?)r$U1zpgDGh2;(LpRU%{$3gG0P-M
zpd2EWZ>R#e&Xva+Rv7bcyp4x5-jH0Ub0KoKU&WHYWL+$l6|`IG_9>^Mj8n$hhuwt4
z%R5Jw3d}<lY%?4UINY4_r?R;l;pJAr;jZD_72@qx6@BKi^>8jKs}>;>)JF&bDZ_;s
zMEZV}@<w$LyFb;_pH0KO?=`f%$JQ!t8bM26Q7x_s5}iXaS#8j=dvq+J%89Vf$^mf2
zRfLH8Ozs+8!OH#1dfhnPcHWo!w(6;E%r5(eON7?SxYc73F_D?0XF#%%*;q0VI}@hG
zJad+o%{3^;v##qmUs9x~%(4&7hV!5o9r;l4lB~$U{Va?_57><-YHlJ)54A7(=mhdk
z?nHnOxd&Y;2<PO*6$a{(N{)t=&sWJhJEB_^HEV7qw9*TmMLJVYUbM(3k_2MQ6G#VB
zBC#wKoB~M$CrC<o*y2$j9s6k7mgG!+)5(^{-$lh4Ns;CJMf`+c_|v!T-0<+KilXIn
z+BQVB?^`?(hIuV%E4)?FfI67M(z?n!pJ=O$2%Av2(ajx4^;CvZ&HL)s!gC@zOVa4d
zRgu&cWg`R-dk{e&gV=&bp$pb$(#DIG4bu-Fd3oEFG4Rgv{TFIxpWR)za(OV2DL-G~
zrp{vO&qD@`wmrPVE-i9JVLQ>ou|$_jDQR8WFyJdP`9}C*{q^pac_^yh(U+Q{D3zXA
zqUaNY^W!<sk)W2@-Uy7K9<HPAFWh+1tk$!7NUCWnOjA?k;=st2ao#iBf1k#<`Y5-e
zX1|BSp8Ky6cx^8xyhEsjT((uY>o%=jMbUlQt7}RLqK==VC231D7wW>K(XKvq+}y$P
zbHKiB>#SFvC{f%ay6xnaJ4MpjRI4RCQnHy>qD5%WS3><=A%-#4f<~*m>GRDj{HpUi
zviR;QuTa})E^)=VZPtnXxhk!4H$_wozxb=ui8>GFXpjsX5oF#SZ|$?XtTz4MwQtmy
zTQ%B>D!X0Iics*?Q^=vCl^kRhWgvt4>!t~0$Hj>&jI9w(wRsKQ9Cq~vn}N1ir>3-U
z*85vmySE#|9LsEtqp7E*Mo)1%rJa>XMfe5r(z=q~Te)P|{H*X|)59IHRTa{vg=!kE
z{{TI@7^-TXdER*eWgS8=&4os0!lHssGpA;T-?v0{mkV8{iVBL!)=J8lDUl?pV7Lz}
zC<aCYwp%0)+D+~%#iXd|rzfDdbMc*QIAaM|Cy}_&TU#=~Nx=i>K-R7qkI=`?zSY+n
zmg$h;e)!M_c$P!apFVZgm5>w^bp{l{ZrJcnv{&6xU~orEe0R}Qb#mlo{0#=U%`3hb
z`SGlxU^PX`Uvmh@&tsi^rmm@yRcB^B_3|o4lekX*0Je;+wl|VR%9H)IO(kNhY(BEk
zrCm6vh1d{t-&<?w!IQ86{OHEjLX;Gt6@l%8+g`=E);TDiJe+DRqMURSy)<S5iifv@
zr1jE@X(q><9XN2<45~sVFfepQc(XM*Y)9Ycwt|weTwMsN%07j{XZ7!(5>s^&d>@@D
zD`=!7<}7~WMO8E{6A}k(YKfBqB!5qxW`fpkeYb6^lHXf)iaDYnNg2*pDi{NS_8|WN
zeP)}N?iymYIwh!M14q=d0<6DqMhNY$5cM|n7U9P*TP?Vi$#)O!8)JwNRT---kW`9F
z3&d^tm!#!lpFP!=rB86*8jw3$LmBIk_xov2Vw4fqtK0R|awR20W0Qof6(HXHGu&H4
ze%-ib#NEABX57|#$tY{7YJf(9qBDk$Mf>DqKO+Z83fq0c*-~gOl;$Zw`g7rj+#ivv
z4tmS_I_yqtxL<JN4=vm(qNn2BC19(XYkbR0FVJB7vCq_TB-jAPz;zbvPCP*F>e^Ro
zd(8YW;!UQy&$ukrl2cScR}Z%|QYphm>fOTvJB@L-#nU@eSrwtYBPwz{Ysm$1gYlyq
z=HIz(`ol+G9%EvCiSdPy54Y?3XbvzyKYbErynKPI>0{$tJ-(5VubBD9v5jCAIYL4E
zgRcz`Ah~=td=ckcSr5S1oMX<NJ29<Ow_*EaXwt{KE>;{z6j8|-B=kN<od7*a8B#K&
zb{X(~`om2yh)m1$w`~2tL-y9wHif;9c-d}fZL!8_=9RP6Fd2XRppW`?(CF)~P+TeQ
zmYRv_rlFl>qp7EmqtncUGQ4iMD(b)x%iIy2Hp91U{{WVhTX%TX#Vk}edA-_nQn521
z$bK`ABS_-n?j|e)=;7<xP0U56mzK_ZxR+|TaUJU*qS`x-Yk8!n+;Nz&7OG%<nCr}d
zj}au@Lj$iM$u-&v$Eaj=V9E5dk%B(?&(5j>l#eJ^o>Y{`nJT0(RVT^&>TbOUePQ;$
z5q8y!k#1=>Fr?j-3l+PE8=B==G|jn@oyyTvNg-~U#LA>(kh3C%7<I!@Pu%>0t0fAO
zgr+^uwuZu~5zK5$d}vH|*(Z~w0RSHP8v8v8&^hircF?qB35MqJL>1WQ!35}<?{pGY
z6>Q-4AFhJj>sktx>67QU&^m{CBodS7xYncw!3xoP{a7Eqn+|q4ce!(0%69qX_iSz$
zt(P6uXEgM6RVrE*m1Id)DL^Coau~#Hdw?IPgN$i)Ezx?6F`SI$yX$=o$bbysbl~6(
z8gwdj?5qav;p*6KmmRaY>~$2i^tDpbS5Q=^Ot4IjQQ~DF5LJ|epSF><J*jJ~+i$k_
z4Khg9YR85%*M^7!Ol%154?{2S`s!Cbb~zie?5l!vslxk8(_8PiMWx^LaQ0a4GZXCG
zdy-GQN4FsC1Blpv#5#;=;#+p|6hQqKS}v%7QIB>m#QpM6kN*J2I^=3ll`{RSLf_Ma
z)1zkdx6P#oFZa$bUG~K#O_I9j9eg&}YTiYq_T0b`-8xB)+=n^toyUE4w-EmTg)@sb
z(+y__`L)|uMUiJUP;NUFMF&YxOe&2FP8+D<Shr6}IUPe>dCD&|7fwgHq}sgA(d{#Q
zrrWJm4c^o%8W}3+>L7S$5fR(mRDW>;4wh)t9A_fQdv|rgxhxc0g4`++mf=ASRVr1$
zVwGc-HIYN1?vK*LDtsJ|jams#I|H>vd3cZfKYpjYEizM5a6*rD!z-YQeVMrFs|Q;V
z*^$~g5D%6s)B5Rv;ukAi+wf-TEwhOHzwaAV)Ku|R*GFHrYJc6~a?!^uj1a0L6kw!~
z8>HjLtw&7^l~a9exCbO19MVTEzpL=mbo=0YqCVYt7#&~pW8*$Gr37UnqYzx?qpPT=
zu7Uty?4Q)>&KIBnGvB_BDQZlw5im)Y7*<~W@S`8EeCpMEtAiX6-!QeidT?)N-eYLO
zJa=8KM^LPwmJ9X7djd(v+?_U8uTAex%KLSyj^)XZ4}^9{V{ckp3~-;SDI>$YczHk)
zGXUh_!nc)NPupSSirG=sQBI1FD-`Nu(<BE!Kd0LoE5BHFRr;;yZdB1oOlnGw%!*D&
z?cko+)r^m&ho)RNSuSUjEuzLs$toHbdb1j{s8XuF{uKZj2r!_MK<};t_5Aey;9m%M
zFT?&a^1;DNcGsq{-Kwnnk#^-Yy;4%mPV>Pu!E#%oSf%Qn$ttV?A$(<uOJ=oXQ&(_$
zI#)Hj7j(Q5s{<7@&Pu<39y5>bbQ-F@{k6z_+hnMR69f-Tp5n-GPgic^x4{F(l+Sav
zG-it8(KK>8qPoZtoeF#m{QdN$+jVqRC?STWWsOh|YIXXD(mM>F+e4G`YS=TCjyi7c
z4e?&Z#{LKH*rD7O8nIDKDXMTLh#%@xPapmyGNxl?U~n6$aLO2TC#WB&6$h*LHM}_B
z6`OUZ3wTv+t4QlF8_q6~4RuJ3YN{$dlH=P}Ri=j|D<7@OEQhC@HxCDRe{)8?QI@t`
z;08lkM&8V^Kc1k=fX5$wCH9u+vh5AZ+joio0EVfitCT3dnpQx=x%TQ1i~vbLP{{A!
zw|x~oF_+mhTm<wj<nNd5=S{q~7UtYMLV6Ey)7rOO6sth)$^ywGFt7DWkj$#gQ!FwY
zq-FErmnmEX;NK9gTu9*G3~09P&h2az74h0xp{0@mH@pY154H$=;DPk1DliGlUFl`a
zwNEg7zeljQG~nO%B`qxj)qX5X{L+A{Ej3vA?5XU(k<}hS`nA(k)4d>Nbio~oKRWHi
zvGG!hIi}{FFf<lfQM%@Gz4fN$PUx&+9kZh+uTsF03X%T030hst5HdV!%0$BG-bI9!
z1TY`xtx#3E%o;``zhVxAO)A6+6~;bvUn>R-asdY*lcq4Bil#=3-Btu>tAA}Blf?~{
zapC6?cAdWaW4qHDYn`U)P9=h-B0(zZeD?r?FhKx<2nRvnxXhSbWC7e~TV$)45hidC
zfvrHY20c>w9JKN?%&+?)vD)QPvo{63F1vE3y&1m@k0T_M24XQQMC*Xxn52>QeOV=g
zv`X~z^rI?6)OwwV<zeUl08LL{JsghGbIlIV+q<iEQQGV_D%zTxv5MGU;aDVC)nJNn
zN3$2w&f`B*h={>V>LWSq<ezhLBgg3YX<K<~&fC03^?O?NJwLK$Ld2M)9Zi>odou%u
zBRCnxxIIQTq}p0KvB^p+>|!pK#IWD#=caFNn&~G+O0mdEyn+vf1a?2Zrjz)7`2Bs~
ze7o*00&WV)DqDy5IAyfYJyXc*L0j%J)2gt<aYHOJDgb;*fB;dbiMj2oP5W!hZK<kx
zS<oMMrykq+M;RX<r@_*zk)C^bhUF7}U2fbIw(Yq$4aU%IcUtYsZkDNIr$_fXqm3nv
z3p4v|^#&xypkZ0VCMj{1vE;c#vbDaWrXp5SxcCRQg~TG35VObDJLd!A@2-0E)8@WT
zb@^7a@i%e31Ut5Zs%uvBp`ZdZhQ1OHxUuy&w;ZvuN{f@(kJ|&Lsi{}_c!f|F>F<v?
z8tJv!2AXsxt~A!%kdWsmwlqUkZz5t7{WN=BLn6(R*dY9C<S<D%_6jM_>8dB8TCp||
zLdU6pZ2W6tr97ZBN0M}HeA}-ItE!RJ<PB}JZPuMIsx$Bj)AChf)36n~hMJ+UqrQ7~
z(bc&Yn0qon6#N};rrQ#R5+(zK-vb&qrP^}3`;*9W4}vtVNMyW$BBE0md1o1J<PVJ~
zc73t|FliZC8BTND{{Ug4`inJ8Xc`w8Uf9v4uGw2p5GuVTh$9EF)SJ-Du*T=M%f_{I
z!I#`}3Hbf-qMNmzjVa}xwc?E;Lefmm0-ysWi9P@WCpubDM+{c~0B(6*4^Bo$QP1cz
z<4F7UmX@xbuHR8R&jn;MLs1iUERLmEXX7A|sirj{9dowY>Zy%Y6;yM?!|0j`L#h7b
zxBmdwOgA6+(SPBV<EPrxdvFE0TW1#f+KG}eRaihspn;1K?gw-wT|nu{1n6$%;f4E_
zM_=W;#;StJJx5F%ib`dASP20V8r04e$O958gkyo&j_tQLyh@?sj=-#z*Fm)8mu||^
zRL4E>Y93lTh*drKQZK2Oy1K}@WiXr!f;4^Fi~A?S*un97qUQ$vt(#`vw|ta!9WoX>
zaw@`Q$4rN;)S&>fmII^$7%=R^S)+vE&hO*(G#mQS#4VFf{{V+dTIegOWs*dmF@uvD
z;A9mz$vynz8qIS)Qg)F-L#eG`$_albN7kY2#vzD)<mj@RF%xI!;Ap~$$&;kyWO+LD
zCDF{f4e4cwl@hQgKbKLV>$T|5w7AK`t;x?PF19+TID=-V72B54LR3R}rjMzosv!MB
zr4tR1ezi<vBT`#UMGCMlKgN3vC~VZW`lz0kwjn%Hp-EwtLo1^K3X$>%BoX#Hj!5!U
zTO7E`%6HLSZQG*DC5rz5E@~OOH%QBN?CO3Pf3A{uyA=C+q@<2!r)K_*Kp#J2_BxZF
zUmr$mhbWv+{{Za?%C8Ksvv9)oO(k{l>5$Yxw(E@FR79ZahMj{cLixe!82}AN>{l~(
zfZHz3hjG$e_;LFEkL#{<+BzCdXibcPl5!it8u(O|$o~EGBDRIB^=(Nw`U%W!pFgIx
z6j;es$A04)a%Q93&ux6bVchG}qOj{D&z);w(0;t*#&wKm)A5h)HTFy}>`(r>%nT4U
zpFNlRV_3#Yk(@M5@sbJdHRN{##<o*B!7!OXEC*sojbni{{erJof(8bE?Wau4Guzvu
zYdl8ZQqmenVbmidsXP2<<Mh(@idu>~nrof1cZOA+WukcJVJvK?D<ZO-s{#Q89D)HD
zz|&gD@VSmuDo2n;l)Dpnt-f}*UTef~7{b9T4Etj}*Y+d+nn_2>F=Zd&^7RnhT<&ld
zyMtU|+?3OA?is^PO+CuG=pM6i+L#d?d!wGWmNo(zq!|pxHzYKKYq~eB!WxQ`EfHU4
zBhU)?Z`V~r<pZ0ZFmhGQcHQPDg%OzUmWq=NMH5Vo1<u(i3sXxg5OEyQj-H^sNZl^K
zQVm&Sln!mUanBDg7CvBjS2~J2%tkAXwWTihtBh*DiJ+007p>uD3)BEt1z2`%I{AFR
zxuvu&DkT*zWOL)~ptlQ`;pxnMc=|{K=R&R&^%SesEJart2k)fS%1I^e&yT*DD<*oS
ztcbr$9_04#q4Fg^v(&HBe%bS)B#Zj$<*)(C8VA34$-;gYS&i$897wQjnly;Y)q95Z
zPez>s$p&X$q56$cCi((mE1m9ND|xi#Uvu*LNxX%QruMYb(^ot`<VIS0gs6@wfE`}T
zv}&$b>Hy?t7&ZImmxo)+ft#}F!|naHTG70=EzT*fR^p;V_-O~`SraPJJWeE5&KYoU
z4`Dp>dW!M~Qw;SHbF0GFN@#t`h8yCKD1>J?>+jp(Wd8ttN0&d`nsAGmUNdg)WO*&-
zGl#qCP0MPU^Sw4b2KAzr;gpGEmZFL=AvHeYp<@Z@WM#sJ3;>R7GOkHI6~^~W;7!$2
zcUt;t+YPFU<qbVdGtC8a6BS28EBh|0!Ma!umcbpE<L{}|^pN#TsPz)sJD$(KIA=w*
zcc#i}LwBOH)k!tJvYMbcj-sF39dQ8hx)`0Dt`vq~dqEcsXQztkbK%_e@>RVnESBn-
zA?SLVcR*FgbCZ#Sj@cUNuT9Qut=^uT(|`3tbl9#p9jmx(QrWk4J*Fmlm|`xoK~)@C
z2`P?Pz$(l>iClsX0q2DHI%QhD77eAXR2@KhIxvc{q(G{`oO~TN+zsQ84Y`BZH2hBC
zMdGUAVymf^8cOP`JyjRt3}gm$N|@tzjFa@p7p1%OlZ`Cns`gl)ccp;)YoV(o3#zJ;
zH^I-J1I~d|+>Q%KPwkCp7dDci<dOP-ZEC7Xs-?_CkbC1A$sk$_ZnM;hy48<K`wto<
zz1yPPR$A@9aKVD%Lp3bH;YkNXv9xWJky#vJkJ9SOd@vx8RECyU8DXCK4?U>9eWk%4
zriicX=I$8!Z*oz=4h-D*9k;k2Y`a-+)gJBsZOaN**44HY1Ney(svw;qqgeV@Htq(w
z*JS!9@{>oxdOe@Nwv}}*=cTHyTCLHx_Z+dyTN$1~Pa`8KiE#jkn1py&>+adkq$cZS
zm14;qkH{KE?h9O$(ixCtbN*U*LmTZG&S`c-7eD@(JXPfX0D;?=jFg;gw%#_4uW(!F
z?Xb&lb-C12RZOvuf20UvF|omAC$F*TUaSsx;my9u#ND+UZi`(#aGVrDD8U&lFc@I=
z1MjIw_1V9}OX=%;+7C|@wO9M~1avDZ$@H}|J!86*I1Um9z-+1NBN@gyOVQ_--b6Wo
zwnee{jdk2f#Htz^s*25}aJ!RfyvNYq-F-NX5<~4PFS}3fj4}rLk-JMS70-+cM<wIA
z3^vo_M>w99yXM(&;CBB2sixW+#)d1^1(M<f)E63mdEgBsXU-&oSj37|x=FzuS=6cW
zC+gS0zIII}!@AJRvv>TI!Jlqir&p)ANC(&W3Xb0Q%X=&qP`Nm8rL;-Rx22xb<8LOL
z4}f#-dPz5*4tBjgrpdc%X<9hzB0-sHrB%x;GB?zxC8KbxOMO7Y{ch6Ye=fXNlVxt-
z=6hpz?Te5w&kLApX)2*$$t-TC^C{^el;MHMC0i#qlMP<bJ{fGW@8WtC07vL^uY#@L
z=u@2hV_fm+{{YU`-{&WT@m5z)9WM8<Q$q&yvvif|n1ZWW4`3b%x*35Vk-Cw9!u50&
zsU;KrSn=CkN~En2X{Su}?S3*}Vb=_N9Rshn%?bR3<L#wQHLf3~S=YXNjSRe6V3v=j
z2*5bbaiXS;#fTWugrluU&)-JPM-YJ+jFaESv|Oksrf#Ph0PEEqYRl6Uhr0ad&YtW=
zRt}<~_tZuQ_d4g_Q|?FVxzgp1qh@W{RxQuAtN#F=ZD(i%hN7Y<oIM!Js87T$BI_p#
zdP(h(u9~p^7Fx*Tkq6Xq-?o&sdvc#WQ7Jj{55|$nbjn84v{o~5Z2lnd`+e2$w~G5d
z>$Z0#17hW9yHhk%N0%e*(#w!QDoUP`0qk_1-qu>JgS}}tLb^2_z$Gok@-Y{2(9QP!
z_1|B_3BWDUvH7{%e8<ZreOBDwJ93)L)dmbfY_6g$8dCMIk9H?AsB9dGage>aJ$8+^
zeAp+UxtJ!3RK!aveqrZ0K6{Lg!^i8dJ1kZ_mEh}()tz_fEmkf3{?l40cNA2Qf7th0
zTdf4H-|y=J$5BrjfTVS!11Kb~(y|Pk^qIeo-e~^-p(obJnCw2SKo6kT6K?z*^%Svi
z+VQt<74>%ru2yx9ly$sR)i4H%tgmKjSiP*d8R%E*@TV36sJK;Bdve078-?+o(^SO%
zG%iTfLU!d#43!S@LI6LMAHIRXNMk)pxW|K`lG4=s&F&nM-ShX*m?)$x(er{aj~cb4
zNkk89p@QQfS?aNl{U_%~HodD7gDNL-a=#iTj+vsU5&%^}@9~`;+pho-1V<;p$<}R!
zAXIikOCpyUC-RL7l8$(fT+p%u<Eco|&4Sx4(W0rIrw*(-U6-)O-%Fdm-le(iq>i?&
zS>jWNSpNVq`)YEXF!8iDG<0xOI>Rz&?W0z=#ZM5Xr$dACd*er&KZg=tYFas@SmQz6
ziOzfDN0)9G(^)CK*SUakl}|Yt(z(Xy3M@?7s0C1tDU{(KA&B$(YsTlGhHRozb+=9m
zh3<b{D=pNP+7EKoRV;l>p`vy1pSSO%7chGozjpB2^|$TUFTzUnrdoQ51cgbAi2}E^
z_4Kh^9*{@}ARii3?Ttr1bBDv*b!C#>x~i^hwf3f(8)ddiVKUaV;S{M1>LV%!ect61
zwly7b@7fLrTkENJF4eW}&7F0;&rkT>rB3NAWt5dz=aDb~SeQHxRJ%rnRf8Qmd{gE}
zYV2zrUd7!Uwc4*jde(qk;@t0CDpmcdm1zb8yY&dzMn0tsd*`;fXK`~qwBoLVc<}d!
zG!(F0nP~4@M&)sOi!{v8yUNnit1OTiT?C30mJJh@Cn|NzpEMJ@rWB|;-COF-U%0A`
zqK|g1suozR&#>*X3OT8QE+lZ0s1mc6CnG&2Gpy4MbFHblyjR_qM{*Q*7#5_`+4m{6
z$4Mh+suqDx0T}E?a)mpbXIZXS>L%T*3ZlVjYp053MI^Hx<c)8h($sP*V<7mz(wfs{
zhN8H%w74L5?Tu~R7K&wXV3>5M?gxzg>)&q9O3<99jii$#c=*x9wtr#p6Stn)0Lugi
zqz?WG*Q!dgq{QI#{{YRM0ItK!jI{fj%~`dswwhY|eZraPtLUof)>!Hya6?Awz^f@4
z>_Hl>hpjK8p7-?l-jBu|lUQ$DGu(H1R+6zPeWtQh$nNPwFa+du=i88AFj5EvYR9y<
zXQNhBkl_4js^;UDUJ!GW!iw$(ar17y*1=ySQpp4rCK+RrHtI)?Mf#C}`5nmZ;E|-3
zK3#TJGvNOKVx(y$J!D3j<gC&8R}I~D=H+EtYH?X>k)3>%Cm@fw{{YicVd}r=F1)7j
z=Chc7RxdZ*$F^vyvwYi?)WT}JMLF?0%O82-Q-IlCr5{lw1E|WTu9~0nNtJIap2t#Y
zrd3p<Xw+4tfh6tSem?s1(yWSDco@);Ym%-(JtacMtbU|#3O{XWs)k#8tW_9wlk&W0
zwtm{nVzkktpoSwF`K|NB$ipM&US&b?@vH+;kGEG>$J<|5qz@#Y*H~Bd*P^|LZvOzL
zv5!>o(knC82W4fz3UF8T{{Y)s8a490!R!fLZ3!p-Ai+K}udMGJFA@O3yo2LEf6(Zr
zVNbJofh{`4X1$^qBN&FHd0c)~Z~VFW_}57CUFsvuHzxNw&5y&rIA3<w)xTFp_i8&O
zQ0am|tgBLpRe&cWtgbqgcj_p(<WmHZ#2tXcj^pHhheGKqk~$ydj9DZyk7{A|Im(~6
zoP(`f(=luLeLy)!rQp8V;|})bcWYYgJB`|c(SO`kqTLnNt^`q1(20WesBY!Z7{^c@
zUm8PQeNVkTBjFua9FI5qB3-vhRP@g@7hbxTs6mqybbUXGk;v`>#zPL^k&~*(@ehey
zLf=wb=HGkP>3NdVYNxeZ8sk{<iYh20SmC9GN$H9tMF1jz1~ZKJ(ppK{M(SaM_s7(K
zzOJQ>E|{(<lYh+b@iuT4)wVYl*5yluo1XK1Yg@&)w?+Dz8cG-|8|f20Gc!u*7(HQ6
zPg?-26>@bByi?-O6FBL=tN5eFP3v*oyMpI8yIpv_RGH_My8-n#Wl#VCR0U7~00E@M
z1)}XWG<8o%W2dNRSSFexBap5-!m$`)y-S>D9YZ+Ij%}7o6;N(2tI%*nH6?O>{xp-x
zs_1nTmVl8`)Xj`W7-t}i9Ba$lou4w%=O@cY<43gCyHXY+y17ti0yd3^KfW|B3TR?r
z(cJ;S=?Pm%K+oT*Gyec>LzNnsMP;tJZTq}8+f~APTBsZ}Qo!X{W8*(>jCjVK?f~<#
z#NG$-8qdFPHVY>3g4cP1wrft|qXlZ#IaWrEr9qr9UYxS98O{zdq;xi#$>R)Yt+j!Z
zCOUfDId8BMDE|Pc)|$<eG%|Hm?Q4W;P7&!0a|7Ey;cnVg(|a{C!#oxB@ATQ?y_;xB
zc;mG`-@Wc=U|P#P>XM-oZ_LVi;;gKA)}Ay*QldcpFO(`itPZ-o?oGRAvp(s+U+8W2
z-l)O3SJuZ3IQI&O{{X7&5Oo$%z5A9J&PJ+2mOZ^o64SxBT9|@=-7ax7IV7B}SahF}
z<mpAe__fB}{{UpNZB8ihCjS7qts7?39lvbbZx@+rX>IhXOGgwju_zQ1*x;WX@BkNC
zAzceaudQ_U+x3#4ao8_3_4E~yiD|1SsuozLjGPBYLRHrw{YpqU(aoxyh4z_pK<q%(
zYB><~VCLhHspH%HE#6Sx_L*Syn-g?<lUoSOXYlwUN+xDvOF~zxw^Fx_PB*6StLG_R
zp;svQbG9U|+8jaw$Rs!06mL81-M~<q!5?=tRVsQa@SwSj1(i@1WWXmuNj{9CbiGBC
z(~~0sgXDd*3)-hyO7xO{Zo^9|>S|ai%g@rljCBvbh^`AvkvBo<7|Fo#qjnKLc36yT
zxBz3mksE>~nO-yl9|VmouIOd@Bk(?c`b^zu9EQhlzMegaF3uJ8w%C*I4nj8`;-H3O
zUA69%R~lx5oX8a<b4M|8k%SRdGZs0=NbTPu7I+)P4dKDf{c7SS)w;_C^JrY5x?8G)
zGJA;|^)zf$hUs7$H&91XIUWwIo5p?=Y~D5Szi@Dih;%PeWZagSs4h{&K!0t==0bcr
zuvb!Y10v)Rk*fIfIm_j5Ej+~Tt{2vBDtprGd;b97E*Cm_c8zK&q>Ue}s}95@tJJ;6
zQZ@+~I!yMOk{cB@ME?L<@3i}l#Y4C#!1gQXp{j=-Iwg%{Uz6~|<o^JDaQCerDmzzy
z@b-g`R%=B{?g%RB?RP55S|IT)J{_T|o>E7!VPWluIT9?QA_h^FN9ilg9gB1DN}GTj
zP2%0QZPR1CwYO?p?ls7jw+mO{(pVNHI5P-(W{eS%7BvSr*J4&tLTR3wwwgKOl}bqr
zh#4e9<ylW=1bGLxx>}BGUy?h+kcM7I_kSKZKgKh?_(!!VZhOANM^S3IZHZE%NUo4e
z7hkr0f~?3{VwyJrzNAt|W2u_q)%NANcEzJ{Ubgj3p0d8%Pf=NEtpH6$E5@wwxCH%3
z#Hsziwwn#b%axn|0JMhD=09UxY3!BOTQyCR`)q=N7M|;Dkt8uF=;`*RhF8kBbtulH
zc2XTGNhK1GZ2th4ZBGs<V~b7be3OjnujlM6;jN`vV@X_Lf6GFvB$9Lq1^^?m&XP+!
z?zkuS(e~e|cBSZ;<c}Yq(=|e;VLs)CSb<ZF{+driD|_G)bhO+Suf`#o0hUpmeCZ=s
zY^kT1lMw{}0Oi#p?F%F-Hl<7gp&q5X4%t5kP8|hJ9o0Hvsa~ZY2f5QXK`m5H1{(nB
zLubBJQLrf~1pc}bRusfXsh1~JoaFLnym&FUt(M8RB(TGA+LK*pnIopS(?}g;QxGJ`
zV>3pIz@ceJS8n1EHh#k5F4Eso@OOXk_Vu;5fTM;{W!zTTX=;pe5rfdnlBD=#&Pf1g
zRwAd}Z&5%M+xmk!Vsy-Jo?UoTzd4?<b3?!5xL$9&YQ!ye3w3>9lHEaypTvro;49jH
za|fugA4<1RQi3`1;dx{GqR`0{sgVtO;RiB&7v|H4HZ7ZFzinPC4ef5A+e-yo&{Y#v
zD)m!J-91Ybu$Sr#tfdQmEt00e^~~=p$xf=Eu_0q@sPFJ_GCtjhZ-K6U=3AF;1oMYr
zptbRS%UG8SJ#-beUCCm&8XKj)XrJ7TqNzYw0N^)C$P6+td+Fh&+(JYS(`4sAQOQ60
z>0^oG)r~BwQAjs}nyKo2)=sXW-$qvWCZ?P*hpdk)jRm$|%|KONkbi9-(EI-Yh*^E5
zZ@S~ovvdWsUB=N#T}rhP&@^%$&)*|OwA*~gQazD|8d@l&N=~|j5rRIX9&`om2~!Wf
zNL&(o0=@LALo^TDH8pDr!dXDZFb0)Yd)r%ORjE0R`xN_rx<gXdS6k*<X;3L0#!sCa
z8mfuaBB+dyC#esSrE1M|De3nFG!;|TQ$7Q>RTTElzFv0<-4IRI%;XX6=OAdB1xeug
zQI{S%k9};TqQw61a-jAA>0D9;v*hnP%z$+xDu5ZE$Jl7*TRn7^yv|EAeqXjRuQvL6
zD~-^tW__jv$DfbBhF$kGA_lk8Dl00LjhGS#+c?rT>|{n)KAe_t%BPQwzBWG{_Rhw+
zTxt61Ej5!osTD%>f?}hlidhw&HfJRi6P%o=#-iJtJ_7LvXz>p1e%;$fn$bAAENul9
zG_4hEh7}>=0Ww1?#>C(bsY$|qT}|V1(Z@TL{_(hOcN<*|#<KqaH{x$^RhjE)`dG(O
zffbBhEg=aQW$94Le1onp^$O&>ntQ$S+dun1Rl8!QqY<0EwWf}WDCuFANbwkxFbC=;
zPP3CFNz#6$8sznO7F=zlHKauSM#tQBuv1k-OI22_PB3V$F}lTqN49;;6=9Fd;ACqw
zw%+_WyzKqSQ*GWGC?#BzNg|1T$j?W(%07_5sU0K`dx8i(^_ub1oK<oDe`9OxwB3@`
zL)KoVRUw8BJ9g4@eN972BEdz|*!@I(wI4je{{RusDLf(Hm76Dr7AwZ}Hu&WHUDg@D
z1xfD(KHA0_H<i$kGUXR4z^H9=_VE5Co*-^}<dnQF<%Y*qNLiJt>u*&_(xpHcmE%xc
zWUwL@T!0Fm`ue|j5|-pg+Q`>Ls~KKdPg>(A<3sKAY3-83xjzFs<iAVqYI(coQj+z>
z9xiPCn`qdyP>KsdvQo|Xs=JDmE=;mYORC06b`c_%bqWRu9eWfjNPs(iU#6+aHK80h
zx?y$Iq=G;bu=xG7-t!B`z7%rH!h6RXxYN2tqi$IvQ(IhMd0nJ)(-aZLfSDzc@Q8!U
z5!?)r<KCx9{{W{L%i{n6=T-0Z+2?-e%%3(_E)#8A^3SulZBsR-(H$ruDl71dT~y~d
zko$;Yjj{BkY*+K031m=>vql`G1oZ{=^ybHp@?N$l18dudI}+;zv_F8LiW#nzw2zfz
zQ&v@qGok5dpy@yfUZ%;<F!JZ=!OVX)yZ$~Ha5Bkls=ubxp7~R4t+<MRZa7S8V=lv`
zWMe8SsUQ=Pk&O`WcVSX)Ex|7H+<||SStP`3A;_q!7%I5=3y@Fi(mwi}58>(L`s=lJ
z1@D~={YubWIE_xUJ6e&HqeEqaa>ix{$ewt^As_g01OEVsH7L$mr}FXqMVe5Kh!*3I
z130(KXA>@5AmdKSt=qen&mc<sU22Y~$r%JPlCG!Wl#qM($RkPBF<c`Q)iLCr#1FsR
zYSmt&9+teYdWmrTtUOTLiD=z4biz9h`?b=i<LEj`VHwT@@RCkqbBwVdgWpxh=Ihgw
zn*K{Sjn>V^P7_{fEfg0yX=pcGcKI6JaaC+Au+9fbRv05BA0xh%+ItFWNMIzSyQM=?
z8YyQ9+%$2LO8xt5TDr*2`NIL-50G?uMRQ+w*`12I{#Om>_s~1!QAtt!;KkJ{F!zW*
z@fXHV>#U`{fy6CH`j3r#bRggpt?`(p#I*8c5J4YpdB<1RkDMO*VNDH!-%}iXYw3S)
zu*vQ7t&%_sWa~fjtYgNpjbcA@>SiY&bFY&aRzk5hN01MZ`fFoHBy2>0DE;;HOqA&6
z6jo+lmg^bqtb&!As;X*gshi!CtD&>!zCQl|u9SNpVcPGIC1p*)Cv0^~3^MkF6M!2X
z%OAIa`spz7i4ZXu1opx4t@i7UU7ERX7G`*AAs~qZb_f3eL-y0O+RzCI^z-K5fSfsw
zi<KUGxLvpRp9LzeGE`csZ`NT`F3h-2Rb{A#MJps!l4GuLolLB%>N_yhif)fUA4~2s
z*rMIMKieCE!AW_JJB7OAw6}bcQp-;?Vd7{~p=Lu1D=V%%j9}pE4z^zUv2J97lAgIK
z>k8n|HhQuE?x*~0#Glv?(^4Pk&FV>S<#%+{@GF>pAYB_`ZUv`{Y}~8r<(`{t)`=N?
zzLHznq@2t3KV^i(tk^<&PB}(@39a}8Qtb{#{{a3ME*w-dKMFazy-)_KW>_~a)rP1f
zKjB|;o>-Zje4daG*lC#JFYwZHqqZQHtA@N@*c+)M5;9XsSyOGART=3RnkJ8_hsRz4
z?Vp`Zv{wX~1h6_vi~&g_aLU8rf;$j=f;$~-nqw>^cqM!3-WUk7NfpiC9+JMJ{4Bb`
zY}`DkU+#i(i48{GNUo5)jJ;hY>7BYr0E5&31Y<f<aaYn`oxTk6<!<Ifba0m89UW3l
zQF*di>RP70!A6L(B~5fFF$y}9kQtgXxR5S;Px{hz{{UHl?0?Tk){zggBNz-v);~>6
z`gqr4ERB^jxZZ+G>_u=%rC0u>H6K#}p7|N<asL2a0)p0#;-Z40T2R<w402{M_5}}Y
zeuEmauc!~E_XfGe<>P#9YX=B+%sc+N*CkHW+*G!jdRoDBh_uinR4N83ER4~}7~zDZ
zwt7Y@e|+0F&8dC3?aSr5dMky#s!D5x6v$ReS<te{6RGa3uk?_75sgK`<0zz>02(`-
z2(n8?BpA$+(B+R;jQeM}Kd2`fIoW%<_qJ}-8>4F9D(Gu%RjE^KuArTwnwq93%yJ_P
zGBU8pK1uyDHI&jIWmY{dQ_qhbJ^ujyNBp(wOJxng*0!P_YNM%rs_0`KV2>H^hZ+9>
z3jz+2OJH4-gUH|UE^u#&7F!<;x#`&Tn`3Y?LrY)8I}k~%^>gGYsHmj?OGFry>SXt0
z3|YUXg9{;h4~+Z?<_~DiyZL|O#@^mG$SWR_YmV8xMOg|=pP3lOP7lv;G7d&ZeN^Ux
zuCl~^?ZHCT6i+Ww-6)~}LO=aTm$>ibss7sQ4@^%{I{yGqZZgNkMwUy)#J)pMLB41$
z?bOoIb^icPvNa!`qMNQs5f4vTcZ`wN2&8AdjN@g8X0%Ni{-S3-bMdY;^$q6Fi(I(z
z&ueVWgIP@lviq{@Ca6l<Naa_kHG&~DdHR+}R1YVh`=ct@?9}k@_?)?0zFzOvt~79S
zdhL2|=Pe%5s<Y88)=P!GiwNbZiCG$!qY4W~zT*D?Pz5BTWuEoR;&9^+Fnj={;1$((
zG+Xy)O-;AtvfQHRxYO5M5hE(ahqBKi`#{Ok0cXw{HUYBo_TLA;*wK&9nIJr_?|a7?
zIb+!TQrxrq*9+F(ilVNiPytfGo<Dp500KaQL4XI>+qSqD*2jdq2It4!fqa(3Bo+7F
zuU`)0xU8KKSHTR>!$}imaAiX*-&S&C>0EXPQVGh34mTeJIda)NC*N_>Ey~Ha<E6M$
z+vMtknfjgmh}-;05OEkIvFPpF8am?F2=+%2_Tk%{K-xCz{g!HoYE1>sluH-842vvn
z(tcIdSbnNCR~I~7Uu3OEC89CCKlv%AP42uk<_hb$cJAoe*2*2lvli7|_$sTNx!snY
z<qX3mOil!Hpj?Jv8w$g!LaM7B2ehS+N`aQ{Pi%MnbZ@yf?%c1vQ0|L`hKi!+ZAsGG
zaFUr?dQ_Q0%(1>YkT_Kg0m11R@HAOpNYW`}U+=47#~e_kPb}U?U`T2jrMgN0K=Yj$
zQb#E{81eC-RFrb7;KqCYo^+$3w)Hxw$jbbUQcOtMt{5M;QO-}D8BxpaFG?{VuC=AM
zKpl=oJY!yAl&rWaduaj~(=@Wk0ya<1bT;8-g1(tsqYgal<*AeE{(wLEfX;=92vP^f
z#*yicx(P<zRFz9G>F!7aTI=jm)IYgR5E%GhogEMf{{Yuss8NkaTKo)=qsf|(oEFQh
zN@D4MJ;&QX>L{wCVi^u|-#T=xc6v(b6jVqIWaQ<1YkuM3y(oasZ64sxPCM&+s=>r*
zVM>I2sXxAt?GiZX_X#VJoP22hJ&QEVwC33CSm8>KC+(uUm8PoAP-$j?<H78#JLvLK
z1vRn4+IFi=Y%vwD0e~6m8eh_Fndsn3m!2+>*f>5$n_d`;Sm<i@V(i~T2LAv(I2X;z
zl<>!ChY1pl2hK^?EbU5;h_%MgR~opfsoUBr$E!IwKjp7fcW@|!BBEoqesr6<)}%}S
z02~6}mnX3rFPm{JIgAnxJM^6e7H&b+z!N{c*VM<!`{-@5<5e7DJf6p}BT4!@v^NDQ
z?+&r?oM`sbQmxV-&)+^ZfE=aVhzl~|lzk%|iO^ep-mXcWDJng%q_UtMPr%osBygiU
zC^-A;RRwK4)0B|`1mKhLtYG?r;7TEs6_G|xG*x)7k!5L2hq=$4d}$Jmo>4I?j|_bN
z+AFwMtwQ0y(^CzwJ#FRXd;)R%`O|C6M-X?0#NqWu>Dx6i)2%&3V!ou%8o42WoEAs{
z9_h{pPn;4!Bx!Y03KG~Pews&drsK5jd^NjNZ*O_+^3(e*1w5}Dv&iQW#Ty_juFZf5
z2OY`m4yb@c-!Qw1+rBuncHI)n73%64-dpv832SCz%u7v@I$J+aPry;0`5MhR+)w4F
zf-`RFita2W@ozO*o?2@*>SU*;Mq*YaX<%lQk1Zh!)WwKUF`U-~vF37Z4nxJh^Ir_H
z+2Y!>(?;K0OHnF>QP{J&$?xaqB>d_B0OgOBZa8y2xT*H90C-P&-E|EjiK-~6nP}-l
z6NjFvOhE$nJ>C93`e`VxlhWO&Z&rG?qO7-7rBxMtJtjyeLzIy^fdPT;2?qpt?b=Xz
z9R4C&&4IQkI91K33ZCI^wo*q8F6r4ZqSQ>_0MgXbNXoKE<e45=%H)!I2|A`cSm!U3
zujklhwWGg1EPXz=KO_7Yx$RCN@dA?ZeB3t(>uafPC5;8YqB4mTO(Ltv<S{QM0+JLc
zz{%)+$7V}xMy6>{cKTEv<NIlg;7_YZr_Ua$;j(b!nccq))~%T-Z99#mwN<+TlZfh-
z0qyoZ{0yGjFXyAw1D4K3?Ns;PWq5=9x}xJ8@>%x%Ei{!E3zxFJOtHflB@~hnOD9ZW
zjykdk*Px)%RJ1}|o9HO#E7UuHJea_d@9T|K&vDe!HBF;v*yCAiY5)YI$Vm%L6EFZt
zA@AIxzeyUc9m*S~{@!mqE7M<ou98BEdc`@Fnn_&t841+i11AILOWr;6VazWbw<Yh6
zyh?$rv|LHP+@_?wPKBDGVv$x#N_pLhM5jq2hX@pf04W*L2V-3#ef4c!vrR@n9|@$4
z^&W)*vg{OnOZ2bL=Eu&svdVIoT1QSeMIqd=?JF<;08#dL2ZgFSi$y)kj$5@1sNFR*
zf_Ty6kRzdgcc`fW(T`dB*YgWMaN}-8w07++nzBlIkL8pL36`GQ6%;W=8uWs&#7uBT
zs-Z%LDyqIuomL|E!x`!8&hy#(k)y6OvP5IAgwF(sz!4c-f|wyrLC<X{x92_F=5sH!
z;NfQ%`I6n29ksXM{4Gw~s=D+gLt`1KXnIXtetLc4<m0(G*29`Gi;ROjYD>K$%lpQ*
zC`vSI*+*<2Bk!j7j++l*a|yuh$-u5D+N7=9yRO+KzUNO2-rIz15a{3GbzrOy-G17J
zt_1qWbF;z@6j}H&Xs_D$`a2>l6jy!EYo4aCQmjcFRK=2b*P<YxD-c+W0!}hyuhXlX
z3)eZE8LZx0(`eDz?)Hnpx7Q`LoYK^0C6y(JNJ}nNh-4+OSFyps>x{7F_@`$yqbTgJ
zH>ejRoJ8a^m^-fqces|efn{lS=Hyx_A*Z|25UeDOxB#+;RFEklWDL1D(=Sa?6mk1i
z3zcv2AAzei`lxzzY(9UvcdE5E-ZdNFYS7izaQ>t5zQVOzD)myzdQVhRgy{g|rb4Ze
z-&Owr=KIsnnqE$5ty{B<{1d)ys|Mkj<FeG<S|@dta#{&}rf+UQ0bs3;rNa!3PEp|L
z{{V8)e4}F*EvgtJEk=$XxmFGIpU{nJr+27@o#P6~*ds$J<Ud3bAJ8ARwA?MVa=Oh!
zfZ6=rUzmN*?W~`MD@MPikxqM&;QjTjldHO%eetiXC#9A+ZYm>Y?t~CQ@2#dk>Xqoj
zV4nISnpJ(wqqzNZt#ZI3Y!?haL9LYm(L*8z$?>mooGDg}AJ-kmv5ge8@zvBSnM6@_
z{{U6`bJ9LYA17MpjAf#NpfYt}I{07@oo<mPPTdR?p2r&0;6kN(31(r_(mwh&7VOtW
zQ&DWVC1nKVC5AxJN{0jy;OXJvw-{{umZSL&?WCursGSNdryj)P00_uGkaU6o@WlDk
zK`c`&--Vh2jIKimUY1tzgP;0q;77(-XC02TqNKKD=$?;9&r&~r*<j?KkFp=;^z3MO
zdwqsD)(LI#L|G_moumLw7*dfkuk{n!j0SO&BXusF1yEq>soV|Y{^r<MD_m6fii%qM
zMSNA8cJ&oUw=D%!L_8!vmFaa}mmSmg@nnukdEVgfJzO(8$A(uaAiP@Rr;m2+y6D7p
zR;sBY4c$XGTn@rdYGA}jBX0x{Ix)&qkIS|EzxGQcS7>@EOA;e0I&q_$d>}%I6b$(|
z&?;(JDXG2Ln+ooz9FE;Y_FVnR@I0LnZ~kET7s1=*Hr?W%7q<0=<1|sr9R=3pkxC<W
zDw4W+jxs?4N9j(-zKnpap?69QG9M)8<4w;WeLwj%=Hipu<KHhke)CjgF(u*}V@Gg_
z8c{R!Ra2CZ7IBhEB=tAk=+A`wd*MeNDQ?%E5${TC^#zvqHKMxZZ;nGs)o}$fq%#!>
zP^wr1Bt_G!BdCyc;oIsRR2bB;=qCVt0iXJJ(n>d`KH|un=I{7j?vh>V_m3ppWuRIK
zBxz#V`|3#+Sh|^+<BGmDVCV-*pp^r8>T~4quQXnkeBN?TW~sjMzTdMy2Wz7IMQT|%
zfk9PM1TN8mD^izAh8dHj1&xay>I#B&DqQ{g!f);m?)}-q-Wb+dtyDIdqPNj*YkCW%
zb<SxaSvu%IZ(ujpQ@DR=qh@woU`A%)-W!9ATe>bKaY~-9tbR3WC}^)26I(29y&>vm
zmHWw2Az!J9Y$~FI*yqoj;Tcp;h5Bwo3A3y0mkL<knIe)YMpvcsy(7C4N8334^R0WE
zg?5Dh0PjtcRQB4si7@DcFwRCvNg~IN-<WqEbFP@>OVa0yeB!}#-TVlK8)I80t6TNs
zanCe%n~h7=ueB62EN_aK>6@+6HDy&*M(Ji5PxE)lk24&ITz0k3kCJV8=(kH((%MCe
z>eZ4&^vMIe%HG;2>RIrkxym*`?UqTkXcbK79l5*g^joY@Z<wj=*9v*dM8~K?P6<{H
zzyLq~>~{lB#|U=^4>&)>UAxAq@7B>(Y`aBQL-_YvsJ+;q+>97osaXMH{XIP=sfqG+
z^3S)nUl8a_JEp})aEyV~Bgj*>Gn|h82+}8Xa*M@Y8|_Q)FkCmX&2-_#uXMv@x!)R`
zO;2yDnL41SmAZpuknx9J*;UEQfvx(r<S{3&kS=hj`Oe_|$9zuIe~7c+TGdf<rRj>A
zx?xO}N7`0mdeSKexT7I^HgHH+%VKW*lf>Pc_x+K&C@*%yQBed}8)Zt(1d>A|MJl9=
zm2k2nA!Q|Urv&P+G}X8Hsp5m)tfHnfG_ozKd0w6Rx&wQZ`TBz@0K{aFpd5`&XQBr-
z`=g7T8?4|?@S5FJc4fMnO}lZkMI3bh0LvWve}#2cC1#3JP6z{{MHwYdx^d;@QGJu1
zG^5EKXL`2*IgthzC%4-{Y3L$i7)jIm5v4V4qEjI(y-ItHYNoU&S4+Y=@$;^pS~>Xa
zD>O000VE#YaiMhU1aappmOb_Es*o#tE-~?-#*(3D9Uar}qjnUMHr7WyH}xXRDf5A%
z$t7i!y03D4X!Nm2!Ib53_RuPud~(0P0MF@<8o+6StrarGh4_4R3~0$lT(^v#`YyId
z*C6U0)ZpmRVQdB+hwG%5VO<^?JpTZH(_ZM)+p1S}?XPN`OcV~<++qqqBfdQ71Tvwj
zD8Z2657SC*yLRhg1CFI{xzaW==zx9vXyU=w@1SFVeD>7ZL04m?w(L~X&^=<J5<3M0
z@8d{rE^Ud$O4U4I;C*B-aizA|s-7AXQ58uI(S=qWh#C=ixBmdOR?|#rnn%GTh5V=Q
zsVOxpDaO#o+Nz|WkV6v`Wp2y|-$QO2rnQsXtf=ZE!9H{+cy0M@Q@l?POE1VCbdI=G
zPF#lYK6TQnTP3t9f1gV<YSOtN4%*cB7pi=y{XA%%-6PZi1_{rTqZgJs2+Dv61M_QD
zGhKt|DxMjXgQaz%;$s1c7$e8Vo4Q-Xat_3GXGgYs;b0;XcSDT-0NYC_*i#pk6;C?k
zHZn9G6C$@-zmI@)Ur)F}j6*-$vBrhdRV=;J?enQL(EWmHDrA;CyC1i6ppwcX$Jdkg
z`)E{-q6}nt@_uvzvY!Ar&V?B9J$FbdS0hM%G;qS3igv4f@l+Tt7F&dMmh!;+a`ecJ
zBn}tq3o0RuvK4XFfO<<SD(b~TuU}8o_s|nwVl16tee<SDz}*}V#x7|&bm5l}ZW~VT
zz<6)b#XG$PX6N1SZmLKb*oce~MJ58bSn6gh$0x8L4A)cdZd*BQ-&grJ4)5Tu-L9^Y
zK$fNLz%%1MdT<H<09|IIEMB3?(^PIz`YTY|vcYee?Yp*~TRm_jdyp@`PX0V}o~80T
zV2{^FvF*LdPZLc~BsD0wBL}A=86JE7y3J;|a!JK#>*BO0cQwN2AQWqD2335HsR03p
ze2)G{oam>BTg!-iN4zcjh`Y-5xUOcBDqrR|yR?-v`w=fpQ1!^n-r52}u5dDb=UJ@N
z!z7Zg3`uK3e%ZABx&HtYQ3|mwoU>=Q&+VlZg8u*ogLT_)HB<&lYjm<avBno5G8O6>
zK1P4nS*+D<%dsQ>0Ffc)+Z<`yS9Y3Jl155+Rpp4p${3JH&OSWr#^Bht8}&?E>czM#
ztreA#B$HL!NnW%))md9A!^f2;I?ZOKO04Lb;Pe6B_uek>#)^vVw{P@!iW-?!Y9z0>
zLk#rnqo{$<IvH3V=LC#)9rLce^oZvVnR^c(?3^}{t9KQm{{UWKi)dJ@DJ`_JPUu5a
z%x8`IjA#Q7ZJ}oN@Jn`6n$26&I>{DBZ{u`Ld=)Wtke$cD$C3Bbf6RXZ_;1Xg2yeU>
z;;z<)?`6Hwyi?N1svRE(qDY;A_N7+ANimf@`}jGm*QZIkXBuv-uAiwtrH2=NIPTka
z>Ad%Kp7~?lzlavfyIjxJPdr^MB{Zudt1(q|{-^<8xx)C@F;LYMZ^KHUulO)N`pss!
zB&oBa4ay0#)om<Ql-|Us3jx&=A0e~|U7@U$7mTFfkiZVj_ttAQpsdn17_y^2az?dI
zRs}Nm$H*GZW{9Q?ND4fd0RI5XtXCA12UG==<w^KD&1RAIU?o#cRMOT<Id?L3IdhUh
z`OdPPqhZq+IRg$!@_sd%%~!Nz*GTRO&*`p~<Qvo{nNClR=-T`R-!#@*z1d=p-z|M@
zsVhw^a7*sTH99}gl_(liStJgjC1KJ?QZregq~o+?JX?R6o+k4_#mK2S%;RSI-&YUz
za;lGq7T3B*OF=<*V$vmbQb33qV2!4gLJx9BNaAN$*z~yE!O&iBHw#q;<hs>YRBnpN
z>URmNt7)F=>{EYDCYD0Z$<@+&7?4C^Pj1@HX1IMuT$9NaCwl(?XG+u+9nt91%nkE+
zO?u_Z&r3!A&1}mExXT=LD+Ai*o@5cirGMiqD3G4b&>{dZ$Xzququ#0XJW;H>ST+{v
zz1z55wq*Pr!L7*f)NX3iD2$gvQH46bBS_>R60Iu^>K8SdyPW?3;=_Hq{(?#gWPi}$
z>3D&C;wI$(0BQFsM@~`~g4Jt-9?h<um>+F$2ULxL*C&=E0}2!e+m)V5Iicp}#ecbS
z_p|MH+jWBBO>ekCUw@8<c;}iKCQ`oQm2?Xj>Nx}%69K-WxvbQq6)ABh<kE8bCQp_=
z2~%?A%Z|+8gj6&X(`~y16xSuShxR3{Bd)4dE=w_a-bXBZv0{4+0FmbVlYU=0&f50P
zmCOdsdz~eo?;U-*qLQhp>R=&|%Mwh#QY6TW9Gy5~o(h~W&1StMml`7E)3kGU54{3;
zSKGU$;aNq+{rCK~rl;JpS*KbVqP5CdN|$M1f5yb55$wvn4!=wFcPHnhUnm=sZSt+W
z_7@&_TeWwG4|vmEHsX8R%%Vv%F^MQAq4w2bKvAk7cvuLSxGIg%b(+m8#RardGnAn3
z_8I<Te}{7IwyGuFT$k}xu0dU%9gn>nEUciD?ZIAyrHEV)>mua$E;FXb(@%gh@2+G9
z+S;66-n=f{{{YKwboYy{%&v+$iC&IOy)4Rl;F$!m`C%YE7#R(nX0uV8BN--#gKiD|
z6AxvzS6i&q7Aub8sJc~B(nTFr(y|!RTE|ri(>lbeiq1h|5miAwfEXIxQv$;|lOSXC
z)@wC*?0`*opWBqUIS0mpnwEA8;5XY@tkoi8<y}EY$}#xutyapmGW|sN`|CBDV0jHp
zKFoqdK75TC+kvS8Gv~?HYc#AO%?+L>RScuQhW&NAdmb603ISYT>ouB{vFTB|*<=!~
zXI_v#-Sea8+UY=Jo+r*!935t}NhZ;fLceTn>8og-t*Qc~W9k^sZ5=^Wwr7Q4VndIs
zw~?&YYEQC@p)bWN4IOGpT^Pq$>Ny@X%HFL88Qojp{q>s7bYUkmU9pWlN~~%-pCo8a
z>`0)=A8d}=&1S6o5Gab6vj+`~p87hoqEr1y2*4fltk!5q!Q~|8VcWND45hfsM*Ss;
zAHK6$sQEBuJ>pk@5}<Vt8q-sI0udUMpO)5ZG$|B7D~0KvA8Chxbep<d0}hnP`<-U9
XO%#QfVRK9~mpLa{tk!ACHthe|q5@uN

literal 0
HcmV?d00001

diff --git a/posts/xgboost-from-scratch/index.html b/posts/xgboost-from-scratch/index.html
index 3e16674..2b53a8b 100644
--- a/posts/xgboost-from-scratch/index.html
+++ b/posts/xgboost-from-scratch/index.html
@@ -172,7 +172,7 @@ <h5 style="font-weight:600; font-size:1rem">Subscribe</h5>
     </div>
     
     <div style="margin-top: 10px;">
-    <button type="submit" class="btn btn-secondary btn-sm">Submit</button>
+    <button type="submit" class="btn btn-secondary btn-sm" data-umami-event="Subscribe">Submit</button>
     
     <!-- real people should not fill this in and expect good things - do not remove this or risk form bot signups-->
     <div style="position: absolute; left: -5000px;" aria-hidden="true"><input type="text" name="b_f718424fc5df77c22533bdaa6_a3c37fb57b" tabindex="-1" value=""></div>
diff --git a/search.json b/search.json
index 76bf255..322575c 100644
--- a/search.json
+++ b/search.json
@@ -11,7 +11,7 @@
     "href": "index.html",
     "title": "Home",
     "section": "",
-    "text": "Blogging with Quarto and Jupyter: The Complete Guide\n\n\n\n\n\n\n\npython\n\n\ntutorial\n\n\nblogging\n\n\n\n\nStep-by-step tutorial and best practices for creating a python blog with quarto and jupyter\n\n\n\n\n\n\nSep 6, 2023\n\n\n\n\n\n\n  \n\n\n\n\nRandom Realizations Resurrected\n\n\n\n\n\n\n\nblogging\n\n\n\n\nThe world’s favorite data science blog is back.\n\n\n\n\n\n\nAug 2, 2023\n\n\n\n\n\n\n  \n\n\n\n\nXGBoost from Scratch\n\n\n\n\n\n\n\npython\n\n\ngradient boosting\n\n\nfrom scratch\n\n\n\n\nA walkthrough of my from-scratch python implementation of XGBoost.\n\n\n\n\n\n\nMay 7, 2022\n\n\n\n\n\n\n  \n\n\n\n\nXGBoost Explained\n\n\n\n\n\n\n\ngradient boosting\n\n\n\n\nIn-depth explanation and mathematical derivation of the XGBoost algorithm\n\n\n\n\n\n\nMar 13, 2022\n\n\n\n\n\n\n  \n\n\n\n\nDecision Tree from Scratch\n\n\n\n\n\n\n\npython\n\n\ngradient boosting\n\n\nfrom scratch\n\n\n\n\nA detailed walkthrough of my from-scratch decision tree implementation in python.\n\n\n\n\n\n\nDec 13, 2021\n\n\n\n\n\n\n  \n\n\n\n\nConsider the Decision Tree\n\n\n\n\n\n\n\ngradient boosting\n\n\n\n\nUnderstand the core strengths and weaknesses of the decision tree, and see how ensembling makes trees shine.\n\n\n\n\n\n\nDec 12, 2021\n\n\n\n\n\n\n  \n\n\n\n\nHow to Implement a Gradient Boosting Machine that Works with Any Loss Function\n\n\n\n\n\n\n\npython\n\n\ngradient boosting\n\n\nfrom scratch\n\n\n\n\nSummarize Friedman’s seminal GBM paper and implement the generic gradient boosting algorithm to train models with any differentiable loss function.\n\n\n\n\n\n\nOct 23, 2021\n\n\n\n\n\n\n  \n\n\n\n\nHello PySpark!\n\n\n\n\n\n\n\npython\n\n\nPySpark\n\n\ntutorial\n\n\n\n\nGet up and running fast with a local pyspark installation, and learn the essentials of working with dataframes at scale.\n\n\n\n\n\n\nJun 22, 2021\n\n\n\n\n\n\n  \n\n\n\n\nHow Gradient Boosting Does Gradient Descent\n\n\n\n\n\n\n\ngradient boosting\n\n\n\n\nUnderstand how gradient boosting does gradient descent in function space to minimize any differentiable loss function in the service of creating a good model.\n\n\n\n\n\n\nApr 27, 2021\n\n\n\n\n\n\n  \n\n\n\n\nGet Down with Gradient Descent\n\n\n\n\n\n\n\ngradient boosting\n\n\n\n\nGet down with the intuition for gradient descent via a fresh analogy, develop the mathematical formulation of the algorithm, and implement it from scratch to train a linear regression model.\n\n\n\n\n\n\nJan 22, 2021\n\n\n\n\n\n\n  \n\n\n\n\nHow to Build a Gradient Boosting Machine from Scratch\n\n\n\n\n\n\n\npython\n\n\ngradient boosting\n\n\nfrom scratch\n\n\n\n\nUnderstand the intuition behind the gradient boosting machine (GBM) and learn how to implement it from scratch.\n\n\n\n\n\n\nDec 8, 2020\n\n\n\n\n\n\n  \n\n\n\n\nThe 80/20 Pandas Tutorial\n\n\n\n\n\n\n\npython\n\n\npandas\n\n\ntutorial\n\n\n\n\nAn opinionated pandas tutorial on my preferred methods to accomplish the most essential data transformation tasks in a way that will make veteran R and tidyverse users smile.\n\n\n\n\n\n\nNov 25, 2020\n\n\n\n\n\n\n  \n\n\n\n\nHello World! And Why I’m Inspired to Start a Blog\n\n\n\n\n\n\n\nblogging\n\n\n\n\nA reflection on what inspired me to start a blog and three reasons I think it could be a good idea.\n\n\n\n\n\n\nNov 22, 2020\n\n\n\n\n\n\nNo matching items"
+    "text": "XGBoost for Regression in Python\n\n\n\n\n\n\n\npython\n\n\ntutorial\n\n\ngradient boosting\n\n\nxgboost\n\n\n\n\nA step-bystep tutorial on regression with XGBoost in python using sklearn and the xgboost library\n\n\n\n\n\n\nOct 25, 2023\n\n\n\n\n\n\n  \n\n\n\n\nBlogging with Quarto and Jupyter: The Complete Guide\n\n\n\n\n\n\n\npython\n\n\ntutorial\n\n\nblogging\n\n\n\n\nStep-by-step tutorial and best practices for creating a python blog with quarto and jupyter\n\n\n\n\n\n\nSep 6, 2023\n\n\n\n\n\n\n  \n\n\n\n\nRandom Realizations Resurrected\n\n\n\n\n\n\n\nblogging\n\n\n\n\nThe world’s favorite data science blog is back.\n\n\n\n\n\n\nAug 2, 2023\n\n\n\n\n\n\n  \n\n\n\n\nXGBoost from Scratch\n\n\n\n\n\n\n\npython\n\n\ngradient boosting\n\n\nfrom scratch\n\n\n\n\nA walkthrough of my from-scratch python implementation of XGBoost.\n\n\n\n\n\n\nMay 7, 2022\n\n\n\n\n\n\n  \n\n\n\n\nXGBoost Explained\n\n\n\n\n\n\n\ngradient boosting\n\n\n\n\nIn-depth explanation and mathematical derivation of the XGBoost algorithm\n\n\n\n\n\n\nMar 13, 2022\n\n\n\n\n\n\n  \n\n\n\n\nDecision Tree from Scratch\n\n\n\n\n\n\n\npython\n\n\ngradient boosting\n\n\nfrom scratch\n\n\n\n\nA detailed walkthrough of my from-scratch decision tree implementation in python.\n\n\n\n\n\n\nDec 13, 2021\n\n\n\n\n\n\n  \n\n\n\n\nConsider the Decision Tree\n\n\n\n\n\n\n\ngradient boosting\n\n\n\n\nUnderstand the core strengths and weaknesses of the decision tree, and see how ensembling makes trees shine.\n\n\n\n\n\n\nDec 12, 2021\n\n\n\n\n\n\n  \n\n\n\n\nHow to Implement a Gradient Boosting Machine that Works with Any Loss Function\n\n\n\n\n\n\n\npython\n\n\ngradient boosting\n\n\nfrom scratch\n\n\n\n\nSummarize Friedman’s seminal GBM paper and implement the generic gradient boosting algorithm to train models with any differentiable loss function.\n\n\n\n\n\n\nOct 23, 2021\n\n\n\n\n\n\n  \n\n\n\n\nHello PySpark!\n\n\n\n\n\n\n\npython\n\n\nPySpark\n\n\ntutorial\n\n\n\n\nGet up and running fast with a local pyspark installation, and learn the essentials of working with dataframes at scale.\n\n\n\n\n\n\nJun 22, 2021\n\n\n\n\n\n\n  \n\n\n\n\nHow Gradient Boosting Does Gradient Descent\n\n\n\n\n\n\n\ngradient boosting\n\n\n\n\nUnderstand how gradient boosting does gradient descent in function space to minimize any differentiable loss function in the service of creating a good model.\n\n\n\n\n\n\nApr 27, 2021\n\n\n\n\n\n\n  \n\n\n\n\nGet Down with Gradient Descent\n\n\n\n\n\n\n\ngradient boosting\n\n\n\n\nGet down with the intuition for gradient descent via a fresh analogy, develop the mathematical formulation of the algorithm, and implement it from scratch to train a linear regression model.\n\n\n\n\n\n\nJan 22, 2021\n\n\n\n\n\n\n  \n\n\n\n\nHow to Build a Gradient Boosting Machine from Scratch\n\n\n\n\n\n\n\npython\n\n\ngradient boosting\n\n\nfrom scratch\n\n\n\n\nUnderstand the intuition behind the gradient boosting machine (GBM) and learn how to implement it from scratch.\n\n\n\n\n\n\nDec 8, 2020\n\n\n\n\n\n\n  \n\n\n\n\nThe 80/20 Pandas Tutorial\n\n\n\n\n\n\n\npython\n\n\npandas\n\n\ntutorial\n\n\n\n\nAn opinionated pandas tutorial on my preferred methods to accomplish the most essential data transformation tasks in a way that will make veteran R and tidyverse users smile.\n\n\n\n\n\n\nNov 25, 2020\n\n\n\n\n\n\n  \n\n\n\n\nHello World! And Why I’m Inspired to Start a Blog\n\n\n\n\n\n\n\nblogging\n\n\n\n\nA reflection on what inspired me to start a blog and three reasons I think it could be a good idea.\n\n\n\n\n\n\nNov 22, 2020\n\n\n\n\n\n\nNo matching items"
   },
   {
     "objectID": "posts/xgboost-explained/index.html",
@@ -147,144 +147,172 @@
     "text": "References\nThis implementation is inspired and partially adapted from Jeremy Howard’s live coding of a Random Forest as part of the fastai ML course."
   },
   {
-    "objectID": "posts/consider-the-decision-tree/index.html",
-    "href": "posts/consider-the-decision-tree/index.html",
-    "title": "Consider the Decision Tree",
+    "objectID": "posts/xgboost-for-regression-in-python/index.html",
+    "href": "posts/xgboost-for-regression-in-python/index.html",
+    "title": "XGBoost for Regression in Python",
     "section": "",
-    "text": "A California cypress tree abides in silence on Alameda Beach.\nAh, the decision tree. It’s an underrated and often overlooked hero of modern statistical learning. Trees aren’t particularly powerful learning algorithms on their own, but when utilized as building blocks in larger ensemble models like random forest and gradient boosted trees, they can achieve state of the art performance in many practical applications. Since we’ve been focusing on gradient boosting ensembles lately, let’s take a moment to consider the humble decision tree itself. This post gives a high-level intuition for how trees work, an opinionated list of their key strengths and weaknesses, and some perspective on why ensembling makes them truly shine.\nOnward!"
+    "text": "In this post I’m going to show you my process for solving regression problems with XGBoost in python, using either the native xgboost API or the scikit-learn interface. This is a powerful methodology that can produce world class results in a short time with minimal thought or effort. While we’ll be working on an old Kagle competition for predicting the sale prices of bulldozers and other heavy machinery, you can use this flow to solve whatever tabular data regression problem you’re working on.\nThis post serves as the explanation and documentation for the XGBoost regression jupyter notebook from my ds-templates repo on GitHub, so go ahead and download the notebook and follow along with your own data.\nIf you’re not already comfortable with the ideas behind gradient boosting and XGBoost, you’ll find it helpful to read some of my previous posts to get up to speed. I’d start with this introduction to gradient boosting, and then read this explanation of how XGBoost works.\nLet’s get into it! 🚀"
   },
   {
-    "objectID": "posts/consider-the-decision-tree/index.html#classification-and-regression-trees",
-    "href": "posts/consider-the-decision-tree/index.html#classification-and-regression-trees",
-    "title": "Consider the Decision Tree",
-    "section": "Classification and Regression Trees",
-    "text": "Classification and Regression Trees\nA Decision tree is a type of statistical model that takes features or covariates as input and yields a prediction as output. The idea of the decision tree as a statistical learning tool traces back to a monograph published in 1984 by Breiman, Freidman, Olshen, and Stone called “Classification and Regression Trees” (a.k.a. CART). As the name suggests, trees come in two main varieties: classification trees which predict discrete class labels (e.g. DecisionTreeClassifier) and regression trees which predict numeric values (e.g. DecisionTreeRegressor).\nAs I mentioned earlier, tree models are not very powerful learners on their own. You might find that an individual tree model is useful for creating a simple and highly interpretable model in specific situations, but in general, trees tend to shine most as building blocks in more complex algorithms. These composite models are called ensembles, and the most important tree ensembles are random forest and gradient boosted trees. While random forest uses either regression or classification trees depending on the type of target, gradient boosting can use regression trees to solve both classification and regression tasks."
+    "objectID": "posts/xgboost-for-regression-in-python/index.html#install-and-import-the-xgboost-library",
+    "href": "posts/xgboost-for-regression-in-python/index.html#install-and-import-the-xgboost-library",
+    "title": "XGBoost for Regression in Python",
+    "section": "Install and import the xgboost library",
+    "text": "Install and import the xgboost library\nIf you don’t already have it, go ahead and use conda to install the xgboost library, e.g.\n$ conda install -c conda-forge xgboost\nThen import it along with the usual suspects.\n\nimport numpy as np\nimport pandas as pd\nimport matplotlib.pyplot as plt\nimport xgboost as xgb"
   },
   {
-    "objectID": "posts/consider-the-decision-tree/index.html#regression-tree-in-action",
-    "href": "posts/consider-the-decision-tree/index.html#regression-tree-in-action",
-    "title": "Consider the Decision Tree",
-    "section": "Regression Tree in Action",
-    "text": "Regression Tree in Action\nLet’s have a closer look at regression trees by training one on the diabetes dataset from scikit learn. According to the documentation:\n\nTen baseline variables, age, sex, body mass index, average blood pressure, and six blood serum measurements were obtained for each of n = 442 diabetes patients, as well as the response of interest, a quantitative measure of disease progression one year after baseline.\n\nFirst we load the data. To make our lives easier, we’ll just use two features: average blood pressure (bp) and the first blood serum measurement (s1) to predict the target. I’ll rescale the features to make the values easier for me to read, but it won’t affect our tree–more on that later.\n\nimport numpy as np \nimport pandas as pd\nimport matplotlib.pyplot as plt\nimport seaborn as sns\ncolor_palette = \"viridis\"\n\n\nfrom sklearn.datasets import load_diabetes\n\nX, y = load_diabetes(as_frame=True, return_X_y=True)\n\nX = 100 * X[['bp', 's1']]\n\n\n\n\n\n\nLet’s grow a tree to predict the target given values of blood pressure and blood serum.\n\nfrom sklearn.tree import DecisionTreeRegressor\n\ntree = DecisionTreeRegressor(max_depth=2)\ntree.fit(X,y);\n\n\n\n\n\n\nTo make predictions using our fitted tree, we start at the root node (which is at the top), and we work our way down moving left if our feature is less than the split threshold and to the right if it’s greater than the split threshold. For example let’s predict the target for a new case with bp= 1 and s1 = 5. Since our blood pressure of 1 is less than 2.359, we move to the left child node. Here, since our serum of 5 is greater than the threshold at 0.875, we move to the right child node. This node has no further children, and thus we return its predicted value of 155.343.\n\ntree.predict(pd.DataFrame({'bp': 1, 's1': 5}, index=[0]))\n\narray([155.34313725])\n\n\nLet’s overlay these splits on our feature scatterplot to see how the tree has partitioned the feature space.\n\n\n\n\n\nThe tree has managed to carve out regions of feature space where the target values tend to be similar within each region, e.g. we have low target values in the bottom left partition and high target values in the far right region.\nLet’s take a look at the regression surface predicted by our tree. Since the tree predicts the exact same value for all instances in a given partition, the surface has only four distinct values.\n\n\n\n\n\nFabulous, now that we’ve seen a tree in action, let’s talk about trees’ key strengths and weaknesses."
+    "objectID": "posts/xgboost-for-regression-in-python/index.html#read-dataset-into-python",
+    "href": "posts/xgboost-for-regression-in-python/index.html#read-dataset-into-python",
+    "title": "XGBoost for Regression in Python",
+    "section": "Read dataset into python",
+    "text": "Read dataset into python\nIn this example we’ll work on the Kagle Bluebook for Bulldozers competition, which asks us to build a regression model to predict the sale price of heavy equipment. Amazingly, you can solve your own regression problem by swapping this data out with your organization’s data before proceeding with the tutorial.\nGo ahead and download the Train.zip file from Kagle and extract it into Train.csv. Then read the data into a pandas dataframe.\n\ndf = pd.read_csv('Train.csv', parse_dates=['saledate']);\n\nNotice I cheated a little bit, checking the columns ahead of time and telling pandas to treat the saledate column as a date. In general it will make life easier to read in any date-like columns as dates.\n\ndf.info()\n\n&lt;class 'pandas.core.frame.DataFrame'&gt;\nRangeIndex: 401125 entries, 0 to 401124\nData columns (total 53 columns):\n #   Column                    Non-Null Count   Dtype         \n---  ------                    --------------   -----         \n 0   SalesID                   401125 non-null  int64         \n 1   SalePrice                 401125 non-null  int64         \n 2   MachineID                 401125 non-null  int64         \n 3   ModelID                   401125 non-null  int64         \n 4   datasource                401125 non-null  int64         \n 5   auctioneerID              380989 non-null  float64       \n 6   YearMade                  401125 non-null  int64         \n 7   MachineHoursCurrentMeter  142765 non-null  float64       \n 8   UsageBand                 69639 non-null   object        \n 9   saledate                  401125 non-null  datetime64[ns]\n 10  fiModelDesc               401125 non-null  object        \n 11  fiBaseModel               401125 non-null  object        \n 12  fiSecondaryDesc           263934 non-null  object        \n 13  fiModelSeries             56908 non-null   object        \n 14  fiModelDescriptor         71919 non-null   object        \n 15  ProductSize               190350 non-null  object        \n 16  fiProductClassDesc        401125 non-null  object        \n 17  state                     401125 non-null  object        \n 18  ProductGroup              401125 non-null  object        \n 19  ProductGroupDesc          401125 non-null  object        \n 20  Drive_System              104361 non-null  object        \n 21  Enclosure                 400800 non-null  object        \n 22  Forks                     192077 non-null  object        \n 23  Pad_Type                  79134 non-null   object        \n 24  Ride_Control              148606 non-null  object        \n 25  Stick                     79134 non-null   object        \n 26  Transmission              183230 non-null  object        \n 27  Turbocharged              79134 non-null   object        \n 28  Blade_Extension           25219 non-null   object        \n 29  Blade_Width               25219 non-null   object        \n 30  Enclosure_Type            25219 non-null   object        \n 31  Engine_Horsepower         25219 non-null   object        \n 32  Hydraulics                320570 non-null  object        \n 33  Pushblock                 25219 non-null   object        \n 34  Ripper                    104137 non-null  object        \n 35  Scarifier                 25230 non-null   object        \n 36  Tip_Control               25219 non-null   object        \n 37  Tire_Size                 94718 non-null   object        \n 38  Coupler                   213952 non-null  object        \n 39  Coupler_System            43458 non-null   object        \n 40  Grouser_Tracks            43362 non-null   object        \n 41  Hydraulics_Flow           43362 non-null   object        \n 42  Track_Type                99153 non-null   object        \n 43  Undercarriage_Pad_Width   99872 non-null   object        \n 44  Stick_Length              99218 non-null   object        \n 45  Thumb                     99288 non-null   object        \n 46  Pattern_Changer           99218 non-null   object        \n 47  Grouser_Type              99153 non-null   object        \n 48  Backhoe_Mounting          78672 non-null   object        \n 49  Blade_Type                79833 non-null   object        \n 50  Travel_Controls           79834 non-null   object        \n 51  Differential_Type         69411 non-null   object        \n 52  Steering_Controls         69369 non-null   object        \ndtypes: datetime64[ns](1), float64(2), int64(6), object(44)\nmemory usage: 162.2+ MB"
   },
   {
-    "objectID": "posts/consider-the-decision-tree/index.html#why-trees-are-awesome",
-    "href": "posts/consider-the-decision-tree/index.html#why-trees-are-awesome",
-    "title": "Consider the Decision Tree",
-    "section": "Why trees are awesome",
-    "text": "Why trees are awesome\nTrees are awesome because they are easy to use, and trees are easy to use because they are robust, require minimal data preprocessing, and can learn complex relationships without user intervention.\n\nFeature Scaling\nTrees owe their minimal data preprocessing requirements and their robustness to the fact that split finding is controlled by the sort order of the input feature values, rather than the values themselves. This means that trees are invariant to the scaling of input features, which in turn means that we don’t need to fuss around with carefully rescaling all the numeric features before fitting a tree. It also means that trees tend to work well even if features are highly skewed or contain outliers.\n\n\nCategoricals\nSince trees just split data based on numeric feature values, we can easily handle most categorical features by using integer encoding. For example we might encode a size feature with small = 1, medium = 2, and large = 3. This works particularly well with ordered categories, because partitioning is consistent with the category semantics. It can also work well even if the categories have no order, because with enough splits a tree can carve each category into its own partition.\n\n\nMissing Values\nIt’s worth calling out that different implementations of the decision tree handle missing feature values in different ways. Notably, scikit-learn handles them by throwing an error and telling you not to pull such shenanigans.\nValueError: Input contains NaN, infinity or a value too large for dtype('float32').\nOn the other hand, XGBoost supports an elegant way to make use of missing values, which we will discuss more in a later post.\n\n\nInteractions\nFeature interactions can also be learned automatically. An interaction means that the effect of one feature on the target differs depending on the value of another feature. For example, the effect of some drug may depend on whether or not the patient exercises. After a tree splits on exercise, it can naturally learn the correct drug effects for both exercisers and non-exercisers. This intuition extends to higher-order interactions as well, as long as the tree has enough splits to parse the relationships.\n\n\nFeature Selection\nBecause trees choose the best feature and threshold value at each split, they essentially perform automatic feature selection. This is great because even if we throw a lot of irrelevant features at a decision tree, it will simply tend not to use them for splits. Similarly, if two or more features are highly correlated or even redundant, the tree will simply choose one or the other when making each split; having both in the model will not cause catastrophic instability as it could in a linear model.\n\n\nFeature-Target Relationship\nFinally, it is possible for trees to discover complex nonlinear feature-target relationships without the need for user-specification of the relationships. This is because trees use local piecewise constant approximations without making any parametric assumptions. With enough splits, the tree can approximate arbitrary feature-target relationships."
+    "objectID": "posts/xgboost-for-regression-in-python/index.html#prepare-raw-data-for-xgboost",
+    "href": "posts/xgboost-for-regression-in-python/index.html#prepare-raw-data-for-xgboost",
+    "title": "XGBoost for Regression in Python",
+    "section": "Prepare raw data for XGBoost",
+    "text": "Prepare raw data for XGBoost\nWhen faced with a new tabular dataset for modeling, we have two format considerations: data types and missingness. From the call to df.info() above, we can see we have both mixed types and missing values.\nWhen it comes to missing values, some models like the gradient booster or random forest in scikit-learn require purely non-missing inputs. One of the great strengths of XGBoost is that it relaxes this requirement, allowing us to pass in missing feature values, so we don’t have to worry about them.\nRegarding data types, all ML models for tabular data require inputs to be numeric, either integers or floats, so we’re going to have to deal with those object columns.\n\nEncode string features\nThe simplest way to encode string variables is to map each unique string value to an integer; this is called integer encoding.\nWe have a couple of options for how to implement this transformation: pandas categoricals or the scikit-learn label encoder. We can use the categorical type in pandas to generate mappings from string values to integers for each string feature. The category type is a bit like the factor type in R. Pandas stores the underlying data as integers, and it also keeps a mapping from the integers to the string values. XGBoost will be able to access the integers for model fitting. This is nice because we can still access the actual categories which can be helpful when we start taking a closer look at the data. If you prefer, you can also use the scikit-learn label encoder to replace the string columns with their integer-mapped counterparts.\n\ndef encode_string_features(df, use_cats=True):\n    out_df = df.copy()\n    for feature, feature_type in df.dtypes.items():\n        if feature_type == 'object':\n            if use_cats:\n                out_df[feature] = out_df[feature].astype('category')\n            else:\n                from sklearn.preprocessing import LabelEncoder\n                out_df[feature] = LabelEncoder() \\\n                    .fit_transform(out_df[feature].astype('str'))\n    return out_df\n\ndf = encode_string_features(df, use_cats=False)\n\n\n\nEncode date and timestamp features\nWhile dates feel sort of numeric, they are not numbers, so we need to transform them into numeric columns. Unfortunately, encoding timestamps isn’t as straightforward as encoding strings, so we actually might need to engage in a little bit of feature engineering. A single date has many different attributes, e.g. days since epoch, year, quarter, month, day, day of year, day of week, is holiday, etc. As a starting point, we can just add a few of these attributes as features. Once a feature is represented as a date or timestamp data type, you can access various attributes via the dt attribute.\n\ndef encode_datetime_features(df, datetime_features, datetime_attributes):\n    out_df = df.copy()\n    for datetime_feature in datetime_features:\n        for datetime_attribute in datetime_attributes:\n            if datetime_attribute == 'days_since_epoch':\n                out_df[f'{datetime_feature}_{datetime_attribute}'] = \\\n                    (out_df[datetime_feature] \n                     - pd.Timestamp(year=1970, month=1, day=1)).dt.days\n            else:\n                out_df[f'{datetime_feature}_{datetime_attribute}'] = \\\n                    getattr(out_df[datetime_feature].dt, datetime_attribute)\n    return out_df\n\ndatetime_features = [\n    'saledate',\n]\ndatetime_attributes = [\n    'year',\n    'month',\n    'day',\n    'quarter',\n    'day_of_year',\n    'day_of_week',\n    'days_since_epoch',\n]\n\ndf = encode_datetime_features(df, datetime_features, datetime_attributes)\n\n\n\nTransform the target if necessary\nIn the interest of speed and efficiency, we didn’t bother doing any EDA with the feature data. Part of my justification for this is that trees are incredibly robust to outliers, colinearity, missingness, and other assorted nonsense in the feature data. However, they are not necessarily robust to nonsense in the target variable, so it’s worth having a look at it before proceeding any further.\n\ndf.SalePrice.hist(); plt.xlabel('SalePrice');\n\n\n\n\nOften when predicting prices it makes sense to use log price, especially when they span multiple orders of magnitude or have a strong right skew. These data look pretty friendly, lacking outliers and exhibiting only a mild positive skew; we could probably get away without doing any transformation. But checking the evaluation metric used to score the Kagle competition, we see they’re using root mean squared log error. That’s equivalent to using RMSE on log-transformed target data, so let’s go ahead and work with log prices.\n\ndf['logSalePrice'] = np.log1p(df['SalePrice'])\ndf.logSalePrice.hist(); plt.xlabel('logSalePrice');"
   },
   {
-    "objectID": "posts/consider-the-decision-tree/index.html#why-trees-are-not-so-awesome",
-    "href": "posts/consider-the-decision-tree/index.html#why-trees-are-not-so-awesome",
-    "title": "Consider the Decision Tree",
-    "section": "Why trees are not so awesome",
-    "text": "Why trees are not so awesome\nThe main weakness of the decision tree is that, on its own, it tends to have poor predictive performance compared to other algorithms. The main reasons for this are the tendency to overfit and prediction quantization issues.\n\nOverfitting\nIf we grow a decision tree until each leaf has exactly one instance in it, we will have simply memorized the training data, and our model will not generalize well. Basically the only defense against overfitting is to reduce the number of leaf nodes in the tree, either by using hyperparameters to stop splitting earlier or by removing certain leaf nodes after growing a deep tree. The problem here is that some of the benefits of trees, like ability to approximate arbitrary target patterns and ability to learn interaction effects, depend on having enough splits for the task. We can sometimes find ourselves in a situation where we cannot learn these complex relationships without overfitting the tree.\n\n\nQuantization\nBecause regression trees use piecewise constant functions to approximate the target, prediction accuracy can deteriorate near split boundaries. For example, if the target is increasing with the feature, a tree might tend to overpredict the target on the left side of split boundaries and overpredict on the right side of split boundaries.\n\n\n\n\n\n\n\nExtrapolation\nBecause they are trained by partitioning the feature space in a training dataset, trees cannot intelligently extrapolate beyond the data on which they are trained. For example if we query a tree for predictions beyond the greatest feature value encountered in training, it will just return the prediction corresponding to the largest in-sample feature values.\n\n\n\n\n\n\n\nThe Dark Side of Convenience\nFinally, there is always a price to pay for convenience. While trees can work well even with a messy dataset containing outliers, redundant features, and thoughtlessly encoded categoricals, we will rarely achieve the best performance under these conditions. Taking the time to deal with outliers, removing redundant information, purposefully choosing appropriate categorical encodings, and building an understanding of the data will often lead to much better results."
+    "objectID": "posts/xgboost-for-regression-in-python/index.html#train-and-evaluate-the-xgboost-regression-model",
+    "href": "posts/xgboost-for-regression-in-python/index.html#train-and-evaluate-the-xgboost-regression-model",
+    "title": "XGBoost for Regression in Python",
+    "section": "Train and Evaluate the XGBoost regression model",
+    "text": "Train and Evaluate the XGBoost regression model\nHaving prepared our dataset, we are now ready to train an XGBoost model. We’ll walk through the flow step-by-step first, then later we’ll collect the code in a single cell, so it’s easier to quickly iterate through variations of the model.\n\nSpecify target and feature columns\nFirst we’ll put together a list of our features and define the target column. I like to have an actual list defined in the code so it’s easier to see everything we’re puting into the model and easier to add or remove features as we iterate. Just run something like list(df.columns) in a cel to get a copy-pasteable list of columns, then edit it down to the full list of features, i.e. remove the target, date columns, and other non-feature columns..\n\n# list(df.columns)\n\n\nfeatures = [\n    'SalesID',\n    'MachineID',\n    'ModelID',\n    'datasource',\n    'auctioneerID',\n    'YearMade',\n    'MachineHoursCurrentMeter',\n    'UsageBand',\n    'fiModelDesc',\n    'fiBaseModel',\n    'fiSecondaryDesc',\n    'fiModelSeries',\n    'fiModelDescriptor',\n    'ProductSize',\n    'fiProductClassDesc',\n    'state',\n    'ProductGroup',\n    'ProductGroupDesc',\n    'Drive_System',\n    'Enclosure',\n    'Forks',\n    'Pad_Type',\n    'Ride_Control',\n    'Stick',\n    'Transmission',\n    'Turbocharged',\n    'Blade_Extension',\n    'Blade_Width',\n    'Enclosure_Type',\n    'Engine_Horsepower',\n    'Hydraulics',\n    'Pushblock',\n    'Ripper',\n    'Scarifier',\n    'Tip_Control',\n    'Tire_Size',\n    'Coupler',\n    'Coupler_System',\n    'Grouser_Tracks',\n    'Hydraulics_Flow',\n    'Track_Type',\n    'Undercarriage_Pad_Width',\n    'Stick_Length',\n    'Thumb',\n    'Pattern_Changer',\n    'Grouser_Type',\n    'Backhoe_Mounting',\n    'Blade_Type',\n    'Travel_Controls',\n    'Differential_Type',\n    'Steering_Controls',\n    'saledate_year',\n    'saledate_month',\n    'saledate_day',\n    'saledate_quarter',\n    'saledate_day_of_year',\n    'saledate_day_of_week',\n    'saledate_days_since_epoch'\n]\n\ntarget = 'logSalePrice'\n\n\n\nSplit the data into training and validation sets\nNext we split the dataset into a training set and a validation set. Of course since we’re going to evaluate against the validation set a number of times as we iterate, it’s best practice to keep a separate test set reserved to check our final model to ensure it generalizes well. Assuming that final test set is hidden away, we can use the rest of the data for training and validation.\nThere are two main ways we might want to select the validation set. If there isn’t a temporal ordering of the observations, we might be able to randomly sample. In practice, it’s much more common that observations have a temporal ordering, and that models are trained on observations up to a certain time and used to predict on observations occuring after that time. Since this data is temporal, we don’t want to split randomly; instead we’ll split on observation date, reserving the latest observations for the validation set.\n\n# Temporal Validation Set\ndef train_test_split_temporal(df, datetime_column, n_test):\n    idx_sort = np.argsort(df[datetime_column])\n    idx_train, idx_test = idx_sort[:-n_valid], idx_sort[-n_valid:]\n    return df.iloc[idx_train, :], df.iloc[idx_test, :]\n\n\n# Random Validation Set\ndef train_test_split_random(df, n_test):\n    np.random.seed(42)\n    idx_sort = np.random.permutation(len(df))\n    idx_train, idx_test = idx_sort[:-n_valid], idx_sort[-n_valid:]\n    return df.iloc[idx_train, :], df.iloc[idx_test, :]\n\nmy_train_test_split = lambda d, n_valid: train_test_split_temporal(d, 'saledate', n_valid)\n# my_train_test_split = lambda d, n_valid: train_test_split_random(d, n_valid)\n\n\nn_valid = 12000\ntrain_df, valid_df = my_train_test_split(df, n_valid)\n\ntrain_df.shape, valid_df.shape\n\n((389125, 61), (12000, 61))\n\n\n\n\nCreate DMatrix data objects\nXGBoost uses a data type called dense matrix for efficient training and prediction, so next we need to create DMatrix objects for our training and validation datasets.\n\nIf you prefer to use the scikit-learn interface to XGBoost, you don’t need to create these dense matrix objects. More on that below.\n\n\ndtrain = xgb.DMatrix(data=train_df[features], label=train_df[target], enable_categorical=True)\ndvalid = xgb.DMatrix(data=valid_df[features], label=valid_df[target], enable_categorical=True)\n\n\n\nSet the XGBoost parameters\nXGBoost has numerous hyperparameters. Fortunately, just a handful of them tend to be the most influential; furthermore, the default values are not bad in most situations. I like to start out with a dictionary containing the default parameter values for just the ones I think are most important. For training there is one required boosting parameter called num_boost_round which I set to 50 as a starting point; you can make this smaller initially if training takes too long.\n\n# default values for important parameters\nparams = {\n    'learning_rate': 0.3,\n    'max_depth': 6,\n    'min_child_weight': 1,\n    'subsample': 1,\n    'colsample_bynode': 1,\n    'objective': 'reg:squarederror',\n}\nnum_boost_round = 50\n\n\n\nTrain the XGBoost model\nCheck out the documentation on the learning API to see all the training options. During training, I like to have XGBoost print out the evaluation metric on the train and validation set after every few boosting rounds and again at the end of training; that can be done by setting evals and verbose_eval. You can also save the evaluation results in a dictionary passed into evals_result to inspect and plot the objective curve over the training iterations.\n\nevals_result = {}\nm = xgb.train(params=params, dtrain=dtrain, num_boost_round=num_boost_round,\n              evals=[(dtrain, 'train'), (dvalid, 'valid')],\n              verbose_eval=10,\n              evals_result=evals_result)\n\n[0] train-rmse:6.74422  valid-rmse:6.79733\n[10]    train-rmse:0.34798  valid-rmse:0.37158\n[20]    train-rmse:0.26289  valid-rmse:0.28239\n[30]    train-rmse:0.25148  valid-rmse:0.27028\n[40]    train-rmse:0.24375  valid-rmse:0.26420\n[49]    train-rmse:0.23738  valid-rmse:0.25855\n\n\n\n\nTrain the XGBoost model using the sklearn interface\nYou can optionally use the sklearn estimator interface to XGBoost. This will bypass the need to use the DMatrix data objects for training and prediction, and it will allow you to leverage many of the other scikit-learn ecosystem tools like pipelines, parameter search, partial dependence plots, etc. The XGBRegressor is available in the xgboost library that we’ve already imported.\n\n# scikit-learn interface\nreg = xgb.XGBRegressor(n_estimators=num_boost_round, **params)\nreg.fit(train_df[features], train_df[target], \n        eval_set=[(train_df[features], train_df[target]), (valid_df[features], valid_df[target])], \n        verbose=10);\n\n[0] validation_0-rmse:6.74422   validation_1-rmse:6.79733\n[10]    validation_0-rmse:0.34798   validation_1-rmse:0.37158\n[20]    validation_0-rmse:0.26289   validation_1-rmse:0.28239\n[30]    validation_0-rmse:0.25148   validation_1-rmse:0.27028\n[40]    validation_0-rmse:0.24375   validation_1-rmse:0.26420\n[49]    validation_0-rmse:0.23738   validation_1-rmse:0.25855\n\n\nSince not all features of XGBoost are available through the scikit-learn estimator interface, you might want to get the native booster object back out of the sklearn wrapper.\n\nm = reg.get_booster()\n\n\n\nEvaluate the model and check for overfitting\nWe get the model evaluation metrics on the training and validation sets printed to stdout when we use the evals argument to the training API. Typically I just look at those printed metrics, but let’s double check by hand.\n\ndef root_mean_squared_error(y_true, y_pred):\n    return np.sqrt(np.mean((y_true - y_pred)**2))\n\nroot_mean_squared_error(dvalid.get_label(), m.predict(dvalid))\n\n0.25855368\n\n\nSo, how good is that RMSLE of 0.259? Well, checking the Kagle leaderboard for this competition, we would have come in 53rd out of 474, which is in the top 12% of submissions. That’s not bad for 10 minutes of work doing the bare minimum necessary to transform the raw data into a format consumable by XGBoost and then training a model using default hyperparameter values.\n\nNote that we’re using a different validation set from that used for the final leaderboard (which is long closed), but our score is likely still a decent approximation for how we would have done in the competition.\n\nIt can be helpful to take a look at objective curves for training and validation data to get a sense for the extent of overfitting. A huge difference between training and validation performance indicates overfitting. In the below curve, there is very little overfitting, indicating we can be aggressive with hyperparameters that increase model flexibility. More on that soon.\n\npd.DataFrame({\n    'train': evals_result['train']['rmse'],\n    'valid': evals_result['valid']['rmse']\n}).plot(); plt.xlabel('boosting round'); plt.ylabel('objective');\n\n\n\n\n\n\nCheck feature importance\nIt’s helpful to get an idea of how much the model is using each feature. In following iterations we might want to try dropping low-signal features or examining the important ones more closely for feature engineering ideas. The gigantic caveat to keep in mind here is that there are different measures of feature importance, and each one will give different importances. XGBoost provides three importance measures; I tend to prefer looking at the weight measure because its rankings usually seem most intuitive.\n\nfig, ax = plt.subplots(figsize=(5,10))\nfeature_importances = pd.Series(m.get_score(importance_type='weight')).sort_values(ascending=False)\nfeature_importances.plot.barh(ax=ax)\nplt.title('Feature Importance');"
   },
   {
-    "objectID": "posts/consider-the-decision-tree/index.html#how-ensembling-makes-trees-shine",
-    "href": "posts/consider-the-decision-tree/index.html#how-ensembling-makes-trees-shine",
-    "title": "Consider the Decision Tree",
-    "section": "How ensembling makes trees shine",
-    "text": "How ensembling makes trees shine\nWe can go a long way toward addressing the issues of overfitting and prediction quantization by using trees as building blocks in larger algorithms called tree ensembles, the most popular examples being random forest and gradient boosted trees. A tree ensemble is a collection of different individual tree models whose predictions are averaged to generate an overall prediction.\nEnsembling helps address overfitting because even if each individual tree is overfitted, the average of their individual noisy predictions will tend to be more stable. Think of it in terms of the bias variance tradeoff, where bias refers to a model’s failure to capture certain patterns and variance refers to how different a model prediction would be if the model were trained on a different sample of training data. Since the ensemble is averaging over the predictions of all the individual models, training it on a different sample of training data would change the individual models predictions, but their overall average prediction will tend to remain stable. Thus, ensembling helps reduce the effects of overfitting by reducing model variance without increasing bias.\nEnsembling also helps address prediction quantization issues. While each individual tree’s predictions might express large jumps in the regression surface, averaging many different trees’ predictions together effectively generates a surface with more partitions and smaller jumps between them. This provides a smoother approximation of the feature-target relationship."
+    "objectID": "posts/xgboost-for-regression-in-python/index.html#improve-performance-using-a-model-iteration-loop",
+    "href": "posts/xgboost-for-regression-in-python/index.html#improve-performance-using-a-model-iteration-loop",
+    "title": "XGBoost for Regression in Python",
+    "section": "Improve performance using a model iteration loop",
+    "text": "Improve performance using a model iteration loop\nAt this point we have a half-decent prototype model. Now we enter the model iteration loop in which we adjust features and model parameters to find configurations that have better and better performance.\nLet’s start by putting the feature and target specification, the training/validation split, the model training, and the evaluation all together in one code block that we can copy paste for easy model iteration.\n\nNote that for this process to be effective, model training needs to take less than 10 seconds. Otherwise you’ll be sitting around waiting way too long. If training takes too long, try training on a sample of the training data, or try reducing the number of boosting rounds.\n\n\nfeatures = [\n    'SalesID',\n    'MachineID',\n    'ModelID',\n    'datasource',\n    'auctioneerID',\n    'YearMade',\n    'MachineHoursCurrentMeter',\n    'UsageBand',\n    'fiModelDesc',\n    'fiBaseModel',\n    'fiSecondaryDesc',\n    'fiModelSeries',\n    'fiModelDescriptor',\n    'ProductSize',\n    'fiProductClassDesc',\n    'state',\n    'ProductGroup',\n    'ProductGroupDesc',\n    'Drive_System',\n    'Enclosure',\n    'Forks',\n    'Pad_Type',\n    'Ride_Control',\n    'Stick',\n    'Transmission',\n    'Turbocharged',\n    'Blade_Extension',\n    'Blade_Width',\n    'Enclosure_Type',\n    'Engine_Horsepower',\n    'Hydraulics',\n    'Pushblock',\n    'Ripper',\n    'Scarifier',\n    'Tip_Control',\n    'Tire_Size',\n    'Coupler',\n    'Coupler_System',\n    'Grouser_Tracks',\n    'Hydraulics_Flow',\n    'Track_Type',\n    'Undercarriage_Pad_Width',\n    'Stick_Length',\n    'Thumb',\n    'Pattern_Changer',\n    'Grouser_Type',\n    'Backhoe_Mounting',\n    'Blade_Type',\n    'Travel_Controls',\n    'Differential_Type',\n    'Steering_Controls',\n    'saledate_year',\n    'saledate_month',\n    'saledate_day',\n    'saledate_quarter',\n    'saledate_day_of_year',\n    'saledate_day_of_week',\n    'saledate_days_since_epoch',\n]\n\ntarget = 'logSalePrice'\n\ntrain_df, valid_df = train_test_split_temporal(df, 'saledate', 12000)\ndtrain = xgb.DMatrix(data=train_df[features], label=train_df[target], enable_categorical=True)\ndvalid = xgb.DMatrix(data=valid_df[features], label=valid_df[target], enable_categorical=True)\n\nparams = {\n    'learning_rate': 0.3,\n    'max_depth': 6,\n    'min_child_weight': 1,\n    'subsample': 1,\n    'colsample_bynode': 1,\n    'objective': 'reg:squarederror',\n}\nnum_boost_round = 50\n\nm = xgb.train(params=params, dtrain=dtrain, num_boost_round=num_boost_round,\n              evals=[(dtrain, 'train'), (dvalid, 'valid')],verbose_eval=10)\n\n[0] train-rmse:6.74422  valid-rmse:6.79733\n[10]    train-rmse:0.34798  valid-rmse:0.37158\n[20]    train-rmse:0.26289  valid-rmse:0.28239\n[30]    train-rmse:0.25148  valid-rmse:0.27028\n[40]    train-rmse:0.24375  valid-rmse:0.26420\n[49]    train-rmse:0.23738  valid-rmse:0.25855\n\n\n\nFeature selection\n\nDrop low-importance features\nLet’s try training a model on only the top k most important features. You can try different values of k for the rankings created from each of the three importance measures. You can play with how many to keep, looking for the optimal number manually.\n\nfeature_importances_weight = pd.Series(m.get_score(importance_type='weight')).sort_values(ascending=False)\nfeature_importances_cover = pd.Series(m.get_score(importance_type='cover')).sort_values(ascending=False)\nfeature_importances_gain = pd.Series(m.get_score(importance_type='gain')).sort_values(ascending=False)\n\n\n# features = list(feature_importances_weight[:30].index)\n# features = list(feature_importances_cover[:35].index)\nfeatures = list(feature_importances_gain[:30].index)\n\ndtrain = xgb.DMatrix(data=train_df[features], label=train_df[target], enable_categorical=True)\ndvalid = xgb.DMatrix(data=valid_df[features], label=valid_df[target], enable_categorical=True)\n\nparams = {\n    'learning_rate': 0.3,\n    'max_depth': 6,\n    'min_child_weight': 1,\n    'subsample': 1,\n    'colsample_bynode': 1,\n    'objective': 'reg:squarederror',\n}\nnum_boost_round = 50\n\nm = xgb.train(params=params, dtrain=dtrain, num_boost_round=num_boost_round,\n              evals=[(dtrain, 'train'), (dvalid, 'valid')], verbose_eval=10)\n\n[0] train-rmse:6.74422  valid-rmse:6.79733\n[10]    train-rmse:0.34798  valid-rmse:0.37150\n[20]    train-rmse:0.26182  valid-rmse:0.27986\n[30]    train-rmse:0.24974  valid-rmse:0.26896\n[40]    train-rmse:0.24282  valid-rmse:0.26043\n[49]    train-rmse:0.23768  valid-rmse:0.25664\n\n\nLooks like keeping the top 30 from the gain importance type gives a slight performance improvement.\n\n\nDrop one feature at a time\nNext try dropping each feature out of the model one-at-a-time to see if there are any more features that you can drop. For each feature, drop it from the feature set, then train a new model, then record the evaluation score. At the end, sort the scores to see which features are the best candidates for removal.\n\nfeatures = [\n    'Coupler_System',\n     'Tire_Size',\n     'Scarifier',\n     'ProductSize',\n     'Ride_Control',\n     'fiBaseModel',\n     'Enclosure',\n     'Pad_Type',\n     'YearMade',\n     'fiSecondaryDesc',\n     'ProductGroup',\n     'Drive_System',\n     'Ripper',\n     'saledate_days_since_epoch',\n     'fiModelDescriptor',\n     'fiProductClassDesc',\n     'MachineID',\n     'Hydraulics',\n     'SalesID',\n     'Track_Type',\n     'ModelID',\n     'fiModelDesc',\n     'Travel_Controls',\n     'Transmission',\n     'Blade_Extension',\n     'fiModelSeries',\n     'Grouser_Tracks',\n     'Undercarriage_Pad_Width',\n     'Stick',\n     'Thumb'\n]\n\n# drop each feature one-at-a-time\nscores = []\nfor i, feature in enumerate(features):\n    drop_one_features = features[:i] + features[i+1:]\n\n    dtrain = xgb.DMatrix(data=train_df[drop_one_features], label=train_df[target], enable_categorical=True)\n    dvalid = xgb.DMatrix(data=valid_df[drop_one_features], label=valid_df[target], enable_categorical=True)\n\n    params = {\n        'learning_rate': 0.3,\n        'max_depth': 6,\n        'min_child_weight': 1,\n        'subsample': 1,\n        'colsample_bynode': 1,\n        'objective': 'reg:squarederror',\n    }\n    num_boost_round = 50\n\n    m = xgb.train(params=params, dtrain=dtrain, num_boost_round=num_boost_round,\n                evals=[(dtrain, 'train'), (dvalid, 'valid')],\n                verbose_eval=False)\n    score = root_mean_squared_error(dvalid.get_label(), m.predict(dvalid))\n    scores.append(score)\n\nresults_df = pd.DataFrame({\n    'feature': features,\n    'score': scores\n})\nresults_df.sort_values(by='score')\n\n\n\n\n\n\n\n\nfeature\nscore\n\n\n\n\n18\nSalesID\n0.252617\n\n\n5\nfiBaseModel\n0.253710\n\n\n27\nUndercarriage_Pad_Width\n0.254032\n\n\n17\nHydraulics\n0.254114\n\n\n20\nModelID\n0.254169\n\n\n4\nRide_Control\n0.254278\n\n\n16\nMachineID\n0.254413\n\n\n19\nTrack_Type\n0.254825\n\n\n6\nEnclosure\n0.254958\n\n\n28\nStick\n0.255164\n\n\n1\nTire_Size\n0.255365\n\n\n10\nProductGroup\n0.255404\n\n\n22\nTravel_Controls\n0.255895\n\n\n29\nThumb\n0.256300\n\n\n23\nTransmission\n0.256380\n\n\n26\nGrouser_Tracks\n0.256395\n\n\n11\nDrive_System\n0.256652\n\n\n24\nBlade_Extension\n0.256698\n\n\n7\nPad_Type\n0.256952\n\n\n25\nfiModelSeries\n0.257073\n\n\n2\nScarifier\n0.257590\n\n\n12\nRipper\n0.257848\n\n\n0\nCoupler_System\n0.258074\n\n\n21\nfiModelDesc\n0.258712\n\n\n13\nsaledate_days_since_epoch\n0.259856\n\n\n14\nfiModelDescriptor\n0.260439\n\n\n9\nfiSecondaryDesc\n0.260782\n\n\n15\nfiProductClassDesc\n0.263790\n\n\n3\nProductSize\n0.268068\n\n\n8\nYearMade\n0.313105\n\n\n\n\n\n\n\nNext try removing the feature with the best removal score. Then with that feature still removed, also try removing the feature with the next best removal score and so on. Repeat this process until the model evaluation metric is no longer improving. I think this could be considered a faster version of backward stepwise feature selection.\n\nfeatures = [\n    'Coupler_System',\n     'Tire_Size',\n     'Scarifier',\n     'ProductSize',\n     'Ride_Control',\n#      'fiBaseModel',\n     'Enclosure',\n     'Pad_Type',\n     'YearMade',\n     'fiSecondaryDesc',\n     'ProductGroup',\n     'Drive_System',\n     'Ripper',\n     'saledate_days_since_epoch',\n     'fiModelDescriptor',\n     'fiProductClassDesc',\n     'MachineID',\n#      'Hydraulics',\n#      'SalesID',\n     'Track_Type',\n     'ModelID',\n     'fiModelDesc',\n     'Travel_Controls',\n     'Transmission',\n     'Blade_Extension',\n     'fiModelSeries',\n     'Grouser_Tracks',\n#      'Undercarriage_Pad_Width',\n     'Stick',\n     'Thumb'\n]\n\ndtrain = xgb.DMatrix(data=train_df[features], label=train_df[target], enable_categorical=True)\ndvalid = xgb.DMatrix(data=valid_df[features], label=valid_df[target], enable_categorical=True)\n\nparams = {\n    'learning_rate': 0.3,\n    'max_depth': 6,\n    'min_child_weight': 1,\n    'subsample': 1,\n    'colsample_bynode': 1,\n    'objective': 'reg:squarederror',\n}\nnum_boost_round = 50\n\nm = xgb.train(params=params, dtrain=dtrain, num_boost_round=num_boost_round,\n              evals=[(dtrain, 'train'), (dvalid, 'valid')], verbose_eval=10)\n\n[0] train-rmse:6.74422  valid-rmse:6.79145\n[10]    train-rmse:0.34882  valid-rmse:0.37201\n[20]    train-rmse:0.26050  valid-rmse:0.27386\n[30]    train-rmse:0.24844  valid-rmse:0.26205\n[40]    train-rmse:0.24042  valid-rmse:0.25426\n[49]    train-rmse:0.23549  valid-rmse:0.25004\n\n\nSo here I was able to remove four more features before the score started getting worse. With our reduced feature set, we’re now ranking 39th on that Kagle leaderboard. Let’s see how far we can get with some hyperparameter tuning.\n\n\n\nTune the XGBoost hyperparameters\nThis is a topic which deserves its own full-length post, but just for fun, here I’ll do a quick and dirty hand tuning without a ton of explanation.\nBroadly speaking, my process is to increase model expressiveness by increasing the maximum tree depth untill it looks like I’m overfitting. At that point, I start pushing tree pruning parameters like min child weight and regularization parameters like lambda to counteract the overfitting. That process lead me to the following parameters.\n\nparams = {\n    'learning_rate': 0.3,\n    'max_depth': 10,\n    'min_child_weight': 14,\n    'lambda': 5,\n    'subsample': 1,\n    'colsample_bynode': 1,\n    'objective': 'reg:squarederror',}\nnum_boost_round = 50\n\nm = xgb.train(params=params, dtrain=dtrain, num_boost_round=num_boost_round,\n              evals=[(dtrain, 'train'), (dvalid, 'valid')], verbose_eval=10)\n\n[0] train-rmse:6.74473  valid-rmse:6.80196\n[10]    train-rmse:0.31833  valid-rmse:0.34151\n[20]    train-rmse:0.22651  valid-rmse:0.24885\n[30]    train-rmse:0.21501  valid-rmse:0.23904\n[40]    train-rmse:0.20897  valid-rmse:0.23645\n[49]    train-rmse:0.20418  valid-rmse:0.23412\n\n\nThat gets us up to 12th place. Next I start reducing the learning rate and increasing the boosting rounds in proportion to one another.\n\nparams = {\n    'learning_rate': 0.3/5,\n    'max_depth': 10,\n    'min_child_weight': 14,\n    'lambda': 5,\n    'subsample': 1,\n    'colsample_bynode': 1,\n    'objective': 'reg:squarederror',}\nnum_boost_round = 50*5\n\nm = xgb.train(params=params, dtrain=dtrain, num_boost_round=num_boost_round,\n              evals=[(dtrain, 'train'), (dvalid, 'valid')], verbose_eval=10)\n\n[0] train-rmse:9.04930  valid-rmse:9.12743\n[10]    train-rmse:4.88505  valid-rmse:4.93769\n[20]    train-rmse:2.64630  valid-rmse:2.68501\n[30]    train-rmse:1.44703  valid-rmse:1.47923\n[40]    train-rmse:0.81123  valid-rmse:0.84079\n[50]    train-rmse:0.48441  valid-rmse:0.51272\n[60]    train-rmse:0.32887  valid-rmse:0.35434\n[70]    train-rmse:0.26276  valid-rmse:0.28630\n[80]    train-rmse:0.23720  valid-rmse:0.26026\n[90]    train-rmse:0.22658  valid-rmse:0.24932\n[100]   train-rmse:0.22119  valid-rmse:0.24441\n[110]   train-rmse:0.21747  valid-rmse:0.24114\n[120]   train-rmse:0.21479  valid-rmse:0.23923\n[130]   train-rmse:0.21250  valid-rmse:0.23768\n[140]   train-rmse:0.21099  valid-rmse:0.23618\n[150]   train-rmse:0.20928  valid-rmse:0.23524\n[160]   train-rmse:0.20767  valid-rmse:0.23445\n[170]   train-rmse:0.20658  valid-rmse:0.23375\n[180]   train-rmse:0.20558  valid-rmse:0.23307\n[190]   train-rmse:0.20431  valid-rmse:0.23252\n[200]   train-rmse:0.20316  valid-rmse:0.23181\n[210]   train-rmse:0.20226  valid-rmse:0.23145\n[220]   train-rmse:0.20133  valid-rmse:0.23087\n[230]   train-rmse:0.20045  valid-rmse:0.23048\n[240]   train-rmse:0.19976  valid-rmse:0.23023\n[249]   train-rmse:0.19902  valid-rmse:0.23009\n\n\nDecreasing the learning rate and increasing the boosting rounds got us up to a 2nd place score. Notice that the score is still decreasing on the validation set. We can actually continue boosting on this model by passing it to the xgb_model argument in the train function. We want to go very very slowly here to avoid overshooting the minimum of the objective function. To do that I ramp up the lambda regularization parameter and boost a few more rounds from where we left off.\n\n# second stage\nparams = {\n    'learning_rate': 0.3/10,\n    'max_depth': 10,\n    'min_child_weight': 14,\n    'lambda': 60,\n    'subsample': 1,\n    'colsample_bynode': 1,\n    'objective': 'reg:squarederror',}\nnum_boost_round = 50*3\n\nm1 = xgb.train(params=params, dtrain=dtrain, num_boost_round=num_boost_round,\n              evals=[(dtrain, 'train'), (dvalid, 'valid')], verbose_eval=10,\n              xgb_model=m)\n\n[0] train-rmse:0.19900  valid-rmse:0.23007\n[10]    train-rmse:0.19862  valid-rmse:0.22990\n[20]    train-rmse:0.19831  valid-rmse:0.22975\n[30]    train-rmse:0.19796  valid-rmse:0.22964\n[40]    train-rmse:0.19768  valid-rmse:0.22955\n[50]    train-rmse:0.19739  valid-rmse:0.22940\n[60]    train-rmse:0.19714  valid-rmse:0.22935\n[70]    train-rmse:0.19689  valid-rmse:0.22927\n[80]    train-rmse:0.19664  valid-rmse:0.22915\n[90]    train-rmse:0.19646  valid-rmse:0.22915\n[100]   train-rmse:0.19620  valid-rmse:0.22910\n[110]   train-rmse:0.19604  valid-rmse:0.22907\n[120]   train-rmse:0.19583  valid-rmse:0.22901\n[130]   train-rmse:0.19562  valid-rmse:0.22899\n[140]   train-rmse:0.19546  valid-rmse:0.22898\n[149]   train-rmse:0.19520  valid-rmse:0.22886\n\n\n\nroot_mean_squared_error(dvalid.get_label(), m1.predict(dvalid))\n\n0.22885828\n\n\nAnd that gets us to 1st place on the leaderboard."
   },
   {
-    "objectID": "posts/consider-the-decision-tree/index.html#wrapping-up",
-    "href": "posts/consider-the-decision-tree/index.html#wrapping-up",
-    "title": "Consider the Decision Tree",
+    "objectID": "posts/xgboost-for-regression-in-python/index.html#wrapping-up",
+    "href": "posts/xgboost-for-regression-in-python/index.html#wrapping-up",
+    "title": "XGBoost for Regression in Python",
     "section": "Wrapping Up",
-    "text": "Wrapping Up\nWell, there you go, that’s my take on the high-level overview of the decision tree and its main strengths and weaknesses. As we’ve seen, ensembling allows us to keep the conveniences of the decision tree while mitigating its core weakness of relatively weak predictive power. This is why tree ensembles are so popular in practical applications. We glossed over pretty much all details of how trees actually do their magic, but fear not, next time we’re going to get rowdy and build one of these things from scratch."
+    "text": "Wrapping Up\nThere you have it, how to use XGBoost to solve a regression problem in python with world class performance. Remember you can use the XGBoost regression notebook from my ds-templates repo to make it easy to follow this flow on your own problems. If you found this helpful, or if you have additional ideas about solving regression problems with XGBoost, let me know down in the comments."
   },
   {
-    "objectID": "posts/xgboost-from-scratch/index.html",
-    "href": "posts/xgboost-from-scratch/index.html",
-    "title": "XGBoost from Scratch",
+    "objectID": "posts/hello-world/index.html",
+    "href": "posts/hello-world/index.html",
+    "title": "Hello World! And Why I’m Inspired to Start a Blog",
     "section": "",
-    "text": "A weathered tree reaches toward the sea at Playa Mal País\nWell, dear reader, it’s that time again, time for us to do a seemingly unnecessary scratch build of a popular algorithm that most people would simply import from the library without a second thought. But readers of this blog are not most people. Of course you know that when we do scratch builds, it’s not for the hell of it, it’s for the purpose of demystification. To that end, today we are going to implement XGBoost from scratch in python, using only numpy and pandas.\nSpecifically we’re going to implement the core statistical learning algorithm of XGBoost, including most of the key hyperparameters and their functionality. Our implementation will also support user-defined custom objective functions, meaning that it can perform regression, classification, and whatever exotic learning tasks you can dream up, as long as you can write down a twice-differentiable objective function. We’ll refrain from implementing some simple features like column subsampling which will be left to you, gentle reader, as exercises. In terms of tree methods, we’re going to implement the exact tree-splitting algorithm, leaving the sparsity-aware method (used to handle missing feature values) and the approximate method (used for scalability) as exercises or maybe topics for future posts.\nAs always, if something is unclear, try backtracking through the previous posts on gradient boosting and decision trees to clarify your intuition. We’ve already built up all the statistical and computational background needed to make sense of this scratch build. Here are the most important prerequisite posts:\nGreat, let’s do this."
+    "text": "Matt raises his arms in joy at the world.!\nWell, I’ve been thinking about getting this blog started for months now. I guess a combination of inertia, up-front investment in blogging platform selection/setup, and spending a little too much time writing and rewriting the first content post has drawn out the period from initial inspiration to making the blog a reality. Needless to say, I’m pretty excited to finally get things going.\nBefore we dive headlong into the weeds of ML algorithms, statistical methods, and whatever I happen to be learning and teaching at the moment, I figured it would be good to articulate why I’ve felt inspired to get started blogging in the first place. Hopefully this will serve the dual purpose of clarifying my intentions and introducing a vastly underappreciated concept in data science that I hope to weave through the posts to come."
   },
   {
-    "objectID": "posts/xgboost-from-scratch/index.html#the-xgboost-model-class",
-    "href": "posts/xgboost-from-scratch/index.html#the-xgboost-model-class",
-    "title": "XGBoost from Scratch",
-    "section": "The XGBoost Model Class",
-    "text": "The XGBoost Model Class\nWe begin with the user-facing API for our model, a class called XGBoostModel which will implement gradient boosting and prediction. To be more consistent with the XGBoost library, we’ll pass hyperparameters to our model in a parameter dictionary, so our init method is going to pull relevant parameters out of the dictionary and set them as object attributes. Note the use of python’s defaultdict so we don’t have to worry about handling key errors if we try to access a parameter that the user didn’t set in the dictionary.\n\nimport math\nimport numpy as np \nimport pandas as pd\nfrom collections import defaultdict\n\n\nclass XGBoostModel():\n    '''XGBoost from Scratch\n    '''\n    \n    def __init__(self, params, random_seed=None):\n        self.params = defaultdict(lambda: None, params)\n        self.subsample = self.params['subsample'] \\\n            if self.params['subsample'] else 1.0\n        self.learning_rate = self.params['learning_rate'] \\\n            if self.params['learning_rate'] else 0.3\n        self.base_prediction = self.params['base_score'] \\\n            if self.params['base_score'] else 0.5\n        self.max_depth = self.params['max_depth'] \\\n            if self.params['max_depth'] else 5\n        self.rng = np.random.default_rng(seed=random_seed)\n\nThe fit method, based on our classic GBM, takes a feature dataframe, a target vector, the objective function, and the number of boosting rounds as arguments. The user-supplied objective function should be an object with loss, gradient, and hessian methods, each of which takes a target vector and a prediction vector as input; the loss method should return a scalar loss score, the gradient method should return a vector of gradients, and the hessian method should return a vector of hessians.\nIn contrast to boosting in the classic GBM, instead of computing residuals between the current predictions and the target, we compute gradients and hessians of the loss function with respect to the current predictions, and instead of predicting residuals with a decision tree, we fit a special XGBoost tree booster (which we’ll implement in a moment) using the gradients and hessians. I’ve also added row subsampling by drawing a random subset of instance indices and passing them to the tree booster during each boosting round. The rest of the fit method is the same as the classic GBM, and the predict method is identical too.\n\ndef fit(self, X, y, objective, num_boost_round, verbose=False):\n    current_predictions = self.base_prediction * np.ones(shape=y.shape)\n    self.boosters = []\n    for i in range(num_boost_round):\n        gradients = objective.gradient(y, current_predictions)\n        hessians = objective.hessian(y, current_predictions)\n        sample_idxs = None if self.subsample == 1.0 \\\n            else self.rng.choice(len(y), \n                                 size=math.floor(self.subsample*len(y)), \n                                 replace=False)\n        booster = TreeBooster(X, gradients, hessians, \n                              self.params, self.max_depth, sample_idxs)\n        current_predictions += self.learning_rate * booster.predict(X)\n        self.boosters.append(booster)\n        if verbose: \n            print(f'[{i}] train loss = {objective.loss(y, current_predictions)}')\n            \ndef predict(self, X):\n    return (self.base_prediction + self.learning_rate \n            * np.sum([booster.predict(X) for booster in self.boosters], axis=0))\n\nXGBoostModel.fit = fit\nXGBoostModel.predict = predict            \n\nAll we have to do now is implement the tree booster."
+    "objectID": "posts/hello-world/index.html#learning",
+    "href": "posts/hello-world/index.html#learning",
+    "title": "Hello World! And Why I’m Inspired to Start a Blog",
+    "section": "Learning",
+    "text": "Learning\nThe initial inception about blogging probably originated from some comments about learning that Jeremy Howard makes in the Practical Deep Learning course from fastai. During one of the lectures, he mentions that it’s a great idea to start blogging. To paraphrase Jeremy:\n\nThe thing I really love about blogging is that it helps you learn; by writing things down, you synthesize your ideas.\n\nBeautiful. That definitely rings true for me. I tend to take notes and play around with code when learning new concepts anyway. One of my key hypotheses about this blogging experiment is that making the effort to transform those notes into blog posts will help me learn more effectively."
   },
   {
-    "objectID": "posts/xgboost-from-scratch/index.html#the-xgboost-tree-booster",
-    "href": "posts/xgboost-from-scratch/index.html#the-xgboost-tree-booster",
-    "title": "XGBoost from Scratch",
-    "section": "The XGBoost Tree Booster",
-    "text": "The XGBoost Tree Booster\nThe XGBoost tree booster is a modified version of the decision tree that we built in the decision tree from scratch post. Like the decision tree, we recursively build a binary tree structure by finding the best split rule for each node in the tree. The main difference is the criterion for evaluating splits and the way that we define a leaf’s predicted value. Instead of being functions of the target values of the instances in each node, the criterion and predicted values are functions of the instance gradients and hessians. Thus we need only make a couple of modifications to our previous decision tree implementation to create the XGBoost tree booster.\n\nInitialization and Inserting Child Nodes\nMost of the init method is just parsing the parameter dictionary to assign parameters as object attributes. The one notable difference from our decision tree is in the way we define the node’s predicted value. We define self.value according to equation 5 of the XGBoost paper, a simple function of the gradient and hessian values of the instances in the current node. Of course the init also goes on to build the tree via the maybe insert child nodes method. This method is nearly identical to the one we implemented for our decision tree. So far so good.\n\nclass TreeBooster():\n \n    def __init__(self, X, g, h, params, max_depth, idxs=None):\n        self.params = params\n        self.max_depth = max_depth\n        assert self.max_depth &gt;= 0, 'max_depth must be nonnegative'\n        self.min_child_weight = params['min_child_weight'] \\\n            if params['min_child_weight'] else 1.0\n        self.reg_lambda = params['reg_lambda'] if params['reg_lambda'] else 1.0\n        self.gamma = params['gamma'] if params['gamma'] else 0.0\n        self.colsample_bynode = params['colsample_bynode'] \\\n            if params['colsample_bynode'] else 1.0\n        if isinstance(g, pd.Series): g = g.values\n        if isinstance(h, pd.Series): h = h.values\n        if idxs is None: idxs = np.arange(len(g))\n        self.X, self.g, self.h, self.idxs = X, g, h, idxs\n        self.n, self.c = len(idxs), X.shape[1]\n        self.value = -g[idxs].sum() / (h[idxs].sum() + self.reg_lambda) # Eq (5)\n        self.best_score_so_far = 0.\n        if self.max_depth &gt; 0:\n            self._maybe_insert_child_nodes()\n\n    def _maybe_insert_child_nodes(self):\n        for i in range(self.c): self._find_better_split(i)\n        if self.is_leaf: return\n        x = self.X.values[self.idxs,self.split_feature_idx]\n        left_idx = np.nonzero(x &lt;= self.threshold)[0]\n        right_idx = np.nonzero(x &gt; self.threshold)[0]\n        self.left = TreeBooster(self.X, self.g, self.h, self.params, \n                                self.max_depth - 1, self.idxs[left_idx])\n        self.right = TreeBooster(self.X, self.g, self.h, self.params, \n                                 self.max_depth - 1, self.idxs[right_idx])\n\n    @property\n    def is_leaf(self): return self.best_score_so_far == 0.\n\n    def _find_better_split(self, feature_idx):\n        pass\n\n\n\nSplit Finding\nSplit finding follows the exact same pattern that we used in the decision tree, except we keep track of gradient and hessian stats instead of target value stats, and of course we use the XGBoost gain criterion (equation 7 from the paper) for evaluating splits.\n\ndef _find_better_split(self, feature_idx):\n    x = self.X.values[self.idxs, feature_idx]\n    g, h = self.g[self.idxs], self.h[self.idxs]\n    sort_idx = np.argsort(x)\n    sort_g, sort_h, sort_x = g[sort_idx], h[sort_idx], x[sort_idx]\n    sum_g, sum_h = g.sum(), h.sum()\n    sum_g_right, sum_h_right = sum_g, sum_h\n    sum_g_left, sum_h_left = 0., 0.\n\n    for i in range(0, self.n - 1):\n        g_i, h_i, x_i, x_i_next = sort_g[i], sort_h[i], sort_x[i], sort_x[i + 1]\n        sum_g_left += g_i; sum_g_right -= g_i\n        sum_h_left += h_i; sum_h_right -= h_i\n        if sum_h_left &lt; self.min_child_weight or x_i == x_i_next:continue\n        if sum_h_right &lt; self.min_child_weight: break\n\n        gain = 0.5 * ((sum_g_left**2 / (sum_h_left + self.reg_lambda))\n                        + (sum_g_right**2 / (sum_h_right + self.reg_lambda))\n                        - (sum_g**2 / (sum_h + self.reg_lambda))\n                        ) - self.gamma/2 # Eq(7) in the xgboost paper\n        if gain &gt; self.best_score_so_far: \n            self.split_feature_idx = feature_idx\n            self.best_score_so_far = gain\n            self.threshold = (x_i + x_i_next) / 2\n            \nTreeBooster._find_better_split = _find_better_split\n\n\n\nPrediction\nPrediction works exactly the same as in our decision tree, and the methods are nearly identical.\n\ndef predict(self, X):\n    return np.array([self._predict_row(row) for i, row in X.iterrows()])\n\ndef _predict_row(self, row):\n    if self.is_leaf: \n        return self.value\n    child = self.left if row[self.split_feature_idx] &lt;= self.threshold \\\n        else self.right\n    return child._predict_row(row)\n\nTreeBooster.predict = predict \nTreeBooster._predict_row = _predict_row"
+    "objectID": "posts/hello-world/index.html#teaching",
+    "href": "posts/hello-world/index.html#teaching",
+    "title": "Hello World! And Why I’m Inspired to Start a Blog",
+    "section": "Teaching",
+    "text": "Teaching\nAh, teaching. Yes, sometimes it’s that thing that takes time away from your research, forcing you to sit alone in a windowless room squinting at hand-written math on a fat stack of homework assignments. But sometimes it actually involves interacting with students, endeavoring to explain a concept, and watching them light up when they get it. The latter manifestation of teaching was one of my favorite things about grad school and academia in general. While I certainly still get to do some teaching as an industry data scientist, I could see myself returning to a more teaching-centric gig somewhere off in the future. Thus we have our second key hypothesis about the blogging experiment, that the writing will entertain my inclination to teach."
   },
   {
-    "objectID": "posts/xgboost-from-scratch/index.html#the-complete-xgboost-from-scratch-implementation",
-    "href": "posts/xgboost-from-scratch/index.html#the-complete-xgboost-from-scratch-implementation",
-    "title": "XGBoost from Scratch",
-    "section": "The Complete XGBoost From Scratch Implementation",
-    "text": "The Complete XGBoost From Scratch Implementation\nHere’s the entire implementation which produces a usable XGBoostModel class with fit and predict methods.\n\nclass XGBoostModel():\n    '''XGBoost from Scratch\n    '''\n    \n    def __init__(self, params, random_seed=None):\n        self.params = defaultdict(lambda: None, params)\n        self.subsample = self.params['subsample'] \\\n            if self.params['subsample'] else 1.0\n        self.learning_rate = self.params['learning_rate'] \\\n            if self.params['learning_rate'] else 0.3\n        self.base_prediction = self.params['base_score'] \\\n            if self.params['base_score'] else 0.5\n        self.max_depth = self.params['max_depth'] \\\n            if self.params['max_depth'] else 5\n        self.rng = np.random.default_rng(seed=random_seed)\n                \n    def fit(self, X, y, objective, num_boost_round, verbose=False):\n        current_predictions = self.base_prediction * np.ones(shape=y.shape)\n        self.boosters = []\n        for i in range(num_boost_round):\n            gradients = objective.gradient(y, current_predictions)\n            hessians = objective.hessian(y, current_predictions)\n            sample_idxs = None if self.subsample == 1.0 \\\n                else self.rng.choice(len(y), \n                                     size=math.floor(self.subsample*len(y)), \n                                     replace=False)\n            booster = TreeBooster(X, gradients, hessians, \n                                  self.params, self.max_depth, sample_idxs)\n            current_predictions += self.learning_rate * booster.predict(X)\n            self.boosters.append(booster)\n            if verbose: \n                print(f'[{i}] train loss = {objective.loss(y, current_predictions)}')\n            \n    def predict(self, X):\n        return (self.base_prediction + self.learning_rate \n                * np.sum([booster.predict(X) for booster in self.boosters], axis=0))\n    \nclass TreeBooster():\n \n    def __init__(self, X, g, h, params, max_depth, idxs=None):\n        self.params = params\n        self.max_depth = max_depth\n        assert self.max_depth &gt;= 0, 'max_depth must be nonnegative'\n        self.min_child_weight = params['min_child_weight'] \\\n            if params['min_child_weight'] else 1.0\n        self.reg_lambda = params['reg_lambda'] if params['reg_lambda'] else 1.0\n        self.gamma = params['gamma'] if params['gamma'] else 0.0\n        self.colsample_bynode = params['colsample_bynode'] \\\n            if params['colsample_bynode'] else 1.0\n        if isinstance(g, pd.Series): g = g.values\n        if isinstance(h, pd.Series): h = h.values\n        if idxs is None: idxs = np.arange(len(g))\n        self.X, self.g, self.h, self.idxs = X, g, h, idxs\n        self.n, self.c = len(idxs), X.shape[1]\n        self.value = -g[idxs].sum() / (h[idxs].sum() + self.reg_lambda) # Eq (5)\n        self.best_score_so_far = 0.\n        if self.max_depth &gt; 0:\n            self._maybe_insert_child_nodes()\n\n    def _maybe_insert_child_nodes(self):\n        for i in range(self.c): self._find_better_split(i)\n        if self.is_leaf: return\n        x = self.X.values[self.idxs,self.split_feature_idx]\n        left_idx = np.nonzero(x &lt;= self.threshold)[0]\n        right_idx = np.nonzero(x &gt; self.threshold)[0]\n        self.left = TreeBooster(self.X, self.g, self.h, self.params, \n                                self.max_depth - 1, self.idxs[left_idx])\n        self.right = TreeBooster(self.X, self.g, self.h, self.params, \n                                 self.max_depth - 1, self.idxs[right_idx])\n\n    @property\n    def is_leaf(self): return self.best_score_so_far == 0.\n    \n    def _find_better_split(self, feature_idx):\n        x = self.X.values[self.idxs, feature_idx]\n        g, h = self.g[self.idxs], self.h[self.idxs]\n        sort_idx = np.argsort(x)\n        sort_g, sort_h, sort_x = g[sort_idx], h[sort_idx], x[sort_idx]\n        sum_g, sum_h = g.sum(), h.sum()\n        sum_g_right, sum_h_right = sum_g, sum_h\n        sum_g_left, sum_h_left = 0., 0.\n\n        for i in range(0, self.n - 1):\n            g_i, h_i, x_i, x_i_next = sort_g[i], sort_h[i], sort_x[i], sort_x[i + 1]\n            sum_g_left += g_i; sum_g_right -= g_i\n            sum_h_left += h_i; sum_h_right -= h_i\n            if sum_h_left &lt; self.min_child_weight or x_i == x_i_next:continue\n            if sum_h_right &lt; self.min_child_weight: break\n\n            gain = 0.5 * ((sum_g_left**2 / (sum_h_left + self.reg_lambda))\n                            + (sum_g_right**2 / (sum_h_right + self.reg_lambda))\n                            - (sum_g**2 / (sum_h + self.reg_lambda))\n                            ) - self.gamma/2 # Eq(7) in the xgboost paper\n            if gain &gt; self.best_score_so_far: \n                self.split_feature_idx = feature_idx\n                self.best_score_so_far = gain\n                self.threshold = (x_i + x_i_next) / 2\n                \n    def predict(self, X):\n        return np.array([self._predict_row(row) for i, row in X.iterrows()])\n\n    def _predict_row(self, row):\n        if self.is_leaf: \n            return self.value\n        child = self.left if row[self.split_feature_idx] &lt;= self.threshold \\\n            else self.right\n        return child._predict_row(row)"
+    "objectID": "posts/hello-world/index.html#contributing",
+    "href": "posts/hello-world/index.html#contributing",
+    "title": "Hello World! And Why I’m Inspired to Start a Blog",
+    "section": "Contributing",
+    "text": "Contributing\nWorking in the field of data science today is a bit like standing in front of a massive complimentary all-you-can-learn buffet. There is an abundance of free material out on the interwebs for learning pretty much anything in data science from hello world python tutorials to research papers on cutting-edge deep learning techniques. I’ve personally benefited from many a blog post that helped me unpack a new concept or get started using a new tool. And let’s not forget the gigantic cyber warehouse full of freely available open source software tools that volunteer developers have straight-up donated to humanity.\nI realize that up to now, I’ve simply been consuming all of this free goodness without giving anything substantive back in return. Well then, it’s time to start evening the score. Which brings us to key hypothesis number three, that through these blog posts, I might be able to create something helpful, thereby being of service to a community that has freely given so much to me."
   },
   {
-    "objectID": "posts/xgboost-from-scratch/index.html#testing",
-    "href": "posts/xgboost-from-scratch/index.html#testing",
-    "title": "XGBoost from Scratch",
-    "section": "Testing",
-    "text": "Testing\nLet’s take this baby for a spin and benchmark its performance against the actual XGBoost library. We use the scikit learn California housing dataset for benchmarking.\n\nfrom sklearn.datasets import fetch_california_housing\nfrom sklearn.model_selection import train_test_split\n    \nX, y = fetch_california_housing(as_frame=True, return_X_y=True)\nX_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, \n                                                    random_state=43)\n\nLet’s start with a nice friendly squared error objective function for training. We should probably have a future post all about how to define custom objective functions in XGBoost, but for now, here’s how I define squared error.\n\nclass SquaredErrorObjective():\n    def loss(self, y, pred): return np.mean((y - pred)**2)\n    def gradient(self, y, pred): return pred - y\n    def hessian(self, y, pred): return np.ones(len(y))\n\nHere I use a more or less arbitrary set of hyperparameters for training. Feel free to play around with tuning and trying other parameter combinations yourself.\n\nimport xgboost as xgb\n\nparams = {\n    'learning_rate': 0.1,\n    'max_depth': 5,\n    'subsample': 0.8,\n    'reg_lambda': 1.5,\n    'gamma': 0.0,\n    'min_child_weight': 25,\n    'base_score': 0.0,\n    'tree_method': 'exact',\n}\nnum_boost_round = 50\n\n# train the from-scratch XGBoost model\nmodel_scratch = XGBoostModel(params, random_seed=42)\nmodel_scratch.fit(X_train, y_train, SquaredErrorObjective(), num_boost_round)\n\n# train the library XGBoost model\ndtrain = xgb.DMatrix(X_train, label=y_train)\ndtest = xgb.DMatrix(X_test, label=y_test)\nmodel_xgb = xgb.train(params, dtrain, num_boost_round)\n\nLet’s check the models’ performance on the held out test data to benchmark our implementation.\n\npred_scratch = model_scratch.predict(X_test)\npred_xgb = model_xgb.predict(dtest)\nprint(f'scratch score: {SquaredErrorObjective().loss(y_test, pred_scratch)}')\nprint(f'xgboost score: {SquaredErrorObjective().loss(y_test, pred_xgb)}')\n\nscratch score: 0.2434125759558149\nxgboost score: 0.24123239765807963\n\n\nWell, look at that! Our scratch-built SGBoost is looking pretty consistent with the library. Go us!"
+    "objectID": "posts/hello-world/index.html#live-long-and-prosper-blog",
+    "href": "posts/hello-world/index.html#live-long-and-prosper-blog",
+    "title": "Hello World! And Why I’m Inspired to Start a Blog",
+    "section": "Live Long and Prosper, Blog",
+    "text": "Live Long and Prosper, Blog\nPhew, there it is, the original source of inspiration for this blogging experiment, and three reasons I think it might be a good idea. The astute reader will have noticed that these three assertions have been formulated as hypotheses which are to be tested in the laboratory of experience. And thus, we also have our first glimpse of the scientific method, an underrated concept that is going to help us put the science back in data science.\nWith that, blog, I christen thee, Random Realizations."
   },
   {
-    "objectID": "posts/xgboost-from-scratch/index.html#wrapping-up",
-    "href": "posts/xgboost-from-scratch/index.html#wrapping-up",
-    "title": "XGBoost from Scratch",
-    "section": "Wrapping Up",
-    "text": "Wrapping Up\nI’d say this is a pretty good milestone for us here at Random Realizations. We’ve been hammering away at the various concepts around gradient boosting, leaving a trail of equations and scratch-built algos in our wake. Today we put all of that together to create a legit scratch build of XGBoost, something that would have been out of reach for me before we embarked on this journey together over a year ago. To anyone with the patience to read through this stuff, cheers to you! I hope you’re learning and enjoying this as much as I am."
+    "objectID": "posts/gradient-boosting-machine-with-any-loss-function/index.html",
+    "href": "posts/gradient-boosting-machine-with-any-loss-function/index.html",
+    "title": "How to Implement a Gradient Boosting Machine that Works with Any Loss Function",
+    "section": "",
+    "text": "Cold water cascades over the rocks in Erwin, Tennessee.\nFriends, this is going to be an epic post! Today, we bring together all the ideas we’ve built up over the past few posts to nail down our understanding of the key ideas in Jerome Friedman’s seminal 2001 paper: “Greedy Function Approximation: A Gradient Boosting Machine.” In particular, we’ll summarize the highlights from the paper, and we’ll build an in-house python implementation of his generic gradient boosting algorithm which can train with any differentiable loss function. What’s more, we’ll go ahead and take our generic gradient boosting machine for a spin by training it with several of the most popular loss functions used in practice.\nAre you freaking stoked or what?\nSweet. Let’s do this."
   },
   {
-    "objectID": "posts/xgboost-from-scratch/index.html#reader-exercises",
-    "href": "posts/xgboost-from-scratch/index.html#reader-exercises",
-    "title": "XGBoost from Scratch",
-    "section": "Reader Exercises",
-    "text": "Reader Exercises\nIf you want to take this a step further and deepen your understanding and coding abilities, let me recommend some exercises for you.\n\nImplement column subsampling. XGBoost itself provides column subsampling by tree, by level, and by node. Try implementing by tree first, then try adding by level or by node as well. These should be pretty straightforward to do.\nImplement sparsity aware split finding for missing feature values (Algorithm 2 in the XGBoost paper). This will be a little more involved, since you’ll need to refactor and modify several parts of the tree booster class."
+    "objectID": "posts/gradient-boosting-machine-with-any-loss-function/index.html#friedman-2001-tldr",
+    "href": "posts/gradient-boosting-machine-with-any-loss-function/index.html#friedman-2001-tldr",
+    "title": "How to Implement a Gradient Boosting Machine that Works with Any Loss Function",
+    "section": "Friedman 2001: TL;DR",
+    "text": "Friedman 2001: TL;DR\nI’ve mentioned this paper a couple of times before, but as far as I can tell, this is the origin of gradient boosting; it is therefore, a seminal work worth reading. You know what, I think you might like to pick up the paper and read it yourself. Like many papers, there is a lot of scary looking math in the first few pages, but if you’ve been following along on this blog, you’ll find that it’s actually totally approachable. This is the kind of thing that cures imposter syndrome, so give it a shot. That said, here’s the TL;DR as I see it.\nThe first part of the paper introduces the idea of fitting models by doing gradient descent in function space, an ingenious idea we spent an entire post demystifying earlier. Friedman goes on to introduce the generic gradient boost algorithm, which works with any differentiable loss function, as well as specific variants for minimizing absolute error, Huber loss, and binary deviance. In terms of hyperparameters, he points out that the learning rate can be used to reduce overfitting, while increased tree depth can help capture more complex interactions among features. He even discusses feature importance and partial dependence methods for interpreting fitted gradient boosting models.\nFriedman concludes by musing about the advantages of gradient boosting with trees. He notes some key advantages afforded by the use of decision trees including no need to rescale input data, robustness against irrelevant input features, and elegant handling of missing feature values. He points out that gradient boosting manages to capitalize on the benefits of decision trees while minimizing their key weakness (crappy accuracy). I think this offers a great insight into why gradient boosting models have become so widespread and successful in practical ML applications."
   },
   {
-    "objectID": "posts/hello-pyspark/index.html",
-    "href": "posts/hello-pyspark/index.html",
-    "title": "Hello PySpark!",
-    "section": "",
-    "text": "A big day at Playa Guiones\nWell, you guessed it: it’s time for us to learn PySpark!\nI know, I know, I can hear you screaming into your pillow. Indeed we just spent all that time converting from R and learning python and why the hell do we need yet another API for working with dataframes?\nThat’s a totally fair question.\nSo what happens when we’re working on something in the real world, where datasets get large in a hurry, and we suddenly have a dataframe that no longer fits into memory? We need a way for our computations and datasets to scale across multiple nodes in a distributed system without having to get too fussy about all the distributed compute details.\nEnter PySpark.\nI think it’s fair to think of PySpark as a python package for working with arbitrarily large dataframes, i.e., it’s like pandas but scalable. It’s built on top of Apache Spark, a unified analytics engine for large-scale data processing. PySpark is essentially a way to access the functionality of spark via python code. While there are other high-level interfaces to Spark (such as Java, Scala, and R), for data scientists who are already working extensively with python, PySpark will be the natural interface of choice. PySpark also has great integration with SQL, and it has a companion machine learning library called MLlib that’s more or less a scalable scikit-learn (maybe we can cover it in a future post).\nSo, here’s the plan. First we’re going to get set up to run PySpark locally in a jupyter notebook on our laptop. This is my preferred environment for interactively playing with PySpark and learning the ropes. Then we’re going to get up and running in PySpark as quickly as possible by reviewing the most essential functionality for working with dataframes and comparing it to how we would do things in pandas. Once we’re comfortable running PySpark on the laptop, it’s going to be much easier to jump onto a distributed cluster and run PySpark at scale.\nLet’s do this."
+    "objectID": "posts/gradient-boosting-machine-with-any-loss-function/index.html#friedmans-generic-gradient-boosting-algorithm",
+    "href": "posts/gradient-boosting-machine-with-any-loss-function/index.html#friedmans-generic-gradient-boosting-algorithm",
+    "title": "How to Implement a Gradient Boosting Machine that Works with Any Loss Function",
+    "section": "Friedman’s Generic Gradient Boosting Algorithm",
+    "text": "Friedman’s Generic Gradient Boosting Algorithm\nLet’s take a closer look at Friedman’s original gradient boost algorithm, Alg. 1 in Section 3 of the paper (translated into the notation we’ve been using so far).\nLike last time, we have training data \\((\\mathbf{y}, \\mathbf{X})\\) where \\(\\mathbf{y}\\) is a length-\\(n\\) vector of target values, and \\(\\mathbf{X}\\) is an \\(n \\times p\\) matrix with \\(n\\) observations of \\(p\\) features. We also have a differentiable loss function \\(L(\\mathbf{y}, \\mathbf{\\hat{y}}) = \\sum_{i=1}^n l(y_i, \\hat{y}_i)\\), a “learning rate” hyperparameter \\(\\eta\\), and a fixed number of model iterations \\(M\\).\nAlgorithm: gradient_boost\\((\\mathbf{X},\\mathbf{y},L,\\eta, M)\\) returns: model \\(F_M\\)\n\nLet base model \\(F_0(\\mathbf{x}) = c\\), where \\(c = \\text{argmin}_{c} \\sum_{i=1}^n l(y_i, c)\\)\nfor \\(m\\) = \\(0\\) to \\(M-1\\):\n     Let “pseudo-residual” vector \\(\\mathbf{r}_m = -\\nabla_{\\mathbf{\\hat{y}}_m} L(\\mathbf{y},\\mathbf{\\hat{y}}_m)\\)\n     Train decision tree regressor \\(h_m(\\mathbf{X})\\) to predict \\(\\mathbf{r}_m\\) (minimizing squared error)\n     foreach terminal leaf node \\(t \\in h_m\\):\n          Let \\(v = \\text{argmin}_v \\sum_{i \\in t} l(y_i, F_m(\\mathbf{x}_i) + v)\\)\n          Set terminal leaf node \\(t\\) to predict value \\(v\\)\n     \\(F_{m+1}(\\mathbf{X}) = F_{m}(\\mathbf{X}) + \\eta h_m(\\mathbf{X})\\)\nReturn composite model \\(F_M\\)\n\nBy now, most of this is already familiar to us. We begin by setting the base model \\(F_0\\) equal to the constant prediction value that minimizes the loss over all examples in the training dataset (line 1). Then we begin the boosting iterations (line 2), each time computing the negative gradients of the loss with respect to the current model predictions (known as the pseudo residuals) (line 3). We then fit our next decision tree regressor to predict the pseudo residuals (line 4).\nThen we encounter something new on lines 5-7. When we fit a vanilla decision tree regressor to predict pseudo residuals, we’re using mean squared error as the loss function to train the tree. As you might imagine, this works well when the global loss function is also squared error. But if we want to use a global loss other than squared error, there is an additional trick we can use to further increase the composite model’s accuracy. The idea is to continue using squared error to train each decision tree, keeping its structure and split conditions but altering the predicted value in each leaf to help minimize the global loss function. Instead of using the mean target value as the prediction for each node (as we would do when minimizing squared error), we use a numerical optimization method like line search to choose the constant value for that leaf that leads to the best overall loss. This is the same thing we did in line 1 of the algorithm to set the base prediction, but here we choose the optimal prediction for each terminal node of the newly trained decision tree."
   },
   {
-    "objectID": "posts/hello-pyspark/index.html#how-to-run-pyspark-in-a-jupyter-notebook-on-your-laptop",
-    "href": "posts/hello-pyspark/index.html#how-to-run-pyspark-in-a-jupyter-notebook-on-your-laptop",
-    "title": "Hello PySpark!",
-    "section": "How to Run PySpark in a Jupyter Notebook on Your Laptop",
-    "text": "How to Run PySpark in a Jupyter Notebook on Your Laptop\nOk, I’m going to walk us through how to get things installed on a Mac or Linux machine where we’re using homebrew and conda to manage virtual environments. If you have a different setup, your favorite search engine will help you get PySpark set up locally.\n\n\n\n\n\n\nNote\n\n\n\nIt’s possible for Homebrew and Anaconda to interfere with one another. The simple rule of thumb is that whenever you want to use the brew command, first deactivate your conda environment by running conda deactivate. See this Stack Overflow question for more details.\n\n\n\nInstall Spark\nInstall Spark with homebrew.\nbrew install apache-spark\nNext we need to set up a SPARK_HOME environment variable in the shell. Check where Spark is installed.\nbrew info apache-spark\nYou should see something like\n==&gt; apache-spark: stable 3.3.2 (bottled), HEAD\nEngine for large-scale data processing\nhttps://spark.apache.org/\n/opt/homebrew/Cellar/apache-spark/3.3.2 (1,453 files, 320.9MB) *\n...\nSet the SPARK_HOME environment variable to your spark installation path with /libexec appended to the end. To do this I added the following line to my .zshrc file.\nexport SPARK_HOME=/opt/homebrew/Cellar/apache-spark/3.3.2/libexec\nRestart your shell, and test the installation by starting the Spark shell.\nspark-shell\n...\nWelcome to\n      ____              __\n     / __/__  ___ _____/ /__\n    _\\ \\/ _ \\/ _ `/ __/  '_/\n   /___/ .__/\\_,_/_/ /_/\\_\\   version 3.3.2\n      /_/\n         \nUsing Scala version 2.12.15 (OpenJDK 64-Bit Server VM, Java 19.0.2)\nType in expressions to have them evaluated.\nType :help for more information.\n\nscala&gt; \nIf you get the scala&gt; prompt, then you’ve successfully installed Spark on your laptop!\n\n\nInstall PySpark\nUse conda to install the PySpark python package. As usual, it’s advisable to do this in a new virtual environment.\n$ conda install pyspark\nYou should be able to launch an interactive PySpark REPL by saying pyspark.\n$ pyspark\n...\nWelcome to\n      ____              __\n     / __/__  ___ _____/ /__\n    _\\ \\/ _ \\/ _ `/ __/  '_/\n   /__ / .__/\\_,_/_/ /_/\\_\\   version 3.1.2\n      /_/\n\nUsing Python version 3.8.3 (default, Jul  2 2020 11:26:31)\nSpark context Web UI available at http://192.168.100.47:4041\nSpark context available as 'sc' (master = local[*], app id = local-1624127229929).\nSparkSession available as 'spark'.\n&gt;&gt;&gt; \nThis time we get a familiar python &gt;&gt;&gt; prompt. This is an interactive shell where we can easily experiment with PySpark. Feel free to run the example code in this post here in the PySpark shell, or, if you prefer a notebook, read on and we’ll get set up to run PySpark in a jupyter notebook.\n\n\n\n\n\n\nNote\n\n\n\nWhen I tried following this setup on a new Mac, I hit an error about being unable to find the Java Runtime. This stack overflow question lead me to the fix.\n\n\n\n\nThe Spark Session Object\nYou may have noticed that when we launched that PySpark interactive shell, it told us that something called SparkSession was available as 'spark'. So basically, what’s happening here is that when we launch the pyspark shell, it instantiates an object called spark which is an instance of class pyspark.sql.session.SparkSession. The spark session object is going to be our entry point for all kinds of PySpark functionality, i.e., we’re going to be saying things like spark.this() and spark.that() to make stuff happen.\nThe PySpark interactive shell is kind enough to instantiate one of these spark session objects for us automatically. However, when we’re using another interface to PySpark (like say a jupyter notebook running a python kernal), we’ll have to make a spark session object for ourselves.\n\n\nCreate a PySpark Session in a Jupyter Notebook\nThere are a few ways to run PySpark in jupyter which you can read about here.\nFor derping around with PySpark on your laptop, I think the best way is to instantiate a spark session from a jupyter notebook running on a regular python kernel. The method we’ll use involves running a standard jupyter notebook session with a python kernal and using the findspark package to initialize the spark session. So, first install the findspark package.\nconda install -c conda-forge findspark\nLaunch jupyter as usual.\njupyter notebook\nGo ahead and fire up a new notebook using a regular python 3 kernal. Once you land inside the notebook, there are a couple things we need to do to get a spark session instantiated. You can think of this as boilerplate code that we need to run in the first cell of a notebook where we’re going to use PySpark.\n\nimport pyspark\nimport findspark\nfrom pyspark.sql import SparkSession\n\nfindspark.init()\nspark = SparkSession.builder.appName('My Spark App').getOrCreate()\n\nFirst we’re running findspark’s init() method to find our Spark installation. If you run into errors here, make sure you got the SPARK_HOME environment variable correctly set in the install instructions above. Then we instantiate a spark session as spark. Once you run this, you’re ready to rock and roll with PySpark in your jupyter notebook.\n\n\n\n\n\n\nNote\n\n\n\nSpark provides a handy web UI that you can use for monitoring and debugging. Once you instantiate the spark session You can open the UI in your web browser at http://localhost:4040/jobs/."
+    "objectID": "posts/gradient-boosting-machine-with-any-loss-function/index.html#implementation",
+    "href": "posts/gradient-boosting-machine-with-any-loss-function/index.html#implementation",
+    "title": "How to Implement a Gradient Boosting Machine that Works with Any Loss Function",
+    "section": "Implementation",
+    "text": "Implementation\nI did some (half-assed) searching on the interweb for an implementation of GBM that allows the user to provide a custom loss function, and you know what? I couldn’t find anything. If you find another implementation, post in the comments so we can learn from it too.\nSince we need to modify the values predicted by our decision trees’ terminal nodes, we’ll want to brush up on the scikit-learn decision tree structure before we get going. You can see explanations of all the necessary decision tree hacks in this notebook.\n\nimport numpy as np\nfrom sklearn.tree import DecisionTreeRegressor \nfrom scipy.optimize import minimize\n\nclass GradientBoostingMachine():\n    '''Gradient Boosting Machine supporting any user-supplied loss function.\n    \n    Parameters\n    ----------\n    n_trees : int\n        number of boosting rounds\n        \n    learning_rate : float\n        learning rate hyperparameter\n        \n    max_depth : int\n        maximum tree depth\n    '''\n    \n    def __init__(self, n_trees, learning_rate=0.1, max_depth=1):\n        self.n_trees=n_trees; \n        self.learning_rate=learning_rate\n        self.max_depth=max_depth;\n    \n    def fit(self, X, y, objective):\n        '''Fit the GBM using the specified loss function.\n        \n        Parameters\n        ----------\n        X : ndarray of size (number observations, number features)\n            design matrix\n            \n        y : ndarray of size (number observations,)\n            target values\n            \n        objective : loss function class instance\n            Class specifying the loss function for training.\n            Should implement two methods:\n                loss(labels: ndarray, predictions: ndarray) -&gt; float\n                negative_gradient(labels: ndarray, predictions: ndarray) -&gt; ndarray\n        '''\n        \n        self.trees = []\n        self.base_prediction = self._get_optimal_base_value(y, objective.loss)\n        current_predictions = self.base_prediction * np.ones(shape=y.shape)\n        for _ in range(self.n_trees):\n            pseudo_residuals = objective.negative_gradient(y, current_predictions)\n            tree = DecisionTreeRegressor(max_depth=self.max_depth)\n            tree.fit(X, pseudo_residuals)\n            self._update_terminal_nodes(tree, X, y, current_predictions, objective.loss)\n            current_predictions += self.learning_rate * tree.predict(X)\n            self.trees.append(tree)\n     \n    def _get_optimal_base_value(self, y, loss):\n        '''Find the optimal initial prediction for the base model.'''\n        fun = lambda c: loss(y, c)\n        c0 = y.mean()\n        return minimize(fun=fun, x0=c0).x[0]\n        \n    def _update_terminal_nodes(self, tree, X, y, current_predictions, loss):\n        '''Update the tree's predictions according to the loss function.'''\n        # terminal node id's\n        leaf_nodes = np.nonzero(tree.tree_.children_left == -1)[0]\n        # compute leaf for each sample in ``X``.\n        leaf_node_for_each_sample = tree.apply(X)\n        for leaf in leaf_nodes:\n            samples_in_this_leaf = np.where(leaf_node_for_each_sample == leaf)[0]\n            y_in_leaf = y.take(samples_in_this_leaf, axis=0)\n            preds_in_leaf = current_predictions.take(samples_in_this_leaf, axis=0)\n            val = self._get_optimal_leaf_value(y_in_leaf, \n                                               preds_in_leaf,\n                                               loss)\n            tree.tree_.value[leaf, 0, 0] = val\n            \n    def _get_optimal_leaf_value(self, y, current_predictions, loss):\n        '''Find the optimal prediction value for a given leaf.'''\n        fun = lambda c: loss(y, current_predictions + c)\n        c0 = y.mean()\n        return minimize(fun=fun, x0=c0).x[0]\n          \n    def predict(self, X):\n        '''Generate predictions for the given input data.'''\n        return (self.base_prediction \n                + self.learning_rate \n                * np.sum([tree.predict(X) for tree in self.trees], axis=0))\n\nIn terms of design, we implement a class for the GBM with scikit-like fit and predict methods. Notice in the below implementation that the fit method is only 10 lines long, and corresponds very closely to Friedman’s gradient boost algorithm from above. Most of the complexity comes from the helper methods for updating the leaf values according to the specified loss function.\nWhen the user wants to call the fit method, they’ll need to supply the loss function they want to use for boosting. We’ll make the user implement their loss (a.k.a. objective) function as a class with two methods: (1) a loss method taking the labels and the predictions and returning the loss score and (2) a negative_gradient method taking the labels and the predictions and returning an array of negative gradients."
   },
   {
-    "objectID": "posts/hello-pyspark/index.html#pyspark-concepts",
-    "href": "posts/hello-pyspark/index.html#pyspark-concepts",
-    "title": "Hello PySpark!",
-    "section": "PySpark Concepts",
-    "text": "PySpark Concepts\nPySpark provides two main abstractions for data: the RDD and the dataframe. RDD’s are just a distributed list of objects; we won’t go into details about them in this post. For us, the key object in PySpark is the dataframe.\nWhile PySpark dataframes expose much of the functionality you would expect from a library for tabular data manipulation, they behave a little differently from pandas dataframes, both syntactically and under-the-hood. There are a couple of key concepts that will help explain these idiosyncracies.\nImmutability - Pyspark RDD’s and dataframes are immutable. This means that if you change an object, e.g. by adding a column to a dataframe, PySpark returns a reference to a new dataframe; it does not modify the existing dataframe. This is kind of nice, because we don’t have to worry about that whole view versus copy nonsense that happens in pandas.\nLazy Evaluation - Lazy evaluation means that when we start manipulating a dataframe, PySpark won’t actually perform any of the computations until we explicitly ask for the result. This is nice because it potentially allows PySpark to do fancy optimizations before executing a sequence of operations. It’s also confusing at first, because PySpark will seem to blaze through complex operations and then take forever to print a few rows of the dataframe."
+    "objectID": "posts/gradient-boosting-machine-with-any-loss-function/index.html#testing-our-model",
+    "href": "posts/gradient-boosting-machine-with-any-loss-function/index.html#testing-our-model",
+    "title": "How to Implement a Gradient Boosting Machine that Works with Any Loss Function",
+    "section": "Testing our Model",
+    "text": "Testing our Model\nLet’s test drive our custom-loss-ready GBM with a few different loss functions! We’ll compare it to the scikit-learn GBM to sanity check our implementation.\n\nfrom sklearn.ensemble import GradientBoostingRegressor, GradientBoostingClassifier\n\nrng = np.random.default_rng()\n\n# test data\ndef make_test_data(n, noise_scale):\n    x = np.linspace(0, 10, 500).reshape(-1,1)\n    y = (np.where(x &lt; 5, x, 5) + rng.normal(0, noise_scale, size=x.shape)).ravel()\n    return x, y\n    \n# print model loss scores\ndef print_model_loss_scores(obj, y, preds, sk_preds):\n    print(f'From Scratch Loss = {obj.loss(y, pred):0.4}')\n    print(f'Scikit-Learn Loss = {obj.loss(y, sk_pred):0.4}')\n\n\nMean Squared Error\nMean Squared Error (a.k.a. Least Squares) loss produces estimates of the mean target value conditioned on the feature values. Here’s the implementation.\n\nx, y = make_test_data(500, 0.4)\n\n\n# from scratch GBM\nclass SquaredErrorLoss():\n    '''User-Defined Squared Error Loss'''\n    \n    def loss(self, y, preds):\n        return np.mean((y - preds)**2)\n    \n    def negative_gradient(self, y, preds):\n        return y - preds\n    \n\ngbm = GradientBoostingMachine(n_trees=10,\n                              learning_rate=0.5,\n                              max_depth=1)\ngbm.fit(x, y, SquaredErrorLoss())\npred = gbm.predict(x)\n\n\n# scikit-learn GBM\nsk_gbm = GradientBoostingRegressor(n_estimators=10,\n                                   learning_rate=0.5,\n                                   max_depth=1,\n                                   loss='squared_error')\nsk_gbm.fit(x, y)\nsk_pred = sk_gbm.predict(x)\n\n\nprint_model_loss_scores(SquaredErrorLoss(), y, pred, sk_pred)\n\nFrom Scratch Loss = 0.168\nScikit-Learn Loss = 0.168\n\n\n\n\n\n\n\n\n\nMean Absolute Error\nMean Absolute Error (a.k.a.Least Absolute Deviations) loss produces estimates of the median target value conditioned on the feature values. Here’s the implementation.\n\nx, y = make_test_data(500, 0.4)\n\n\n\n# from scratch GBM\nclass AbsoluteErrorLoss():\n    '''User-Defined Absolute Error Loss'''\n    \n    def loss(self, y, preds):\n        return np.mean(np.abs(y - preds))\n    \n    def negative_gradient(self, y, preds):\n        return np.sign(y - preds)\n\n\ngbm = GradientBoostingMachine(n_trees=10,\n                              learning_rate=0.5,\n                              max_depth=1)\ngbm.fit(x, y, AbsoluteErrorLoss())\npred = gbm.predict(x)\n\n\n# scikit-learn GBM\nsk_gbm = GradientBoostingRegressor(n_estimators=10,\n                                   learning_rate=0.5,\n                                   max_depth=1,\n                                   loss='absolute_error')\nsk_gbm.fit(x, y)\nsk_pred = sk_gbm.predict(x)\n\n\nprint_model_loss_scores(AbsoluteErrorLoss(), y, pred, sk_pred)\n\nFrom Scratch Loss = 0.3225\nScikit-Learn Loss = 0.3208\n\n\n\n\n\n\n\n\n\nQuantile Loss\nQuantile loss yields estimates of a given quantile of the target variable conditioned on the features. Here’s my implementation.\n\nx, y = make_test_data(500, 1)\n\n\n\n# from scratch GBM\nclass QuantileLoss():\n    '''Quantile Loss\n    \n    Parameters\n    ----------\n    alpha : float\n        quantile to be estimated, 0 &lt; alpha &lt; 1\n    '''\n    \n    def __init__(self, alpha):\n        if alpha &lt; 0 or alpha &gt;1:\n            raise ValueError('alpha must be between 0 and 1')\n        self.alpha = alpha\n        \n    def loss(self, y, preds):\n        e = y - preds\n        return np.mean(np.where(e &gt; 0, self.alpha * e, (self.alpha - 1) * e))\n    \n    def negative_gradient(self, y, preds):\n        e = y - preds \n        return np.where(e &gt; 0, self.alpha, self.alpha - 1)\n\ngbm = GradientBoostingMachine(n_trees=10,\n                              learning_rate=0.5,\n                             max_depth=1)\ngbm.fit(x, y, QuantileLoss(alpha=0.9))\npred = gbm.predict(x)    \n\n\n# scikit-learn GBM\nsk_gbm = GradientBoostingRegressor(n_estimators=10,\n                                 learning_rate=0.5,\n                                 max_depth=1,\n                                 loss='quantile', alpha=0.9)\nsk_gbm.fit(x, y)\nsk_pred = sk_gbm.predict(x)\n\n\nprint_model_loss_scores(QuantileLoss(alpha=0.9), y, pred, sk_pred)\n\nFrom Scratch Loss = 0.1853\nScikit-Learn Loss = 0.1856\n\n\n\n\n\n\n\n\n\nBinary Cross Entropy Loss\nThe previous losses are useful for regression problems, where the target is numeric. But we can also solve classification problems, simply by swapping in an appropriate loss function. Here we’ll implement binary cross entropy, a.k.a. binary deviance, a.k.a. negative binomial log likelihood (sometimes abusively called log loss). One thing to remember is that, as with logistic regression, our model is actually predicting the log odds ratio, not the probability of the positive class. Thus we use expit transformations (the inverse of logit) whenever probabilities are needed, e.g., when predicting the probability that an observation belongs to the positive class.\n\n# make categorical test data\n\ndef expit(t):\n    return np.exp(t) / (1 + np.exp(t))\n\nx = np.linspace(-3, 3, 500)\np = expit(x)\ny = rng.binomial(1, p, size=p.shape)\nx = x.reshape(-1,1)\n\n\n# from scratch GBM\nclass BinaryCrossEntropyLoss():\n    '''Binary Cross Entropy Loss\n    \n    Note that the predictions should be log odds ratios.\n    '''\n    \n    def __init__(self):\n        self.expit = lambda t: np.exp(t) / (1 + np.exp(t))\n    \n    def loss(self, y, preds):\n        p = self.expit(preds)\n        return -np.mean(y * np.log(p) + (1 - y) * np.log(1 - p))\n    \n    def negative_gradient(self, y, preds):\n        p = self.expit(preds)\n        return y / p - (1 - y) / (1 - p)\n\n    \ngbm = GradientBoostingMachine(n_trees=10,\n                              learning_rate=0.5,\n                              max_depth=1)\ngbm.fit(x, y, BinaryCrossEntropyLoss())\npred = expit(gbm.predict(x))\n\n\n# scikit-learn GBM\nsk_gbm = GradientBoostingClassifier(n_estimators=10,\n                                    learning_rate=0.5,\n                                    max_depth=1,\n                                    loss='log_loss')\nsk_gbm.fit(x, y)\nsk_pred = sk_gbm.predict_proba(x)[:, 1]\n\n\nprint_model_loss_scores(BinaryCrossEntropyLoss(), y, pred, sk_pred)\n\nFrom Scratch Loss = 0.6379\nScikit-Learn Loss = 0.6403"
   },
   {
-    "objectID": "posts/hello-pyspark/index.html#pyspark-dataframe-essentials",
-    "href": "posts/hello-pyspark/index.html#pyspark-dataframe-essentials",
-    "title": "Hello PySpark!",
-    "section": "PySpark Dataframe Essentials",
-    "text": "PySpark Dataframe Essentials\n\nCreating a PySpark dataframe with createDataFrame()\nThe first thing we’ll need is a way to make dataframes. createDataFrame() allows us to create PySpark dataframes from python objects like nested lists or pandas dataframes. Notice that createDataFrame() is a method of the spark session class, so we’ll call it from our spark session sparkby saying spark.createDataFrame().\n\n# create pyspark dataframe from nested  lists\nmy_df = spark.createDataFrame(\n    data=[\n        [2022, \"tiger\"],\n        [2023, \"rabbit\"],\n        [2024, \"dragon\"]\n    ],\n    schema=['year', 'animal']\n)\n\nLet’s read the seaborn tips dataset into a pandas dataframe and then use it to create a PySpark dataframe.\n\nimport pandas as pd\n\n# load tips dataset into a pandas dataframe\npandas_df = pd.read_csv('https://raw.githubusercontent.com/mwaskom/seaborn-data/master/tips.csv')\n\n# create pyspark dataframe from a pandas dataframe\npyspark_df = spark.createDataFrame(pandas_df)\n\n\n\n\n\n\n\nNote\n\n\n\nIn real life when we’re running PySpark on a large-scale distributed system, we would not generally want to use python lists or pandas dataframes to load data into PySpark. Ideally we would want to read data directly from where it is stored on HDFS, e.g. by reading parquet files, or by querying directly from a hive database using spark sql.\n\n\n\n\nPeeking at a dataframe’s contents\nThe default print method for the PySpark dataframe will just give you the schema.\n\npyspark_df\n\nDataFrame[total_bill: double, tip: double, sex: string, smoker: string, day: string, time: string, size: bigint]\n\n\nIf we want to peek at some of the data, we’ll need to use the show() method, which is analogous to the pandas head(). Remember that show() will cause PySpark to execute any operations that it’s been lazily waiting to evaluate, so sometimes it can take a while to run.\n\n# show the first few rows of the dataframe\npyspark_df.show(5)\n\n+----------+----+------+------+---+------+----+\n|total_bill| tip|   sex|smoker|day|  time|size|\n+----------+----+------+------+---+------+----+\n|     16.99|1.01|Female|    No|Sun|Dinner|   2|\n|     10.34|1.66|  Male|    No|Sun|Dinner|   3|\n|     21.01| 3.5|  Male|    No|Sun|Dinner|   3|\n|     23.68|3.31|  Male|    No|Sun|Dinner|   2|\n|     24.59|3.61|Female|    No|Sun|Dinner|   4|\n+----------+----+------+------+---+------+----+\nonly showing top 5 rows\n\n\n\n\n[Stage 0:&gt;                                                          (0 + 1) / 1]\n\n                                                                                \n\n\nWe thus encounter our first rude awakening. PySpark’s default representation of dataframes in the notebook isn’t as pretty as that of pandas. But no one ever said it would be pretty, they just said it would be scalable.\nYou can also use the printSchema() method for a nice vertical representation of the schema.\n\n# show the dataframe schema\npyspark_df.printSchema()\n\nroot\n |-- total_bill: double (nullable = true)\n |-- tip: double (nullable = true)\n |-- sex: string (nullable = true)\n |-- smoker: string (nullable = true)\n |-- day: string (nullable = true)\n |-- time: string (nullable = true)\n |-- size: long (nullable = true)\n\n\n\n\n\nSelect columns by name\nYou can select specific columns from a dataframe using the select() method. You can pass either a list of names, or pass names as arguments.\n\n# select some of the columns\npyspark_df.select('total_bill', 'tip')\n\n# select columns in a list\npyspark_df.select(['day', 'time', 'total_bill'])\n\n\n\nFilter rows based on column values\nAnalogous to the WHERE clause in SQL, and the query() method in pandas, PySpark provides a filter() method which returns only the rows that meet the specified conditions. Its argument is a string specifying the condition to be met for rows to be included in the result. You specify the condition as an expression involving the column names and comparison operators like &lt;, &gt;, &lt;=, &gt;=, == (equal), and ~= (not equal). You can specify compound expressions using and and or, and you can even do a SQL-like in to check if the column value matches any items in a list.\n\n## compare a column to a value\npyspark_df.filter('total_bill &gt; 20')\n\n# compare two columns with arithmetic\npyspark_df.filter('tip &gt; 0.15 * total_bill')\n\n# check equality with a string value\npyspark_df.filter('sex == \"Male\"')\n\n# check equality with any of several possible values\npyspark_df.filter('day in (\"Sat\", \"Sun\")')\n\n# use \"and\" \npyspark_df.filter('day == \"Fri\" and time == \"Lunch\"')\n\nIf you’re into boolean indexing with the brackets, PySpark does support that too, but I encourage you to use filter() instead. Check out my rant about why you shouldn’t use boolean indexing for the details. The TLDR is that filter() requires less typing, makes your code more readable and portable, and it allows you to chain method calls together using dot chains.\nHere’s the boolean indexing equivalent of the last example from above.\n\n# using boolean indexing\npyspark_df[(pyspark_df.day == 'Fri') & (pyspark_df.time == 'Lunch')]\n\nI know, it looks horrendous, but not as horrendous as the error message you’ll get if you forget the parentheses.\n\n\nAdd new columns to a dataframe\nYou can add new columns which are functions of the existing columns with the withColumn() method.\n\nimport pyspark.sql.functions as f\n\n# add a new column using col() to reference other columns\npyspark_df.withColumn('tip_percent', f.col('tip') / f.col('total_bill'))\n\nNotice that we’ve imported the pyspark.sql.functions module. This module contains lots of useful functions that we’ll be using all over the place, so it’s probably a good idea to go ahead and import it whenever you’re using PySpark. BTW, it seems like folks usually import this module as f or F. In this example we’re using the col() function, which allows us to refer to columns in our dataframe using string representations of the column names.\nYou could also achieve the same result using the dot to reference the other columns, but this requires us to type the dataframe name over and over again, which makes it harder to reuse this code on different dataframes or in dot chains.\n\n# add a new column using the dot to reference other columns (less recommended)\npyspark_df.withColumn('tip_percent', pyspark_df.tip / pyspark_df.total_bill)\n\nIf you want to apply numerical transformations like exponents or logs, use the built-in functions in the pyspark.sql.functions module.\n\n# log \npyspark_df.withColumn('log_bill', f.log(f.col('total_bill')))\n\n# exponent\npyspark_df.withColumn('bill_squared', f.pow(f.col('total_bill'), 2))\n\nYou can implement conditional assignment like SQL’s CASE WHEN construct using the when() function and the otherwise() method.\n\n# conditional assignment (like CASE WHEN)\npyspark_df.withColumn('is_male', f.when(f.col('sex') == 'Male', True).otherwise(False))\n\n# using multiple when conditions and values\npyspark_df.withColumn('bill_size', \n    f.when(f.col('total_bill') &lt; 10, 'small')\n    .when(f.col('total_bill') &lt; 20, 'medium')\n    .otherwise('large')\n)\n\nRemember that since PySpark dataframes are immutable, calling withColumns() on a dataframe returns a new dataframe. If you want to persist the result, you’ll need to make an assignment.\npyspark_df = pyspark_df.withColumns(...)\n\n\nGroup by and aggregate\nPySpark provides a groupBy() method similar to the pandas groupby(). Just like in pandas, we can call methods like count() and mean() on our grouped dataframe, and we also have a more flexible agg() method that allows us to specify column-aggregation mappings.\n\n\n# group by and count\npyspark_df.groupBy('time').count().show()\n\n+------+-----+\n|  time|count|\n+------+-----+\n|Dinner|  176|\n| Lunch|   68|\n+------+-----+\n\n\n\n\n\n# group by and specify column-aggregation mappings with agg()\npyspark_df.groupBy('time').agg({'total_bill': 'mean', 'tip': 'max'}).show()\n\n+------+--------+------------------+\n|  time|max(tip)|   avg(total_bill)|\n+------+--------+------------------+\n|Dinner|    10.0| 20.79715909090909|\n| Lunch|     6.7|17.168676470588235|\n+------+--------+------------------+\n\n\n\nIf you want to get fancier with your aggregations, it might just be easier to express them using hive syntax. Read on to find out how.\n\n\nRun Hive SQL on dataframes\nOne of the mind-blowing features of PySpark is that it allows you to write hive SQL queries on your dataframes. To take a PySpark dataframe into the SQL world, use the createOrReplaceTempView() method. This method takes one string argument which will be the dataframes name in the SQL world. Then you can use spark.sql() to run a query. The result is returned as a PySpark dataframe.\n\n\n# put pyspark dataframe in SQL world and query it\npyspark_df.createOrReplaceTempView('tips')\nspark.sql('select * from tips').show(5)\n\n+----------+----+------+------+---+------+----+\n|total_bill| tip|   sex|smoker|day|  time|size|\n+----------+----+------+------+---+------+----+\n|     16.99|1.01|Female|    No|Sun|Dinner|   2|\n|     10.34|1.66|  Male|    No|Sun|Dinner|   3|\n|     21.01| 3.5|  Male|    No|Sun|Dinner|   3|\n|     23.68|3.31|  Male|    No|Sun|Dinner|   2|\n|     24.59|3.61|Female|    No|Sun|Dinner|   4|\n+----------+----+------+------+---+------+----+\nonly showing top 5 rows\n\n\n\nThis is awesome for a couple of reasons. First, it allows us to easily express any transformations in hive syntax. If you’re like me and you’ve already been using hive, this will dramatically reduce the PySpark learning curve, because when in doubt, you can always bump a dataframe into the SQL world and simply use hive to do what you need. Second, if you have a hive deployment, PySpark’s SQL world also has access to all of your hive tables. This means you can write queries involving both hive tables and your PySpark dataframes. It also means you can run hive commands, like inserting into a table, directly from PySpark.\nLet’s do some aggregations that might be a little trickier to do using the PySpark built-in functions.\n\n\n# run hive query and save result to dataframe\ntip_stats_by_time = spark.sql(\"\"\"\n    select\n        time\n        , count(*) as n \n        , avg(tip) as avg_tip\n        , percentile_approx(tip, 0.5) as med_tip\n        , avg(case when tip &gt; 3 then 1 else 0 end) as pct_tip_gt_3\n    from \n        tips\n    group by 1\n\"\"\")\n\ntip_stats_by_time.show()\n\n+------+---+------------------+-------+-------------------+\n|  time|  n|           avg_tip|med_tip|       pct_tip_gt_3|\n+------+---+------------------+-------+-------------------+\n|Dinner|176| 3.102670454545455|    3.0|0.44886363636363635|\n| Lunch| 68|2.7280882352941176|    2.2|0.27941176470588236|\n+------+---+------------------+-------+-------------------+"
+    "objectID": "posts/gradient-boosting-machine-with-any-loss-function/index.html#wrapping-up",
+    "href": "posts/gradient-boosting-machine-with-any-loss-function/index.html#wrapping-up",
+    "title": "How to Implement a Gradient Boosting Machine that Works with Any Loss Function",
+    "section": "Wrapping Up",
+    "text": "Wrapping Up\nWoohoo! We did it! We finally made it through Friedman’s paper in its entirety, and we implemented the generic gradient boosting algorithm which works with any differentiable loss function. If you made it this far, great job, gold star! By now you hopefully have a pretty solid grasp on gradient boosting, which is good, because soon we’re going to dive into the modern Newton descent gradient boosting frameworks like XGBoost. Onward!"
   },
   {
-    "objectID": "posts/hello-pyspark/index.html#visualization-with-pyspark",
-    "href": "posts/hello-pyspark/index.html#visualization-with-pyspark",
-    "title": "Hello PySpark!",
-    "section": "Visualization with PySpark",
-    "text": "Visualization with PySpark\nThere aren’t any tools for visualization included in PySpark. But that’s no problem, because we can just use the toPandas() method on a PySpark dataframe to pull data back into pandas. Once we have a pandas dataframe, we can happily build visualizations as usual. Of course, if your PySpark dataframe is huge, you wouldn’t want to use toPandas() directly, because PySpark will attempt to read the entire contents of its huge dataframe into memory. Instead, it’s best to use PySpark to generate aggregations of your data for plotting or to pull only a sample of your full data into pandas.\n\n# read aggregated pyspark dataframe into pandas for plotting\nplot_pdf = tip_stats_by_time.toPandas()\nplot_pdf.plot.bar(x='time', y=['avg_tip', 'med_tip']);"
+    "objectID": "posts/gradient-boosting-machine-with-any-loss-function/index.html#references",
+    "href": "posts/gradient-boosting-machine-with-any-loss-function/index.html#references",
+    "title": "How to Implement a Gradient Boosting Machine that Works with Any Loss Function",
+    "section": "References",
+    "text": "References\nFriedman’s 2001 paper: Greedy Function Approximation: A Gradient Boosting Machine"
   },
   {
-    "objectID": "posts/hello-pyspark/index.html#wrapping-up",
-    "href": "posts/hello-pyspark/index.html#wrapping-up",
-    "title": "Hello PySpark!",
+    "objectID": "posts/get-down-with-gradient-descent/index.html",
+    "href": "posts/get-down-with-gradient-descent/index.html",
+    "title": "Get Down with Gradient Descent",
+    "section": "",
+    "text": "Ahh, gradient descent. It’s probably one of the most ubiquitous algorithms used in data science, but you’re unlikely to see it being celebrated in the limelight of the Kaggle podium. Rather than taking center stage, gradient descent operates under the hood, powering the training for a wide range of models including deep neural networks, gradient boosting trees, generalized linear models, and mixed effects models. Getting an intuition for the algorithm will reveal how model fitting actually works and help us to see the common thread connecting a wide range of seemingly unrelated models. In this post we’ll get the intuition for gradient descent with a fresh analogy, develop the mathematical formulation, and ground our understanding by using it to train ourselves a linear regression model."
+  },
+  {
+    "objectID": "posts/get-down-with-gradient-descent/index.html#intuition",
+    "href": "posts/get-down-with-gradient-descent/index.html#intuition",
+    "title": "Get Down with Gradient Descent",
+    "section": "Intuition",
+    "text": "Intuition\nBefore we dive into the intuition for gradient descent itself, let’s get a high-level view of why it’s useful in training or fiting a model. Training a model basically means finding the model parameter values that make the model fit a given dataset well. We measure how well a model fits data using a special function variously called a loss or cost or objective function. A loss function takes the dataset and the model as arguments and returns a number that tells us how well our model fits the data. Therefore training is an optimization problem in which we search for the model parameter values that result in the minimum value of the loss function. Enter gradient descent.\nGradient descent is a numerical optimization technique that helps us find the inputs that yield the minimum value of a function. Since most explanations of the gradient descent algorithm seem to use a story about hikers being lost in some foggy mountains, we’re going to try out a new analogy.\nLet’s say you’re at a concert. Remember those? They’re these things that used to happen where people played music and everyone danced and had a great time.\n\nNOTE: Chiming in here in 2023 from a sort-of-post COVID 19 world, happily I can report that concerts and live music are back!\n\nNow suppose at this concert there’s a dance floor which has become a bit sweltering from copious amounts of “getting down”. But the temperature isn’t quite uniform; maybe there’s a cool spot from a ceiling fan somewhere.\n\n\n\ndance floor\n\n\nLet’s get ourselves to that cool spot using the following procedure.\n\nFrom our current location, figure out which direction feels coolest.\nTake a step (or simply shimmy) in that direction.\nRepeat steps 1 and 2 until we reach the coolest spot on the dance floor.\n\nThe crux of this procedure is figuring out, at each step, which direction yields the greatest temperature reduction. Our skin is pretty sensitive to temperature, so we can just use awareness of body sensation to sense which direction feels coolest. Luckily, we have a mathematical equivalent to our skin’s ability to sense local variation in temperature.\n\nDetermine which way to go\nLet \\(f(x,y)\\) be the temperature on the dance floor at position \\((x,y)\\). The direction of fastest decrease in temperature is going to be given by some vector in our \\((x,y)\\) space, e.g.,\n[vector component in \\(x\\) direction, vector component in \\(y\\) direction]\nTurns out that the gradient of a function evaluated at a particular location yields a vector that points in the direction of fastest increase in the function, pretty similar to what we’re looking for. The gradient of \\(f(x,y)\\) is given by\n\\[ \\nabla f(x,y) = \\left [ \\frac{\\partial f(x,y)}{\\partial x}, \\frac{\\partial f(x,y)}{\\partial y} \\right ] \\]\nThe components of the gradient vector are the partial derivatives of our function \\(f(x,y)\\), evaluated at the point \\((x,y)\\). These partial derivatives just tell us the slope of \\(f(x,y)\\) in the \\(x\\) and \\(y\\) directions respectively. The intuition is that if \\(\\frac{\\partial f(x,y)}{\\partial x}\\) is a large positive number, then moving in the positive \\(x\\) direction will make \\(f(x,y)\\) increase a lot, whereas if \\(\\frac{\\partial f(x,y)}{\\partial x}\\) is a large negative number, then moving in the negative \\(x\\) direction will make \\(f(x,y)\\) increase a lot.\nIt’s not too hard to see that the direction of fastest decrease is actually just the exact opposite direction from that of fastest increase. Since we can point a vector in the opposite direction by negating its component values, our direction of fastest temperature decrease will be given by the negative gradient of the temperature field \\(-\\nabla f(x,y)\\).\n\n\n\ndance floor with hot and cold sides\n\n\n\n\nTake a step in the right direction\nNow that we have our direction vector, we’re ready to take a step toward the cool part of the dance floor. To do this, we’ll just add our direction vector to our current position. The update rule would look like this.\n\\[ [x_\\text{next}, y_\\text{next}] = [x_\\text{prev}, y_\\text{prev}] - \\nabla f (x_\\text{prev}, y_\\text{prev}) = [x_\\text{prev}, y_\\text{prev}] -  \\left [ \\frac{\\partial f (x_\\text{prev}, y_\\text{prev})}{\\partial x}, \\frac{\\partial f (x_\\text{prev}, y_\\text{prev})}{\\partial y} \\right ] \\]\nIf we iteratively apply this update rule, we’ll end up tracing a trajectory through the \\((x,y)\\) space on the dance floor and we’ll eventually end up at the coolest spot!\n\n\n\ndance floor with trajectory from hot side to cool side\n\n\nGreat success!"
+  },
+  {
+    "objectID": "posts/get-down-with-gradient-descent/index.html#general-formulation",
+    "href": "posts/get-down-with-gradient-descent/index.html#general-formulation",
+    "title": "Get Down with Gradient Descent",
+    "section": "General Formulation",
+    "text": "General Formulation\nLet’s generalize a bit to get to the form of gradient descent you’ll see in references like the wikipedia article.\nFirst we modify our update equation above to handle functions with more than two arguments. We’ll use a bold \\(\\mathbf{x}\\) to indicate a vector of inputs \\(\\mathbf{x} = [x_1,x_2,\\dots,x_p]\\). Our function \\(f(\\mathbf{x}): \\mathbb{R}^p \\mapsto \\mathbb{R}\\) maps a \\(p\\) dimensional input to a scalar output.\nSecond, instead of displacing our current location with the negative gradient vector itself, we’ll first rescale it with a learning rate parameter. This helps address any issues with units on inputs versus outputs. Imagine the input could range between 0 and 1, but the output ranged from 0 to 1,000. We would need to rescale the partial derivatives so the update step doesn’t send us way too far off in input space.\nFinally, we’ll index our updates with \\(t=0,1,\\dots\\). We’ll run for some prespecified number of iterations or we’ll stop the procedure once the change in \\(f(\\mathbf{x})\\) is sufficiently small from one iteration to the next. Our update equation will look like this.\n\\[\\mathbf{x}_{t+1} = \\mathbf{x}_t - \\eta \\nabla f ( \\mathbf{x}_t) \\]\nIn pseudocode we could write it like this.\n# gradient descent\nx = initial_value_of_x \nfor t in range(n_iterations):  # or some other convergence condition\n    x -= learning_rate * gradient_of_f(x)\nNow let’s see how this algorithm gets used to train models."
+  },
+  {
+    "objectID": "posts/get-down-with-gradient-descent/index.html#training-a-linear-regression-model-with-gradient-descent",
+    "href": "posts/get-down-with-gradient-descent/index.html#training-a-linear-regression-model-with-gradient-descent",
+    "title": "Get Down with Gradient Descent",
+    "section": "Training a Linear Regression Model with Gradient Descent",
+    "text": "Training a Linear Regression Model with Gradient Descent\nTo get the intuition for how we use gradient descent to train models, let’s use it to train a linear regression model. Note that we wouldn’t actually use gradient descent to train a linear model in real life since there is an exact analytical solution for the best-fit parameter values.\nAnyway, in the simple linear regression problem we have numerical feature \\(x\\) and numerical target \\(y\\), and we want to find a model of the form\n\\[F(x) = \\alpha + \\beta x\\]\nThis model has two parameters, \\(\\alpha\\) and \\(\\beta\\). Here “training” means finding the parameter values that make \\(F(x)\\) fit our \\(y\\) data best. We measure how well, or really how poorly, our model fits the data by using a loss function that yields a small value when a model fits well. Ordinary least squares is so named because it uses mean squared error as its loss function.\n\\[L(y, F(x)) =  \\frac{1}{n} \\sum_{i=1}^{n} (y_i - F(x_i))^2  =  \\frac{1}{n} \\sum_{i=1}^{n} (y_i - (\\alpha + \\beta x_i))^2 \\]\nThe loss function \\(L\\) takes four arguments: \\(x\\), \\(y\\), \\(\\alpha\\), and \\(\\beta\\). But since \\(x\\) and \\(y\\) are fixed given our dataset, we could write the loss as \\(L(\\alpha, \\beta | x, y)\\) to emphasize that \\(\\alpha\\) and \\(\\beta\\) are the only free parameters. So we’re looking for the following.\n\\[\\underset{\\alpha,\\beta}{\\operatorname{argmin}} ~ L(\\alpha,\\beta|x,y) \\]\nThat’s right, we’re looking for the values of \\(\\alpha\\) and \\(\\beta\\) that minimize scalar-valued function \\(L(\\alpha, \\beta)\\). Sounds familiar huh?\nTo solve this minimization problem with gradient descent, we can use the following update rule.\n\\[[\\alpha_{t+1}, \\beta_{t+1}] = [\\alpha_{t}, \\beta_{t}] - \\eta \\nabla L(\\alpha_t, \\beta_t | x, y) \\]\nTo get the gradient \\(\\nabla L(\\alpha,\\beta|x,y)\\), we need the partial derivatives of \\(L\\) with respect to \\(\\alpha\\) and \\(\\beta\\). Since \\(L\\) is just a big sum, it’s easy to calculate the derivatives.\n\\[ \\frac{\\partial L(\\alpha, \\beta)}{\\partial \\alpha} = \\frac{1}{n} \\sum_{i=1}^{n} -2 (y_i - (\\alpha + \\beta x_i)) \\] \\[ \\frac{\\partial L(\\alpha, \\beta)}{\\partial \\beta} = \\frac{1}{n} \\sum_{i=1}^{n} -2x_i (y_i - (\\alpha + \\beta x_i)) \\]\nGreat! We’ve got everything we need to implement gradient descent to train an ordinary least squares model. Everything except data that is.\n\nToy Data\nLet’s make a friendly little linear dataset where \\(\\alpha=-10\\) and \\(\\beta=2\\), i.e.\n\\[ y = -10 + 2x + \\text{noise}\\]\n\nimport numpy as np \n\nalpha_true = -10\nbeta_true = 2\n\nrng = np.random.default_rng(42)\nx = np.linspace(0, 10, 50)\ny = alpha_true + beta_true*x + rng.normal(0, 1, size=x.shape)\n\n\n\n\n\n\n\n\nImplementation\nOur implementation will use a function to compute the gradient of the loss function. Since we have two parameters, we’ll use length-2 arrays to hold their values and their partial derivatives. At each iteration, we update the parameter values by subtracting the rescaled partial derivatives.\n\n\n# linear regression using gradient descent \n\ndef gradient_of_loss(parameters, x, y):\n    alpha = parameters[0]\n    beta = parameters[1]\n    partial_alpha = np.mean(-2*(y - (alpha + beta*x)))\n    partial_beta = np.mean(-2*x*(y - (alpha + beta*x)))\n    return np.array([partial_alpha, partial_beta])\n\nlearning_rate = 0.02\nparameters = np.array([0.0, 0.0]) # initial values of alpha and beta\n\nfor _ in range(500):\n    partial_derivatives = gradient_of_loss(parameters, x, y)\n    parameters -= learning_rate * partial_derivatives\n    \nparameters\n\narray([-10.07049616,   2.03559051])\n\n\nWe can see the loss function decreasing throughout the 500 iterations.\n\n\n\n\n\nAnd we can visualize the loss function as a contour plot over \\((\\alpha,\\beta)\\) space. The blue points show the trajectory our gradient descent followed as it shimmied from the initial position to the coolest spot in \\((\\alpha, \\beta)\\) space where the loss function is nice and small.\n\n\n\n\n\nOur gradient descent settles in a spot pretty close to \\((-10, 2)\\) in \\((\\alpha,\\beta)\\) space, which gives us the final fitted model below."
+  },
+  {
+    "objectID": "posts/get-down-with-gradient-descent/index.html#wrapping-up",
+    "href": "posts/get-down-with-gradient-descent/index.html#wrapping-up",
+    "title": "Get Down with Gradient Descent",
     "section": "Wrapping Up",
-    "text": "Wrapping Up\nSo that’s a wrap on our crash course in working with PySpark. You now have a good idea of what pyspark is and how to get started manipulating dataframes with it. Stay tuned for a future post on PySpark’s companion ML library MLlib. In the meantime, may no dataframe be too large for you ever again."
+    "text": "Wrapping Up\nThere you have it, gradient descent explained with a fresh new analogy having nothing whatsoever to do with foggy mountains, plus an implemented example fitting a linear model. While we often see gradient descent used to train models by performing an optimization in parameter space, as in generalized linear models and neural networks, there are other ways to use this powerful technique to train models. In particular, we’ll soon see how our beloved gradient boosting tree models use gradient descent in prediction space, rather than parameter space. Stay tuned for that mind bender in a future post."
   },
   {
     "objectID": "posts/8020-pandas-tutorial/index.html",
@@ -350,123 +378,144 @@
     "text": "Wrapping Up\nThere you have it, how to pull off the five most essential data transformation tasks using pandas in a style reminiscent of my beloved dplyr. Remember that part of the beauty of pandas is that since there are so many ways to do most tasks, you can develop your own style based on the kind of data you work with, what you like about other tools, how you see others using the tools, and of course your own taste and preferences.\nIf you found this post helpful or if you have your own preferred style for accomplishing any of these key transformations with pandas, do let me know about it in the comments."
   },
   {
-    "objectID": "posts/get-down-with-gradient-descent/index.html",
-    "href": "posts/get-down-with-gradient-descent/index.html",
-    "title": "Get Down with Gradient Descent",
+    "objectID": "posts/hello-pyspark/index.html",
+    "href": "posts/hello-pyspark/index.html",
+    "title": "Hello PySpark!",
     "section": "",
-    "text": "Ahh, gradient descent. It’s probably one of the most ubiquitous algorithms used in data science, but you’re unlikely to see it being celebrated in the limelight of the Kaggle podium. Rather than taking center stage, gradient descent operates under the hood, powering the training for a wide range of models including deep neural networks, gradient boosting trees, generalized linear models, and mixed effects models. Getting an intuition for the algorithm will reveal how model fitting actually works and help us to see the common thread connecting a wide range of seemingly unrelated models. In this post we’ll get the intuition for gradient descent with a fresh analogy, develop the mathematical formulation, and ground our understanding by using it to train ourselves a linear regression model."
+    "text": "A big day at Playa Guiones\nWell, you guessed it: it’s time for us to learn PySpark!\nI know, I know, I can hear you screaming into your pillow. Indeed we just spent all that time converting from R and learning python and why the hell do we need yet another API for working with dataframes?\nThat’s a totally fair question.\nSo what happens when we’re working on something in the real world, where datasets get large in a hurry, and we suddenly have a dataframe that no longer fits into memory? We need a way for our computations and datasets to scale across multiple nodes in a distributed system without having to get too fussy about all the distributed compute details.\nEnter PySpark.\nI think it’s fair to think of PySpark as a python package for working with arbitrarily large dataframes, i.e., it’s like pandas but scalable. It’s built on top of Apache Spark, a unified analytics engine for large-scale data processing. PySpark is essentially a way to access the functionality of spark via python code. While there are other high-level interfaces to Spark (such as Java, Scala, and R), for data scientists who are already working extensively with python, PySpark will be the natural interface of choice. PySpark also has great integration with SQL, and it has a companion machine learning library called MLlib that’s more or less a scalable scikit-learn (maybe we can cover it in a future post).\nSo, here’s the plan. First we’re going to get set up to run PySpark locally in a jupyter notebook on our laptop. This is my preferred environment for interactively playing with PySpark and learning the ropes. Then we’re going to get up and running in PySpark as quickly as possible by reviewing the most essential functionality for working with dataframes and comparing it to how we would do things in pandas. Once we’re comfortable running PySpark on the laptop, it’s going to be much easier to jump onto a distributed cluster and run PySpark at scale.\nLet’s do this."
   },
   {
-    "objectID": "posts/get-down-with-gradient-descent/index.html#intuition",
-    "href": "posts/get-down-with-gradient-descent/index.html#intuition",
-    "title": "Get Down with Gradient Descent",
-    "section": "Intuition",
-    "text": "Intuition\nBefore we dive into the intuition for gradient descent itself, let’s get a high-level view of why it’s useful in training or fiting a model. Training a model basically means finding the model parameter values that make the model fit a given dataset well. We measure how well a model fits data using a special function variously called a loss or cost or objective function. A loss function takes the dataset and the model as arguments and returns a number that tells us how well our model fits the data. Therefore training is an optimization problem in which we search for the model parameter values that result in the minimum value of the loss function. Enter gradient descent.\nGradient descent is a numerical optimization technique that helps us find the inputs that yield the minimum value of a function. Since most explanations of the gradient descent algorithm seem to use a story about hikers being lost in some foggy mountains, we’re going to try out a new analogy.\nLet’s say you’re at a concert. Remember those? They’re these things that used to happen where people played music and everyone danced and had a great time.\n\nNOTE: Chiming in here in 2023 from a sort-of-post COVID 19 world, happily I can report that concerts and live music are back!\n\nNow suppose at this concert there’s a dance floor which has become a bit sweltering from copious amounts of “getting down”. But the temperature isn’t quite uniform; maybe there’s a cool spot from a ceiling fan somewhere.\n\n\n\ndance floor\n\n\nLet’s get ourselves to that cool spot using the following procedure.\n\nFrom our current location, figure out which direction feels coolest.\nTake a step (or simply shimmy) in that direction.\nRepeat steps 1 and 2 until we reach the coolest spot on the dance floor.\n\nThe crux of this procedure is figuring out, at each step, which direction yields the greatest temperature reduction. Our skin is pretty sensitive to temperature, so we can just use awareness of body sensation to sense which direction feels coolest. Luckily, we have a mathematical equivalent to our skin’s ability to sense local variation in temperature.\n\nDetermine which way to go\nLet \\(f(x,y)\\) be the temperature on the dance floor at position \\((x,y)\\). The direction of fastest decrease in temperature is going to be given by some vector in our \\((x,y)\\) space, e.g.,\n[vector component in \\(x\\) direction, vector component in \\(y\\) direction]\nTurns out that the gradient of a function evaluated at a particular location yields a vector that points in the direction of fastest increase in the function, pretty similar to what we’re looking for. The gradient of \\(f(x,y)\\) is given by\n\\[ \\nabla f(x,y) = \\left [ \\frac{\\partial f(x,y)}{\\partial x}, \\frac{\\partial f(x,y)}{\\partial y} \\right ] \\]\nThe components of the gradient vector are the partial derivatives of our function \\(f(x,y)\\), evaluated at the point \\((x,y)\\). These partial derivatives just tell us the slope of \\(f(x,y)\\) in the \\(x\\) and \\(y\\) directions respectively. The intuition is that if \\(\\frac{\\partial f(x,y)}{\\partial x}\\) is a large positive number, then moving in the positive \\(x\\) direction will make \\(f(x,y)\\) increase a lot, whereas if \\(\\frac{\\partial f(x,y)}{\\partial x}\\) is a large negative number, then moving in the negative \\(x\\) direction will make \\(f(x,y)\\) increase a lot.\nIt’s not too hard to see that the direction of fastest decrease is actually just the exact opposite direction from that of fastest increase. Since we can point a vector in the opposite direction by negating its component values, our direction of fastest temperature decrease will be given by the negative gradient of the temperature field \\(-\\nabla f(x,y)\\).\n\n\n\ndance floor with hot and cold sides\n\n\n\n\nTake a step in the right direction\nNow that we have our direction vector, we’re ready to take a step toward the cool part of the dance floor. To do this, we’ll just add our direction vector to our current position. The update rule would look like this.\n\\[ [x_\\text{next}, y_\\text{next}] = [x_\\text{prev}, y_\\text{prev}] - \\nabla f (x_\\text{prev}, y_\\text{prev}) = [x_\\text{prev}, y_\\text{prev}] -  \\left [ \\frac{\\partial f (x_\\text{prev}, y_\\text{prev})}{\\partial x}, \\frac{\\partial f (x_\\text{prev}, y_\\text{prev})}{\\partial y} \\right ] \\]\nIf we iteratively apply this update rule, we’ll end up tracing a trajectory through the \\((x,y)\\) space on the dance floor and we’ll eventually end up at the coolest spot!\n\n\n\ndance floor with trajectory from hot side to cool side\n\n\nGreat success!"
+    "objectID": "posts/hello-pyspark/index.html#how-to-run-pyspark-in-a-jupyter-notebook-on-your-laptop",
+    "href": "posts/hello-pyspark/index.html#how-to-run-pyspark-in-a-jupyter-notebook-on-your-laptop",
+    "title": "Hello PySpark!",
+    "section": "How to Run PySpark in a Jupyter Notebook on Your Laptop",
+    "text": "How to Run PySpark in a Jupyter Notebook on Your Laptop\nOk, I’m going to walk us through how to get things installed on a Mac or Linux machine where we’re using homebrew and conda to manage virtual environments. If you have a different setup, your favorite search engine will help you get PySpark set up locally.\n\n\n\n\n\n\nNote\n\n\n\nIt’s possible for Homebrew and Anaconda to interfere with one another. The simple rule of thumb is that whenever you want to use the brew command, first deactivate your conda environment by running conda deactivate. See this Stack Overflow question for more details.\n\n\n\nInstall Spark\nInstall Spark with homebrew.\nbrew install apache-spark\nNext we need to set up a SPARK_HOME environment variable in the shell. Check where Spark is installed.\nbrew info apache-spark\nYou should see something like\n==&gt; apache-spark: stable 3.3.2 (bottled), HEAD\nEngine for large-scale data processing\nhttps://spark.apache.org/\n/opt/homebrew/Cellar/apache-spark/3.3.2 (1,453 files, 320.9MB) *\n...\nSet the SPARK_HOME environment variable to your spark installation path with /libexec appended to the end. To do this I added the following line to my .zshrc file.\nexport SPARK_HOME=/opt/homebrew/Cellar/apache-spark/3.3.2/libexec\nRestart your shell, and test the installation by starting the Spark shell.\nspark-shell\n...\nWelcome to\n      ____              __\n     / __/__  ___ _____/ /__\n    _\\ \\/ _ \\/ _ `/ __/  '_/\n   /___/ .__/\\_,_/_/ /_/\\_\\   version 3.3.2\n      /_/\n         \nUsing Scala version 2.12.15 (OpenJDK 64-Bit Server VM, Java 19.0.2)\nType in expressions to have them evaluated.\nType :help for more information.\n\nscala&gt; \nIf you get the scala&gt; prompt, then you’ve successfully installed Spark on your laptop!\n\n\nInstall PySpark\nUse conda to install the PySpark python package. As usual, it’s advisable to do this in a new virtual environment.\n$ conda install pyspark\nYou should be able to launch an interactive PySpark REPL by saying pyspark.\n$ pyspark\n...\nWelcome to\n      ____              __\n     / __/__  ___ _____/ /__\n    _\\ \\/ _ \\/ _ `/ __/  '_/\n   /__ / .__/\\_,_/_/ /_/\\_\\   version 3.1.2\n      /_/\n\nUsing Python version 3.8.3 (default, Jul  2 2020 11:26:31)\nSpark context Web UI available at http://192.168.100.47:4041\nSpark context available as 'sc' (master = local[*], app id = local-1624127229929).\nSparkSession available as 'spark'.\n&gt;&gt;&gt; \nThis time we get a familiar python &gt;&gt;&gt; prompt. This is an interactive shell where we can easily experiment with PySpark. Feel free to run the example code in this post here in the PySpark shell, or, if you prefer a notebook, read on and we’ll get set up to run PySpark in a jupyter notebook.\n\n\n\n\n\n\nNote\n\n\n\nWhen I tried following this setup on a new Mac, I hit an error about being unable to find the Java Runtime. This stack overflow question lead me to the fix.\n\n\n\n\nThe Spark Session Object\nYou may have noticed that when we launched that PySpark interactive shell, it told us that something called SparkSession was available as 'spark'. So basically, what’s happening here is that when we launch the pyspark shell, it instantiates an object called spark which is an instance of class pyspark.sql.session.SparkSession. The spark session object is going to be our entry point for all kinds of PySpark functionality, i.e., we’re going to be saying things like spark.this() and spark.that() to make stuff happen.\nThe PySpark interactive shell is kind enough to instantiate one of these spark session objects for us automatically. However, when we’re using another interface to PySpark (like say a jupyter notebook running a python kernal), we’ll have to make a spark session object for ourselves.\n\n\nCreate a PySpark Session in a Jupyter Notebook\nThere are a few ways to run PySpark in jupyter which you can read about here.\nFor derping around with PySpark on your laptop, I think the best way is to instantiate a spark session from a jupyter notebook running on a regular python kernel. The method we’ll use involves running a standard jupyter notebook session with a python kernal and using the findspark package to initialize the spark session. So, first install the findspark package.\nconda install -c conda-forge findspark\nLaunch jupyter as usual.\njupyter notebook\nGo ahead and fire up a new notebook using a regular python 3 kernal. Once you land inside the notebook, there are a couple things we need to do to get a spark session instantiated. You can think of this as boilerplate code that we need to run in the first cell of a notebook where we’re going to use PySpark.\n\nimport pyspark\nimport findspark\nfrom pyspark.sql import SparkSession\n\nfindspark.init()\nspark = SparkSession.builder.appName('My Spark App').getOrCreate()\n\nFirst we’re running findspark’s init() method to find our Spark installation. If you run into errors here, make sure you got the SPARK_HOME environment variable correctly set in the install instructions above. Then we instantiate a spark session as spark. Once you run this, you’re ready to rock and roll with PySpark in your jupyter notebook.\n\n\n\n\n\n\nNote\n\n\n\nSpark provides a handy web UI that you can use for monitoring and debugging. Once you instantiate the spark session You can open the UI in your web browser at http://localhost:4040/jobs/."
   },
   {
-    "objectID": "posts/get-down-with-gradient-descent/index.html#general-formulation",
-    "href": "posts/get-down-with-gradient-descent/index.html#general-formulation",
-    "title": "Get Down with Gradient Descent",
-    "section": "General Formulation",
-    "text": "General Formulation\nLet’s generalize a bit to get to the form of gradient descent you’ll see in references like the wikipedia article.\nFirst we modify our update equation above to handle functions with more than two arguments. We’ll use a bold \\(\\mathbf{x}\\) to indicate a vector of inputs \\(\\mathbf{x} = [x_1,x_2,\\dots,x_p]\\). Our function \\(f(\\mathbf{x}): \\mathbb{R}^p \\mapsto \\mathbb{R}\\) maps a \\(p\\) dimensional input to a scalar output.\nSecond, instead of displacing our current location with the negative gradient vector itself, we’ll first rescale it with a learning rate parameter. This helps address any issues with units on inputs versus outputs. Imagine the input could range between 0 and 1, but the output ranged from 0 to 1,000. We would need to rescale the partial derivatives so the update step doesn’t send us way too far off in input space.\nFinally, we’ll index our updates with \\(t=0,1,\\dots\\). We’ll run for some prespecified number of iterations or we’ll stop the procedure once the change in \\(f(\\mathbf{x})\\) is sufficiently small from one iteration to the next. Our update equation will look like this.\n\\[\\mathbf{x}_{t+1} = \\mathbf{x}_t - \\eta \\nabla f ( \\mathbf{x}_t) \\]\nIn pseudocode we could write it like this.\n# gradient descent\nx = initial_value_of_x \nfor t in range(n_iterations):  # or some other convergence condition\n    x -= learning_rate * gradient_of_f(x)\nNow let’s see how this algorithm gets used to train models."
+    "objectID": "posts/hello-pyspark/index.html#pyspark-concepts",
+    "href": "posts/hello-pyspark/index.html#pyspark-concepts",
+    "title": "Hello PySpark!",
+    "section": "PySpark Concepts",
+    "text": "PySpark Concepts\nPySpark provides two main abstractions for data: the RDD and the dataframe. RDD’s are just a distributed list of objects; we won’t go into details about them in this post. For us, the key object in PySpark is the dataframe.\nWhile PySpark dataframes expose much of the functionality you would expect from a library for tabular data manipulation, they behave a little differently from pandas dataframes, both syntactically and under-the-hood. There are a couple of key concepts that will help explain these idiosyncracies.\nImmutability - Pyspark RDD’s and dataframes are immutable. This means that if you change an object, e.g. by adding a column to a dataframe, PySpark returns a reference to a new dataframe; it does not modify the existing dataframe. This is kind of nice, because we don’t have to worry about that whole view versus copy nonsense that happens in pandas.\nLazy Evaluation - Lazy evaluation means that when we start manipulating a dataframe, PySpark won’t actually perform any of the computations until we explicitly ask for the result. This is nice because it potentially allows PySpark to do fancy optimizations before executing a sequence of operations. It’s also confusing at first, because PySpark will seem to blaze through complex operations and then take forever to print a few rows of the dataframe."
   },
   {
-    "objectID": "posts/get-down-with-gradient-descent/index.html#training-a-linear-regression-model-with-gradient-descent",
-    "href": "posts/get-down-with-gradient-descent/index.html#training-a-linear-regression-model-with-gradient-descent",
-    "title": "Get Down with Gradient Descent",
-    "section": "Training a Linear Regression Model with Gradient Descent",
-    "text": "Training a Linear Regression Model with Gradient Descent\nTo get the intuition for how we use gradient descent to train models, let’s use it to train a linear regression model. Note that we wouldn’t actually use gradient descent to train a linear model in real life since there is an exact analytical solution for the best-fit parameter values.\nAnyway, in the simple linear regression problem we have numerical feature \\(x\\) and numerical target \\(y\\), and we want to find a model of the form\n\\[F(x) = \\alpha + \\beta x\\]\nThis model has two parameters, \\(\\alpha\\) and \\(\\beta\\). Here “training” means finding the parameter values that make \\(F(x)\\) fit our \\(y\\) data best. We measure how well, or really how poorly, our model fits the data by using a loss function that yields a small value when a model fits well. Ordinary least squares is so named because it uses mean squared error as its loss function.\n\\[L(y, F(x)) =  \\frac{1}{n} \\sum_{i=1}^{n} (y_i - F(x_i))^2  =  \\frac{1}{n} \\sum_{i=1}^{n} (y_i - (\\alpha + \\beta x_i))^2 \\]\nThe loss function \\(L\\) takes four arguments: \\(x\\), \\(y\\), \\(\\alpha\\), and \\(\\beta\\). But since \\(x\\) and \\(y\\) are fixed given our dataset, we could write the loss as \\(L(\\alpha, \\beta | x, y)\\) to emphasize that \\(\\alpha\\) and \\(\\beta\\) are the only free parameters. So we’re looking for the following.\n\\[\\underset{\\alpha,\\beta}{\\operatorname{argmin}} ~ L(\\alpha,\\beta|x,y) \\]\nThat’s right, we’re looking for the values of \\(\\alpha\\) and \\(\\beta\\) that minimize scalar-valued function \\(L(\\alpha, \\beta)\\). Sounds familiar huh?\nTo solve this minimization problem with gradient descent, we can use the following update rule.\n\\[[\\alpha_{t+1}, \\beta_{t+1}] = [\\alpha_{t}, \\beta_{t}] - \\eta \\nabla L(\\alpha_t, \\beta_t | x, y) \\]\nTo get the gradient \\(\\nabla L(\\alpha,\\beta|x,y)\\), we need the partial derivatives of \\(L\\) with respect to \\(\\alpha\\) and \\(\\beta\\). Since \\(L\\) is just a big sum, it’s easy to calculate the derivatives.\n\\[ \\frac{\\partial L(\\alpha, \\beta)}{\\partial \\alpha} = \\frac{1}{n} \\sum_{i=1}^{n} -2 (y_i - (\\alpha + \\beta x_i)) \\] \\[ \\frac{\\partial L(\\alpha, \\beta)}{\\partial \\beta} = \\frac{1}{n} \\sum_{i=1}^{n} -2x_i (y_i - (\\alpha + \\beta x_i)) \\]\nGreat! We’ve got everything we need to implement gradient descent to train an ordinary least squares model. Everything except data that is.\n\nToy Data\nLet’s make a friendly little linear dataset where \\(\\alpha=-10\\) and \\(\\beta=2\\), i.e.\n\\[ y = -10 + 2x + \\text{noise}\\]\n\nimport numpy as np \n\nalpha_true = -10\nbeta_true = 2\n\nrng = np.random.default_rng(42)\nx = np.linspace(0, 10, 50)\ny = alpha_true + beta_true*x + rng.normal(0, 1, size=x.shape)\n\n\n\n\n\n\n\n\nImplementation\nOur implementation will use a function to compute the gradient of the loss function. Since we have two parameters, we’ll use length-2 arrays to hold their values and their partial derivatives. At each iteration, we update the parameter values by subtracting the rescaled partial derivatives.\n\n\n# linear regression using gradient descent \n\ndef gradient_of_loss(parameters, x, y):\n    alpha = parameters[0]\n    beta = parameters[1]\n    partial_alpha = np.mean(-2*(y - (alpha + beta*x)))\n    partial_beta = np.mean(-2*x*(y - (alpha + beta*x)))\n    return np.array([partial_alpha, partial_beta])\n\nlearning_rate = 0.02\nparameters = np.array([0.0, 0.0]) # initial values of alpha and beta\n\nfor _ in range(500):\n    partial_derivatives = gradient_of_loss(parameters, x, y)\n    parameters -= learning_rate * partial_derivatives\n    \nparameters\n\narray([-10.07049616,   2.03559051])\n\n\nWe can see the loss function decreasing throughout the 500 iterations.\n\n\n\n\n\nAnd we can visualize the loss function as a contour plot over \\((\\alpha,\\beta)\\) space. The blue points show the trajectory our gradient descent followed as it shimmied from the initial position to the coolest spot in \\((\\alpha, \\beta)\\) space where the loss function is nice and small.\n\n\n\n\n\nOur gradient descent settles in a spot pretty close to \\((-10, 2)\\) in \\((\\alpha,\\beta)\\) space, which gives us the final fitted model below."
+    "objectID": "posts/hello-pyspark/index.html#pyspark-dataframe-essentials",
+    "href": "posts/hello-pyspark/index.html#pyspark-dataframe-essentials",
+    "title": "Hello PySpark!",
+    "section": "PySpark Dataframe Essentials",
+    "text": "PySpark Dataframe Essentials\n\nCreating a PySpark dataframe with createDataFrame()\nThe first thing we’ll need is a way to make dataframes. createDataFrame() allows us to create PySpark dataframes from python objects like nested lists or pandas dataframes. Notice that createDataFrame() is a method of the spark session class, so we’ll call it from our spark session sparkby saying spark.createDataFrame().\n\n# create pyspark dataframe from nested  lists\nmy_df = spark.createDataFrame(\n    data=[\n        [2022, \"tiger\"],\n        [2023, \"rabbit\"],\n        [2024, \"dragon\"]\n    ],\n    schema=['year', 'animal']\n)\n\nLet’s read the seaborn tips dataset into a pandas dataframe and then use it to create a PySpark dataframe.\n\nimport pandas as pd\n\n# load tips dataset into a pandas dataframe\npandas_df = pd.read_csv('https://raw.githubusercontent.com/mwaskom/seaborn-data/master/tips.csv')\n\n# create pyspark dataframe from a pandas dataframe\npyspark_df = spark.createDataFrame(pandas_df)\n\n\n\n\n\n\n\nNote\n\n\n\nIn real life when we’re running PySpark on a large-scale distributed system, we would not generally want to use python lists or pandas dataframes to load data into PySpark. Ideally we would want to read data directly from where it is stored on HDFS, e.g. by reading parquet files, or by querying directly from a hive database using spark sql.\n\n\n\n\nPeeking at a dataframe’s contents\nThe default print method for the PySpark dataframe will just give you the schema.\n\npyspark_df\n\nDataFrame[total_bill: double, tip: double, sex: string, smoker: string, day: string, time: string, size: bigint]\n\n\nIf we want to peek at some of the data, we’ll need to use the show() method, which is analogous to the pandas head(). Remember that show() will cause PySpark to execute any operations that it’s been lazily waiting to evaluate, so sometimes it can take a while to run.\n\n# show the first few rows of the dataframe\npyspark_df.show(5)\n\n+----------+----+------+------+---+------+----+\n|total_bill| tip|   sex|smoker|day|  time|size|\n+----------+----+------+------+---+------+----+\n|     16.99|1.01|Female|    No|Sun|Dinner|   2|\n|     10.34|1.66|  Male|    No|Sun|Dinner|   3|\n|     21.01| 3.5|  Male|    No|Sun|Dinner|   3|\n|     23.68|3.31|  Male|    No|Sun|Dinner|   2|\n|     24.59|3.61|Female|    No|Sun|Dinner|   4|\n+----------+----+------+------+---+------+----+\nonly showing top 5 rows\n\n\n\n\n[Stage 0:&gt;                                                          (0 + 1) / 1]\n\n                                                                                \n\n\nWe thus encounter our first rude awakening. PySpark’s default representation of dataframes in the notebook isn’t as pretty as that of pandas. But no one ever said it would be pretty, they just said it would be scalable.\nYou can also use the printSchema() method for a nice vertical representation of the schema.\n\n# show the dataframe schema\npyspark_df.printSchema()\n\nroot\n |-- total_bill: double (nullable = true)\n |-- tip: double (nullable = true)\n |-- sex: string (nullable = true)\n |-- smoker: string (nullable = true)\n |-- day: string (nullable = true)\n |-- time: string (nullable = true)\n |-- size: long (nullable = true)\n\n\n\n\n\nSelect columns by name\nYou can select specific columns from a dataframe using the select() method. You can pass either a list of names, or pass names as arguments.\n\n# select some of the columns\npyspark_df.select('total_bill', 'tip')\n\n# select columns in a list\npyspark_df.select(['day', 'time', 'total_bill'])\n\n\n\nFilter rows based on column values\nAnalogous to the WHERE clause in SQL, and the query() method in pandas, PySpark provides a filter() method which returns only the rows that meet the specified conditions. Its argument is a string specifying the condition to be met for rows to be included in the result. You specify the condition as an expression involving the column names and comparison operators like &lt;, &gt;, &lt;=, &gt;=, == (equal), and ~= (not equal). You can specify compound expressions using and and or, and you can even do a SQL-like in to check if the column value matches any items in a list.\n\n## compare a column to a value\npyspark_df.filter('total_bill &gt; 20')\n\n# compare two columns with arithmetic\npyspark_df.filter('tip &gt; 0.15 * total_bill')\n\n# check equality with a string value\npyspark_df.filter('sex == \"Male\"')\n\n# check equality with any of several possible values\npyspark_df.filter('day in (\"Sat\", \"Sun\")')\n\n# use \"and\" \npyspark_df.filter('day == \"Fri\" and time == \"Lunch\"')\n\nIf you’re into boolean indexing with the brackets, PySpark does support that too, but I encourage you to use filter() instead. Check out my rant about why you shouldn’t use boolean indexing for the details. The TLDR is that filter() requires less typing, makes your code more readable and portable, and it allows you to chain method calls together using dot chains.\nHere’s the boolean indexing equivalent of the last example from above.\n\n# using boolean indexing\npyspark_df[(pyspark_df.day == 'Fri') & (pyspark_df.time == 'Lunch')]\n\nI know, it looks horrendous, but not as horrendous as the error message you’ll get if you forget the parentheses.\n\n\nAdd new columns to a dataframe\nYou can add new columns which are functions of the existing columns with the withColumn() method.\n\nimport pyspark.sql.functions as f\n\n# add a new column using col() to reference other columns\npyspark_df.withColumn('tip_percent', f.col('tip') / f.col('total_bill'))\n\nNotice that we’ve imported the pyspark.sql.functions module. This module contains lots of useful functions that we’ll be using all over the place, so it’s probably a good idea to go ahead and import it whenever you’re using PySpark. BTW, it seems like folks usually import this module as f or F. In this example we’re using the col() function, which allows us to refer to columns in our dataframe using string representations of the column names.\nYou could also achieve the same result using the dot to reference the other columns, but this requires us to type the dataframe name over and over again, which makes it harder to reuse this code on different dataframes or in dot chains.\n\n# add a new column using the dot to reference other columns (less recommended)\npyspark_df.withColumn('tip_percent', pyspark_df.tip / pyspark_df.total_bill)\n\nIf you want to apply numerical transformations like exponents or logs, use the built-in functions in the pyspark.sql.functions module.\n\n# log \npyspark_df.withColumn('log_bill', f.log(f.col('total_bill')))\n\n# exponent\npyspark_df.withColumn('bill_squared', f.pow(f.col('total_bill'), 2))\n\nYou can implement conditional assignment like SQL’s CASE WHEN construct using the when() function and the otherwise() method.\n\n# conditional assignment (like CASE WHEN)\npyspark_df.withColumn('is_male', f.when(f.col('sex') == 'Male', True).otherwise(False))\n\n# using multiple when conditions and values\npyspark_df.withColumn('bill_size', \n    f.when(f.col('total_bill') &lt; 10, 'small')\n    .when(f.col('total_bill') &lt; 20, 'medium')\n    .otherwise('large')\n)\n\nRemember that since PySpark dataframes are immutable, calling withColumns() on a dataframe returns a new dataframe. If you want to persist the result, you’ll need to make an assignment.\npyspark_df = pyspark_df.withColumns(...)\n\n\nGroup by and aggregate\nPySpark provides a groupBy() method similar to the pandas groupby(). Just like in pandas, we can call methods like count() and mean() on our grouped dataframe, and we also have a more flexible agg() method that allows us to specify column-aggregation mappings.\n\n\n# group by and count\npyspark_df.groupBy('time').count().show()\n\n+------+-----+\n|  time|count|\n+------+-----+\n|Dinner|  176|\n| Lunch|   68|\n+------+-----+\n\n\n\n\n\n# group by and specify column-aggregation mappings with agg()\npyspark_df.groupBy('time').agg({'total_bill': 'mean', 'tip': 'max'}).show()\n\n+------+--------+------------------+\n|  time|max(tip)|   avg(total_bill)|\n+------+--------+------------------+\n|Dinner|    10.0| 20.79715909090909|\n| Lunch|     6.7|17.168676470588235|\n+------+--------+------------------+\n\n\n\nIf you want to get fancier with your aggregations, it might just be easier to express them using hive syntax. Read on to find out how.\n\n\nRun Hive SQL on dataframes\nOne of the mind-blowing features of PySpark is that it allows you to write hive SQL queries on your dataframes. To take a PySpark dataframe into the SQL world, use the createOrReplaceTempView() method. This method takes one string argument which will be the dataframes name in the SQL world. Then you can use spark.sql() to run a query. The result is returned as a PySpark dataframe.\n\n\n# put pyspark dataframe in SQL world and query it\npyspark_df.createOrReplaceTempView('tips')\nspark.sql('select * from tips').show(5)\n\n+----------+----+------+------+---+------+----+\n|total_bill| tip|   sex|smoker|day|  time|size|\n+----------+----+------+------+---+------+----+\n|     16.99|1.01|Female|    No|Sun|Dinner|   2|\n|     10.34|1.66|  Male|    No|Sun|Dinner|   3|\n|     21.01| 3.5|  Male|    No|Sun|Dinner|   3|\n|     23.68|3.31|  Male|    No|Sun|Dinner|   2|\n|     24.59|3.61|Female|    No|Sun|Dinner|   4|\n+----------+----+------+------+---+------+----+\nonly showing top 5 rows\n\n\n\nThis is awesome for a couple of reasons. First, it allows us to easily express any transformations in hive syntax. If you’re like me and you’ve already been using hive, this will dramatically reduce the PySpark learning curve, because when in doubt, you can always bump a dataframe into the SQL world and simply use hive to do what you need. Second, if you have a hive deployment, PySpark’s SQL world also has access to all of your hive tables. This means you can write queries involving both hive tables and your PySpark dataframes. It also means you can run hive commands, like inserting into a table, directly from PySpark.\nLet’s do some aggregations that might be a little trickier to do using the PySpark built-in functions.\n\n\n# run hive query and save result to dataframe\ntip_stats_by_time = spark.sql(\"\"\"\n    select\n        time\n        , count(*) as n \n        , avg(tip) as avg_tip\n        , percentile_approx(tip, 0.5) as med_tip\n        , avg(case when tip &gt; 3 then 1 else 0 end) as pct_tip_gt_3\n    from \n        tips\n    group by 1\n\"\"\")\n\ntip_stats_by_time.show()\n\n+------+---+------------------+-------+-------------------+\n|  time|  n|           avg_tip|med_tip|       pct_tip_gt_3|\n+------+---+------------------+-------+-------------------+\n|Dinner|176| 3.102670454545455|    3.0|0.44886363636363635|\n| Lunch| 68|2.7280882352941176|    2.2|0.27941176470588236|\n+------+---+------------------+-------+-------------------+"
   },
   {
-    "objectID": "posts/get-down-with-gradient-descent/index.html#wrapping-up",
-    "href": "posts/get-down-with-gradient-descent/index.html#wrapping-up",
-    "title": "Get Down with Gradient Descent",
+    "objectID": "posts/hello-pyspark/index.html#visualization-with-pyspark",
+    "href": "posts/hello-pyspark/index.html#visualization-with-pyspark",
+    "title": "Hello PySpark!",
+    "section": "Visualization with PySpark",
+    "text": "Visualization with PySpark\nThere aren’t any tools for visualization included in PySpark. But that’s no problem, because we can just use the toPandas() method on a PySpark dataframe to pull data back into pandas. Once we have a pandas dataframe, we can happily build visualizations as usual. Of course, if your PySpark dataframe is huge, you wouldn’t want to use toPandas() directly, because PySpark will attempt to read the entire contents of its huge dataframe into memory. Instead, it’s best to use PySpark to generate aggregations of your data for plotting or to pull only a sample of your full data into pandas.\n\n# read aggregated pyspark dataframe into pandas for plotting\nplot_pdf = tip_stats_by_time.toPandas()\nplot_pdf.plot.bar(x='time', y=['avg_tip', 'med_tip']);"
+  },
+  {
+    "objectID": "posts/hello-pyspark/index.html#wrapping-up",
+    "href": "posts/hello-pyspark/index.html#wrapping-up",
+    "title": "Hello PySpark!",
     "section": "Wrapping Up",
-    "text": "Wrapping Up\nThere you have it, gradient descent explained with a fresh new analogy having nothing whatsoever to do with foggy mountains, plus an implemented example fitting a linear model. While we often see gradient descent used to train models by performing an optimization in parameter space, as in generalized linear models and neural networks, there are other ways to use this powerful technique to train models. In particular, we’ll soon see how our beloved gradient boosting tree models use gradient descent in prediction space, rather than parameter space. Stay tuned for that mind bender in a future post."
+    "text": "Wrapping Up\nSo that’s a wrap on our crash course in working with PySpark. You now have a good idea of what pyspark is and how to get started manipulating dataframes with it. Stay tuned for a future post on PySpark’s companion ML library MLlib. In the meantime, may no dataframe be too large for you ever again."
   },
   {
-    "objectID": "posts/gradient-boosting-machine-with-any-loss-function/index.html",
-    "href": "posts/gradient-boosting-machine-with-any-loss-function/index.html",
-    "title": "How to Implement a Gradient Boosting Machine that Works with Any Loss Function",
+    "objectID": "posts/xgboost-from-scratch/index.html",
+    "href": "posts/xgboost-from-scratch/index.html",
+    "title": "XGBoost from Scratch",
     "section": "",
-    "text": "Cold water cascades over the rocks in Erwin, Tennessee.\nFriends, this is going to be an epic post! Today, we bring together all the ideas we’ve built up over the past few posts to nail down our understanding of the key ideas in Jerome Friedman’s seminal 2001 paper: “Greedy Function Approximation: A Gradient Boosting Machine.” In particular, we’ll summarize the highlights from the paper, and we’ll build an in-house python implementation of his generic gradient boosting algorithm which can train with any differentiable loss function. What’s more, we’ll go ahead and take our generic gradient boosting machine for a spin by training it with several of the most popular loss functions used in practice.\nAre you freaking stoked or what?\nSweet. Let’s do this."
+    "text": "A weathered tree reaches toward the sea at Playa Mal País\nWell, dear reader, it’s that time again, time for us to do a seemingly unnecessary scratch build of a popular algorithm that most people would simply import from the library without a second thought. But readers of this blog are not most people. Of course you know that when we do scratch builds, it’s not for the hell of it, it’s for the purpose of demystification. To that end, today we are going to implement XGBoost from scratch in python, using only numpy and pandas.\nSpecifically we’re going to implement the core statistical learning algorithm of XGBoost, including most of the key hyperparameters and their functionality. Our implementation will also support user-defined custom objective functions, meaning that it can perform regression, classification, and whatever exotic learning tasks you can dream up, as long as you can write down a twice-differentiable objective function. We’ll refrain from implementing some simple features like column subsampling which will be left to you, gentle reader, as exercises. In terms of tree methods, we’re going to implement the exact tree-splitting algorithm, leaving the sparsity-aware method (used to handle missing feature values) and the approximate method (used for scalability) as exercises or maybe topics for future posts.\nAs always, if something is unclear, try backtracking through the previous posts on gradient boosting and decision trees to clarify your intuition. We’ve already built up all the statistical and computational background needed to make sense of this scratch build. Here are the most important prerequisite posts:\nGreat, let’s do this."
   },
   {
-    "objectID": "posts/gradient-boosting-machine-with-any-loss-function/index.html#friedman-2001-tldr",
-    "href": "posts/gradient-boosting-machine-with-any-loss-function/index.html#friedman-2001-tldr",
-    "title": "How to Implement a Gradient Boosting Machine that Works with Any Loss Function",
-    "section": "Friedman 2001: TL;DR",
-    "text": "Friedman 2001: TL;DR\nI’ve mentioned this paper a couple of times before, but as far as I can tell, this is the origin of gradient boosting; it is therefore, a seminal work worth reading. You know what, I think you might like to pick up the paper and read it yourself. Like many papers, there is a lot of scary looking math in the first few pages, but if you’ve been following along on this blog, you’ll find that it’s actually totally approachable. This is the kind of thing that cures imposter syndrome, so give it a shot. That said, here’s the TL;DR as I see it.\nThe first part of the paper introduces the idea of fitting models by doing gradient descent in function space, an ingenious idea we spent an entire post demystifying earlier. Friedman goes on to introduce the generic gradient boost algorithm, which works with any differentiable loss function, as well as specific variants for minimizing absolute error, Huber loss, and binary deviance. In terms of hyperparameters, he points out that the learning rate can be used to reduce overfitting, while increased tree depth can help capture more complex interactions among features. He even discusses feature importance and partial dependence methods for interpreting fitted gradient boosting models.\nFriedman concludes by musing about the advantages of gradient boosting with trees. He notes some key advantages afforded by the use of decision trees including no need to rescale input data, robustness against irrelevant input features, and elegant handling of missing feature values. He points out that gradient boosting manages to capitalize on the benefits of decision trees while minimizing their key weakness (crappy accuracy). I think this offers a great insight into why gradient boosting models have become so widespread and successful in practical ML applications."
+    "objectID": "posts/xgboost-from-scratch/index.html#the-xgboost-model-class",
+    "href": "posts/xgboost-from-scratch/index.html#the-xgboost-model-class",
+    "title": "XGBoost from Scratch",
+    "section": "The XGBoost Model Class",
+    "text": "The XGBoost Model Class\nWe begin with the user-facing API for our model, a class called XGBoostModel which will implement gradient boosting and prediction. To be more consistent with the XGBoost library, we’ll pass hyperparameters to our model in a parameter dictionary, so our init method is going to pull relevant parameters out of the dictionary and set them as object attributes. Note the use of python’s defaultdict so we don’t have to worry about handling key errors if we try to access a parameter that the user didn’t set in the dictionary.\n\nimport math\nimport numpy as np \nimport pandas as pd\nfrom collections import defaultdict\n\n\nclass XGBoostModel():\n    '''XGBoost from Scratch\n    '''\n    \n    def __init__(self, params, random_seed=None):\n        self.params = defaultdict(lambda: None, params)\n        self.subsample = self.params['subsample'] \\\n            if self.params['subsample'] else 1.0\n        self.learning_rate = self.params['learning_rate'] \\\n            if self.params['learning_rate'] else 0.3\n        self.base_prediction = self.params['base_score'] \\\n            if self.params['base_score'] else 0.5\n        self.max_depth = self.params['max_depth'] \\\n            if self.params['max_depth'] else 5\n        self.rng = np.random.default_rng(seed=random_seed)\n\nThe fit method, based on our classic GBM, takes a feature dataframe, a target vector, the objective function, and the number of boosting rounds as arguments. The user-supplied objective function should be an object with loss, gradient, and hessian methods, each of which takes a target vector and a prediction vector as input; the loss method should return a scalar loss score, the gradient method should return a vector of gradients, and the hessian method should return a vector of hessians.\nIn contrast to boosting in the classic GBM, instead of computing residuals between the current predictions and the target, we compute gradients and hessians of the loss function with respect to the current predictions, and instead of predicting residuals with a decision tree, we fit a special XGBoost tree booster (which we’ll implement in a moment) using the gradients and hessians. I’ve also added row subsampling by drawing a random subset of instance indices and passing them to the tree booster during each boosting round. The rest of the fit method is the same as the classic GBM, and the predict method is identical too.\n\ndef fit(self, X, y, objective, num_boost_round, verbose=False):\n    current_predictions = self.base_prediction * np.ones(shape=y.shape)\n    self.boosters = []\n    for i in range(num_boost_round):\n        gradients = objective.gradient(y, current_predictions)\n        hessians = objective.hessian(y, current_predictions)\n        sample_idxs = None if self.subsample == 1.0 \\\n            else self.rng.choice(len(y), \n                                 size=math.floor(self.subsample*len(y)), \n                                 replace=False)\n        booster = TreeBooster(X, gradients, hessians, \n                              self.params, self.max_depth, sample_idxs)\n        current_predictions += self.learning_rate * booster.predict(X)\n        self.boosters.append(booster)\n        if verbose: \n            print(f'[{i}] train loss = {objective.loss(y, current_predictions)}')\n            \ndef predict(self, X):\n    return (self.base_prediction + self.learning_rate \n            * np.sum([booster.predict(X) for booster in self.boosters], axis=0))\n\nXGBoostModel.fit = fit\nXGBoostModel.predict = predict            \n\nAll we have to do now is implement the tree booster."
   },
   {
-    "objectID": "posts/gradient-boosting-machine-with-any-loss-function/index.html#friedmans-generic-gradient-boosting-algorithm",
-    "href": "posts/gradient-boosting-machine-with-any-loss-function/index.html#friedmans-generic-gradient-boosting-algorithm",
-    "title": "How to Implement a Gradient Boosting Machine that Works with Any Loss Function",
-    "section": "Friedman’s Generic Gradient Boosting Algorithm",
-    "text": "Friedman’s Generic Gradient Boosting Algorithm\nLet’s take a closer look at Friedman’s original gradient boost algorithm, Alg. 1 in Section 3 of the paper (translated into the notation we’ve been using so far).\nLike last time, we have training data \\((\\mathbf{y}, \\mathbf{X})\\) where \\(\\mathbf{y}\\) is a length-\\(n\\) vector of target values, and \\(\\mathbf{X}\\) is an \\(n \\times p\\) matrix with \\(n\\) observations of \\(p\\) features. We also have a differentiable loss function \\(L(\\mathbf{y}, \\mathbf{\\hat{y}}) = \\sum_{i=1}^n l(y_i, \\hat{y}_i)\\), a “learning rate” hyperparameter \\(\\eta\\), and a fixed number of model iterations \\(M\\).\nAlgorithm: gradient_boost\\((\\mathbf{X},\\mathbf{y},L,\\eta, M)\\) returns: model \\(F_M\\)\n\nLet base model \\(F_0(\\mathbf{x}) = c\\), where \\(c = \\text{argmin}_{c} \\sum_{i=1}^n l(y_i, c)\\)\nfor \\(m\\) = \\(0\\) to \\(M-1\\):\n     Let “pseudo-residual” vector \\(\\mathbf{r}_m = -\\nabla_{\\mathbf{\\hat{y}}_m} L(\\mathbf{y},\\mathbf{\\hat{y}}_m)\\)\n     Train decision tree regressor \\(h_m(\\mathbf{X})\\) to predict \\(\\mathbf{r}_m\\) (minimizing squared error)\n     foreach terminal leaf node \\(t \\in h_m\\):\n          Let \\(v = \\text{argmin}_v \\sum_{i \\in t} l(y_i, F_m(\\mathbf{x}_i) + v)\\)\n          Set terminal leaf node \\(t\\) to predict value \\(v\\)\n     \\(F_{m+1}(\\mathbf{X}) = F_{m}(\\mathbf{X}) + \\eta h_m(\\mathbf{X})\\)\nReturn composite model \\(F_M\\)\n\nBy now, most of this is already familiar to us. We begin by setting the base model \\(F_0\\) equal to the constant prediction value that minimizes the loss over all examples in the training dataset (line 1). Then we begin the boosting iterations (line 2), each time computing the negative gradients of the loss with respect to the current model predictions (known as the pseudo residuals) (line 3). We then fit our next decision tree regressor to predict the pseudo residuals (line 4).\nThen we encounter something new on lines 5-7. When we fit a vanilla decision tree regressor to predict pseudo residuals, we’re using mean squared error as the loss function to train the tree. As you might imagine, this works well when the global loss function is also squared error. But if we want to use a global loss other than squared error, there is an additional trick we can use to further increase the composite model’s accuracy. The idea is to continue using squared error to train each decision tree, keeping its structure and split conditions but altering the predicted value in each leaf to help minimize the global loss function. Instead of using the mean target value as the prediction for each node (as we would do when minimizing squared error), we use a numerical optimization method like line search to choose the constant value for that leaf that leads to the best overall loss. This is the same thing we did in line 1 of the algorithm to set the base prediction, but here we choose the optimal prediction for each terminal node of the newly trained decision tree."
+    "objectID": "posts/xgboost-from-scratch/index.html#the-xgboost-tree-booster",
+    "href": "posts/xgboost-from-scratch/index.html#the-xgboost-tree-booster",
+    "title": "XGBoost from Scratch",
+    "section": "The XGBoost Tree Booster",
+    "text": "The XGBoost Tree Booster\nThe XGBoost tree booster is a modified version of the decision tree that we built in the decision tree from scratch post. Like the decision tree, we recursively build a binary tree structure by finding the best split rule for each node in the tree. The main difference is the criterion for evaluating splits and the way that we define a leaf’s predicted value. Instead of being functions of the target values of the instances in each node, the criterion and predicted values are functions of the instance gradients and hessians. Thus we need only make a couple of modifications to our previous decision tree implementation to create the XGBoost tree booster.\n\nInitialization and Inserting Child Nodes\nMost of the init method is just parsing the parameter dictionary to assign parameters as object attributes. The one notable difference from our decision tree is in the way we define the node’s predicted value. We define self.value according to equation 5 of the XGBoost paper, a simple function of the gradient and hessian values of the instances in the current node. Of course the init also goes on to build the tree via the maybe insert child nodes method. This method is nearly identical to the one we implemented for our decision tree. So far so good.\n\nclass TreeBooster():\n \n    def __init__(self, X, g, h, params, max_depth, idxs=None):\n        self.params = params\n        self.max_depth = max_depth\n        assert self.max_depth &gt;= 0, 'max_depth must be nonnegative'\n        self.min_child_weight = params['min_child_weight'] \\\n            if params['min_child_weight'] else 1.0\n        self.reg_lambda = params['reg_lambda'] if params['reg_lambda'] else 1.0\n        self.gamma = params['gamma'] if params['gamma'] else 0.0\n        self.colsample_bynode = params['colsample_bynode'] \\\n            if params['colsample_bynode'] else 1.0\n        if isinstance(g, pd.Series): g = g.values\n        if isinstance(h, pd.Series): h = h.values\n        if idxs is None: idxs = np.arange(len(g))\n        self.X, self.g, self.h, self.idxs = X, g, h, idxs\n        self.n, self.c = len(idxs), X.shape[1]\n        self.value = -g[idxs].sum() / (h[idxs].sum() + self.reg_lambda) # Eq (5)\n        self.best_score_so_far = 0.\n        if self.max_depth &gt; 0:\n            self._maybe_insert_child_nodes()\n\n    def _maybe_insert_child_nodes(self):\n        for i in range(self.c): self._find_better_split(i)\n        if self.is_leaf: return\n        x = self.X.values[self.idxs,self.split_feature_idx]\n        left_idx = np.nonzero(x &lt;= self.threshold)[0]\n        right_idx = np.nonzero(x &gt; self.threshold)[0]\n        self.left = TreeBooster(self.X, self.g, self.h, self.params, \n                                self.max_depth - 1, self.idxs[left_idx])\n        self.right = TreeBooster(self.X, self.g, self.h, self.params, \n                                 self.max_depth - 1, self.idxs[right_idx])\n\n    @property\n    def is_leaf(self): return self.best_score_so_far == 0.\n\n    def _find_better_split(self, feature_idx):\n        pass\n\n\n\nSplit Finding\nSplit finding follows the exact same pattern that we used in the decision tree, except we keep track of gradient and hessian stats instead of target value stats, and of course we use the XGBoost gain criterion (equation 7 from the paper) for evaluating splits.\n\ndef _find_better_split(self, feature_idx):\n    x = self.X.values[self.idxs, feature_idx]\n    g, h = self.g[self.idxs], self.h[self.idxs]\n    sort_idx = np.argsort(x)\n    sort_g, sort_h, sort_x = g[sort_idx], h[sort_idx], x[sort_idx]\n    sum_g, sum_h = g.sum(), h.sum()\n    sum_g_right, sum_h_right = sum_g, sum_h\n    sum_g_left, sum_h_left = 0., 0.\n\n    for i in range(0, self.n - 1):\n        g_i, h_i, x_i, x_i_next = sort_g[i], sort_h[i], sort_x[i], sort_x[i + 1]\n        sum_g_left += g_i; sum_g_right -= g_i\n        sum_h_left += h_i; sum_h_right -= h_i\n        if sum_h_left &lt; self.min_child_weight or x_i == x_i_next:continue\n        if sum_h_right &lt; self.min_child_weight: break\n\n        gain = 0.5 * ((sum_g_left**2 / (sum_h_left + self.reg_lambda))\n                        + (sum_g_right**2 / (sum_h_right + self.reg_lambda))\n                        - (sum_g**2 / (sum_h + self.reg_lambda))\n                        ) - self.gamma/2 # Eq(7) in the xgboost paper\n        if gain &gt; self.best_score_so_far: \n            self.split_feature_idx = feature_idx\n            self.best_score_so_far = gain\n            self.threshold = (x_i + x_i_next) / 2\n            \nTreeBooster._find_better_split = _find_better_split\n\n\n\nPrediction\nPrediction works exactly the same as in our decision tree, and the methods are nearly identical.\n\ndef predict(self, X):\n    return np.array([self._predict_row(row) for i, row in X.iterrows()])\n\ndef _predict_row(self, row):\n    if self.is_leaf: \n        return self.value\n    child = self.left if row[self.split_feature_idx] &lt;= self.threshold \\\n        else self.right\n    return child._predict_row(row)\n\nTreeBooster.predict = predict \nTreeBooster._predict_row = _predict_row"
   },
   {
-    "objectID": "posts/gradient-boosting-machine-with-any-loss-function/index.html#implementation",
-    "href": "posts/gradient-boosting-machine-with-any-loss-function/index.html#implementation",
-    "title": "How to Implement a Gradient Boosting Machine that Works with Any Loss Function",
-    "section": "Implementation",
-    "text": "Implementation\nI did some (half-assed) searching on the interweb for an implementation of GBM that allows the user to provide a custom loss function, and you know what? I couldn’t find anything. If you find another implementation, post in the comments so we can learn from it too.\nSince we need to modify the values predicted by our decision trees’ terminal nodes, we’ll want to brush up on the scikit-learn decision tree structure before we get going. You can see explanations of all the necessary decision tree hacks in this notebook.\n\nimport numpy as np\nfrom sklearn.tree import DecisionTreeRegressor \nfrom scipy.optimize import minimize\n\nclass GradientBoostingMachine():\n    '''Gradient Boosting Machine supporting any user-supplied loss function.\n    \n    Parameters\n    ----------\n    n_trees : int\n        number of boosting rounds\n        \n    learning_rate : float\n        learning rate hyperparameter\n        \n    max_depth : int\n        maximum tree depth\n    '''\n    \n    def __init__(self, n_trees, learning_rate=0.1, max_depth=1):\n        self.n_trees=n_trees; \n        self.learning_rate=learning_rate\n        self.max_depth=max_depth;\n    \n    def fit(self, X, y, objective):\n        '''Fit the GBM using the specified loss function.\n        \n        Parameters\n        ----------\n        X : ndarray of size (number observations, number features)\n            design matrix\n            \n        y : ndarray of size (number observations,)\n            target values\n            \n        objective : loss function class instance\n            Class specifying the loss function for training.\n            Should implement two methods:\n                loss(labels: ndarray, predictions: ndarray) -&gt; float\n                negative_gradient(labels: ndarray, predictions: ndarray) -&gt; ndarray\n        '''\n        \n        self.trees = []\n        self.base_prediction = self._get_optimal_base_value(y, objective.loss)\n        current_predictions = self.base_prediction * np.ones(shape=y.shape)\n        for _ in range(self.n_trees):\n            pseudo_residuals = objective.negative_gradient(y, current_predictions)\n            tree = DecisionTreeRegressor(max_depth=self.max_depth)\n            tree.fit(X, pseudo_residuals)\n            self._update_terminal_nodes(tree, X, y, current_predictions, objective.loss)\n            current_predictions += self.learning_rate * tree.predict(X)\n            self.trees.append(tree)\n     \n    def _get_optimal_base_value(self, y, loss):\n        '''Find the optimal initial prediction for the base model.'''\n        fun = lambda c: loss(y, c)\n        c0 = y.mean()\n        return minimize(fun=fun, x0=c0).x[0]\n        \n    def _update_terminal_nodes(self, tree, X, y, current_predictions, loss):\n        '''Update the tree's predictions according to the loss function.'''\n        # terminal node id's\n        leaf_nodes = np.nonzero(tree.tree_.children_left == -1)[0]\n        # compute leaf for each sample in ``X``.\n        leaf_node_for_each_sample = tree.apply(X)\n        for leaf in leaf_nodes:\n            samples_in_this_leaf = np.where(leaf_node_for_each_sample == leaf)[0]\n            y_in_leaf = y.take(samples_in_this_leaf, axis=0)\n            preds_in_leaf = current_predictions.take(samples_in_this_leaf, axis=0)\n            val = self._get_optimal_leaf_value(y_in_leaf, \n                                               preds_in_leaf,\n                                               loss)\n            tree.tree_.value[leaf, 0, 0] = val\n            \n    def _get_optimal_leaf_value(self, y, current_predictions, loss):\n        '''Find the optimal prediction value for a given leaf.'''\n        fun = lambda c: loss(y, current_predictions + c)\n        c0 = y.mean()\n        return minimize(fun=fun, x0=c0).x[0]\n          \n    def predict(self, X):\n        '''Generate predictions for the given input data.'''\n        return (self.base_prediction \n                + self.learning_rate \n                * np.sum([tree.predict(X) for tree in self.trees], axis=0))\n\nIn terms of design, we implement a class for the GBM with scikit-like fit and predict methods. Notice in the below implementation that the fit method is only 10 lines long, and corresponds very closely to Friedman’s gradient boost algorithm from above. Most of the complexity comes from the helper methods for updating the leaf values according to the specified loss function.\nWhen the user wants to call the fit method, they’ll need to supply the loss function they want to use for boosting. We’ll make the user implement their loss (a.k.a. objective) function as a class with two methods: (1) a loss method taking the labels and the predictions and returning the loss score and (2) a negative_gradient method taking the labels and the predictions and returning an array of negative gradients."
+    "objectID": "posts/xgboost-from-scratch/index.html#the-complete-xgboost-from-scratch-implementation",
+    "href": "posts/xgboost-from-scratch/index.html#the-complete-xgboost-from-scratch-implementation",
+    "title": "XGBoost from Scratch",
+    "section": "The Complete XGBoost From Scratch Implementation",
+    "text": "The Complete XGBoost From Scratch Implementation\nHere’s the entire implementation which produces a usable XGBoostModel class with fit and predict methods.\n\nclass XGBoostModel():\n    '''XGBoost from Scratch\n    '''\n    \n    def __init__(self, params, random_seed=None):\n        self.params = defaultdict(lambda: None, params)\n        self.subsample = self.params['subsample'] \\\n            if self.params['subsample'] else 1.0\n        self.learning_rate = self.params['learning_rate'] \\\n            if self.params['learning_rate'] else 0.3\n        self.base_prediction = self.params['base_score'] \\\n            if self.params['base_score'] else 0.5\n        self.max_depth = self.params['max_depth'] \\\n            if self.params['max_depth'] else 5\n        self.rng = np.random.default_rng(seed=random_seed)\n                \n    def fit(self, X, y, objective, num_boost_round, verbose=False):\n        current_predictions = self.base_prediction * np.ones(shape=y.shape)\n        self.boosters = []\n        for i in range(num_boost_round):\n            gradients = objective.gradient(y, current_predictions)\n            hessians = objective.hessian(y, current_predictions)\n            sample_idxs = None if self.subsample == 1.0 \\\n                else self.rng.choice(len(y), \n                                     size=math.floor(self.subsample*len(y)), \n                                     replace=False)\n            booster = TreeBooster(X, gradients, hessians, \n                                  self.params, self.max_depth, sample_idxs)\n            current_predictions += self.learning_rate * booster.predict(X)\n            self.boosters.append(booster)\n            if verbose: \n                print(f'[{i}] train loss = {objective.loss(y, current_predictions)}')\n            \n    def predict(self, X):\n        return (self.base_prediction + self.learning_rate \n                * np.sum([booster.predict(X) for booster in self.boosters], axis=0))\n    \nclass TreeBooster():\n \n    def __init__(self, X, g, h, params, max_depth, idxs=None):\n        self.params = params\n        self.max_depth = max_depth\n        assert self.max_depth &gt;= 0, 'max_depth must be nonnegative'\n        self.min_child_weight = params['min_child_weight'] \\\n            if params['min_child_weight'] else 1.0\n        self.reg_lambda = params['reg_lambda'] if params['reg_lambda'] else 1.0\n        self.gamma = params['gamma'] if params['gamma'] else 0.0\n        self.colsample_bynode = params['colsample_bynode'] \\\n            if params['colsample_bynode'] else 1.0\n        if isinstance(g, pd.Series): g = g.values\n        if isinstance(h, pd.Series): h = h.values\n        if idxs is None: idxs = np.arange(len(g))\n        self.X, self.g, self.h, self.idxs = X, g, h, idxs\n        self.n, self.c = len(idxs), X.shape[1]\n        self.value = -g[idxs].sum() / (h[idxs].sum() + self.reg_lambda) # Eq (5)\n        self.best_score_so_far = 0.\n        if self.max_depth &gt; 0:\n            self._maybe_insert_child_nodes()\n\n    def _maybe_insert_child_nodes(self):\n        for i in range(self.c): self._find_better_split(i)\n        if self.is_leaf: return\n        x = self.X.values[self.idxs,self.split_feature_idx]\n        left_idx = np.nonzero(x &lt;= self.threshold)[0]\n        right_idx = np.nonzero(x &gt; self.threshold)[0]\n        self.left = TreeBooster(self.X, self.g, self.h, self.params, \n                                self.max_depth - 1, self.idxs[left_idx])\n        self.right = TreeBooster(self.X, self.g, self.h, self.params, \n                                 self.max_depth - 1, self.idxs[right_idx])\n\n    @property\n    def is_leaf(self): return self.best_score_so_far == 0.\n    \n    def _find_better_split(self, feature_idx):\n        x = self.X.values[self.idxs, feature_idx]\n        g, h = self.g[self.idxs], self.h[self.idxs]\n        sort_idx = np.argsort(x)\n        sort_g, sort_h, sort_x = g[sort_idx], h[sort_idx], x[sort_idx]\n        sum_g, sum_h = g.sum(), h.sum()\n        sum_g_right, sum_h_right = sum_g, sum_h\n        sum_g_left, sum_h_left = 0., 0.\n\n        for i in range(0, self.n - 1):\n            g_i, h_i, x_i, x_i_next = sort_g[i], sort_h[i], sort_x[i], sort_x[i + 1]\n            sum_g_left += g_i; sum_g_right -= g_i\n            sum_h_left += h_i; sum_h_right -= h_i\n            if sum_h_left &lt; self.min_child_weight or x_i == x_i_next:continue\n            if sum_h_right &lt; self.min_child_weight: break\n\n            gain = 0.5 * ((sum_g_left**2 / (sum_h_left + self.reg_lambda))\n                            + (sum_g_right**2 / (sum_h_right + self.reg_lambda))\n                            - (sum_g**2 / (sum_h + self.reg_lambda))\n                            ) - self.gamma/2 # Eq(7) in the xgboost paper\n            if gain &gt; self.best_score_so_far: \n                self.split_feature_idx = feature_idx\n                self.best_score_so_far = gain\n                self.threshold = (x_i + x_i_next) / 2\n                \n    def predict(self, X):\n        return np.array([self._predict_row(row) for i, row in X.iterrows()])\n\n    def _predict_row(self, row):\n        if self.is_leaf: \n            return self.value\n        child = self.left if row[self.split_feature_idx] &lt;= self.threshold \\\n            else self.right\n        return child._predict_row(row)"
   },
   {
-    "objectID": "posts/gradient-boosting-machine-with-any-loss-function/index.html#testing-our-model",
-    "href": "posts/gradient-boosting-machine-with-any-loss-function/index.html#testing-our-model",
-    "title": "How to Implement a Gradient Boosting Machine that Works with Any Loss Function",
-    "section": "Testing our Model",
-    "text": "Testing our Model\nLet’s test drive our custom-loss-ready GBM with a few different loss functions! We’ll compare it to the scikit-learn GBM to sanity check our implementation.\n\nfrom sklearn.ensemble import GradientBoostingRegressor, GradientBoostingClassifier\n\nrng = np.random.default_rng()\n\n# test data\ndef make_test_data(n, noise_scale):\n    x = np.linspace(0, 10, 500).reshape(-1,1)\n    y = (np.where(x &lt; 5, x, 5) + rng.normal(0, noise_scale, size=x.shape)).ravel()\n    return x, y\n    \n# print model loss scores\ndef print_model_loss_scores(obj, y, preds, sk_preds):\n    print(f'From Scratch Loss = {obj.loss(y, pred):0.4}')\n    print(f'Scikit-Learn Loss = {obj.loss(y, sk_pred):0.4}')\n\n\nMean Squared Error\nMean Squared Error (a.k.a. Least Squares) loss produces estimates of the mean target value conditioned on the feature values. Here’s the implementation.\n\nx, y = make_test_data(500, 0.4)\n\n\n# from scratch GBM\nclass SquaredErrorLoss():\n    '''User-Defined Squared Error Loss'''\n    \n    def loss(self, y, preds):\n        return np.mean((y - preds)**2)\n    \n    def negative_gradient(self, y, preds):\n        return y - preds\n    \n\ngbm = GradientBoostingMachine(n_trees=10,\n                              learning_rate=0.5,\n                              max_depth=1)\ngbm.fit(x, y, SquaredErrorLoss())\npred = gbm.predict(x)\n\n\n# scikit-learn GBM\nsk_gbm = GradientBoostingRegressor(n_estimators=10,\n                                   learning_rate=0.5,\n                                   max_depth=1,\n                                   loss='squared_error')\nsk_gbm.fit(x, y)\nsk_pred = sk_gbm.predict(x)\n\n\nprint_model_loss_scores(SquaredErrorLoss(), y, pred, sk_pred)\n\nFrom Scratch Loss = 0.168\nScikit-Learn Loss = 0.168\n\n\n\n\n\n\n\n\n\nMean Absolute Error\nMean Absolute Error (a.k.a.Least Absolute Deviations) loss produces estimates of the median target value conditioned on the feature values. Here’s the implementation.\n\nx, y = make_test_data(500, 0.4)\n\n\n\n# from scratch GBM\nclass AbsoluteErrorLoss():\n    '''User-Defined Absolute Error Loss'''\n    \n    def loss(self, y, preds):\n        return np.mean(np.abs(y - preds))\n    \n    def negative_gradient(self, y, preds):\n        return np.sign(y - preds)\n\n\ngbm = GradientBoostingMachine(n_trees=10,\n                              learning_rate=0.5,\n                              max_depth=1)\ngbm.fit(x, y, AbsoluteErrorLoss())\npred = gbm.predict(x)\n\n\n# scikit-learn GBM\nsk_gbm = GradientBoostingRegressor(n_estimators=10,\n                                   learning_rate=0.5,\n                                   max_depth=1,\n                                   loss='absolute_error')\nsk_gbm.fit(x, y)\nsk_pred = sk_gbm.predict(x)\n\n\nprint_model_loss_scores(AbsoluteErrorLoss(), y, pred, sk_pred)\n\nFrom Scratch Loss = 0.3225\nScikit-Learn Loss = 0.3208\n\n\n\n\n\n\n\n\n\nQuantile Loss\nQuantile loss yields estimates of a given quantile of the target variable conditioned on the features. Here’s my implementation.\n\nx, y = make_test_data(500, 1)\n\n\n\n# from scratch GBM\nclass QuantileLoss():\n    '''Quantile Loss\n    \n    Parameters\n    ----------\n    alpha : float\n        quantile to be estimated, 0 &lt; alpha &lt; 1\n    '''\n    \n    def __init__(self, alpha):\n        if alpha &lt; 0 or alpha &gt;1:\n            raise ValueError('alpha must be between 0 and 1')\n        self.alpha = alpha\n        \n    def loss(self, y, preds):\n        e = y - preds\n        return np.mean(np.where(e &gt; 0, self.alpha * e, (self.alpha - 1) * e))\n    \n    def negative_gradient(self, y, preds):\n        e = y - preds \n        return np.where(e &gt; 0, self.alpha, self.alpha - 1)\n\ngbm = GradientBoostingMachine(n_trees=10,\n                              learning_rate=0.5,\n                             max_depth=1)\ngbm.fit(x, y, QuantileLoss(alpha=0.9))\npred = gbm.predict(x)    \n\n\n# scikit-learn GBM\nsk_gbm = GradientBoostingRegressor(n_estimators=10,\n                                 learning_rate=0.5,\n                                 max_depth=1,\n                                 loss='quantile', alpha=0.9)\nsk_gbm.fit(x, y)\nsk_pred = sk_gbm.predict(x)\n\n\nprint_model_loss_scores(QuantileLoss(alpha=0.9), y, pred, sk_pred)\n\nFrom Scratch Loss = 0.1853\nScikit-Learn Loss = 0.1856\n\n\n\n\n\n\n\n\n\nBinary Cross Entropy Loss\nThe previous losses are useful for regression problems, where the target is numeric. But we can also solve classification problems, simply by swapping in an appropriate loss function. Here we’ll implement binary cross entropy, a.k.a. binary deviance, a.k.a. negative binomial log likelihood (sometimes abusively called log loss). One thing to remember is that, as with logistic regression, our model is actually predicting the log odds ratio, not the probability of the positive class. Thus we use expit transformations (the inverse of logit) whenever probabilities are needed, e.g., when predicting the probability that an observation belongs to the positive class.\n\n# make categorical test data\n\ndef expit(t):\n    return np.exp(t) / (1 + np.exp(t))\n\nx = np.linspace(-3, 3, 500)\np = expit(x)\ny = rng.binomial(1, p, size=p.shape)\nx = x.reshape(-1,1)\n\n\n# from scratch GBM\nclass BinaryCrossEntropyLoss():\n    '''Binary Cross Entropy Loss\n    \n    Note that the predictions should be log odds ratios.\n    '''\n    \n    def __init__(self):\n        self.expit = lambda t: np.exp(t) / (1 + np.exp(t))\n    \n    def loss(self, y, preds):\n        p = self.expit(preds)\n        return -np.mean(y * np.log(p) + (1 - y) * np.log(1 - p))\n    \n    def negative_gradient(self, y, preds):\n        p = self.expit(preds)\n        return y / p - (1 - y) / (1 - p)\n\n    \ngbm = GradientBoostingMachine(n_trees=10,\n                              learning_rate=0.5,\n                              max_depth=1)\ngbm.fit(x, y, BinaryCrossEntropyLoss())\npred = expit(gbm.predict(x))\n\n\n# scikit-learn GBM\nsk_gbm = GradientBoostingClassifier(n_estimators=10,\n                                    learning_rate=0.5,\n                                    max_depth=1,\n                                    loss='log_loss')\nsk_gbm.fit(x, y)\nsk_pred = sk_gbm.predict_proba(x)[:, 1]\n\n\nprint_model_loss_scores(BinaryCrossEntropyLoss(), y, pred, sk_pred)\n\nFrom Scratch Loss = 0.6379\nScikit-Learn Loss = 0.6403"
+    "objectID": "posts/xgboost-from-scratch/index.html#testing",
+    "href": "posts/xgboost-from-scratch/index.html#testing",
+    "title": "XGBoost from Scratch",
+    "section": "Testing",
+    "text": "Testing\nLet’s take this baby for a spin and benchmark its performance against the actual XGBoost library. We use the scikit learn California housing dataset for benchmarking.\n\nfrom sklearn.datasets import fetch_california_housing\nfrom sklearn.model_selection import train_test_split\n    \nX, y = fetch_california_housing(as_frame=True, return_X_y=True)\nX_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, \n                                                    random_state=43)\n\nLet’s start with a nice friendly squared error objective function for training. We should probably have a future post all about how to define custom objective functions in XGBoost, but for now, here’s how I define squared error.\n\nclass SquaredErrorObjective():\n    def loss(self, y, pred): return np.mean((y - pred)**2)\n    def gradient(self, y, pred): return pred - y\n    def hessian(self, y, pred): return np.ones(len(y))\n\nHere I use a more or less arbitrary set of hyperparameters for training. Feel free to play around with tuning and trying other parameter combinations yourself.\n\nimport xgboost as xgb\n\nparams = {\n    'learning_rate': 0.1,\n    'max_depth': 5,\n    'subsample': 0.8,\n    'reg_lambda': 1.5,\n    'gamma': 0.0,\n    'min_child_weight': 25,\n    'base_score': 0.0,\n    'tree_method': 'exact',\n}\nnum_boost_round = 50\n\n# train the from-scratch XGBoost model\nmodel_scratch = XGBoostModel(params, random_seed=42)\nmodel_scratch.fit(X_train, y_train, SquaredErrorObjective(), num_boost_round)\n\n# train the library XGBoost model\ndtrain = xgb.DMatrix(X_train, label=y_train)\ndtest = xgb.DMatrix(X_test, label=y_test)\nmodel_xgb = xgb.train(params, dtrain, num_boost_round)\n\nLet’s check the models’ performance on the held out test data to benchmark our implementation.\n\npred_scratch = model_scratch.predict(X_test)\npred_xgb = model_xgb.predict(dtest)\nprint(f'scratch score: {SquaredErrorObjective().loss(y_test, pred_scratch)}')\nprint(f'xgboost score: {SquaredErrorObjective().loss(y_test, pred_xgb)}')\n\nscratch score: 0.2434125759558149\nxgboost score: 0.24123239765807963\n\n\nWell, look at that! Our scratch-built SGBoost is looking pretty consistent with the library. Go us!"
   },
   {
-    "objectID": "posts/gradient-boosting-machine-with-any-loss-function/index.html#wrapping-up",
-    "href": "posts/gradient-boosting-machine-with-any-loss-function/index.html#wrapping-up",
-    "title": "How to Implement a Gradient Boosting Machine that Works with Any Loss Function",
+    "objectID": "posts/xgboost-from-scratch/index.html#wrapping-up",
+    "href": "posts/xgboost-from-scratch/index.html#wrapping-up",
+    "title": "XGBoost from Scratch",
     "section": "Wrapping Up",
-    "text": "Wrapping Up\nWoohoo! We did it! We finally made it through Friedman’s paper in its entirety, and we implemented the generic gradient boosting algorithm which works with any differentiable loss function. If you made it this far, great job, gold star! By now you hopefully have a pretty solid grasp on gradient boosting, which is good, because soon we’re going to dive into the modern Newton descent gradient boosting frameworks like XGBoost. Onward!"
+    "text": "Wrapping Up\nI’d say this is a pretty good milestone for us here at Random Realizations. We’ve been hammering away at the various concepts around gradient boosting, leaving a trail of equations and scratch-built algos in our wake. Today we put all of that together to create a legit scratch build of XGBoost, something that would have been out of reach for me before we embarked on this journey together over a year ago. To anyone with the patience to read through this stuff, cheers to you! I hope you’re learning and enjoying this as much as I am."
   },
   {
-    "objectID": "posts/gradient-boosting-machine-with-any-loss-function/index.html#references",
-    "href": "posts/gradient-boosting-machine-with-any-loss-function/index.html#references",
-    "title": "How to Implement a Gradient Boosting Machine that Works with Any Loss Function",
-    "section": "References",
-    "text": "References\nFriedman’s 2001 paper: Greedy Function Approximation: A Gradient Boosting Machine"
+    "objectID": "posts/xgboost-from-scratch/index.html#reader-exercises",
+    "href": "posts/xgboost-from-scratch/index.html#reader-exercises",
+    "title": "XGBoost from Scratch",
+    "section": "Reader Exercises",
+    "text": "Reader Exercises\nIf you want to take this a step further and deepen your understanding and coding abilities, let me recommend some exercises for you.\n\nImplement column subsampling. XGBoost itself provides column subsampling by tree, by level, and by node. Try implementing by tree first, then try adding by level or by node as well. These should be pretty straightforward to do.\nImplement sparsity aware split finding for missing feature values (Algorithm 2 in the XGBoost paper). This will be a little more involved, since you’ll need to refactor and modify several parts of the tree booster class."
   },
   {
-    "objectID": "posts/hello-world/index.html",
-    "href": "posts/hello-world/index.html",
-    "title": "Hello World! And Why I’m Inspired to Start a Blog",
+    "objectID": "posts/consider-the-decision-tree/index.html",
+    "href": "posts/consider-the-decision-tree/index.html",
+    "title": "Consider the Decision Tree",
     "section": "",
-    "text": "Matt raises his arms in joy at the world.!\nWell, I’ve been thinking about getting this blog started for months now. I guess a combination of inertia, up-front investment in blogging platform selection/setup, and spending a little too much time writing and rewriting the first content post has drawn out the period from initial inspiration to making the blog a reality. Needless to say, I’m pretty excited to finally get things going.\nBefore we dive headlong into the weeds of ML algorithms, statistical methods, and whatever I happen to be learning and teaching at the moment, I figured it would be good to articulate why I’ve felt inspired to get started blogging in the first place. Hopefully this will serve the dual purpose of clarifying my intentions and introducing a vastly underappreciated concept in data science that I hope to weave through the posts to come."
+    "text": "A California cypress tree abides in silence on Alameda Beach.\nAh, the decision tree. It’s an underrated and often overlooked hero of modern statistical learning. Trees aren’t particularly powerful learning algorithms on their own, but when utilized as building blocks in larger ensemble models like random forest and gradient boosted trees, they can achieve state of the art performance in many practical applications. Since we’ve been focusing on gradient boosting ensembles lately, let’s take a moment to consider the humble decision tree itself. This post gives a high-level intuition for how trees work, an opinionated list of their key strengths and weaknesses, and some perspective on why ensembling makes them truly shine.\nOnward!"
   },
   {
-    "objectID": "posts/hello-world/index.html#learning",
-    "href": "posts/hello-world/index.html#learning",
-    "title": "Hello World! And Why I’m Inspired to Start a Blog",
-    "section": "Learning",
-    "text": "Learning\nThe initial inception about blogging probably originated from some comments about learning that Jeremy Howard makes in the Practical Deep Learning course from fastai. During one of the lectures, he mentions that it’s a great idea to start blogging. To paraphrase Jeremy:\n\nThe thing I really love about blogging is that it helps you learn; by writing things down, you synthesize your ideas.\n\nBeautiful. That definitely rings true for me. I tend to take notes and play around with code when learning new concepts anyway. One of my key hypotheses about this blogging experiment is that making the effort to transform those notes into blog posts will help me learn more effectively."
+    "objectID": "posts/consider-the-decision-tree/index.html#classification-and-regression-trees",
+    "href": "posts/consider-the-decision-tree/index.html#classification-and-regression-trees",
+    "title": "Consider the Decision Tree",
+    "section": "Classification and Regression Trees",
+    "text": "Classification and Regression Trees\nA Decision tree is a type of statistical model that takes features or covariates as input and yields a prediction as output. The idea of the decision tree as a statistical learning tool traces back to a monograph published in 1984 by Breiman, Freidman, Olshen, and Stone called “Classification and Regression Trees” (a.k.a. CART). As the name suggests, trees come in two main varieties: classification trees which predict discrete class labels (e.g. DecisionTreeClassifier) and regression trees which predict numeric values (e.g. DecisionTreeRegressor).\nAs I mentioned earlier, tree models are not very powerful learners on their own. You might find that an individual tree model is useful for creating a simple and highly interpretable model in specific situations, but in general, trees tend to shine most as building blocks in more complex algorithms. These composite models are called ensembles, and the most important tree ensembles are random forest and gradient boosted trees. While random forest uses either regression or classification trees depending on the type of target, gradient boosting can use regression trees to solve both classification and regression tasks."
   },
   {
-    "objectID": "posts/hello-world/index.html#teaching",
-    "href": "posts/hello-world/index.html#teaching",
-    "title": "Hello World! And Why I’m Inspired to Start a Blog",
-    "section": "Teaching",
-    "text": "Teaching\nAh, teaching. Yes, sometimes it’s that thing that takes time away from your research, forcing you to sit alone in a windowless room squinting at hand-written math on a fat stack of homework assignments. But sometimes it actually involves interacting with students, endeavoring to explain a concept, and watching them light up when they get it. The latter manifestation of teaching was one of my favorite things about grad school and academia in general. While I certainly still get to do some teaching as an industry data scientist, I could see myself returning to a more teaching-centric gig somewhere off in the future. Thus we have our second key hypothesis about the blogging experiment, that the writing will entertain my inclination to teach."
+    "objectID": "posts/consider-the-decision-tree/index.html#regression-tree-in-action",
+    "href": "posts/consider-the-decision-tree/index.html#regression-tree-in-action",
+    "title": "Consider the Decision Tree",
+    "section": "Regression Tree in Action",
+    "text": "Regression Tree in Action\nLet’s have a closer look at regression trees by training one on the diabetes dataset from scikit learn. According to the documentation:\n\nTen baseline variables, age, sex, body mass index, average blood pressure, and six blood serum measurements were obtained for each of n = 442 diabetes patients, as well as the response of interest, a quantitative measure of disease progression one year after baseline.\n\nFirst we load the data. To make our lives easier, we’ll just use two features: average blood pressure (bp) and the first blood serum measurement (s1) to predict the target. I’ll rescale the features to make the values easier for me to read, but it won’t affect our tree–more on that later.\n\nimport numpy as np \nimport pandas as pd\nimport matplotlib.pyplot as plt\nimport seaborn as sns\ncolor_palette = \"viridis\"\n\n\nfrom sklearn.datasets import load_diabetes\n\nX, y = load_diabetes(as_frame=True, return_X_y=True)\n\nX = 100 * X[['bp', 's1']]\n\n\n\n\n\n\nLet’s grow a tree to predict the target given values of blood pressure and blood serum.\n\nfrom sklearn.tree import DecisionTreeRegressor\n\ntree = DecisionTreeRegressor(max_depth=2)\ntree.fit(X,y);\n\n\n\n\n\n\nTo make predictions using our fitted tree, we start at the root node (which is at the top), and we work our way down moving left if our feature is less than the split threshold and to the right if it’s greater than the split threshold. For example let’s predict the target for a new case with bp= 1 and s1 = 5. Since our blood pressure of 1 is less than 2.359, we move to the left child node. Here, since our serum of 5 is greater than the threshold at 0.875, we move to the right child node. This node has no further children, and thus we return its predicted value of 155.343.\n\ntree.predict(pd.DataFrame({'bp': 1, 's1': 5}, index=[0]))\n\narray([155.34313725])\n\n\nLet’s overlay these splits on our feature scatterplot to see how the tree has partitioned the feature space.\n\n\n\n\n\nThe tree has managed to carve out regions of feature space where the target values tend to be similar within each region, e.g. we have low target values in the bottom left partition and high target values in the far right region.\nLet’s take a look at the regression surface predicted by our tree. Since the tree predicts the exact same value for all instances in a given partition, the surface has only four distinct values.\n\n\n\n\n\nFabulous, now that we’ve seen a tree in action, let’s talk about trees’ key strengths and weaknesses."
   },
   {
-    "objectID": "posts/hello-world/index.html#contributing",
-    "href": "posts/hello-world/index.html#contributing",
-    "title": "Hello World! And Why I’m Inspired to Start a Blog",
-    "section": "Contributing",
-    "text": "Contributing\nWorking in the field of data science today is a bit like standing in front of a massive complimentary all-you-can-learn buffet. There is an abundance of free material out on the interwebs for learning pretty much anything in data science from hello world python tutorials to research papers on cutting-edge deep learning techniques. I’ve personally benefited from many a blog post that helped me unpack a new concept or get started using a new tool. And let’s not forget the gigantic cyber warehouse full of freely available open source software tools that volunteer developers have straight-up donated to humanity.\nI realize that up to now, I’ve simply been consuming all of this free goodness without giving anything substantive back in return. Well then, it’s time to start evening the score. Which brings us to key hypothesis number three, that through these blog posts, I might be able to create something helpful, thereby being of service to a community that has freely given so much to me."
+    "objectID": "posts/consider-the-decision-tree/index.html#why-trees-are-awesome",
+    "href": "posts/consider-the-decision-tree/index.html#why-trees-are-awesome",
+    "title": "Consider the Decision Tree",
+    "section": "Why trees are awesome",
+    "text": "Why trees are awesome\nTrees are awesome because they are easy to use, and trees are easy to use because they are robust, require minimal data preprocessing, and can learn complex relationships without user intervention.\n\nFeature Scaling\nTrees owe their minimal data preprocessing requirements and their robustness to the fact that split finding is controlled by the sort order of the input feature values, rather than the values themselves. This means that trees are invariant to the scaling of input features, which in turn means that we don’t need to fuss around with carefully rescaling all the numeric features before fitting a tree. It also means that trees tend to work well even if features are highly skewed or contain outliers.\n\n\nCategoricals\nSince trees just split data based on numeric feature values, we can easily handle most categorical features by using integer encoding. For example we might encode a size feature with small = 1, medium = 2, and large = 3. This works particularly well with ordered categories, because partitioning is consistent with the category semantics. It can also work well even if the categories have no order, because with enough splits a tree can carve each category into its own partition.\n\n\nMissing Values\nIt’s worth calling out that different implementations of the decision tree handle missing feature values in different ways. Notably, scikit-learn handles them by throwing an error and telling you not to pull such shenanigans.\nValueError: Input contains NaN, infinity or a value too large for dtype('float32').\nOn the other hand, XGBoost supports an elegant way to make use of missing values, which we will discuss more in a later post.\n\n\nInteractions\nFeature interactions can also be learned automatically. An interaction means that the effect of one feature on the target differs depending on the value of another feature. For example, the effect of some drug may depend on whether or not the patient exercises. After a tree splits on exercise, it can naturally learn the correct drug effects for both exercisers and non-exercisers. This intuition extends to higher-order interactions as well, as long as the tree has enough splits to parse the relationships.\n\n\nFeature Selection\nBecause trees choose the best feature and threshold value at each split, they essentially perform automatic feature selection. This is great because even if we throw a lot of irrelevant features at a decision tree, it will simply tend not to use them for splits. Similarly, if two or more features are highly correlated or even redundant, the tree will simply choose one or the other when making each split; having both in the model will not cause catastrophic instability as it could in a linear model.\n\n\nFeature-Target Relationship\nFinally, it is possible for trees to discover complex nonlinear feature-target relationships without the need for user-specification of the relationships. This is because trees use local piecewise constant approximations without making any parametric assumptions. With enough splits, the tree can approximate arbitrary feature-target relationships."
   },
   {
-    "objectID": "posts/hello-world/index.html#live-long-and-prosper-blog",
-    "href": "posts/hello-world/index.html#live-long-and-prosper-blog",
-    "title": "Hello World! And Why I’m Inspired to Start a Blog",
-    "section": "Live Long and Prosper, Blog",
-    "text": "Live Long and Prosper, Blog\nPhew, there it is, the original source of inspiration for this blogging experiment, and three reasons I think it might be a good idea. The astute reader will have noticed that these three assertions have been formulated as hypotheses which are to be tested in the laboratory of experience. And thus, we also have our first glimpse of the scientific method, an underrated concept that is going to help us put the science back in data science.\nWith that, blog, I christen thee, Random Realizations."
+    "objectID": "posts/consider-the-decision-tree/index.html#why-trees-are-not-so-awesome",
+    "href": "posts/consider-the-decision-tree/index.html#why-trees-are-not-so-awesome",
+    "title": "Consider the Decision Tree",
+    "section": "Why trees are not so awesome",
+    "text": "Why trees are not so awesome\nThe main weakness of the decision tree is that, on its own, it tends to have poor predictive performance compared to other algorithms. The main reasons for this are the tendency to overfit and prediction quantization issues.\n\nOverfitting\nIf we grow a decision tree until each leaf has exactly one instance in it, we will have simply memorized the training data, and our model will not generalize well. Basically the only defense against overfitting is to reduce the number of leaf nodes in the tree, either by using hyperparameters to stop splitting earlier or by removing certain leaf nodes after growing a deep tree. The problem here is that some of the benefits of trees, like ability to approximate arbitrary target patterns and ability to learn interaction effects, depend on having enough splits for the task. We can sometimes find ourselves in a situation where we cannot learn these complex relationships without overfitting the tree.\n\n\nQuantization\nBecause regression trees use piecewise constant functions to approximate the target, prediction accuracy can deteriorate near split boundaries. For example, if the target is increasing with the feature, a tree might tend to overpredict the target on the left side of split boundaries and overpredict on the right side of split boundaries.\n\n\n\n\n\n\n\nExtrapolation\nBecause they are trained by partitioning the feature space in a training dataset, trees cannot intelligently extrapolate beyond the data on which they are trained. For example if we query a tree for predictions beyond the greatest feature value encountered in training, it will just return the prediction corresponding to the largest in-sample feature values.\n\n\n\n\n\n\n\nThe Dark Side of Convenience\nFinally, there is always a price to pay for convenience. While trees can work well even with a messy dataset containing outliers, redundant features, and thoughtlessly encoded categoricals, we will rarely achieve the best performance under these conditions. Taking the time to deal with outliers, removing redundant information, purposefully choosing appropriate categorical encodings, and building an understanding of the data will often lead to much better results."
+  },
+  {
+    "objectID": "posts/consider-the-decision-tree/index.html#how-ensembling-makes-trees-shine",
+    "href": "posts/consider-the-decision-tree/index.html#how-ensembling-makes-trees-shine",
+    "title": "Consider the Decision Tree",
+    "section": "How ensembling makes trees shine",
+    "text": "How ensembling makes trees shine\nWe can go a long way toward addressing the issues of overfitting and prediction quantization by using trees as building blocks in larger algorithms called tree ensembles, the most popular examples being random forest and gradient boosted trees. A tree ensemble is a collection of different individual tree models whose predictions are averaged to generate an overall prediction.\nEnsembling helps address overfitting because even if each individual tree is overfitted, the average of their individual noisy predictions will tend to be more stable. Think of it in terms of the bias variance tradeoff, where bias refers to a model’s failure to capture certain patterns and variance refers to how different a model prediction would be if the model were trained on a different sample of training data. Since the ensemble is averaging over the predictions of all the individual models, training it on a different sample of training data would change the individual models predictions, but their overall average prediction will tend to remain stable. Thus, ensembling helps reduce the effects of overfitting by reducing model variance without increasing bias.\nEnsembling also helps address prediction quantization issues. While each individual tree’s predictions might express large jumps in the regression surface, averaging many different trees’ predictions together effectively generates a surface with more partitions and smaller jumps between them. This provides a smoother approximation of the feature-target relationship."
+  },
+  {
+    "objectID": "posts/consider-the-decision-tree/index.html#wrapping-up",
+    "href": "posts/consider-the-decision-tree/index.html#wrapping-up",
+    "title": "Consider the Decision Tree",
+    "section": "Wrapping Up",
+    "text": "Wrapping Up\nWell, there you go, that’s my take on the high-level overview of the decision tree and its main strengths and weaknesses. As we’ve seen, ensembling allows us to keep the conveniences of the decision tree while mitigating its core weakness of relatively weak predictive power. This is why tree ensembles are so popular in practical applications. We glossed over pretty much all details of how trees actually do their magic, but fear not, next time we’re going to get rowdy and build one of these things from scratch."
   },
   {
     "objectID": "posts/blogging-with-quarto-and-jupyter/index.html",
@@ -648,7 +697,7 @@
     "href": "archive.html",
     "title": "Archive",
     "section": "",
-    "text": "Blogging with Quarto and Jupyter: The Complete Guide\n\n\n\n\n\n\n\n\n\nSep 6, 2023\n\n\n\n\n\n\n\n\nRandom Realizations Resurrected\n\n\n\n\n\n\n\n\n\nAug 2, 2023\n\n\n\n\n\n\n\n\nXGBoost from Scratch\n\n\n\n\n\n\n\n\n\nMay 7, 2022\n\n\n\n\n\n\n\n\nXGBoost Explained\n\n\n\n\n\n\n\n\n\nMar 13, 2022\n\n\n\n\n\n\n\n\nDecision Tree from Scratch\n\n\n\n\n\n\n\n\n\nDec 13, 2021\n\n\n\n\n\n\n\n\nConsider the Decision Tree\n\n\n\n\n\n\n\n\n\nDec 12, 2021\n\n\n\n\n\n\n\n\nHow to Implement a Gradient Boosting Machine that Works with Any Loss Function\n\n\n\n\n\n\n\n\n\nOct 23, 2021\n\n\n\n\n\n\n\n\nHello PySpark!\n\n\n\n\n\n\n\n\n\nJun 22, 2021\n\n\n\n\n\n\n\n\nHow Gradient Boosting Does Gradient Descent\n\n\n\n\n\n\n\n\n\nApr 27, 2021\n\n\n\n\n\n\n\n\nGet Down with Gradient Descent\n\n\n\n\n\n\n\n\n\nJan 22, 2021\n\n\n\n\n\n\n\n\nHow to Build a Gradient Boosting Machine from Scratch\n\n\n\n\n\n\n\n\n\nDec 8, 2020\n\n\n\n\n\n\n\n\nThe 80/20 Pandas Tutorial\n\n\n\n\n\n\n\n\n\nNov 25, 2020\n\n\n\n\n\n\n\n\nHello World! And Why I’m Inspired to Start a Blog\n\n\n\n\n\n\n\n\n\nNov 22, 2020\n\n\n\n\n\n\nNo matching items"
+    "text": "XGBoost for Regression in Python\n\n\n\n\n\n\n\n\n\nOct 25, 2023\n\n\n\n\n\n\n\n\nBlogging with Quarto and Jupyter: The Complete Guide\n\n\n\n\n\n\n\n\n\nSep 6, 2023\n\n\n\n\n\n\n\n\nRandom Realizations Resurrected\n\n\n\n\n\n\n\n\n\nAug 2, 2023\n\n\n\n\n\n\n\n\nXGBoost from Scratch\n\n\n\n\n\n\n\n\n\nMay 7, 2022\n\n\n\n\n\n\n\n\nXGBoost Explained\n\n\n\n\n\n\n\n\n\nMar 13, 2022\n\n\n\n\n\n\n\n\nDecision Tree from Scratch\n\n\n\n\n\n\n\n\n\nDec 13, 2021\n\n\n\n\n\n\n\n\nConsider the Decision Tree\n\n\n\n\n\n\n\n\n\nDec 12, 2021\n\n\n\n\n\n\n\n\nHow to Implement a Gradient Boosting Machine that Works with Any Loss Function\n\n\n\n\n\n\n\n\n\nOct 23, 2021\n\n\n\n\n\n\n\n\nHello PySpark!\n\n\n\n\n\n\n\n\n\nJun 22, 2021\n\n\n\n\n\n\n\n\nHow Gradient Boosting Does Gradient Descent\n\n\n\n\n\n\n\n\n\nApr 27, 2021\n\n\n\n\n\n\n\n\nGet Down with Gradient Descent\n\n\n\n\n\n\n\n\n\nJan 22, 2021\n\n\n\n\n\n\n\n\nHow to Build a Gradient Boosting Machine from Scratch\n\n\n\n\n\n\n\n\n\nDec 8, 2020\n\n\n\n\n\n\n\n\nThe 80/20 Pandas Tutorial\n\n\n\n\n\n\n\n\n\nNov 25, 2020\n\n\n\n\n\n\n\n\nHello World! And Why I’m Inspired to Start a Blog\n\n\n\n\n\n\n\n\n\nNov 22, 2020\n\n\n\n\n\n\nNo matching items"
   },
   {
     "objectID": "about.html",
diff --git a/sitemap.xml b/sitemap.xml
index a4f7fa9..d1c9292 100644
--- a/sitemap.xml
+++ b/sitemap.xml
@@ -2,70 +2,74 @@
 <urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
   <url>
     <loc>https://randomrealizations.com/gradient-boosting-series.html</loc>
-    <lastmod>2023-09-06T08:42:25.920Z</lastmod>
+    <lastmod>2023-09-18T12:36:28.632Z</lastmod>
   </url>
   <url>
     <loc>https://randomrealizations.com/</loc>
-    <lastmod>2023-09-06T08:42:24.970Z</lastmod>
+    <lastmod>2023-09-18T12:36:27.918Z</lastmod>
   </url>
   <url>
     <loc>https://randomrealizations.com/posts/xgboost-explained/</loc>
-    <lastmod>2023-09-06T08:42:23.748Z</lastmod>
+    <lastmod>2023-09-18T12:36:26.685Z</lastmod>
   </url>
   <url>
     <loc>https://randomrealizations.com/posts/random-realizations-resurrected/</loc>
-    <lastmod>2023-09-06T08:42:22.712Z</lastmod>
+    <lastmod>2023-09-18T12:36:25.726Z</lastmod>
   </url>
   <url>
     <loc>https://randomrealizations.com/posts/decision-tree-from-scratch/</loc>
-    <lastmod>2023-09-06T08:42:21.791Z</lastmod>
+    <lastmod>2023-09-18T12:36:24.877Z</lastmod>
   </url>
   <url>
-    <loc>https://randomrealizations.com/posts/consider-the-decision-tree/</loc>
-    <lastmod>2023-09-06T08:42:20.407Z</lastmod>
+    <loc>https://randomrealizations.com/posts/xgboost-for-regression-in-python/</loc>
+    <lastmod>2023-09-18T12:36:23.472Z</lastmod>
   </url>
   <url>
-    <loc>https://randomrealizations.com/posts/xgboost-from-scratch/</loc>
-    <lastmod>2023-09-06T08:42:19.625Z</lastmod>
+    <loc>https://randomrealizations.com/posts/hello-world/</loc>
+    <lastmod>2023-09-18T12:36:22.012Z</lastmod>
   </url>
   <url>
-    <loc>https://randomrealizations.com/posts/hello-pyspark/</loc>
-    <lastmod>2023-09-06T08:42:17.993Z</lastmod>
+    <loc>https://randomrealizations.com/posts/gradient-boosting-machine-with-any-loss-function/</loc>
+    <lastmod>2023-09-18T12:36:21.268Z</lastmod>
+  </url>
+  <url>
+    <loc>https://randomrealizations.com/posts/get-down-with-gradient-descent/</loc>
+    <lastmod>2023-09-18T12:36:20.524Z</lastmod>
   </url>
   <url>
     <loc>https://randomrealizations.com/posts/8020-pandas-tutorial/</loc>
-    <lastmod>2023-09-06T08:42:17.339Z</lastmod>
+    <lastmod>2023-09-18T12:36:19.440Z</lastmod>
   </url>
   <url>
-    <loc>https://randomrealizations.com/posts/get-down-with-gradient-descent/</loc>
-    <lastmod>2023-09-06T08:42:18.457Z</lastmod>
+    <loc>https://randomrealizations.com/posts/hello-pyspark/</loc>
+    <lastmod>2023-09-18T12:36:20.069Z</lastmod>
   </url>
   <url>
-    <loc>https://randomrealizations.com/posts/gradient-boosting-machine-with-any-loss-function/</loc>
-    <lastmod>2023-09-06T08:42:19.194Z</lastmod>
+    <loc>https://randomrealizations.com/posts/xgboost-from-scratch/</loc>
+    <lastmod>2023-09-18T12:36:21.712Z</lastmod>
   </url>
   <url>
-    <loc>https://randomrealizations.com/posts/hello-world/</loc>
-    <lastmod>2023-09-06T08:42:19.959Z</lastmod>
+    <loc>https://randomrealizations.com/posts/consider-the-decision-tree/</loc>
+    <lastmod>2023-09-18T12:36:22.520Z</lastmod>
   </url>
   <url>
     <loc>https://randomrealizations.com/posts/blogging-with-quarto-and-jupyter/</loc>
-    <lastmod>2023-09-06T08:42:21.254Z</lastmod>
+    <lastmod>2023-09-18T12:36:24.343Z</lastmod>
   </url>
   <url>
     <loc>https://randomrealizations.com/posts/how-gradient-boosting-does-gradient-descent/</loc>
-    <lastmod>2023-09-06T08:42:22.425Z</lastmod>
+    <lastmod>2023-09-18T12:36:25.447Z</lastmod>
   </url>
   <url>
     <loc>https://randomrealizations.com/posts/gradient-boosting-machine-from-scratch/</loc>
-    <lastmod>2023-09-06T08:42:23.170Z</lastmod>
+    <lastmod>2023-09-18T12:36:26.164Z</lastmod>
   </url>
   <url>
     <loc>https://randomrealizations.com/archive.html</loc>
-    <lastmod>2023-09-06T08:42:24.220Z</lastmod>
+    <lastmod>2023-09-18T12:36:27.165Z</lastmod>
   </url>
   <url>
     <loc>https://randomrealizations.com/about.html</loc>
-    <lastmod>2023-09-06T08:42:25.195Z</lastmod>
+    <lastmod>2023-09-18T12:36:28.132Z</lastmod>
   </url>
 </urlset>