Skip to content

Commit

Permalink
Bug fixes
Browse files Browse the repository at this point in the history
Fixed bugs in legacy Linear Regression with a Synthetic Dataset Colab, per #4754:

* Switched API call from tf.keras.optimizers.experimental.RMSprop to tf.keras.optimizers.RMSprop
* Converted dataset features and labels to np.array
* Converted Task #3 learning rate value to a float
  • Loading branch information
sandersk authored Aug 28, 2024
1 parent 6a780b9 commit ed18ccd
Showing 1 changed file with 38 additions and 42 deletions.
80 changes: 38 additions & 42 deletions ml/cc/exercises/linear_regression_with_synthetic_data.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@
"source": [
"# Simple Linear Regression with Synthetic Data\n",
"\n",
"In this first Colab, you'll explore linear regression with a simple database. "
"In this first Colab, you'll explore linear regression with a simple database."
]
},
{
Expand Down Expand Up @@ -82,9 +82,10 @@
},
"outputs": [],
"source": [
"import numpy as np\n",
"import pandas as pd\n",
"import tensorflow as tf\n",
"from matplotlib import pyplot as plt\n"
"from matplotlib import pyplot as plt"
]
},
{
Expand All @@ -98,7 +99,7 @@
"The following code defines two functions:\n",
"\n",
" * `build_model(my_learning_rate)`, which builds an empty model.\n",
" * `train_model(model, feature, label, epochs)`, which trains the model from the examples (feature and label) you pass. \n",
" * `train_model(model, feature, label, epochs)`, which trains the model from the examples (feature and label) you pass.\n",
"\n",
"Since you don't need to understand model building code right now, we've hidden this code cell. You may optionally double-click the headline to explore this code."
]
Expand All @@ -115,33 +116,33 @@
"#@title Define the functions that build and train a model\n",
"def build_model(my_learning_rate):\n",
" \"\"\"Create and compile a simple linear regression model.\"\"\"\n",
" # Most simple tf.keras models are sequential. \n",
" # Most simple tf.keras models are sequential.\n",
" # A sequential model contains one or more layers.\n",
" model = tf.keras.models.Sequential()\n",
"\n",
" # Describe the topography of the model.\n",
" # The topography of a simple linear regression model\n",
" # is a single node in a single layer. \n",
" model.add(tf.keras.layers.Dense(units=1, \n",
" # is a single node in a single layer.\n",
" model.add(tf.keras.layers.Dense(units=1,\n",
" input_shape=(1,)))\n",
"\n",
" # Compile the model topography into code that \n",
" # TensorFlow can efficiently execute. Configure \n",
" # training to minimize the model's mean squared error. \n",
" model.compile(optimizer=tf.keras.optimizers.experimental.RMSprop(learning_rate=my_learning_rate),\n",
" # Compile the model topography into code that\n",
" # TensorFlow can efficiently execute. Configure\n",
" # training to minimize the model's mean squared error.\n",
" model.compile(optimizer=tf.keras.optimizers.RMSprop(learning_rate=my_learning_rate),\n",
" loss=\"mean_squared_error\",\n",
" metrics=[tf.keras.metrics.RootMeanSquaredError()])\n",
"\n",
" return model \n",
" return model\n",
"\n",
"\n",
"def train_model(model, feature, label, epochs, batch_size):\n",
" \"\"\"Train the model by feeding it data.\"\"\"\n",
"\n",
" # Feed the feature values and the label values to the \n",
" # model. The model will train for the specified number \n",
" # Feed the feature values and the label values to the\n",
" # model. The model will train for the specified number\n",
" # of epochs, gradually learning how the feature values\n",
" # relate to the label values. \n",
" # relate to the label values.\n",
" history = model.fit(x=feature,\n",
" y=label,\n",
" batch_size=batch_size,\n",
Expand All @@ -151,15 +152,15 @@
" trained_weight = model.get_weights()[0][0]\n",
" trained_bias = model.get_weights()[1]\n",
"\n",
" # The list of epochs is stored separately from the \n",
" # The list of epochs is stored separately from the\n",
" # rest of history.\n",
" epochs = history.epoch\n",
" \n",
"\n",
" # Gather the history (a snapshot) of each epoch.\n",
" hist = pd.DataFrame(history.history)\n",
"\n",
" # Specifically gather the model's root mean \n",
" # squared error at each epoch. \n",
" # Specifically gather the model's root mean\n",
" # squared error at each epoch.\n",
" rmse = hist[\"root_mean_squared_error\"]\n",
"\n",
" return trained_weight, trained_bias, epochs, rmse\n",
Expand Down Expand Up @@ -248,8 +249,8 @@
},
"outputs": [],
"source": [
"my_feature = ([1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0])\n",
"my_label = ([5.0, 8.8, 9.6, 14.2, 18.8, 19.5, 21.4, 26.8, 28.9, 32.0, 33.8, 38.2])"
"my_feature = np.array([1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0])\n",
"my_label = np.array([5.0, 8.8, 9.6, 14.2, 18.8, 19.5, 21.4, 26.8, 28.9, 32.0, 33.8, 38.2])"
]
},
{
Expand Down Expand Up @@ -282,7 +283,7 @@
"my_batch_size=12\n",
"\n",
"my_model = build_model(learning_rate)\n",
"trained_weight, trained_bias, epochs, rmse = train_model(my_model, my_feature, \n",
"trained_weight, trained_bias, epochs, rmse = train_model(my_model, my_feature,\n",
" my_label, epochs,\n",
" my_batch_size)\n",
"plot_the_model(trained_weight, trained_bias, my_feature, my_label)\n",
Expand Down Expand Up @@ -332,7 +333,7 @@
"my_batch_size=12\n",
"\n",
"my_model = build_model(learning_rate)\n",
"trained_weight, trained_bias, epochs, rmse = train_model(my_model, my_feature, \n",
"trained_weight, trained_bias, epochs, rmse = train_model(my_model, my_feature,\n",
" my_label, epochs,\n",
" my_batch_size)\n",
"plot_the_model(trained_weight, trained_bias, my_feature, my_label)\n",
Expand All @@ -351,10 +352,10 @@
"#@title Double-click to view a possible solution\n",
"learning_rate=0.01\n",
"epochs=450\n",
"my_batch_size=12 \n",
"my_batch_size=12\n",
"\n",
"my_model = build_model(learning_rate)\n",
"trained_weight, trained_bias, epochs, rmse = train_model(my_model, my_feature, \n",
"trained_weight, trained_bias, epochs, rmse = train_model(my_model, my_feature,\n",
" my_label, epochs,\n",
" my_batch_size)\n",
"plot_the_model(trained_weight, trained_bias, my_feature, my_label)\n",
Expand Down Expand Up @@ -383,11 +384,11 @@
"outputs": [],
"source": [
"# Increase the learning rate and decrease the number of epochs.\n",
"learning_rate=100 \n",
"epochs=500 \n",
"learning_rate=100.0\n",
"epochs=500\n",
"\n",
"my_model = build_model(learning_rate)\n",
"trained_weight, trained_bias, epochs, rmse = train_model(my_model, my_feature, \n",
"trained_weight, trained_bias, epochs, rmse = train_model(my_model, my_feature,\n",
" my_label, epochs,\n",
" my_batch_size)\n",
"plot_the_model(trained_weight, trained_bias, my_feature, my_label)\n",
Expand All @@ -400,7 +401,7 @@
"id": "c96ITm021NEV"
},
"source": [
"The resulting model is terrible; the red line doesn't align with the blue dots. Furthermore, the loss curve oscillates like a [roller coaster](https://www.wikipedia.org/wiki/Roller_coaster). An oscillating loss curve strongly suggests that the learning rate is too high. "
"The resulting model is terrible; the red line doesn't align with the blue dots. Furthermore, the loss curve oscillates like a [roller coaster](https://www.wikipedia.org/wiki/Roller_coaster). An oscillating loss curve strongly suggests that the learning rate is too high."
]
},
{
Expand All @@ -411,7 +412,7 @@
"source": [
"## Task 4: Find the ideal combination of epochs and learning rate\n",
"\n",
"Assign values to the following two hyperparameters to make training converge as efficiently as possible: \n",
"Assign values to the following two hyperparameters to make training converge as efficiently as possible:\n",
"\n",
"* learning_rate\n",
"* epochs"
Expand All @@ -430,7 +431,7 @@
"epochs= ? # Replace ? with an integer\n",
"\n",
"my_model = build_model(learning_rate)\n",
"trained_weight, trained_bias, epochs, rmse = train_model(my_model, my_feature, \n",
"trained_weight, trained_bias, epochs, rmse = train_model(my_model, my_feature,\n",
" my_label, epochs,\n",
" my_batch_size)\n",
"plot_the_model(trained_weight, trained_bias, my_feature, my_label)\n",
Expand All @@ -452,7 +453,7 @@
"epochs=70\n",
"\n",
"my_model = build_model(learning_rate)\n",
"trained_weight, trained_bias, epochs, rmse = train_model(my_model, my_feature, \n",
"trained_weight, trained_bias, epochs, rmse = train_model(my_model, my_feature,\n",
" my_label, epochs,\n",
" my_batch_size)\n",
"plot_the_model(trained_weight, trained_bias, my_feature, my_label)\n",
Expand All @@ -471,7 +472,7 @@
"\n",
"One **epoch** spans sufficient iterations to process every example in the dataset. For example, if the batch size is 12, then each epoch lasts one iteration. However, if the batch size is 6, then each epoch consumes two iterations. \n",
"\n",
"It is tempting to simply set the batch size to the number of examples in the dataset (12, in this case). However, the model might actually train faster on smaller batches. Conversely, very small batches might not contain enough information to help the model converge. \n",
"It is tempting to simply set the batch size to the number of examples in the dataset (12, in this case). However, the model might actually train faster on smaller batches. Conversely, very small batches might not contain enough information to help the model converge.\n",
"\n",
"Experiment with `batch_size` in the following code cell. What's the smallest integer you can set for `batch_size` and still have the model converge in a hundred epochs?"
]
Expand All @@ -489,7 +490,7 @@
"my_batch_size= ? # Replace ? with an integer.\n",
"\n",
"my_model = build_model(learning_rate)\n",
"trained_weight, trained_bias, epochs, rmse = train_model(my_model, my_feature, \n",
"trained_weight, trained_bias, epochs, rmse = train_model(my_model, my_feature,\n",
" my_label, epochs,\n",
" my_batch_size)\n",
"plot_the_model(trained_weight, trained_bias, my_feature, my_label)\n",
Expand All @@ -512,7 +513,7 @@
"my_batch_size=1 # Wow, a batch size of 1 works!\n",
"\n",
"my_model = build_model(learning_rate)\n",
"trained_weight, trained_bias, epochs, rmse = train_model(my_model, my_feature, \n",
"trained_weight, trained_bias, epochs, rmse = train_model(my_model, my_feature,\n",
" my_label, epochs,\n",
" my_batch_size)\n",
"plot_the_model(trained_weight, trained_bias, my_feature, my_label)\n",
Expand All @@ -529,25 +530,20 @@
"\n",
"Most machine learning problems require a lot of hyperparameter tuning. Unfortunately, we can't provide concrete tuning rules for every model. Lowering the learning rate can help one model converge efficiently but make another model converge much too slowly. You must experiment to find the best set of hyperparameters for your dataset. That said, here are a few rules of thumb:\n",
"\n",
" * Training loss should steadily decrease, steeply at first, and then more slowly until the slope of the curve reaches or approaches zero. \n",
" * Training loss should steadily decrease, steeply at first, and then more slowly until the slope of the curve reaches or approaches zero.\n",
" * If the training loss does not converge, train for more epochs.\n",
" * If the training loss decreases too slowly, increase the learning rate. Note that setting the learning rate too high may also prevent training loss from converging.\n",
" * If the training loss varies wildly (that is, the training loss jumps around), decrease the learning rate.\n",
" * Lowering the learning rate while increasing the number of epochs or the batch size is often a good combination.\n",
" * Setting the batch size to a *very* small batch number can also cause instability. First, try large batch size values. Then, decrease the batch size until you see degradation.\n",
" * For real-world datasets consisting of a very large number of examples, the entire dataset might not fit into memory. In such cases, you'll need to reduce the batch size to enable a batch to fit into memory. \n",
" * For real-world datasets consisting of a very large number of examples, the entire dataset might not fit into memory. In such cases, you'll need to reduce the batch size to enable a batch to fit into memory.\n",
"\n",
"Remember: the ideal combination of hyperparameters is data dependent, so you must always experiment and verify."
]
}
],
"metadata": {
"colab": {
"collapsed_sections": [],
"last_runtime": {
"build_target": "//learning/deepmind/public/tools/ml_python:ml_notebook",
"kind": "private"
},
"name": "Linear Regression with Synthetic Data.ipynb",
"private_outputs": true,
"provenance": []
Expand All @@ -559,4 +555,4 @@
},
"nbformat": 4,
"nbformat_minor": 0
}
}

0 comments on commit ed18ccd

Please sign in to comment.