From bd81931e772cbab870191a8ee2396ddc039ebbe3 Mon Sep 17 00:00:00 2001 From: Adedapo Adeniran <41041115+Brainydaps@users.noreply.github.com> Date: Wed, 3 Jul 2024 02:13:02 +0100 Subject: [PATCH 1/2] Update TrainingModel.cs --- TrainingModel.cs | 28 +++++++++++++++++----------- 1 file changed, 17 insertions(+), 11 deletions(-) diff --git a/TrainingModel.cs b/TrainingModel.cs index e98594a..b46597a 100644 --- a/TrainingModel.cs +++ b/TrainingModel.cs @@ -4,6 +4,7 @@ using System.Linq; using Microsoft.ML; using Microsoft.ML.Data; +using Microsoft.ML.Trainers.LightGbm; using Microsoft.ML.Transforms.Text; namespace TalkingStage @@ -34,17 +35,22 @@ public void TrainAndSaveModel(string trainingDataPath, string modelPath) var preprocessedTrainingDataView = mlContext.Data.LoadFromEnumerable(preprocessedTrainingData); // Define the data preparation and training pipeline - var pipeline = mlContext.Transforms.Text.FeaturizeText("Features", nameof(TalkingStageBot.InputData.Text)) - .Append(mlContext.Transforms.Conversion.MapValueToKey("LabelKey", nameof(TalkingStageBot.InputData.Label))) - .Append(mlContext.Transforms.Concatenate("Features", "Features")) - .AppendCacheCheckpoint(mlContext) - .Append(mlContext.MulticlassClassification.Trainers.SdcaMaximumEntropy( - labelColumnName: "LabelKey", - featureColumnName: "Features", - l2Regularization: 0.1f, - l1Regularization: 0.01f, - maximumNumberOfIterations: 1000)) - .Append(mlContext.Transforms.Conversion.MapKeyToValue("PredictedLabel", "PredictedLabel")); + var pipeline = mlContext.Transforms.Text.FeaturizeText(inputColumnName: @"Text", outputColumnName: @"Text") + .Append(mlContext.Transforms.Concatenate(@"Features", new[] { @"Text" })) + .Append(mlContext.Transforms.Conversion.MapValueToKey(outputColumnName: @"Label", inputColumnName: @"Label", addKeyValueAnnotationsAsText: false)) + .Append(mlContext.MulticlassClassification.Trainers.LightGbm(new LightGbmMulticlassTrainer.Options() { + NumberOfLeaves = 30, + NumberOfIterations = 200, + MinimumExampleCountPerLeaf = 1, + LearningRate = 0.8, + LabelColumnName = @"Label", FeatureColumnName = @"Features", ExampleWeightColumnName = null, + Booster = new GradientBooster.Options() { + SubsampleFraction = 1, + FeatureFraction = 0.9, + L1Regularization = 0.03, + L2Regularization = 0.1 }, + MaximumBinCountPerFeature = 254 })) + .Append(mlContext.Transforms.Conversion.MapKeyToValue(outputColumnName: @"PredictedLabel", inputColumnName: @"PredictedLabel")); // Train the model var model = pipeline.Fit(preprocessedTrainingDataView); From 2291b3f4fa6d0edd651ccd0f37d804899cf6ee5a Mon Sep 17 00:00:00 2001 From: Adedapo Adeniran <41041115+Brainydaps@users.noreply.github.com> Date: Wed, 3 Jul 2024 02:18:11 +0100 Subject: [PATCH 2/2] Update README.md --- README.md | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index c7f3dfa..e28cbf6 100644 --- a/README.md +++ b/README.md @@ -15,11 +15,17 @@ TalkingStage is a conversational bot that uses machine learning to predict respo ![Screenshot 2024-06-25 172912](https://github.com/Brainydaps/TalkingStage/assets/41041115/2a04c075-23b4-4ae5-a9c1-87972993eff6) ![Screenshot 2024-06-29 at 03 26 57](https://github.com/Brainydaps/TalkingStage/assets/41041115/b3a18c48-0370-4471-8e84-2e24a2abfdd3) -## What's New in v1.2.1 +## What's New in v1.2.3 Release + +- Implemented a more advanced machine learning algorithm (LightGBM) for improved prediction accuracy. +- Fine-tuned model parameters for enhanced performance and reliability. +- Optimized data preprocessing steps to ensure consistent and effective training. +- Updated data loading and preprocessing to handle text data more efficiently. +- Streamlined the training pipeline for faster model training and deployment. +- Enhanced model robustness with increased iterations and improved feature handling. +- Integrated comprehensive error handling and logging for better debugging capabilities. +- Improved overall code readability and maintainability. -- **Initial Interaction Alert**: On the first interaction, the bot now displays an alert with guidance on how to format questions for better responses. This helps users understand the input format for optimal results. -- **Unique Responses**: Updated the response mechanism to ensure that each response is unique and separated by a comma. -- **Expanded Training Data**: The training data has been expanded to better handle a wider variety of questions, resulting in more accurate and relevant answers. ## Project Structure