main.diff

diff --git "a/C:\\dev\\jade\\chatglm_cpp2\\main.cpp" b/main.cpp
index 68a63cc..9979be5 100644
--- "a/C:\\dev\\jade\\chatglm_cpp2\\main.cpp"
+++ b/main.cpp
@@ -207,11 +207,13 @@ int main(int argc, char** argv) {
         }
         
         model->reshape(shapes);
-        ov::preprocess::PrePostProcessor p3(model);
-        p3.input("input_ids").tensor().set_element_type(ov::element::i32);  // cast to the type of tokenyzer's output
-        p3.input("attention_mask").tensor().set_element_type(ov::element::i32);
-        p3.input("position_ids").tensor().set_element_type(ov::element::i32);
-        model = p3.build();
+        {
+            ov::preprocess::PrePostProcessor p3(model);
+            p3.input("input_ids").tensor().set_element_type(ov::element::i32);  // cast to the type of tokenyzer's output
+            p3.input("attention_mask").tensor().set_element_type(ov::element::i32);
+            p3.input("position_ids").tensor().set_element_type(ov::element::i32);
+            model = p3.build();
+        }
         //ov::InferRequest ireq = core.compile_model(model, "CPU", { ov::cache_dir("llm-cache") }).create_infer_request();
         //ov::InferRequest ireq = core.compile_model(model, "CPU").create_infer_request();
         startTime = Time::now();
@@ -222,6 +224,9 @@ int main(int argc, char** argv) {
         ov::InferRequest ireq;
         int32_t out_token;
 
+        auto model_inputs = model->inputs();
+        model = nullptr;
+
         for (std::string input_text : sentences) {
             std::cout << " #### sentence: index " << input_text << std::endl;
 
@@ -239,7 +244,7 @@ int main(int argc, char** argv) {
             duration_ms = get_duration_ms_until_now(startTime);
             std::cout << "Get Tokenizer id  took " << duration_ms << " ms" << std::endl;
 
-            for (const ov::Output<ov::Node>& input : model->inputs()) {
+            for (const ov::Output<ov::Node>& input : model_inputs) {
                 for (const std::string& name : input.get_names()) {
                     if (name.rfind("past_key_values", 0) == 0) {
                         ireq.get_tensor(input).set_shape(input.get_partial_shape().get_min_shape());
@@ -290,7 +295,7 @@ int main(int argc, char** argv) {
             constexpr int32_t SPECIAL_EOS_TOKEN = 2;  // There's no way to extract the value from the tokenizer for now
             while (out_token != SPECIAL_EOS_TOKEN) {
                 startTime = Time::now();
-                for (const ov::Output<ov::Node>& input : model->inputs()) {
+                for (const ov::Output<ov::Node>& input : model_inputs) {
                     for (const std::string& name : input.get_names()) {
                         if (name.rfind("past_key_values", 0) == 0) {
                             ireq.set_tensor(input, ireq.get_tensor("present" + name.substr(15)));