for learning

Dependencies:   mbed FastPWM

Revision:
177:8e9cf31d63f4
Parent:
176:589ea3edcf3c
Child:
178:1074553d2f6f
diff -r 589ea3edcf3c -r 8e9cf31d63f4 main.cpp
--- a/main.cpp	Tue Nov 24 10:16:10 2020 +0000
+++ b/main.cpp	Tue Nov 24 13:04:54 2020 +0000
@@ -1,4 +1,4 @@
-//201124_4
+//201124_5
 #include "mbed.h"
 #include "FastPWM.h"
 #include "INIT_HW.h"
@@ -489,38 +489,35 @@
 
 void update_Critic_Networks(float (*arr)[num_input_RL])
 {
-    float gradient_rate = 0.001f;
+    float gradient_rate = 0.1f;
     float G_hc1[num_input_RL][num_hidden_unit1] = {0.0f};
-    float d_V_d_hc1[batch_size][num_input_RL][num_hidden_unit1] = {0.0f};   ////////////////1
-    float G_bc1[num_hidden_unit1] = {0.0f}; 
-    float d_V_d_bc1[batch_size][num_hidden_unit1] = {0.0f};                 ////////////////2
+    float G_bc1[num_hidden_unit1] = {0.0f};
     for (int index2 = 0; index2 < num_hidden_unit1; index2++) {
         for (int index1 = 0; index1 < num_input_RL; index1++) {
             for (int n=0; n<batch_size; n++) {
+                float d_V_d_hc1 = 0.0f;
                 for(int k=0; k<num_hidden_unit2; k++) {
                     if (hxh_c_sum_array[n][k] >= 0) {
                         if (hx_c_sum_array[n][index2] > 0) {
-                            //G_hc1[index1][index2] = G_hc1[index1][index2] + arr[n][index1]*hc2_temp[index2][k]*hc3_temp[k];               //////////////////////3
-                            d_V_d_hc1[n][index1][index2] = d_V_d_hc1[n][index1][index2] + arr[n][index1]*hc2_temp[index2][k]*hc3_temp[k];   //////////////////////4
+                            d_V_d_hc1 = d_V_d_hc1 + arr[n][index1]*hc2_temp[index2][k]*hc3_temp[k];
                         }
                     }
                 }
-                G_hc1[index1][index2] = G_hc1[index1][index2] + 2.0f*(return_G[n]-V[n])*(-d_V_d_hc1[n][index1][index2]);                    /////////////////////5
+                G_hc1[index1][index2] = G_hc1[index1][index2] + 2.0f*(return_G[n]-V[n])*(-d_V_d_hc1);
             }
             G_hc1[index1][index2] = G_hc1[index1][index2] / batch_size;
             //hc1_temp[index1][index2] = hc1_temp[index1][index2] - gradient_rate * G_hc1[index1][index2];
         }
         for (int n=0; n<batch_size; n++) {
+            float d_V_d_bc1 = 0.0f;
             for(int k=0; k<num_hidden_unit2; k++) {
                 if (hxh_c_sum_array[n][k] >= 0) {
                     if (hx_c_sum_array[n][index2] > 0) {
-                        //G_bc1[index2] = G_bc1[index2] + hc2_temp[index2][k]*hc3_temp[k];                                                  //////////////////6
-                        d_V_d_bc1[n][index2] = d_V_d_bc1[n][index2] + hc2_temp[index2][k]*hc3_temp[k];                                      //////////////////7
+                        d_V_d_bc1 = d_V_d_bc1 + hc2_temp[index2][k]*hc3_temp[k];
                     }
-
                 }
             }
-            G_bc1[index2] = G_bc1[index2] + 2.0f*(return_G[n]-V[n])*(-d_V_d_bc1[n][index2]);                                                /////////////////////8
+            G_bc1[index2] = G_bc1[index2] + 2.0f*(return_G[n]-V[n])*(-d_V_d_bc1);
         }
         G_bc1[index2] = G_bc1[index2] / batch_size;
         //bc1_temp[index2] = bc1_temp[index2] - gradient_rate * G_bc1[index2];
@@ -528,54 +525,50 @@
 
 
     float G_hc2[num_hidden_unit1][num_hidden_unit2] = {0.0f};
-    float d_V_d_hc2[batch_size][num_hidden_unit1][num_hidden_unit2] = {0.0f};   
     float G_bc2[num_hidden_unit2] = {0.0f};
-    float d_V_d_bc2[batch_size][num_hidden_unit2] = {0.0f};                    
     for (int index2 = 0; index2 < num_hidden_unit2; index2++) {
         for (int index1 = 0; index1 < num_hidden_unit1; index1++) {
             for (int n=0; n<batch_size; n++) {
+                float d_V_d_hc2 = 0.0f;
                 if (hxh_c_sum_array[n][index2] >= 0) {
                     if (hx_c_sum_array[n][index1] > 0) {
-                        //G_hc2[index1][index2] = G_hc2[index1][index2] + hx_c_sum_array[n][index1]*hc3_temp[index2];
-                        d_V_d_hc2[n][index1][index2] = hx_c_sum_array[n][index1]*hc3_temp[index2];
+                        d_V_d_hc2 = hx_c_sum_array[n][index1]*hc3_temp[index2];
                     }
                 }
-                G_hc2[index1][index2] = G_hc2[index1][index2] + 2.0f*(return_G[n]-V[n])*(-d_V_d_hc2[n][index1][index2]);                    
+                G_hc2[index1][index2] = G_hc2[index1][index2] + 2.0f*(return_G[n]-V[n])*(-d_V_d_hc2);
             }
             G_hc2[index1][index2] = G_hc2[index1][index2] / batch_size;
             //hc2_temp[index1][index2] = hc2_temp[index1][index2] - gradient_rate * G_hc2[index1][index2];
         }
         for (int n=0; n<batch_size; n++) {
+            float d_V_d_bc2 = 0.0f;
             if (hxh_c_sum_array[n][index2] >= 0) {
-                //G_bc2[index2] = G_bc2[index2] + hc3_temp[index2];
-                d_V_d_bc2[n][index2] = hc3_temp[index2];
+                d_V_d_bc2 = hc3_temp[index2];
             }
-            G_bc2[index2] = G_bc2[index2] + 2.0f*(return_G[n]-V[n])*(-d_V_d_bc2[n][index2]); 
+            G_bc2[index2] = G_bc2[index2] + 2.0f*(return_G[n]-V[n])*(-d_V_d_bc2);
         }
         G_bc2[index2] = G_bc2[index2] / batch_size;
         //bc2_temp[index2] = bc2_temp[index2] - gradient_rate * G_bc2[index2];
     }
 
     float G_hc3[num_hidden_unit2]= {0.0f};
-    float d_V_d_hc3[batch_size][num_hidden_unit2] = {0.0f};  
     float G_bc3 = 0.0f;
-    float d_V_d_bc3[batch_size] = {0.0f}; 
     for (int index2 = 0; index2 < 1; index2++) {
         for (int index1 = 0; index1 < num_hidden_unit2; index1++) {
             for (int n=0; n<batch_size; n++) {
+                float d_V_d_hc3 = 0.0f;
                 if (hxh_c_sum_array[n][index1] >= 0) {
-                    //G_hc3[index1] = G_hc3[index1] + hxh_c_sum_array[n][index1];
-                    d_V_d_hc3[n][index1] = d_V_d_hc3[n][index1] + hxh_c_sum_array[n][index1];
+                    d_V_d_hc3 = d_V_d_hc3 + hxh_c_sum_array[n][index1];
                 }
-                G_hc3[index1] = G_hc3[index1] + 2.0f*(return_G[n]-V[n])*(-d_V_d_hc3[n][index1]);
+                G_hc3[index1] = G_hc3[index1] + 2.0f*(return_G[n]-V[n])*(-d_V_d_hc3);
             }
             G_hc3[index1] = G_hc3[index1] / batch_size;
             //hc3_temp[index1] = hc3_temp[index1] - gradient_rate * G_hc3[index1];
         }
         for (int n=0; n<batch_size; n++) {
-            //G_bc2[index2] = G_bc2[index2] + 1.0f;
-            d_V_d_bc3[n] = 1.0f;
-            G_bc3 = G_bc3 + 2.0f*(return_G[n]-V[n])*(-d_V_d_bc3[n]); 
+            float d_V_d_bc3 = 0.0f;
+            d_V_d_bc3 = 1.0f;
+            G_bc3 = G_bc3 + 2.0f*(return_G[n]-V[n])*(-d_V_d_bc3);
         }
         G_bc3 = G_bc3 / batch_size;
         //bc3_temp = bc3_temp - gradient_rate * G_bc3;
@@ -600,39 +593,36 @@
         }
         bc3_temp = bc3_temp - gradient_rate * G_bc3;
     }
-
 }
 
 ///////////////////////////Softplus//////////////////////////////////
 void update_Actor_Networks(float (*arr)[num_input_RL])
 {
-    float gradient_rate = 0.001f;
+    float gradient_rate = 0.1f;
 
     float G_ha1[num_input_RL][num_hidden_unit1] = {0.0f};
     float G_ba1[num_hidden_unit1] = {0.0f};
-    float d_x_d_ha1[batch_size][num_input_RL][num_hidden_unit1] = {0.0f};
-    float d_x_d_ba1[batch_size][num_hidden_unit1] = {0.0f};
-    float d_y_d_ha1[batch_size][num_input_RL][num_hidden_unit1] = {0.0f};
-    float d_y_d_ba1[batch_size][num_hidden_unit1] = {0.0f};
 
     for (int index2 = 0; index2 < num_hidden_unit1; index2++) {
         for (int index1 = 0; index1 < num_input_RL; index1++) {
             for (int n=0; n<batch_size; n++) {
+                float d_x_d_ha1 = 0.0f;
+                float d_y_d_ha1 = 0.0f;
                 if((advantage[n] >= 0.0f && ratio[n] >= 1.0f + epsilon) || (advantage[n] < 0.0f && ratio[n] < 1.0f - epsilon)) {
                     G_ha1[index1][index2] = G_ha1[index1][index2];
                 } else {
                     for(int k=0; k<num_hidden_unit2; k++) {
                         if (hxh_a_sum_array[n][k] >= 0) {
                             if (hx_a_sum_array[n][index2] > 0) {
-                                d_x_d_ha1[n][index1][index2] = d_x_d_ha1[n][index1][index2] + arr[n][index1]*ha2_temp[index2][k]*ha3_temp[k][0];
-                                d_y_d_ha1[n][index1][index2] = d_y_d_ha1[n][index1][index2] + arr[n][index1]*ha2_temp[index2][k]*ha3_temp[k][1];
+                                d_x_d_ha1 = d_x_d_ha1 + arr[n][index1]*ha2_temp[index2][k]*ha3_temp[k][0];
+                                d_y_d_ha1 = d_y_d_ha1 + arr[n][index1]*ha2_temp[index2][k]*ha3_temp[k][1];
                             }
                         }
                     }
                     float d_mean_d_ha1 = 0.0f;
                     float d_dev_d_ha1 = 0.0f;
-                    d_mean_d_ha1 = exp(hxhh_a_sum_array[n][0])/(1.0f+exp(hxhh_a_sum_array[n][0]))*d_x_d_ha1[n][index1][index2];
-                    d_dev_d_ha1 = exp(hxhh_a_sum_array[n][1])/(1.0f+exp(hxhh_a_sum_array[n][1]))*d_y_d_ha1[n][index1][index2];
+                    d_mean_d_ha1 = exp(hxhh_a_sum_array[n][0])/(1.0f+exp(hxhh_a_sum_array[n][0]))*d_x_d_ha1;
+                    d_dev_d_ha1 = exp(hxhh_a_sum_array[n][1])/(1.0f+exp(hxhh_a_sum_array[n][1]))*d_y_d_ha1;
 
                     G_ha1[index1][index2] = G_ha1[index1][index2] + advantage[n]/pi_old[n]*(d_mean_d_ha1*Grad_Normal_Dist_Mean(mean_array[n],deviation_array[n],action_array[n])+d_dev_d_ha1*Grad_Normal_Dist_Deviation(mean_array[n],deviation_array[n],action_array[n]));
                 }
@@ -642,21 +632,23 @@
         }
 
         for (int n=0; n<batch_size; n++) {
+            float d_x_d_ba1 = 0.0f;
+            float d_y_d_ba1 = 0.0f;
             if((advantage[n] >= 0.0f && ratio[n] >= 1.0f + epsilon) || (advantage[n] < 0.0f && ratio[n] < 1.0f - epsilon))  {
                 G_ba1[index2] = G_ba1[index2];
             } else {
                 for(int k=0; k<num_hidden_unit2; k++) {
                     if (hxh_a_sum_array[n][k] >= 0) {
                         if (hx_a_sum_array[n][index2] > 0) {
-                            d_x_d_ba1[n][index2] = d_x_d_ba1[n][index2] + ha2_temp[index2][k]*ha3_temp[k][0];
-                            d_y_d_ba1[n][index2] = d_y_d_ba1[n][index2] + ha2_temp[index2][k]*ha3_temp[k][1];
+                            d_x_d_ba1 = d_x_d_ba1 + ha2_temp[index2][k]*ha3_temp[k][0];
+                            d_y_d_ba1 = d_y_d_ba1 + ha2_temp[index2][k]*ha3_temp[k][1];
                         }
                     }
                 }
                 float d_mean_d_ba1 = 0.0f;
                 float d_dev_d_ba1 = 0.0f;
-                d_mean_d_ba1 = exp(hxhh_a_sum_array[n][0])/(1.0f+exp(hxhh_a_sum_array[n][0]))*d_x_d_ba1[n][index2];
-                d_dev_d_ba1 = exp(hxhh_a_sum_array[n][1])/(1.0f+exp(hxhh_a_sum_array[n][1]))*d_y_d_ba1[n][index2];
+                d_mean_d_ba1 = exp(hxhh_a_sum_array[n][0])/(1.0f+exp(hxhh_a_sum_array[n][0]))*d_x_d_ba1;
+                d_dev_d_ba1 = exp(hxhh_a_sum_array[n][1])/(1.0f+exp(hxhh_a_sum_array[n][1]))*d_y_d_ba1;
 
                 G_ba1[index2] = G_ba1[index2] + advantage[n]/pi_old[n]*(d_mean_d_ba1*Grad_Normal_Dist_Mean(mean_array[n],deviation_array[n],action_array[n])+d_dev_d_ba1*Grad_Normal_Dist_Deviation(mean_array[n],deviation_array[n],action_array[n]));
             }
@@ -667,28 +659,26 @@
 
     float G_ha2[num_hidden_unit1][num_hidden_unit2] = {0.0f};
     float G_ba2[num_hidden_unit2] = {0.0f};
-    float d_x_d_ha2[batch_size][num_hidden_unit1][num_hidden_unit2] = {0.0f};
-    float d_x_d_ba2[batch_size][num_hidden_unit2] = {0.0f};
-    float d_y_d_ha2[batch_size][num_hidden_unit1][num_hidden_unit2] = {0.0f};
-    float d_y_d_ba2[batch_size][num_hidden_unit2] = {0.0f};
 
     for (int index2 = 0; index2 < num_hidden_unit2; index2++) {
         for (int index1 = 0; index1 < num_hidden_unit1; index1++) {
             for (int n=0; n<batch_size; n++) {
+                float d_x_d_ha2 = 0.0f;
+                float d_y_d_ha2 = 0.0f;
                 if((advantage[n] >= 0.0f && ratio[n] >= 1.0f + epsilon) || (advantage[n] < 0.0f && ratio[n] < 1.0f - epsilon)) {
                     G_ha2[index1][index2] = G_ha2[index1][index2];
                 } else {
                     if (hxh_a_sum_array[n][index2] >= 0) {
                         if (hx_a_sum_array[n][index1] > 0) {
-                            d_x_d_ha2[n][index1][index2] = hx_a_sum_array[n][index1]*ha3_temp[index2][0];
-                            d_y_d_ha2[n][index1][index2] = hx_a_sum_array[n][index1]*ha3_temp[index2][1];
+                            d_x_d_ha2 = hx_a_sum_array[n][index1]*ha3_temp[index2][0];
+                            d_y_d_ha2 = hx_a_sum_array[n][index1]*ha3_temp[index2][1];
                         }
                     }
 
                     float d_mean_d_ha2 = 0.0f;
                     float d_dev_d_ha2 = 0.0f;
-                    d_mean_d_ha2 = exp(hxhh_a_sum_array[n][0])/(1.0f+exp(hxhh_a_sum_array[n][0]))*d_x_d_ha2[n][index1][index2];
-                    d_dev_d_ha2 = exp(hxhh_a_sum_array[n][1])/(1.0f+exp(hxhh_a_sum_array[n][1]))*d_y_d_ha2[n][index1][index2];
+                    d_mean_d_ha2 = exp(hxhh_a_sum_array[n][0])/(1.0f+exp(hxhh_a_sum_array[n][0]))*d_x_d_ha2;
+                    d_dev_d_ha2 = exp(hxhh_a_sum_array[n][1])/(1.0f+exp(hxhh_a_sum_array[n][1]))*d_y_d_ha2;
 
                     G_ha2[index1][index2] = G_ha2[index1][index2] + advantage[n]/pi_old[n]*(d_mean_d_ha2*Grad_Normal_Dist_Mean(mean_array[n],deviation_array[n],action_array[n])+d_dev_d_ha2*Grad_Normal_Dist_Deviation(mean_array[n],deviation_array[n],action_array[n]));
                 }
@@ -698,18 +688,20 @@
         }
 
         for (int n=0; n<batch_size; n++) {
+            float d_x_d_ba2 = 0.0f;
+            float d_y_d_ba2 = 0.0f;
             if((advantage[n] >= 0.0f && ratio[n] >= 1.0f + epsilon) || (advantage[n] < 0.0f && ratio[n] < 1.0f - epsilon))  {
                 G_ba2[index2] = G_ba2[index2];
             } else {
 
                 if (hxh_a_sum_array[n][index2] >= 0) {
-                    d_x_d_ba2[n][index2] = ha3_temp[index2][0];
-                    d_y_d_ba2[n][index2] = ha3_temp[index2][1];
+                    d_x_d_ba2 = ha3_temp[index2][0];
+                    d_y_d_ba2 = ha3_temp[index2][1];
                 }
                 float d_mean_d_ba2= 0.0f;
                 float d_dev_d_ba2= 0.0f;
-                d_mean_d_ba2 = exp(hxhh_a_sum_array[n][0])/(1.0f+exp(hxhh_a_sum_array[n][0]))*d_x_d_ba2[n][index2];
-                d_dev_d_ba2 = exp(hxhh_a_sum_array[n][1])/(1.0f+exp(hxhh_a_sum_array[n][1]))*d_y_d_ba2[n][index2];
+                d_mean_d_ba2 = exp(hxhh_a_sum_array[n][0])/(1.0f+exp(hxhh_a_sum_array[n][0]))*d_x_d_ba2;
+                d_dev_d_ba2 = exp(hxhh_a_sum_array[n][1])/(1.0f+exp(hxhh_a_sum_array[n][1]))*d_y_d_ba2;
 
                 G_ba2[index2] = G_ba2[index2] + advantage[n]/pi_old[n]*(d_mean_d_ba2*Grad_Normal_Dist_Mean(mean_array[n],deviation_array[n],action_array[n])+d_dev_d_ba2*Grad_Normal_Dist_Deviation(mean_array[n],deviation_array[n],action_array[n]));
             }
@@ -720,27 +712,25 @@
 
     float G_ha3[num_hidden_unit2][2] = {0.0f};
     float G_ba3[2] = {0.0f};
-    float d_x_d_ha3[batch_size][num_hidden_unit2][2] = {0.0f};
-    float d_x_d_ba3[batch_size][2] = {0.0f};
-    float d_y_d_ha3[batch_size][num_hidden_unit2][2] = {0.0f};
-    float d_y_d_ba3[batch_size][2] = {0.0f};
 
     for (int index2 = 0; index2 < 2; index2++) {
         for (int index1 = 0; index1 < num_hidden_unit2; index1++) {
             for (int n=0; n<batch_size; n++) {
+                float d_x_d_ha3 = 0.0f;
+                float d_y_d_ha3 = 0.0f;
                 if((advantage[n] >= 0.0f && ratio[n] >= 1.0f + epsilon) || (advantage[n] < 0.0f && ratio[n] < 1.0f - epsilon)) {
                     G_ha3[index1][index2] = G_ha3[index1][index2];
                 } else {
                     if (hxh_a_sum_array[n][index1] >= 0) {
                         if (hx_a_sum_array[n][index1] > 0) {
-                            d_x_d_ha3[n][index1][index2] = hxh_a_sum_array[n][index1];
-                            d_y_d_ha3[n][index1][index2] = hxh_a_sum_array[n][index1];
+                            d_x_d_ha3 = hxh_a_sum_array[n][index1];
+                            d_y_d_ha3 = hxh_a_sum_array[n][index1];
                         }
                     }
                     float d_mean_d_ha3 = 0.0f;
                     float d_dev_d_ha3 = 0.0f;
-                    d_mean_d_ha3 = exp(hxhh_a_sum_array[n][0])/(1.0f+exp(hxhh_a_sum_array[n][0]))*d_x_d_ha3[n][index1][index2];
-                    d_dev_d_ha3 = exp(hxhh_a_sum_array[n][1])/(1.0f+exp(hxhh_a_sum_array[n][1]))*d_y_d_ha3[n][index1][index2];
+                    d_mean_d_ha3 = exp(hxhh_a_sum_array[n][0])/(1.0f+exp(hxhh_a_sum_array[n][0]))*d_x_d_ha3;
+                    d_dev_d_ha3 = exp(hxhh_a_sum_array[n][1])/(1.0f+exp(hxhh_a_sum_array[n][1]))*d_y_d_ha3;
 
                     G_ha3[index1][index2] = G_ha3[index1][index2] + advantage[n]/pi_old[n]*(d_mean_d_ha3*Grad_Normal_Dist_Mean(mean_array[n],deviation_array[n],action_array[n])+d_dev_d_ha3*Grad_Normal_Dist_Deviation(mean_array[n],deviation_array[n],action_array[n]));
                 }
@@ -750,17 +740,19 @@
         }
 
         for (int n=0; n<batch_size; n++) {
+            float d_x_d_ba3 = 0.0f;
+            float d_y_d_ba3 = 0.0f;
             if((advantage[n] >= 0.0f && ratio[n] >= 1.0f + epsilon) || (advantage[n] < 0.0f && ratio[n] < 1.0f - epsilon))  {
                 G_ba3[index2] = G_ba3[index2];
             } else {
 
-                d_x_d_ba3[n][index2] = 1.0f;
-                d_y_d_ba3[n][index2] = 1.0f;
+                d_x_d_ba3 = 1.0f;
+                d_y_d_ba3 = 1.0f;
 
                 float d_mean_d_ba3= 0.0f;
                 float d_dev_d_ba3= 0.0f;
-                d_mean_d_ba3 = exp(hxhh_a_sum_array[n][0])/(1.0f+exp(hxhh_a_sum_array[n][0]))*d_x_d_ba3[n][index2];
-                d_dev_d_ba3 = exp(hxhh_a_sum_array[n][1])/(1.0f+exp(hxhh_a_sum_array[n][1]))*d_y_d_ba3[n][index2];
+                d_mean_d_ba3 = exp(hxhh_a_sum_array[n][0])/(1.0f+exp(hxhh_a_sum_array[n][0]))*d_x_d_ba3;
+                d_dev_d_ba3 = exp(hxhh_a_sum_array[n][1])/(1.0f+exp(hxhh_a_sum_array[n][1]))*d_y_d_ba3;
 
                 G_ba3[index2] = G_ba3[index2] + advantage[n]/pi_old[n]*(d_mean_d_ba3*Grad_Normal_Dist_Mean(mean_array[n],deviation_array[n],action_array[n])+d_dev_d_ba3*Grad_Normal_Dist_Deviation(mean_array[n],deviation_array[n],action_array[n]));
             }
@@ -768,7 +760,7 @@
         G_ba3[index2] = -G_ba3[index2] / batch_size;
         //ba3_temp[index2] = ba3_temp[index2] - gradient_rate * G_ba3[index2];
     }
-    
+
     // Simultaneous Update
     for (int index2 = 0; index2 < num_hidden_unit1; index2++) {
         for (int index1 = 0; index1 < num_input_RL; index1++) {
@@ -788,155 +780,9 @@
         }
         ba3_temp[index2] = ba3_temp[index2] - gradient_rate * G_ba3[index2];
     }
-    
+
 }
 
-///////////////////////////ReLU - Bad performance//////////////////////////////////
-//void update_Actor_Networks(float (*arr)[num_input_RL])
-//{
-//    float gradient_rate = 0.001f;   //-0.01f
-//
-//    float G_ha1[num_input_RL][num_hidden_unit] = {0.0f};
-//    float G_ba1[num_hidden_unit] = {0.0f};
-//    for (int index2 = 0; index2 < num_hidden_unit; index2++) {
-//        for (int index1 = 0; index1 < num_input_RL; index1++) {
-//            for (int i=0; i<batch_size; i++) {
-//                if((advantage[i] >= 0.0f && ratio[i] >= 1.0f + epsilon) || (advantage[i] < 0.0f && ratio[i] < 1.0f - epsilon)) {
-//                    G_ha1[index1][index2] = G_ha1[index1][index2];
-//                } else {
-//
-//                    float hx_sum_total = 0.0f;
-//                    for(int m = 0; m < num_hidden_unit; m++) {
-//                        for(int n = 0; n < num_input_RL; n++) {
-//                            hx_sum_total = hx_sum_total + ha1_temp[n][m]*arr[i][n];
-//                        }
-//                    }
-//                    hx_sum_total = hx_sum_total + bc1_temp[index2];
-//                    float d_mean_d_ha1 = 0.0f;
-//                    float d_dev_d_ha1 = 0.0f;
-//                    if (hx_sum_total >=0) {
-//                        float hx_sum = 0.0f;
-//                        for(int j=0; j<num_input_RL; j++) {
-//                            hx_sum = hx_sum + ha1_temp[j][index2]*arr[i][j];
-//                        }
-//                        hx_sum = hx_sum + bc1_temp[index2];
-//                        if (hx_sum >= 0) {
-//                            d_mean_d_ha1 = ha2_temp[index2][0]*arr[i][index1];
-//                            d_dev_d_ha1 = ha2_temp[index2][1]*arr[i][index1];
-//                        } else {
-//                            d_mean_d_ha1 = 0.0f;
-//                            d_dev_d_ha1 = 0.0f;
-//                        }
-//                    } else {
-//                        d_mean_d_ha1 = 0.0f;
-//                        d_dev_d_ha1 = 0.0f;
-//                    }
-//                    G_ha1[index1][index2] = G_ha1[index1][index2] + advantage[i]/pi_old[i]*(d_mean_d_ha1*Grad_Normal_Dist_Mean(mean_array[i],deviation_array[i],action_array[i])+d_dev_d_ha1*Grad_Normal_Dist_Deviation(mean_array[i],deviation_array[i],action_array[i]));
-//                }
-//            }
-//            G_ha1[index1][index2] = G_ha1[index1][index2] / batch_size;
-//            ha1_temp[index1][index2] = ha1_temp[index1][index2] - gradient_rate * G_ha1[index1][index2];
-//        }
-//        for (int i=0; i<batch_size; i++) {
-//            if((advantage[i] >= 0.0f && ratio[i] >= 1.0f + epsilon) || (advantage[i] < 0.0f && ratio[i] < 1.0f - epsilon))  {
-//                G_ba1[index2] = G_ba1[index2];
-//            } else {
-//
-//                float hx_sum_total = 0.0f;
-//                for(int m = 0; m < num_hidden_unit; m++) {
-//                    for(int n = 0; n < num_input_RL; n++) {
-//                        hx_sum_total = hx_sum_total + ha1_temp[n][m]*arr[i][n];
-//                    }
-//                }
-//                hx_sum_total = hx_sum_total + bc1_temp[index2];
-//                float d_mean_d_ba1 = 0.0f;
-//                float d_dev_d_ba1 = 0.0f;
-//                if (hx_sum_total >=0) {
-//
-//                    float hx_sum = 0.0f;
-//                    for(int j=0; j<num_input_RL; j++) {
-//                        hx_sum = hx_sum + ha1_temp[j][index2]*arr[i][j];
-//                    }
-//                    hx_sum = hx_sum + bc1_temp[index2];
-//
-//                    if(hx_sum >=0) {
-//                        d_mean_d_ba1 = ha2_temp[index2][0];
-//                        d_dev_d_ba1 = ha2_temp[index2][1];
-//                    } else {
-//                        d_mean_d_ba1 = 0.0f;
-//                        d_dev_d_ba1 = 0.0f;
-//                    }
-//                } else {
-//                    d_mean_d_ba1 = 0.0f;
-//                    d_dev_d_ba1 = 0.0f;
-//                }
-//                G_ba1[index2] = G_ba1[index2] + advantage[i]/pi_old[i]*(d_mean_d_ba1*Grad_Normal_Dist_Mean(mean_array[i],deviation_array[i],action_array[i])+d_dev_d_ba1*Grad_Normal_Dist_Deviation(mean_array[i],deviation_array[i],action_array[i]));
-//            }
-//        }
-//        G_ba1[index2] = G_ba1[index2] / batch_size;
-//        ba1_temp[index2] = ba1_temp[index2] - gradient_rate * G_ba1[index2];
-//    }
-//
-//    float G_ha2[num_hidden_unit][2] = {0.0f};
-//    float G_ba2[2] = {0.0f};
-//    for (int index2 = 0; index2 < 2; index2++) {
-//        for (int index1 = 0; index1 < num_hidden_unit; index1++) {
-//            for (int i=0; i<batch_size; i++) {
-//                if((advantage[i] >= 0.0f && ratio[i] >= 1.0f + epsilon) || (advantage[i] < 0.0f && ratio[i] < 1.0f - epsilon)) {
-//                    G_ha2[index1][index2] = G_ha2[index1][index2];
-//                } else {
-//
-//                    float hx_sum_total = 0.0f;
-//                    for(int m = 0; m < num_hidden_unit; m++) {
-//                        for(int n = 0; n < num_input_RL; n++) {
-//                            hx_sum_total = hx_sum_total + ha1_temp[n][m]*arr[i][n];
-//                        }
-//                    }
-//                    hx_sum_total = hx_sum_total + bc1_temp[index2];
-//                    float d_mean_d_ha2 = 0.0f;
-//                    float d_dev_d_ha2 = 0.0f;
-//                    if (hx_sum_total >=0) {
-//                        float hx_sum = 0.0f;
-//                        for(int j=0; j<num_input_RL; j++) {
-//                            hx_sum = hx_sum + ha1_temp[j][index1]*arr[i][j];
-//                        }
-//                        hx_sum = hx_sum + bc1_temp[index1];
-//                        if (hx_sum >= 0) {
-//                            d_mean_d_ha2 = hx_sum;
-//                            d_dev_d_ha2 = hx_sum;
-//                        } else {
-//                            d_mean_d_ha2 = 0.0f;
-//                            d_mean_d_ha2 = 0.0f;
-//                        }
-//                    } else {
-//                        d_mean_d_ha2 = 0.0f;
-//                        d_mean_d_ha2 = 0.0f;
-//                    }
-//                    G_ha2[index1][index2] = G_ha2[index1][index2] + advantage[i]/pi_old[i]*(d_mean_d_ha2*Grad_Normal_Dist_Mean(mean_array[i],deviation_array[i],action_array[i])+d_dev_d_ha2*Grad_Normal_Dist_Deviation(mean_array[i],deviation_array[i],action_array[i]));
-//                }
-//            }
-//            G_ha2[index1][index2] = G_ha2[index1][index2] / batch_size;
-//            ha2_temp[index1][index2] = ha2_temp[index1][index2] - gradient_rate * G_ha2[index1][index2];
-//        }
-//        for (int i=0; i<batch_size; i++) {
-//            if((advantage[i] >= 0.0f && ratio[i] >= 1.0f + epsilon) || (advantage[i] < 0.0f && ratio[i] < 1.0f - epsilon))  {
-//                G_ba2[index2] = G_ba2[index2];
-//            } else {
-//
-//                float d_mean_d_ba2 = 0.0f;
-//                float d_dev_d_ba2 = 0.0f;
-//                d_mean_d_ba2 = 1.0f;
-//                d_dev_d_ba2 = 1.0f;
-//                G_ba1[index2] = G_ba1[index2] + advantage[i]/pi_old[i]*(d_mean_d_ba2*Grad_Normal_Dist_Mean(mean_array[i],deviation_array[i],action_array[i])+d_dev_d_ba2*Grad_Normal_Dist_Deviation(mean_array[i],deviation_array[i],action_array[i]));
-//            }
-//        }
-//        G_ba2[index2] = G_ba2[index2] / batch_size;
-//        ba2_temp[index2] = ba2_temp[index2] - gradient_rate * G_ba2[index2];
-//    }
-//}
-
-
-
 float rand_normal(double mean, double stddev)
 {
     //Box muller method
@@ -1143,7 +989,7 @@
         //read_field(i2c_slave_addr1);
         //if(DIR_VALVE_ENC < 0) value = 1023 - value;
 
-//        timer_while ++;
+        //timer_while ++;
 
         ///////////////////////////////////////////////////////Neural Network
 
@@ -1268,7 +1114,7 @@
                             hxh_c_sum_array[n][i] = hxh_c_sum[i];
                         }
                         hxhh_c_sum_array[n] = hxhh_c_sum;
-                        
+
                         pi[n] = exp(-(action_array[n]-mean_array[n])*(action_array[n]-mean_array[n])/(2.0f*deviation_array[n]*deviation_array[n]))/(sqrt(2.0f*PI)*deviation_array[n]);
                         Actor_Network_Old(temp_array);
                         pi_old[n] = exp(-(action_array[n]-mean_old)*(action_array[n]-mean_old)/(2.0f*deviation_old*deviation_old))/(sqrt(2.0f*PI)*deviation_old);
@@ -1305,7 +1151,6 @@
                     update_Critic_Networks(state_array);
                     update_Actor_Networks(state_array);
                 }
-                //virt_pos = 10.0f;
                 Update_Done_Flag = 1;
                 Update_Case = 0;
                 //logging1 = V[0];