HydraulicControlBoard_Rainbow_v1_2_copy1 - 2011

Users » Lightvalve » Code » HydraulicControlBoard_Rainbow_v1_2_copy1
Sungwoo Kim / Mbed 2 deprecated HydraulicControlBoard_Rainbow_v1_2_copy1
2011
Diff: main.cpp

Revision:: 173:68c7914679ec
Parent:: 172:63af34265fe9
Child:: 174:c828479f53f9
diff -r 63af34265fe9 -r 68c7914679ec main.cpp
--- a/main.cpp	Sat Nov 21 07:25:32 2020 +0000
+++ b/main.cpp	Tue Nov 24 05:19:59 2020 +0000
@@ -1,4 +1,4 @@
-//201121_2
+//201124_1
 #include "mbed.h"
 #include "FastPWM.h"
 #include "INIT_HW.h"
@@ -268,28 +268,36 @@
 float input_RL[num_input_RL] = { 0.0f };
 
 //Critic Networks
-float hc1[num_input_RL][num_hidden_unit] = {0.0f};
-float bc1[num_hidden_unit] = {0.0f};
-float hc2[num_hidden_unit] = {0.0f};
-float bc2 = 0.0f;
+float hc1[num_input_RL][num_hidden_unit1] = {0.0f};
+float bc1[num_hidden_unit1] = {0.0f};
+float hc2[num_hidden_unit1][num_hidden_unit2] = {0.0f};
+float bc2[num_hidden_unit2] = {0.0f};
+float hc3[num_hidden_unit2] = {0.0f};
+float bc3 = 0.0f;
 
 //Critic Networks Temporary
-float hc1_temp[num_input_RL][num_hidden_unit] = {0.0f};
-float bc1_temp[num_hidden_unit] = {0.0f};
-float hc2_temp[num_hidden_unit] = {0.0f};
-float bc2_temp = 0.0f;
+float hc1_temp[num_input_RL][num_hidden_unit1] = {0.0f};
+float bc1_temp[num_hidden_unit1] = {0.0f};
+float hc2_temp[num_hidden_unit1][num_hidden_unit2] = {0.0f};
+float bc2_temp[num_hidden_unit2] = {0.0f};
+float hc3_temp[num_hidden_unit2] = {0.0f};
+float bc3_temp = 0.0f;
 
 //Actor Networks
-float ha1[num_input_RL][num_hidden_unit] = {0.0f};
-float ba1[num_hidden_unit] = {0.0f};
-float ha2[num_hidden_unit][2] = {0.0f};
-float ba2[2] = {0.0f};
+float ha1[num_input_RL][num_hidden_unit1] = {0.0f};
+float ba1[num_hidden_unit1] = {0.0f};
+float ha2[num_hidden_unit1][num_hidden_unit2] = {0.0f};
+float ba2[num_hidden_unit2] = {0.0f};
+float ha3[num_hidden_unit2][2] = {0.0f};
+float ba3[2] = {0.0f};
 
 //Actor Networks Temporary
-float ha1_temp[num_input_RL][num_hidden_unit] = {0.0f};
-float ba1_temp[num_hidden_unit] = {0.0f};
-float ha2_temp[num_hidden_unit][2] = {0.0f};
-float ba2_temp[2] = {0.0f};
+float ha1_temp[num_input_RL][num_hidden_unit1] = {0.0f};
+float ba1_temp[num_hidden_unit1] = {0.0f};
+float ha2_temp[num_hidden_unit1][num_hidden_unit2] = {0.0f};
+float ba2_temp[num_hidden_unit2] = {0.0f};
+float ha3_temp[num_hidden_unit2][2] = {0.0f};
+float ba3_temp[2] = {0.0f};
 
 float VALVE_POS_RAW_NN = 0.0f;
 float DDV_JOINT_POS_FF(float REF_JOINT_VEL);
@@ -297,38 +305,82 @@
 
 float Critic_Network(float *arr)
 {
-    float output1[num_hidden_unit] = { 0.0f };
+    float output1[num_hidden_unit1] = { 0.0f };
+    float output2[num_hidden_unit2] = { 0.0f };
     float output = 0.0f;
-    for (int index2 = 0; index2 < num_hidden_unit; index2++) {
+    for (int index2 = 0; index2 < num_hidden_unit1; index2++) {
         for (int index1 = 0; index1 < num_input_RL; index1++) {
             output1[index2] = output1[index2] + hc1[index1][index2] * arr[index1];
         }
-        output1[index2] = tanh(output1[index2] + bc1[index2]);
+        //ReLU
+        output1[index2] = output1[index2] + bc1[index2];
+        hx_c_sum[index2] = output1[index2];
+        if (output1[index2] < 0) {
+            output1[index2] = 0;
+        }
+        //tanh
+        //output1[index2] = tanh(output1[index2] + bc1[index2]);
+    }
+    for (int index2 = 0; index2 < num_hidden_unit2; index2++) {
+        for (int index1 = 0; index1 < num_hidden_unit1; index1++) {
+            output2[index2] = output2[index2] + hc2[index1][index2] * arr[index1];
+        }
+        //ReLU
+        output2[index2] = output2[index2] + bc2[index2];
+        hxh_c_sum[index2] = output2[index2];
+        if (output2[index2] < 0) {
+            output2[index2] = 0;
+        }
+        //tanh
+        //output2[index2] = tanh(output2[index2] + bc2[index2]);
     }
     for (int index2 = 0; index2 < 1; index2++) {
-        for (int index1 = 0; index1 < num_hidden_unit; index1++) {
-            output = output + hc2[index1] * output1[index1];
+        for (int index1 = 0; index1 < num_hidden_unit2; index1++) {
+            output = output + hc3[index1] * output2[index1];
         }
-        output = output + bc2;
+        output = output + bc3;
+        hxhh_c_sum = output;
     }
     return output;
 }
 
 float Critic_Network_Temp(float *arr)
 {
-    float output1[num_hidden_unit] = { 0.0f };
+    float output1[num_hidden_unit1] = { 0.0f };
+    float output2[num_hidden_unit2] = { 0.0f };
     float output = 0.0f;
-    for (int index2 = 0; index2 < num_hidden_unit; index2++) {
+    for (int index2 = 0; index2 < num_hidden_unit1; index2++) {
         for (int index1 = 0; index1 < num_input_RL; index1++) {
             output1[index2] = output1[index2] + hc1_temp[index1][index2] * arr[index1];
         }
-        output1[index2] = tanh(output1[index2] + bc1_temp[index2]);
+        //ReLU
+        output1[index2] = output1[index2] + bc1_temp[index2];
+        hx_c_sum[index2] = output1[index2];
+        if (output1[index2] < 0) {
+            output1[index2] = 0;
+        }
+        //tanh
+        //output1[index2] = tanh(output1[index2] + bc1_temp[index2]);
+    }
+    for (int index2 = 0; index2 < num_hidden_unit2; index2++) {
+        for (int index1 = 0; index1 < num_hidden_unit1; index1++) {
+            output2[index2] = output2[index2] + hc2_temp[index1][index2] * arr[index1];
+        }
+        //ReLU
+        output2[index2] = output2[index2] + bc2_temp[index2];
+        hxh_c_sum[index2] = output2[index2];
+        if (output2[index2] < 0) {
+            output2[index2] = 0;
+        }
+        //tanh
+        //output2[index2] = tanh(output2[index2] + bc2_temp[index2]);
     }
     for (int index2 = 0; index2 < 1; index2++) {
-        for (int index1 = 0; index1 < num_hidden_unit; index1++) {
-            output = output + hc2_temp[index1] * output1[index1];
+        for (int index1 = 0; index1 < num_hidden_unit2; index1++) {
+            output = output + hc3_temp[index1] * output2[index1];
         }
-        output = output + bc2_temp;
+        output = output + bc3_temp;
+        hxhh_c_sum = output;
     }
     return output;
 }
@@ -336,10 +388,53 @@
 
 void Actor_Network(float *arr)
 {
-    float output1[num_hidden_unit] = {0.0f};
+    float output1[num_hidden_unit1] = {0.0f};
+    float output2[num_hidden_unit2] = {0.0f};
     float output[2] = {0.0f};
 
-    for (int index2 = 0; index2 < num_hidden_unit; index2++) {
+    for (int index2 = 0; index2 < num_hidden_unit1; index2++) {
+        for (int index1 = 0; index1 < num_input_RL; index1++) {
+            output1[index2] = output1[index2] + ha1[index1][index2] * arr[index1];
+        }
+        output1[index2] = output1[index2] + ba1[index2];
+        hx_a_sum[index2] = output1[index2];
+        if (output1[index2] < 0) {
+            output1[index2] = 0;
+        }
+    }
+    for (int index2 = 0; index2 < num_hidden_unit2; index2++) {
+        for (int index1 = 0; index1 < num_hidden_unit1; index1++) {
+            output2[index2] = output2[index2] + ha2[index1][index2] * arr[index1];
+        }
+        output2[index2] = output2[index2] + ba2[index2];
+        hxh_a_sum[index2] = output2[index2];
+        if (output2[index2] < 0) {
+            output2[index2] = 0;
+        }
+    }
+    for (int index2 = 0; index2 < 2; index2++) {
+        for (int index1 = 0; index1 < num_hidden_unit2; index1++) {
+            output[index2] = output[index2] + ha3[index1][index2] * output2[index1];
+        }
+    }
+    hxhh_a_sum[0] = output[0] + ba3[0];
+    hxhh_a_sum[1] = output[1] + ba3[1];
+
+    mean_before_SP = output[0] + ba3[0];    //SP = softplus
+    deviation_before_SP = output[1] + ba3[1];
+    //Softplus
+    mean = log(1.0f+exp(mean_before_SP));
+    deviation = log(1.0f+exp(deviation_before_SP));
+}
+
+
+void Actor_Network_Old(float *arr)
+{
+    float output1[num_hidden_unit1] = {0.0f};
+    float output2[num_hidden_unit2] = {0.0f};
+    float output[2] = {0.0f};
+
+    for (int index2 = 0; index2 < num_hidden_unit1; index2++) {
         for (int index1 = 0; index1 < num_input_RL; index1++) {
             output1[index2] = output1[index2] + ha1[index1][index2] * arr[index1];
         }
@@ -348,62 +443,25 @@
             output1[index2] = 0;
         }
     }
-    for (int index2 = 0; index2 < 2; index2++) {
-        for (int index1 = 0; index1 < num_hidden_unit; index1++) {
-            output[index2] = output[index2] + ha2[index1][index2] * output1[index1];
+    for (int index2 = 0; index2 < num_hidden_unit2; index2++) {
+        for (int index1 = 0; index1 < num_hidden_unit1; index1++) {
+            output2[index2] = output2[index2] + ha2[index1][index2] * arr[index1];
         }
-    }
-    mean_before_SP = output[0] + ba2[0];    //SP = softplus
-    deviation_before_SP = output[1] + ba2[1];
-//    mean = log(1.0f+exp(mean_before_SP));
-//    deviation = log(1.0f+exp(deviation_before_SP));
-    if (mean_before_SP >=0) {
-        mean = mean_before_SP;
-    } else {
-        mean = 0.0f;
-    }
-    if (deviation_before_SP >=0) {
-        deviation = deviation_before_SP;
-    } else {
-        deviation = 0.0f;
-    }
-
-}
-
-
-void Actor_Network_Old(float *arr)
-{
-    float output1[num_hidden_unit] = {0.0f};
-    float output[2] = {0.0f};
-
-    for (int index2 = 0; index2 < num_hidden_unit; index2++) {
-        for (int index1 = 0; index1 < num_input_RL; index1++) {
-            output1[index2] = output1[index2] + ha1[index1][index2] * arr[index1];
-        }
-        output1[index2] = output1[index2] + ba1[index2];
-        if (output1[index2] < 0) {
-            output1[index2] = 0;
+        output2[index2] = output2[index2] + ba2[index2];
+        if (output2[index2] < 0) {
+            output2[index2] = 0;
         }
     }
     for (int index2 = 0; index2 < 2; index2++) {
-        for (int index1 = 0; index1 < num_hidden_unit; index1++) {
-            output[index2] = output[index2] + ha2[index1][index2] * output1[index1];
+        for (int index1 = 0; index1 < num_hidden_unit2; index1++) {
+            output[index2] = output[index2] + ha3[index1][index2] * output2[index1];
         }
     }
-    mean_old = output[0] + ba2[0];
-    deviation_old = output[1] + ba2[1];
-//    mean_old = log(1.0f+exp(mean_old));
-//    deviation_old = log(1.0f+exp(deviation_old));
-    if (mean_before_SP >=0) {
-        mean_old = mean_before_SP;
-    } else {
-        mean_old = 0.0f;
-    }
-    if (deviation_before_SP >=0) {
-        deviation_old = deviation_before_SP;
-    } else {
-        deviation_old = 0.0f;
-    }
+    mean_old = output[0] + ba3[0];
+    deviation_old = output[1] + ba3[1];
+    //Softplus
+    mean_old = log(1.0f+exp(mean_old));
+    deviation_old = log(1.0f+exp(deviation_old));
 }
 
 float Grad_Normal_Dist_Mean(float mean, float deviation, float action)
@@ -420,221 +478,430 @@
     return grad_dev;
 }
 
+float ReLU(float x)
+{
+    if (x >= 0) {
+        return x;
+    } else {
+        return 0.0f;
+    }
+}
+
 void update_Critic_Networks(float (*arr)[num_input_RL])
 {
     float gradient_rate = 0.001f;
-
-    float G_hc1[num_input_RL][num_hidden_unit] = {0.0f};
-    float G_bc1[num_hidden_unit] = {0.0f};
-    for (int index2 = 0; index2 < num_hidden_unit; index2++) {
+//    float hx_sum = 0.0f;
+
+
+    ///////////////////////////////////////////////////////////CRITIC
+    float G_hc1[num_input_RL][num_hidden_unit1] = {0.0f};
+    float G_bc1[num_hidden_unit1] = {0.0f};
+    for (int index2 = 0; index2 < num_hidden_unit1; index2++) {
         for (int index1 = 0; index1 < num_input_RL; index1++) {
-            for (int i=0; i<batch_size; i++) {
-                float hx_sum = 0.0f;
-                float hx_sum_next = 0.0f;
-                for(int j=0; j<num_input_RL; j++) {
-                    hx_sum = hx_sum + hc1_temp[j][index2]*arr[i][j];
-                    if (i==batch_size-1) hx_sum_next = 0.0f;
-                    else hx_sum_next = hx_sum_next + hc1_temp[j][index2]*arr[i+1][j];
+            for (int n=0; n<batch_size; n++) {
+                for(int k=0; k<num_hidden_unit2; k++) {
+                    if (hxh_c_sum_array[n][k] >= 0) {
+                        if (hx_c_sum_array[n][index2] > 0) {
+                            G_hc1[index1][index2] = G_hc1[index1][index2] + arr[n][index1]*hc2_temp[index2][k]*hc3_temp[k];
+                        }
+                    }
                 }
-                if (i==batch_size-1) G_hc1[index1][index2] = G_hc1[index1][index2] + 2.0f*advantage[i]*(-hc2_temp[index2]*(1.0f-tanh(hx_sum + bc1_temp[index2])*tanh(hx_sum + bc1_temp[index2]))*arr[i][index1]);
-                else G_hc1[index1][index2] = G_hc1[index1][index2] + 2.0f*advantage[i]*(hc2_temp[index2]*(1.0f-tanh(hx_sum_next + bc1_temp[index2])*tanh(hx_sum_next + bc1_temp[index2]))*arr[i+1][index1] - hc2_temp[index2]*(1.0f-tanh(hx_sum + bc1_temp[index2])*tanh(hx_sum + bc1_temp[index2]))*arr[i][index1]);
             }
             G_hc1[index1][index2] = G_hc1[index1][index2] / batch_size;
+            //hc1_temp[index1][index2] = hc1_temp[index1][index2] - gradient_rate * G_hc1[index1][index2];
+        }
+        for (int n=0; n<batch_size; n++) {
+            for(int k=0; k<num_hidden_unit2; k++) {
+                if (hxh_c_sum_array[n][k] >= 0) {
+                    if (hx_c_sum_array[n][index2] > 0) {
+                        G_bc1[index2] = G_bc1[index2] + hc2_temp[index2][k]*hc3_temp[k];
+                    }
+
+                }
+            }
+        }
+        G_bc1[index2] = G_bc1[index2] / batch_size;
+        //bc1_temp[index2] = bc1_temp[index2] - gradient_rate * G_bc1[index2];
+    }
+
+    float G_hc2[num_hidden_unit1][num_hidden_unit2] = {0.0f};
+    float G_bc2[num_hidden_unit2] = {0.0f};
+    for (int index2 = 0; index2 < num_hidden_unit2; index2++) {
+        for (int index1 = 0; index1 < num_hidden_unit1; index1++) {
+            for (int n=0; n<batch_size; n++) {
+                if (hxh_c_sum_array[n][index2] >= 0) {
+                    if (hx_c_sum_array[n][index1] > 0) {
+                        G_hc2[index1][index2] = G_hc2[index1][index2] + hx_c_sum_array[n][index1]*hc3_temp[index2];
+                    }
+                }
+            }
+            G_hc2[index1][index2] = G_hc2[index1][index2] / batch_size;
+            //hc2_temp[index1][index2] = hc2_temp[index1][index2] - gradient_rate * G_hc2[index1][index2];
+        }
+        for (int n=0; n<batch_size; n++) {
+            if (hxh_c_sum_array[n][index2] >= 0) {
+                G_bc2[index2] = G_bc2[index2] + hc3_temp[index2];
+            }
+        }
+        G_bc2[index2] = G_bc2[index2] / batch_size;
+        //bc2_temp[index2] = bc2_temp[index2] - gradient_rate * G_bc2[index2];
+    }
+
+    float G_hc3[num_hidden_unit2]= {0.0f};
+    float G_bc3 = 0.0f;
+    for (int index2 = 0; index2 < 1; index2++) {
+        for (int index1 = 0; index1 < num_hidden_unit2; index1++) {
+            for (int n=0; n<batch_size; n++) {
+                if (hxh_c_sum_array[n][index1] >= 0) {
+                    G_hc3[index1] = G_hc3[index1] + hxh_c_sum_array[n][index1];
+                }
+            }
+            G_hc3[index1] = G_hc3[index1] / batch_size;
+            //hc3_temp[index1] = hc3_temp[index1] - gradient_rate * G_hc3[index1];
+        }
+        for (int n=0; n<batch_size; n++) {
+            G_bc2[index2] = G_bc2[index2] + 1.0f;
+        }
+        G_bc3 = G_bc3 / batch_size;
+        //bc3_temp = bc3_temp - gradient_rate * G_bc3;
+    }
+
+    // Simultaneous Update
+    for (int index2 = 0; index2 < num_hidden_unit1; index2++) {
+        for (int index1 = 0; index1 < num_input_RL; index1++) {
             hc1_temp[index1][index2] = hc1_temp[index1][index2] - gradient_rate * G_hc1[index1][index2];
         }
-        for (int i=0; i<batch_size; i++) {
-            float hx_sum = 0.0f;
-            float hx_sum_next = 0.0f;
-            for(int j=0; j<num_input_RL; j++) {
-                hx_sum = hx_sum + hc1_temp[j][index2]*arr[i][j];
-                if (i==batch_size-1) hx_sum_next = 0.0f;
-                else hx_sum_next = hx_sum_next + hc1_temp[j][index2]*arr[i+1][j];
-            }
-            if (i==batch_size-1) G_bc1[index2] = G_bc1[index2] + 2.0f*advantage[i]*(-hc2_temp[index2]*(1.0f-tanh(hx_sum + bc1_temp[index2])*tanh(hx_sum + bc1_temp[index2])));
-            else  G_bc1[index2] = G_bc1[index2] + 2.0f*advantage[i]*(hc2_temp[index2]*(1.0f-tanh(hx_sum_next + bc1_temp[index2])*tanh(hx_sum_next + bc1_temp[index2])) - hc2_temp[index2]*(1.0f-tanh(hx_sum + bc1_temp[index2])*tanh(hx_sum + bc1_temp[index2])));
-        }
-        G_bc1[index2] = G_bc1[index2] / batch_size;
         bc1_temp[index2] = bc1_temp[index2] - gradient_rate * G_bc1[index2];
     }
-
-    float G_hc2[num_hidden_unit] = {0.0f};
-    float G_bc2 = 0.0f;
+    for (int index2 = 0; index2 < num_hidden_unit2; index2++) {
+        for (int index1 = 0; index1 < num_hidden_unit1; index1++) {
+            hc2_temp[index1][index2] = hc2_temp[index1][index2] - gradient_rate * G_hc2[index1][index2];
+        }
+        bc2_temp[index2] = bc2_temp[index2] - gradient_rate * G_bc2[index2];
+    }
     for (int index2 = 0; index2 < 1; index2++) {
-        for (int index1 = 0; index1 < num_hidden_unit; index1++) {
-            for (int i=0; i<batch_size; i++) {
-                float hx_sum = 0.0f;
-                float hx_sum_next = 0.0f;
-                for(int j=0; j<num_input_RL; j++) {
-                    hx_sum = hx_sum + hc1_temp[j][index1]*arr[i][j];
-                    if (i==batch_size-1) hx_sum_next = 0.0f;
-                    else hx_sum_next = hx_sum_next + hc1_temp[j][index1]*arr[i+1][j];
-                }
-                if (i==batch_size-1) G_hc2[index1] = G_hc2[index1] - 2.0f*advantage[i]*tanh(hx_sum + bc1_temp[index1]);
-                else G_hc2[index1] = G_hc2[index1] + 2.0f*advantage[i]*(tanh(hx_sum_next + bc1_temp[index1]) - tanh(hx_sum + bc1_temp[index1]));
-            }
-            G_hc2[index1] = G_hc2[index1] / batch_size;
-            hc2_temp[index1] = hc2_temp[index1] - gradient_rate * G_hc2[index1];
+        for (int index1 = 0; index1 < num_hidden_unit2; index1++) {
+            hc3_temp[index1] = hc3_temp[index1] - gradient_rate * G_hc3[index1];
         }
-        for (int i=0; i<batch_size; i++) {
-            if (i==batch_size-1) G_bc2 = G_bc2 + 2.0f*advantage[i]*(-1.0f);
-            else  G_bc2 = 0.0f;
-        }
-        G_bc2 = G_bc2/ batch_size;
-        bc2_temp = bc2_temp - gradient_rate * G_bc2;
+        bc3_temp = bc3_temp - gradient_rate * G_bc3;
     }
+
 }
 
+///////////////////////////Softplus//////////////////////////////////
 void update_Actor_Networks(float (*arr)[num_input_RL])
 {
-    float gradient_rate = 0.001f;   //-0.01f
-
-    float G_ha1[num_input_RL][num_hidden_unit] = {0.0f};
-    float G_ba1[num_hidden_unit] = {0.0f};
-    for (int index2 = 0; index2 < num_hidden_unit; index2++) {
+    float gradient_rate = 0.001f;
+
+    float G_ha1[num_input_RL][num_hidden_unit1] = {0.0f};
+    float G_ba1[num_hidden_unit1] = {0.0f};
+    float d_x_d_ha1[num_input_RL][num_hidden_unit1] = {0.0f};
+    float d_x_d_ba1[num_hidden_unit1] = {0.0f};
+    float d_y_d_ha1[num_input_RL][num_hidden_unit1] = {0.0f};
+    float d_y_d_ba1[num_hidden_unit1] = {0.0f};
+
+    for (int index2 = 0; index2 < num_hidden_unit1; index2++) {
         for (int index1 = 0; index1 < num_input_RL; index1++) {
-            for (int i=0; i<batch_size; i++) {
-                if((advantage[i] >= 0.0f && ratio[i] >= 1.0f + epsilon) || (advantage[i] < 0.0f && ratio[i] < 1.0f - epsilon)) {
+            for (int n=0; n<batch_size; n++) {
+                if((advantage[n] >= 0.0f && ratio[n] >= 1.0f + epsilon) || (advantage[n] < 0.0f && ratio[n] < 1.0f - epsilon)) {
                     G_ha1[index1][index2] = G_ha1[index1][index2];
                 } else {
-
-                    float hx_sum_total = 0.0f;
-                    for(int m = 0; m < num_hidden_unit; m++) {
-                        for(int n = 0; n < num_input_RL; n++) {
-                            hx_sum_total = hx_sum_total + ha1_temp[n][m]*arr[i][n];
+                    for(int k=0; k<num_hidden_unit2; k++) {
+                        if (hxh_a_sum_array[n][k] >= 0) {
+                            if (hx_a_sum_array[n][index2] > 0) {
+                                d_x_d_ha1[index1][index2] = d_x_d_ha1[index1][index2] + arr[n][index1]*ha2_temp[index2][k]*ha3_temp[k][0];
+                                d_y_d_ha1[index1][index2] = d_y_d_ha1[index1][index2] + arr[n][index1]*ha2_temp[index2][k]*ha3_temp[k][1];
+                            }
                         }
                     }
-                    hx_sum_total = hx_sum_total + bc1_temp[index2];
                     float d_mean_d_ha1 = 0.0f;
                     float d_dev_d_ha1 = 0.0f;
-                    if (hx_sum_total >=0) {
-                        float hx_sum = 0.0f;
-                        for(int j=0; j<num_input_RL; j++) {
-                            hx_sum = hx_sum + ha1_temp[j][index2]*arr[i][j];
-                        }
-                        hx_sum = hx_sum + bc1_temp[index2];
-                        if (hx_sum >= 0) {
-//                            d_mean_d_ha1 = exp(mean_before_SP_array[i])/(1.0f+exp(mean_before_SP_array[i]))*ha2_temp[index2][0]*arr[i][index1];
-//                            d_dev_d_ha1 = exp(deviation_before_SP_array[i])/(1.0f+exp(deviation_before_SP_array[i]))*ha2_temp[index2][1]*arr[i][index1];
-                            d_mean_d_ha1 = ha2_temp[index2][0]*arr[i][index1];
-                            d_dev_d_ha1 = ha2_temp[index2][1]*arr[i][index1];
-                        } else {
-                            d_mean_d_ha1 = 0.0f;
-                            d_dev_d_ha1 = 0.0f;
-                        }
-                    } else {
-                        d_mean_d_ha1 = 0.0f;
-                        d_dev_d_ha1 = 0.0f;
-                    }
-                    G_ha1[index1][index2] = G_ha1[index1][index2] + advantage[i]/pi_old[i]*(d_mean_d_ha1*Grad_Normal_Dist_Mean(mean_array[i],deviation_array[i],action_array[i])+d_dev_d_ha1*Grad_Normal_Dist_Deviation(mean_array[i],deviation_array[i],action_array[i]));
+                    d_mean_d_ha1 = exp(hxhh_a_sum_array[n][0])/(1.0f+exp(hxhh_a_sum_array[n][0]))*d_x_d_ha1[index1][index2];
+                    d_dev_d_ha1 = exp(hxhh_a_sum_array[n][1])/(1.0f+exp(hxhh_a_sum_array[n][1]))*d_y_d_ha1[index1][index2];
+
+                    G_ha1[index1][index2] = G_ha1[index1][index2] + advantage[n]/pi_old[n]*(d_mean_d_ha1*Grad_Normal_Dist_Mean(mean_array[n],deviation_array[n],action_array[n])+d_dev_d_ha1*Grad_Normal_Dist_Deviation(mean_array[n],deviation_array[n],action_array[n]));
                 }
             }
             G_ha1[index1][index2] = G_ha1[index1][index2] / batch_size;
             ha1_temp[index1][index2] = ha1_temp[index1][index2] - gradient_rate * G_ha1[index1][index2];
         }
-        for (int i=0; i<batch_size; i++) {
-            if((advantage[i] >= 0.0f && ratio[i] >= 1.0f + epsilon) || (advantage[i] < 0.0f && ratio[i] < 1.0f - epsilon))  {
+
+        for (int n=0; n<batch_size; n++) {
+            if((advantage[n] >= 0.0f && ratio[n] >= 1.0f + epsilon) || (advantage[n] < 0.0f && ratio[n] < 1.0f - epsilon))  {
                 G_ba1[index2] = G_ba1[index2];
             } else {
-
-                float hx_sum_total = 0.0f;
-                for(int m = 0; m < num_hidden_unit; m++) {
-                    for(int n = 0; n < num_input_RL; n++) {
-                        hx_sum_total = hx_sum_total + ha1_temp[n][m]*arr[i][n];
+                for(int k=0; k<num_hidden_unit2; k++) {
+                    if (hxh_a_sum_array[n][k] >= 0) {
+                        if (hx_a_sum_array[n][index2] > 0) {
+                            d_x_d_ba1[index2] = d_x_d_ba1[index2] + ha2_temp[index2][k]*ha3_temp[k][0];
+                            d_y_d_ba1[index2] = d_y_d_ba1[index2] + ha2_temp[index2][k]*ha3_temp[k][1];
+                        }
                     }
                 }
-                hx_sum_total = hx_sum_total + bc1_temp[index2];
                 float d_mean_d_ba1 = 0.0f;
                 float d_dev_d_ba1 = 0.0f;
-                if (hx_sum_total >=0) {
-
-                    float hx_sum = 0.0f;
-                    for(int j=0; j<num_input_RL; j++) {
-                        hx_sum = hx_sum + ha1_temp[j][index2]*arr[i][j];
-                    }
-                    hx_sum = hx_sum + bc1_temp[index2];
-
-                    if(hx_sum >=0) {
-//                        d_mean_d_ba1 = exp(mean_before_SP_array[i])/(1.0f+exp(mean_before_SP_array[i]))*ha2_temp[index2][0];
-//                        d_dev_d_ba1 = exp(deviation_before_SP_array[i])/(1.0f+exp(deviation_before_SP_array[i]))*ha2_temp[index2][1];
-                        d_mean_d_ba1 = ha2_temp[index2][0];
-                        d_dev_d_ba1 = ha2_temp[index2][1];
-                    } else {
-                        d_mean_d_ba1 = 0.0f;
-                        d_dev_d_ba1 = 0.0f;
-                    }
-                } else {
-                    d_mean_d_ba1 = 0.0f;
-                    d_dev_d_ba1 = 0.0f;
-                }
-                G_ba1[index2] = G_ba1[index2] + advantage[i]/pi_old[i]*(d_mean_d_ba1*Grad_Normal_Dist_Mean(mean_array[i],deviation_array[i],action_array[i])+d_dev_d_ba1*Grad_Normal_Dist_Deviation(mean_array[i],deviation_array[i],action_array[i]));
+                d_mean_d_ba1 = exp(hxhh_a_sum_array[n][0])/(1.0f+exp(hxhh_a_sum_array[n][0]))*d_x_d_ba1[index2];
+                d_dev_d_ba1 = exp(hxhh_a_sum_array[n][1])/(1.0f+exp(hxhh_a_sum_array[n][1]))*d_y_d_ba1[index2];
+
+                G_ba1[index2] = G_ba1[index2] + advantage[n]/pi_old[n]*(d_mean_d_ba1*Grad_Normal_Dist_Mean(mean_array[n],deviation_array[n],action_array[n])+d_dev_d_ba1*Grad_Normal_Dist_Deviation(mean_array[n],deviation_array[n],action_array[n]));
             }
         }
         G_ba1[index2] = G_ba1[index2] / batch_size;
         ba1_temp[index2] = ba1_temp[index2] - gradient_rate * G_ba1[index2];
     }
 
-    float G_ha2[num_hidden_unit][2] = {0.0f};
-    float G_ba2[2] = {0.0f};
-    for (int index2 = 0; index2 < 2; index2++) {
-        for (int index1 = 0; index1 < num_hidden_unit; index1++) {
-            for (int i=0; i<batch_size; i++) {
-                if((advantage[i] >= 0.0f && ratio[i] >= 1.0f + epsilon) || (advantage[i] < 0.0f && ratio[i] < 1.0f - epsilon)) {
+    float G_ha2[num_hidden_unit1][num_hidden_unit2] = {0.0f};
+    float G_ba2[num_hidden_unit2] = {0.0f};
+    float d_x_d_ha2[num_hidden_unit1][num_hidden_unit2] = {0.0f};
+    float d_x_d_ba2[num_hidden_unit2] = {0.0f};
+    float d_y_d_ha2[num_hidden_unit1][num_hidden_unit2] = {0.0f};
+    float d_y_d_ba2[num_hidden_unit2] = {0.0f};
+
+    for (int index2 = 0; index2 < num_hidden_unit2; index2++) {
+        for (int index1 = 0; index1 < num_hidden_unit1; index1++) {
+            for (int n=0; n<batch_size; n++) {
+                if((advantage[n] >= 0.0f && ratio[n] >= 1.0f + epsilon) || (advantage[n] < 0.0f && ratio[n] < 1.0f - epsilon)) {
                     G_ha2[index1][index2] = G_ha2[index1][index2];
                 } else {
 
-                    float hx_sum_total = 0.0f;
-                    for(int m = 0; m < num_hidden_unit; m++) {
-                        for(int n = 0; n < num_input_RL; n++) {
-                            hx_sum_total = hx_sum_total + ha1_temp[n][m]*arr[i][n];
+                    if (hxh_a_sum_array[n][index2] >= 0) {
+                        if (hx_a_sum_array[n][index1] > 0) {
+                            d_x_d_ha2[index1][index2] = d_x_d_ha2[index1][index2] + hx_a_sum_array[n][index1]*ha3_temp[index2][0];
+                            d_y_d_ha2[index1][index2] = d_y_d_ha2[index1][index2] + hx_a_sum_array[n][index1]*ha3_temp[index2][1];
                         }
                     }
-                    hx_sum_total = hx_sum_total + bc1_temp[index2];
+
                     float d_mean_d_ha2 = 0.0f;
                     float d_dev_d_ha2 = 0.0f;
-                    if (hx_sum_total >=0) {
-                        float hx_sum = 0.0f;
-                        for(int j=0; j<num_input_RL; j++) {
-                            hx_sum = hx_sum + ha1_temp[j][index1]*arr[i][j];
-                        }
-                        hx_sum = hx_sum + bc1_temp[index1];
-                        if (hx_sum >= 0) {
-//                            d_mean_d_ha2 = exp(mean_before_SP_array[i])/(1.0f+exp(mean_before_SP_array[i]))*hx_sum;
-//                            d_dev_d_ha2 = exp(deviation_before_SP_array[i])/(1.0f+exp(deviation_before_SP_array[i]))*hx_sum;
-                            d_mean_d_ha2 = hx_sum;
-                            d_dev_d_ha2 = hx_sum;
-                        } else {
-                            d_mean_d_ha2 = 0.0f;
-                            d_mean_d_ha2 = 0.0f;
-                        }
-                    } else {
-                        d_mean_d_ha2 = 0.0f;
-                        d_mean_d_ha2 = 0.0f;
-                    }
-                    G_ha2[index1][index2] = G_ha2[index1][index2] + advantage[i]/pi_old[i]*(d_mean_d_ha2*Grad_Normal_Dist_Mean(mean_array[i],deviation_array[i],action_array[i])+d_dev_d_ha2*Grad_Normal_Dist_Deviation(mean_array[i],deviation_array[i],action_array[i]));
+                    d_mean_d_ha2 = exp(hxhh_a_sum_array[n][0])/(1.0f+exp(hxhh_a_sum_array[n][0]))*d_x_d_ha2[index1][index2];
+                    d_dev_d_ha2 = exp(hxhh_a_sum_array[n][1])/(1.0f+exp(hxhh_a_sum_array[n][1]))*d_y_d_ha2[index1][index2];
+
+                    G_ha2[index1][index2] = G_ha2[index1][index2] + advantage[n]/pi_old[n]*(d_mean_d_ha2*Grad_Normal_Dist_Mean(mean_array[n],deviation_array[n],action_array[n])+d_dev_d_ha2*Grad_Normal_Dist_Deviation(mean_array[n],deviation_array[n],action_array[n]));
                 }
             }
             G_ha2[index1][index2] = G_ha2[index1][index2] / batch_size;
             ha2_temp[index1][index2] = ha2_temp[index1][index2] - gradient_rate * G_ha2[index1][index2];
         }
-        for (int i=0; i<batch_size; i++) {
-            if((advantage[i] >= 0.0f && ratio[i] >= 1.0f + epsilon) || (advantage[i] < 0.0f && ratio[i] < 1.0f - epsilon))  {
+
+        for (int n=0; n<batch_size; n++) {
+            if((advantage[n] >= 0.0f && ratio[n] >= 1.0f + epsilon) || (advantage[n] < 0.0f && ratio[n] < 1.0f - epsilon))  {
                 G_ba2[index2] = G_ba2[index2];
             } else {
 
-                float d_mean_d_ba2 = 0.0f;
-                float d_dev_d_ba2 = 0.0f;
-//                d_mean_d_ba2 = exp(mean_before_SP_array[i])/(1.0f+exp(mean_before_SP_array[i]));
-//                d_dev_d_ba2 = exp(deviation_before_SP_array[i])/(1.0f+exp(deviation_before_SP_array[i]));
-                d_mean_d_ba2 = 1.0f;
-                d_dev_d_ba2 = 1.0f;
-                G_ba1[index2] = G_ba1[index2] + advantage[i]/pi_old[i]*(d_mean_d_ba2*Grad_Normal_Dist_Mean(mean_array[i],deviation_array[i],action_array[i])+d_dev_d_ba2*Grad_Normal_Dist_Deviation(mean_array[i],deviation_array[i],action_array[i]));
+                if (hxh_a_sum_array[n][index2] >= 0) {
+                    d_x_d_ba2[index2] = d_x_d_ba2[index2] + ha3_temp[index2][0];
+                    d_y_d_ba2[index2] = d_y_d_ba2[index2] + ha3_temp[index2][1];
+                }
+                float d_mean_d_ba2= 0.0f;
+                float d_dev_d_ba2= 0.0f;
+                d_mean_d_ba2 = exp(hxhh_a_sum_array[n][0])/(1.0f+exp(hxhh_a_sum_array[n][0]))*d_x_d_ba2[index2];
+                d_dev_d_ba2 = exp(hxhh_a_sum_array[n][1])/(1.0f+exp(hxhh_a_sum_array[n][1]))*d_y_d_ba2[index2];
+
+                G_ba2[index2] = G_ba2[index2] + advantage[n]/pi_old[n]*(d_mean_d_ba2*Grad_Normal_Dist_Mean(mean_array[n],deviation_array[n],action_array[n])+d_dev_d_ba2*Grad_Normal_Dist_Deviation(mean_array[n],deviation_array[n],action_array[n]));
             }
         }
         G_ba2[index2] = G_ba2[index2] / batch_size;
         ba2_temp[index2] = ba2_temp[index2] - gradient_rate * G_ba2[index2];
     }
+
+    float G_ha3[num_hidden_unit2][2] = {0.0f};
+    float G_ba3[2] = {0.0f};
+    float d_x_d_ha3[num_hidden_unit2][2] = {0.0f};
+    float d_x_d_ba3[2] = {0.0f};
+    float d_y_d_ha3[num_hidden_unit2][2] = {0.0f};
+    float d_y_d_ba3[2] = {0.0f};
+
+    for (int index2 = 0; index2 < 2; index2++) {
+        for (int index1 = 0; index1 < num_hidden_unit2; index1++) {
+            for (int n=0; n<batch_size; n++) {
+                if((advantage[n] >= 0.0f && ratio[n] >= 1.0f + epsilon) || (advantage[n] < 0.0f && ratio[n] < 1.0f - epsilon)) {
+                    G_ha3[index1][index2] = G_ha3[index1][index2];
+                } else {
+
+                    if (hxh_a_sum_array[n][index1] >= 0) {
+                        if (hx_a_sum_array[n][index1] > 0) {
+                            d_x_d_ha3[index1][index2] = d_x_d_ha3[index1][index2] + hxh_a_sum_array[n][index1];
+                            d_y_d_ha3[index1][index2] = d_y_d_ha3[index1][index2] + hxh_a_sum_array[n][index1];
+
+                        }
+                    }
+                    float d_mean_d_ha3 = 0.0f;
+                    float d_dev_d_ha3 = 0.0f;
+                    d_mean_d_ha3 = exp(hxhh_a_sum_array[n][0])/(1.0f+exp(hxhh_a_sum_array[n][0]))*d_x_d_ha3[index1][index2];
+                    d_dev_d_ha3 = exp(hxhh_a_sum_array[n][1])/(1.0f+exp(hxhh_a_sum_array[n][1]))*d_y_d_ha3[index1][index2];
+
+                    G_ha3[index1][index2] = G_ha3[index1][index2] + advantage[n]/pi_old[n]*(d_mean_d_ha3*Grad_Normal_Dist_Mean(mean_array[n],deviation_array[n],action_array[n])+d_dev_d_ha3*Grad_Normal_Dist_Deviation(mean_array[n],deviation_array[n],action_array[n]));
+                }
+            }
+            G_ha3[index1][index2] = G_ha3[index1][index2] / batch_size;
+            ha3_temp[index1][index2] = ha3_temp[index1][index2] - gradient_rate * G_ha3[index1][index2];
+        }
+
+        for (int n=0; n<batch_size; n++) {
+            if((advantage[n] >= 0.0f && ratio[n] >= 1.0f + epsilon) || (advantage[n] < 0.0f && ratio[n] < 1.0f - epsilon))  {
+                G_ba3[index2] = G_ba3[index2];
+            } else {
+
+                d_x_d_ba3[index2] = d_x_d_ba3[index2] + 1.0f;
+                d_y_d_ba3[index2] = d_y_d_ba3[index2] + 1.0f;
+
+                float d_mean_d_ba3= 0.0f;
+                float d_dev_d_ba3= 0.0f;
+                d_mean_d_ba3 = exp(hxhh_a_sum_array[n][0])/(1.0f+exp(hxhh_a_sum_array[n][0]))*d_x_d_ba3[index2];
+                d_dev_d_ba3 = exp(hxhh_a_sum_array[n][1])/(1.0f+exp(hxhh_a_sum_array[n][1]))*d_y_d_ba3[index2];
+
+                G_ba3[index2] = G_ba3[index2] + advantage[n]/pi_old[n]*(d_mean_d_ba3*Grad_Normal_Dist_Mean(mean_array[n],deviation_array[n],action_array[n])+d_dev_d_ba3*Grad_Normal_Dist_Deviation(mean_array[n],deviation_array[n],action_array[n]));
+            }
+        }
+        G_ba3[index2] = G_ba3[index2] / batch_size;
+        ba3_temp[index2] = ba3_temp[index2] - gradient_rate * G_ba3[index2];
+    }
 }
 
+///////////////////////////ReLU - Bad performance//////////////////////////////////
+//void update_Actor_Networks(float (*arr)[num_input_RL])
+//{
+//    float gradient_rate = 0.001f;   //-0.01f
+//
+//    float G_ha1[num_input_RL][num_hidden_unit] = {0.0f};
+//    float G_ba1[num_hidden_unit] = {0.0f};
+//    for (int index2 = 0; index2 < num_hidden_unit; index2++) {
+//        for (int index1 = 0; index1 < num_input_RL; index1++) {
+//            for (int i=0; i<batch_size; i++) {
+//                if((advantage[i] >= 0.0f && ratio[i] >= 1.0f + epsilon) || (advantage[i] < 0.0f && ratio[i] < 1.0f - epsilon)) {
+//                    G_ha1[index1][index2] = G_ha1[index1][index2];
+//                } else {
+//
+//                    float hx_sum_total = 0.0f;
+//                    for(int m = 0; m < num_hidden_unit; m++) {
+//                        for(int n = 0; n < num_input_RL; n++) {
+//                            hx_sum_total = hx_sum_total + ha1_temp[n][m]*arr[i][n];
+//                        }
+//                    }
+//                    hx_sum_total = hx_sum_total + bc1_temp[index2];
+//                    float d_mean_d_ha1 = 0.0f;
+//                    float d_dev_d_ha1 = 0.0f;
+//                    if (hx_sum_total >=0) {
+//                        float hx_sum = 0.0f;
+//                        for(int j=0; j<num_input_RL; j++) {
+//                            hx_sum = hx_sum + ha1_temp[j][index2]*arr[i][j];
+//                        }
+//                        hx_sum = hx_sum + bc1_temp[index2];
+//                        if (hx_sum >= 0) {
+//                            d_mean_d_ha1 = ha2_temp[index2][0]*arr[i][index1];
+//                            d_dev_d_ha1 = ha2_temp[index2][1]*arr[i][index1];
+//                        } else {
+//                            d_mean_d_ha1 = 0.0f;
+//                            d_dev_d_ha1 = 0.0f;
+//                        }
+//                    } else {
+//                        d_mean_d_ha1 = 0.0f;
+//                        d_dev_d_ha1 = 0.0f;
+//                    }
+//                    G_ha1[index1][index2] = G_ha1[index1][index2] + advantage[i]/pi_old[i]*(d_mean_d_ha1*Grad_Normal_Dist_Mean(mean_array[i],deviation_array[i],action_array[i])+d_dev_d_ha1*Grad_Normal_Dist_Deviation(mean_array[i],deviation_array[i],action_array[i]));
+//                }
+//            }
+//            G_ha1[index1][index2] = G_ha1[index1][index2] / batch_size;
+//            ha1_temp[index1][index2] = ha1_temp[index1][index2] - gradient_rate * G_ha1[index1][index2];
+//        }
+//        for (int i=0; i<batch_size; i++) {
+//            if((advantage[i] >= 0.0f && ratio[i] >= 1.0f + epsilon) || (advantage[i] < 0.0f && ratio[i] < 1.0f - epsilon))  {
+//                G_ba1[index2] = G_ba1[index2];
+//            } else {
+//
+//                float hx_sum_total = 0.0f;
+//                for(int m = 0; m < num_hidden_unit; m++) {
+//                    for(int n = 0; n < num_input_RL; n++) {
+//                        hx_sum_total = hx_sum_total + ha1_temp[n][m]*arr[i][n];
+//                    }
+//                }
+//                hx_sum_total = hx_sum_total + bc1_temp[index2];
+//                float d_mean_d_ba1 = 0.0f;
+//                float d_dev_d_ba1 = 0.0f;
+//                if (hx_sum_total >=0) {
+//
+//                    float hx_sum = 0.0f;
+//                    for(int j=0; j<num_input_RL; j++) {
+//                        hx_sum = hx_sum + ha1_temp[j][index2]*arr[i][j];
+//                    }
+//                    hx_sum = hx_sum + bc1_temp[index2];
+//
+//                    if(hx_sum >=0) {
+//                        d_mean_d_ba1 = ha2_temp[index2][0];
+//                        d_dev_d_ba1 = ha2_temp[index2][1];
+//                    } else {
+//                        d_mean_d_ba1 = 0.0f;
+//                        d_dev_d_ba1 = 0.0f;
+//                    }
+//                } else {
+//                    d_mean_d_ba1 = 0.0f;
+//                    d_dev_d_ba1 = 0.0f;
+//                }
+//                G_ba1[index2] = G_ba1[index2] + advantage[i]/pi_old[i]*(d_mean_d_ba1*Grad_Normal_Dist_Mean(mean_array[i],deviation_array[i],action_array[i])+d_dev_d_ba1*Grad_Normal_Dist_Deviation(mean_array[i],deviation_array[i],action_array[i]));
+//            }
+//        }
+//        G_ba1[index2] = G_ba1[index2] / batch_size;
+//        ba1_temp[index2] = ba1_temp[index2] - gradient_rate * G_ba1[index2];
+//    }
+//
+//    float G_ha2[num_hidden_unit][2] = {0.0f};
+//    float G_ba2[2] = {0.0f};
+//    for (int index2 = 0; index2 < 2; index2++) {
+//        for (int index1 = 0; index1 < num_hidden_unit; index1++) {
+//            for (int i=0; i<batch_size; i++) {
+//                if((advantage[i] >= 0.0f && ratio[i] >= 1.0f + epsilon) || (advantage[i] < 0.0f && ratio[i] < 1.0f - epsilon)) {
+//                    G_ha2[index1][index2] = G_ha2[index1][index2];
+//                } else {
+//
+//                    float hx_sum_total = 0.0f;
+//                    for(int m = 0; m < num_hidden_unit; m++) {
+//                        for(int n = 0; n < num_input_RL; n++) {
+//                            hx_sum_total = hx_sum_total + ha1_temp[n][m]*arr[i][n];
+//                        }
+//                    }
+//                    hx_sum_total = hx_sum_total + bc1_temp[index2];
+//                    float d_mean_d_ha2 = 0.0f;
+//                    float d_dev_d_ha2 = 0.0f;
+//                    if (hx_sum_total >=0) {
+//                        float hx_sum = 0.0f;
+//                        for(int j=0; j<num_input_RL; j++) {
+//                            hx_sum = hx_sum + ha1_temp[j][index1]*arr[i][j];
+//                        }
+//                        hx_sum = hx_sum + bc1_temp[index1];
+//                        if (hx_sum >= 0) {
+//                            d_mean_d_ha2 = hx_sum;
+//                            d_dev_d_ha2 = hx_sum;
+//                        } else {
+//                            d_mean_d_ha2 = 0.0f;
+//                            d_mean_d_ha2 = 0.0f;
+//                        }
+//                    } else {
+//                        d_mean_d_ha2 = 0.0f;
+//                        d_mean_d_ha2 = 0.0f;
+//                    }
+//                    G_ha2[index1][index2] = G_ha2[index1][index2] + advantage[i]/pi_old[i]*(d_mean_d_ha2*Grad_Normal_Dist_Mean(mean_array[i],deviation_array[i],action_array[i])+d_dev_d_ha2*Grad_Normal_Dist_Deviation(mean_array[i],deviation_array[i],action_array[i]));
+//                }
+//            }
+//            G_ha2[index1][index2] = G_ha2[index1][index2] / batch_size;
+//            ha2_temp[index1][index2] = ha2_temp[index1][index2] - gradient_rate * G_ha2[index1][index2];
+//        }
+//        for (int i=0; i<batch_size; i++) {
+//            if((advantage[i] >= 0.0f && ratio[i] >= 1.0f + epsilon) || (advantage[i] < 0.0f && ratio[i] < 1.0f - epsilon))  {
+//                G_ba2[index2] = G_ba2[index2];
+//            } else {
+//
+//                float d_mean_d_ba2 = 0.0f;
+//                float d_dev_d_ba2 = 0.0f;
+//                d_mean_d_ba2 = 1.0f;
+//                d_dev_d_ba2 = 1.0f;
+//                G_ba1[index2] = G_ba1[index2] + advantage[i]/pi_old[i]*(d_mean_d_ba2*Grad_Normal_Dist_Mean(mean_array[i],deviation_array[i],action_array[i])+d_dev_d_ba2*Grad_Normal_Dist_Deviation(mean_array[i],deviation_array[i],action_array[i]));
+//            }
+//        }
+//        G_ba2[index2] = G_ba2[index2] / batch_size;
+//        ba2_temp[index2] = ba2_temp[index2] - gradient_rate * G_ba2[index2];
+//    }
+//}
+
 
 
 float rand_normal(double mean, double stddev)
@@ -664,31 +931,44 @@
     }
 }
 
+
 void Overwirte_Critic_Networks()
 {
-    for (int index2 = 0; index2 < num_hidden_unit; index2++) {
+    for (int index2 = 0; index2 < num_hidden_unit1; index2++) {
         for (int index1 = 0; index1 < num_input_RL; index1++) {
             hc1[index1][index2] = hc1_temp[index1][index2];
         }
         bc1[index2] = bc1_temp[index2];
-        hc2[index2] = hc2_temp[index2];
     }
-    bc2 = bc2_temp;
+    for (int index2 = 0; index2 < num_hidden_unit2; index2++) {
+        for (int index1 = 0; index1 < num_hidden_unit1; index1++) {
+            hc2[index1][index2] = hc2_temp[index1][index2];
+        }
+        bc2[index2] = bc2_temp[index2];
+        hc3[index2] = hc3_temp[index2];
+    }
+    bc3 = bc3_temp;
 }
 void Overwirte_Actor_Networks()
 {
-    for (int index2 = 0; index2 < num_hidden_unit; index2++) {
+    for (int index2 = 0; index2 < num_hidden_unit1; index2++) {
         for (int index1 = 0; index1 < num_input_RL; index1++) {
             ha1[index1][index2] = ha1_temp[index1][index2];
         }
         ba1[index2] = ba1_temp[index2];
     }
-    for (int index2 = 0; index2 < 2; index2++) {
-        for (int index1 = 0; index1 < num_hidden_unit; index1++) {
+    for (int index2 = 0; index2 < num_hidden_unit2; index2++) {
+        for (int index1 = 0; index1 < num_hidden_unit1; index1++) {
             ha2[index1][index2] = ha2_temp[index1][index2];
         }
         ba2[index2] = ba2_temp[index2];
     }
+    for (int index2 = 0; index2 < 2; index2++) {
+        for (int index1 = 0; index1 < num_hidden_unit2; index1++) {
+            ha3[index1][index2] = ha3_temp[index1][index2];
+        }
+        ba3[index2] = ba3_temp[index2];
+    }
 }
 
 
@@ -775,25 +1055,38 @@
             ID_index_array[i] =  (i+1) * 0.5f;
     }
 
-    for (int index2 = 0; index2 < num_hidden_unit; index2++) {
+    for (int index2 = 0; index2 < num_hidden_unit1; index2++) {
         for (int index1 = 0; index1 < num_input_RL; index1++) {
             hc1_temp[index1][index2] = (float) (rand()%100) * 0.01f ;
         }
         bc1_temp[index2] = (float) (rand()%100) * 0.01f;
-        hc2_temp[index2] = (float) (rand()%100) * 0.01f;
+    }
+    for (int index2 = 0; index2 < num_hidden_unit2; index2++) {
+        for (int index1 = 0; index1 < num_hidden_unit1; index1++) {
+            hc2_temp[index1][index2] = (float) (rand()%100) * 0.01f;
+        }
+        bc2[index2] = (float) (rand()%100) * 0.01f;
+        hc3[index2] = (float) (rand()%100) * 0.01f;
     }
-    bc2_temp = (float) (rand()%100) * 0.01f;
-    for (int index2 = 0; index2 < num_hidden_unit; index2++) {
+    bc3 = (float) (rand()%100) * 0.01f;
+
+    for (int index2 = 0; index2 < num_hidden_unit1; index2++) {
         for (int index1 = 0; index1 < num_input_RL; index1++) {
-            ha1_temp[index1][index2] = (float) (rand()%100) * 0.01f;
+            ha1[index1][index2] = (float) (rand()%100) * 0.01f;
         }
-        ba1_temp[index2] = (float) (rand()%100) * 0.01f;
+        ba1[index2] = (float) (rand()%100) * 0.01f;
+    }
+    for (int index2 = 0; index2 < num_hidden_unit2; index2++) {
+        for (int index1 = 0; index1 < num_hidden_unit1; index1++) {
+            ha2[index1][index2] = (float) (rand()%100) * 0.01f;
+        }
+        ba2[index2] = (float) (rand()%100) * 0.01f;
     }
     for (int index2 = 0; index2 < 2; index2++) {
-        for (int index1 = 0; index1 < num_hidden_unit; index1++) {
-            ha2_temp[index1][index2] = (float) (rand()%100) * 0.01f;
+        for (int index1 = 0; index1 < num_hidden_unit2; index1++) {
+            ha3[index1][index2] = (float) (rand()%100) * 0.01f;
         }
-        ba2_temp[index2] = (float) (rand()%100) * 0.01f;
+        ba3[index2] = (float) (rand()%100) * 0.01f;
     }
 
     Overwirte_Critic_Networks();
@@ -935,10 +1228,19 @@
                         //float temp_array[3] = {state_array[i][0], state_array[i][1], state_array[i][2]};
                         float temp_array[2] = {state_array[i][0], state_array[i][1]};
                         V[i] = Critic_Network_Temp(temp_array);
+                        for (int i=0; i<num_hidden_unit1; i++) {
+                            hx_c_sum_array[RL_timer][i] = hx_c_sum[i];
+                        }
+                        for (int i=0; i<num_hidden_unit2; i++) {
+                            hxh_c_sum_array[RL_timer][i] = hxh_c_sum[i];
+                        }
+                        hxhh_c_sum_array[RL_timer] = hxhh_c_sum;
                         pi[i] = exp(-(action_array[i]-mean_array[i])*(action_array[i]-mean_array[i])/(2.0f*deviation_array[i]*deviation_array[i]))/(sqrt(2.0f*PI)*deviation_array[i]);
                         Actor_Network_Old(temp_array);
                         pi_old[i] = exp(-(action_array[i]-mean_old)*(action_array[i]-mean_old)/(2.0f*deviation_old*deviation_old))/(sqrt(2.0f*PI)*deviation_old);
-                        r[i] = exp(-0.00005f * state_array[i][1] * 70.0f * state_array[i][1] * 70.0f);
+                        r[i] = exp(-0.25f * state_array[i][1] * state_array[i][1]);
+                        if(i == batch_size-1) return_G[i] = 0.0f;
+                        else return_G[i] = gamma * return_G[i+1] + r[i];
                         if(i == batch_size-1) td_target[i] = r[i];
                         else td_target[i] = r[i] + gamma * V[i+1];
                         delta[i] = td_target[i] - V[i];
@@ -2375,6 +2677,14 @@
                     //float temp_array[3] = {train_set_x[RL_timer], train_set_error[RL_timer], train_set_count[RL_timer]};
                     float temp_array[2] = {train_set_x[RL_timer], train_set_error[RL_timer]};
                     Actor_Network(temp_array);
+                    for (int i=0; i<num_hidden_unit1; i++) {
+                        hx_a_sum_array[RL_timer][i] = hx_a_sum[i];
+                    }
+                    for (int i=0; i<num_hidden_unit2; i++) {
+                        hxh_a_sum_array[RL_timer][i] = hxh_a_sum[i];
+                    }
+                    hxhh_a_sum_array[RL_timer][0] = hxhh_a_sum[0];
+                    hxhh_a_sum_array[RL_timer][1] = hxhh_a_sum[1];
                     mean_array[RL_timer] = mean;
                     deviation_array[RL_timer] = deviation;
                     mean_before_SP_array[RL_timer] = mean_before_SP;
Repository toolbox

Export to desktop IDE
Build repository
Repository details

Type:	Program
Mbed OS support:	Mbed 2 deprecated
Created:	01 Aug 2022
Imports:	1
Forks:	0
Commits:	258
Dependents:	0
Dependencies:	2
Followers:	1
Diff: main.cpp

Repository toolbox

Repository details

Important Information for this Arm website

Access Warning