Original Version of STM Board

Dependencies:   FastPWM mbed

Revision:
171:bfc1fd2629d8
Parent:
170:42c938a40313
Child:
172:63af34265fe9
diff -r 42c938a40313 -r bfc1fd2629d8 main.cpp
--- a/main.cpp	Wed Nov 18 12:03:39 2020 +0000
+++ b/main.cpp	Sat Nov 21 07:15:47 2020 +0000
@@ -1,4 +1,4 @@
-//201118_1
+//201121_1
 #include "mbed.h"
 #include "FastPWM.h"
 #include "INIT_HW.h"
@@ -112,7 +112,7 @@
 
     MODE_VALVE_OPEN_LOOP,                               //3
     MODE_JOINT_ADAPTIVE_BACKSTEPPING,                   //4
-    MODE_VALVE_POSITION_TORQUE_CONTROL_LEARNING,        //5
+    MODE_RL,                                            //5
 
     MODE_JOINT_POSITION_PRES_CONTROL_PWM,               //6
     MODE_JOINT_POSITION_PRES_CONTROL_VALVE_POSITION,    //7
@@ -124,7 +124,7 @@
     MODE_CURRENT_CONTROL,                               //11
     MODE_JOINT_POSITION_TORQUE_CONTROL_CURRENT,         //12
     MODE_JOINT_POSITION_PRES_CONTROL_CURRENT,           //13
-    MODE_RL,                                            //14
+    MODE_VALVE_POSITION_TORQUE_CONTROL_LEARNING,                                            //14
 
     //utility
     MODE_TORQUE_SENSOR_NULLING = 20,                    //20
@@ -268,27 +268,27 @@
 float input_RL[num_input_RL] = { 0.0f };
 
 //Critic Networks
-float hc1[num_input_RL][10] = {0.0f};
-float bc1[10] = {0.0f};
-float hc2[10] = {0.0f};
+float hc1[num_input_RL][num_hidden_unit] = {0.0f};
+float bc1[num_hidden_unit] = {0.0f};
+float hc2[num_hidden_unit] = {0.0f};
 float bc2 = 0.0f;
 
 //Critic Networks Temporary
-float hc1_temp[num_input_RL][10] = {0.0f};
-float bc1_temp[10] = {0.0f};
-float hc2_temp[10] = {0.0f};
+float hc1_temp[num_input_RL][num_hidden_unit] = {0.0f};
+float bc1_temp[num_hidden_unit] = {0.0f};
+float hc2_temp[num_hidden_unit] = {0.0f};
 float bc2_temp = 0.0f;
 
 //Actor Networks
-float ha1[num_input_RL][10] = {0.0f};
-float ba1[10] = {0.0f};
-float ha2[10][2] = {0.0f};
+float ha1[num_input_RL][num_hidden_unit] = {0.0f};
+float ba1[num_hidden_unit] = {0.0f};
+float ha2[num_hidden_unit][2] = {0.0f};
 float ba2[2] = {0.0f};
 
 //Actor Networks Temporary
-float ha1_temp[num_input_RL][10] = {0.0f};
-float ba1_temp[10] = {0.0f};
-float ha2_temp[10][2] = {0.0f};
+float ha1_temp[num_input_RL][num_hidden_unit] = {0.0f};
+float ba1_temp[num_hidden_unit] = {0.0f};
+float ha2_temp[num_hidden_unit][2] = {0.0f};
 float ba2_temp[2] = {0.0f};
 
 float VALVE_POS_RAW_NN = 0.0f;
@@ -297,16 +297,16 @@
 
 float Critic_Network(float *arr)
 {
-    float output1[10] = { 0.0f };
+    float output1[num_hidden_unit] = { 0.0f };
     float output = 0.0f;
-    for (int index2 = 0; index2 < 10; index2++) {
+    for (int index2 = 0; index2 < num_hidden_unit; index2++) {
         for (int index1 = 0; index1 < num_input_RL; index1++) {
             output1[index2] = output1[index2] + hc1[index1][index2] * arr[index1];
         }
         output1[index2] = tanh(output1[index2] + bc1[index2]);
     }
     for (int index2 = 0; index2 < 1; index2++) {
-        for (int index1 = 0; index1 < 10; index1++) {
+        for (int index1 = 0; index1 < num_hidden_unit; index1++) {
             output = output + hc2[index1] * output1[index1];
         }
         output = output + bc2;
@@ -316,16 +316,16 @@
 
 float Critic_Network_Temp(float *arr)
 {
-    float output1[10] = { 0.0f };
+    float output1[num_hidden_unit] = { 0.0f };
     float output = 0.0f;
-    for (int index2 = 0; index2 < 10; index2++) {
+    for (int index2 = 0; index2 < num_hidden_unit; index2++) {
         for (int index1 = 0; index1 < num_input_RL; index1++) {
             output1[index2] = output1[index2] + hc1_temp[index1][index2] * arr[index1];
         }
         output1[index2] = tanh(output1[index2] + bc1_temp[index2]);
     }
     for (int index2 = 0; index2 < 1; index2++) {
-        for (int index1 = 0; index1 < 10; index1++) {
+        for (int index1 = 0; index1 < num_hidden_unit; index1++) {
             output = output + hc2_temp[index1] * output1[index1];
         }
         output = output + bc2_temp;
@@ -336,10 +336,10 @@
 
 void Actor_Network(float *arr)
 {
-    float output1[10] = {0.0f};
+    float output1[num_hidden_unit] = {0.0f};
     float output[2] = {0.0f};
 
-    for (int index2 = 0; index2 < 10; index2++) {
+    for (int index2 = 0; index2 < num_hidden_unit; index2++) {
         for (int index1 = 0; index1 < num_input_RL; index1++) {
             output1[index2] = output1[index2] + ha1[index1][index2] * arr[index1];
         }
@@ -349,49 +349,34 @@
         }
     }
     for (int index2 = 0; index2 < 2; index2++) {
-        for (int index1 = 0; index1 < 10; index1++) {
+        for (int index1 = 0; index1 < num_hidden_unit; index1++) {
             output[index2] = output[index2] + ha2[index1][index2] * output1[index1];
         }
     }
-    mean_before_SP = output[0] + ba2[0];
+    mean_before_SP = output[0] + ba2[0];    //SP = softplus
     deviation_before_SP = output[1] + ba2[1];
-    mean = log(1.0f+exp(mean_before_SP));
-    deviation = log(1.0f+exp(deviation_before_SP));
-}
-
-
-//void Actor_Network_Temp(float *arr)
-//{
-//    float output1[10] = {0.0f};
-//    float output[2] = {0.0f};
-//
-//    for (int index2 = 0; index2 < 10; index2++) {
-//        for (int index1 = 0; index1 < num_input_RL; index1++) {
-//            output1[index2] = output1[index2] + ha1_temp[index1][index2] * arr[index1];
-//        }
-//        output1[index2] = output1[index2] + ba1_temp[index2];
-//        if (output1[index2] < 0) {
-//            output1[index2] = 0;
-//        }
-//    }
-//    for (int index2 = 0; index2 < 2; index2++) {
-//        for (int index1 = 0; index1 < 10; index1++) {
-//            output[index2] = output[index2] + ha2_temp[index1][index2] * output1[index1];
-//        }
-//    }
-//    mean_before_SP = output[0] + ba2_temp[0];
-//    deviation_before_SP = output[1] + ba2_temp[1];
 //    mean = log(1.0f+exp(mean_before_SP));
 //    deviation = log(1.0f+exp(deviation_before_SP));
-//}
+    if (mean_before_SP >=0) {
+        mean = mean_before_SP;
+    } else {
+        mean = 0.0f;
+    }
+    if (deviation_before_SP >=0) {
+        deviation = deviation_before_SP;
+    } else {
+        deviation = 0.0f;
+    }
+
+}
 
 
 void Actor_Network_Old(float *arr)
 {
-    float output1[10] = {0.0f};
+    float output1[num_hidden_unit] = {0.0f};
     float output[2] = {0.0f};
 
-    for (int index2 = 0; index2 < 10; index2++) {
+    for (int index2 = 0; index2 < num_hidden_unit; index2++) {
         for (int index1 = 0; index1 < num_input_RL; index1++) {
             output1[index2] = output1[index2] + ha1[index1][index2] * arr[index1];
         }
@@ -401,14 +386,24 @@
         }
     }
     for (int index2 = 0; index2 < 2; index2++) {
-        for (int index1 = 0; index1 < 10; index1++) {
+        for (int index1 = 0; index1 < num_hidden_unit; index1++) {
             output[index2] = output[index2] + ha2[index1][index2] * output1[index1];
         }
     }
     mean_old = output[0] + ba2[0];
     deviation_old = output[1] + ba2[1];
-    mean_old = log(1.0f+exp(mean_old));
-    deviation_old = log(1.0f+exp(deviation_old));
+//    mean_old = log(1.0f+exp(mean_old));
+//    deviation_old = log(1.0f+exp(deviation_old));
+    if (mean_before_SP >=0) {
+        mean = mean_before_SP;
+    } else {
+        mean = 0.0f;
+    }
+    if (deviation_before_SP >=0) {
+        deviation = deviation_before_SP;
+    } else {
+        deviation = 0.0f;
+    }
 }
 
 float Grad_Normal_Dist_Mean(float mean, float deviation, float action)
@@ -429,9 +424,9 @@
 {
     float gradient_rate = 0.01f;
 
-    float G_hc1[num_input_RL][10] = {0.0f};
-    float G_bc1[10] = {0.0f};
-    for (int index2 = 0; index2 < 10; index2++) {
+    float G_hc1[num_input_RL][num_hidden_unit] = {0.0f};
+    float G_bc1[num_hidden_unit] = {0.0f};
+    for (int index2 = 0; index2 < num_hidden_unit; index2++) {
         for (int index1 = 0; index1 < num_input_RL; index1++) {
             for (int i=0; i<batch_size; i++) {
                 float hx_sum = 0.0f;
@@ -462,10 +457,10 @@
         bc1_temp[index2] = bc1_temp[index2] - gradient_rate * G_bc1[index2];
     }
 
-    float G_hc2[10] = {0.0f};
+    float G_hc2[num_hidden_unit] = {0.0f};
     float G_bc2 = 0.0f;
     for (int index2 = 0; index2 < 1; index2++) {
-        for (int index1 = 0; index1 < 10; index1++) {
+        for (int index1 = 0; index1 < num_hidden_unit; index1++) {
             for (int i=0; i<batch_size; i++) {
                 float hx_sum = 0.0f;
                 float hx_sum_next = 0.0f;
@@ -491,26 +486,41 @@
 
 void update_Actor_Networks(float (*arr)[num_input_RL])
 {
-    float gradient_rate = -0.01f;
+    float gradient_rate = 0.01f;   //-0.01f
 
-    float G_ha1[num_input_RL][10] = {0.0f};
-    float G_ba1[10] = {0.0f};
-    for (int index2 = 0; index2 < 10; index2++) {
+    float G_ha1[num_input_RL][num_hidden_unit] = {0.0f};
+    float G_ba1[num_hidden_unit] = {0.0f};
+    for (int index2 = 0; index2 < num_hidden_unit; index2++) {
         for (int index1 = 0; index1 < num_input_RL; index1++) {
             for (int i=0; i<batch_size; i++) {
                 if((advantage[i] >= 0.0f && ratio[i] >= 1.0f + epsilon) || (advantage[i] < 0.0f && ratio[i] < 1.0f - epsilon)) {
                     G_ha1[index1][index2] = G_ha1[index1][index2];
                 } else {
-                    float hx_sum = 0.0f;
-                    for(int j=0; j<num_input_RL; j++) {
-                        hx_sum = hx_sum + ha1_temp[j][index2]*arr[i][j];
+
+                    float hx_sum_total = 0.0f;
+                    for(int m = 0; m < num_hidden_unit; m++) {
+                        for(int n = 0; n < num_input_RL; n++) {
+                            hx_sum_total = hx_sum_total + ha1_temp[n][m]*arr[i][n];
+                        }
                     }
-                    hx_sum = hx_sum + bc1_temp[index2];
+                    hx_sum_total = hx_sum_total + bc1_temp[index2];
                     float d_mean_d_ha1 = 0.0f;
                     float d_dev_d_ha1 = 0.0f;
-                    if (hx_sum >= 0) {
-                        d_mean_d_ha1 = exp(mean_before_SP_array[i])/(1.0f+exp(mean_before_SP_array[i]))*ha2_temp[index2][0]*arr[i][index1];
-                        d_dev_d_ha1 = exp(deviation_before_SP_array[i])/(1.0f+exp(deviation_before_SP_array[i]))*ha2_temp[index2][1]*arr[i][index1];
+                    if (hx_sum_total >=0) {
+                        float hx_sum = 0.0f;
+                        for(int j=0; j<num_input_RL; j++) {
+                            hx_sum = hx_sum + ha1_temp[j][index2]*arr[i][j];
+                        }
+                        hx_sum = hx_sum + bc1_temp[index2];
+                        if (hx_sum >= 0) {
+//                            d_mean_d_ha1 = exp(mean_before_SP_array[i])/(1.0f+exp(mean_before_SP_array[i]))*ha2_temp[index2][0]*arr[i][index1];
+//                            d_dev_d_ha1 = exp(deviation_before_SP_array[i])/(1.0f+exp(deviation_before_SP_array[i]))*ha2_temp[index2][1]*arr[i][index1];
+                            d_mean_d_ha1 = ha2_temp[index2][0]*arr[i][index1];
+                            d_dev_d_ha1 = ha2_temp[index2][1]*arr[i][index1];
+                        } else {
+                            d_mean_d_ha1 = 0.0f;
+                            d_dev_d_ha1 = 0.0f;
+                        }
                     } else {
                         d_mean_d_ha1 = 0.0f;
                         d_dev_d_ha1 = 0.0f;
@@ -525,16 +535,33 @@
             if((advantage[i] >= 0.0f && ratio[i] >= 1.0f + epsilon) || (advantage[i] < 0.0f && ratio[i] < 1.0f - epsilon))  {
                 G_ba1[index2] = G_ba1[index2];
             } else {
-                float hx_sum = 0.0f;
-                for(int j=0; j<num_input_RL; j++) {
-                    hx_sum = hx_sum + ha1_temp[j][index2]*arr[i][j];
+
+                float hx_sum_total = 0.0f;
+                for(int m = 0; m < num_hidden_unit; m++) {
+                    for(int n = 0; n < num_input_RL; n++) {
+                        hx_sum_total = hx_sum_total + ha1_temp[n][m]*arr[i][n];
+                    }
                 }
-                hx_sum = hx_sum + bc1_temp[index2];
+                hx_sum_total = hx_sum_total + bc1_temp[index2];
                 float d_mean_d_ba1 = 0.0f;
                 float d_dev_d_ba1 = 0.0f;
-                if(hx_sum >=0) {
-                    d_mean_d_ba1 = exp(mean_before_SP_array[i])/(1.0f+exp(mean_before_SP_array[i]))*ha2_temp[index2][0];
-                    d_dev_d_ba1 = exp(deviation_before_SP_array[i])/(1.0f+exp(deviation_before_SP_array[i]))*ha2_temp[index2][1];
+                if (hx_sum_total >=0) {
+
+                    float hx_sum = 0.0f;
+                    for(int j=0; j<num_input_RL; j++) {
+                        hx_sum = hx_sum + ha1_temp[j][index2]*arr[i][j];
+                    }
+                    hx_sum = hx_sum + bc1_temp[index2];
+
+                    if(hx_sum >=0) {
+//                        d_mean_d_ba1 = exp(mean_before_SP_array[i])/(1.0f+exp(mean_before_SP_array[i]))*ha2_temp[index2][0];
+//                        d_dev_d_ba1 = exp(deviation_before_SP_array[i])/(1.0f+exp(deviation_before_SP_array[i]))*ha2_temp[index2][1];
+                        d_mean_d_ba1 = ha2_temp[index2][0];
+                        d_dev_d_ba1 = ha2_temp[index2][1];
+                    } else {
+                        d_mean_d_ba1 = 0.0f;
+                        d_dev_d_ba1 = 0.0f;
+                    }
                 } else {
                     d_mean_d_ba1 = 0.0f;
                     d_dev_d_ba1 = 0.0f;
@@ -546,27 +573,42 @@
         ba1_temp[index2] = ba1_temp[index2] - gradient_rate * G_ba1[index2];
     }
 
-    float G_ha2[10][2] = {0.0f};
+    float G_ha2[num_hidden_unit][2] = {0.0f};
     float G_ba2[2] = {0.0f};
     for (int index2 = 0; index2 < 2; index2++) {
-        for (int index1 = 0; index1 < 10; index1++) {
+        for (int index1 = 0; index1 < num_hidden_unit; index1++) {
             for (int i=0; i<batch_size; i++) {
                 if((advantage[i] >= 0.0f && ratio[i] >= 1.0f + epsilon) || (advantage[i] < 0.0f && ratio[i] < 1.0f - epsilon)) {
                     G_ha2[index1][index2] = G_ha2[index1][index2];
                 } else {
-                    float hx_sum = 0.0f;
-                    for(int j=0; j<num_input_RL; j++) {
-                        hx_sum = hx_sum + ha1_temp[j][index1]*arr[i][j];
+
+                    float hx_sum_total = 0.0f;
+                    for(int m = 0; m < num_hidden_unit; m++) {
+                        for(int n = 0; n < num_input_RL; n++) {
+                            hx_sum_total = hx_sum_total + ha1_temp[n][m]*arr[i][n];
+                        }
                     }
-                    hx_sum = hx_sum + bc1_temp[index1];
+                    hx_sum_total = hx_sum_total + bc1_temp[index2];
                     float d_mean_d_ha2 = 0.0f;
                     float d_dev_d_ha2 = 0.0f;
-                    if (hx_sum >= 0) {
-                        d_mean_d_ha2 = exp(mean_before_SP_array[i])/(1.0f+exp(mean_before_SP_array[i]))*hx_sum;
-                        d_dev_d_ha2 = exp(deviation_before_SP_array[i])/(1.0f+exp(deviation_before_SP_array[i]))*hx_sum;
+                    if (hx_sum_total >=0) {
+                        float hx_sum = 0.0f;
+                        for(int j=0; j<num_input_RL; j++) {
+                            hx_sum = hx_sum + ha1_temp[j][index1]*arr[i][j];
+                        }
+                        hx_sum = hx_sum + bc1_temp[index1];
+                        if (hx_sum >= 0) {
+//                            d_mean_d_ha2 = exp(mean_before_SP_array[i])/(1.0f+exp(mean_before_SP_array[i]))*hx_sum;
+//                            d_dev_d_ha2 = exp(deviation_before_SP_array[i])/(1.0f+exp(deviation_before_SP_array[i]))*hx_sum;
+                            d_mean_d_ha2 = hx_sum;
+                            d_dev_d_ha2 = hx_sum;
+                        } else {
+                            d_mean_d_ha2 = 0.0f;
+                            d_mean_d_ha2 = 0.0f;
+                        }
                     } else {
                         d_mean_d_ha2 = 0.0f;
-                        d_dev_d_ha2 = 0.0f;
+                        d_mean_d_ha2 = 0.0f;
                     }
                     G_ha2[index1][index2] = G_ha2[index1][index2] + advantage[i]/pi_old[i]*(d_mean_d_ha2*Grad_Normal_Dist_Mean(mean_array[i],deviation_array[i],action_array[i])+d_dev_d_ha2*Grad_Normal_Dist_Deviation(mean_array[i],deviation_array[i],action_array[i]));
                 }
@@ -581,8 +623,10 @@
 
                 float d_mean_d_ba2 = 0.0f;
                 float d_dev_d_ba2 = 0.0f;
-                d_mean_d_ba2 = exp(mean_before_SP_array[i])/(1.0f+exp(mean_before_SP_array[i]));
-                d_dev_d_ba2 = exp(deviation_before_SP_array[i])/(1.0f+exp(deviation_before_SP_array[i]));
+//                d_mean_d_ba2 = exp(mean_before_SP_array[i])/(1.0f+exp(mean_before_SP_array[i]));
+//                d_dev_d_ba2 = exp(deviation_before_SP_array[i])/(1.0f+exp(deviation_before_SP_array[i]));
+                d_mean_d_ba2 = 1.0f;
+                d_dev_d_ba2 = 1.0f;
                 G_ba1[index2] = G_ba1[index2] + advantage[i]/pi_old[i]*(d_mean_d_ba2*Grad_Normal_Dist_Mean(mean_array[i],deviation_array[i],action_array[i])+d_dev_d_ba2*Grad_Normal_Dist_Deviation(mean_array[i],deviation_array[i],action_array[i]));
             }
         }
@@ -622,7 +666,7 @@
 
 void Overwirte_Critic_Networks()
 {
-    for (int index2 = 0; index2 < 10; index2++) {
+    for (int index2 = 0; index2 < num_hidden_unit; index2++) {
         for (int index1 = 0; index1 < num_input_RL; index1++) {
             hc1[index1][index2] = hc1_temp[index1][index2];
         }
@@ -633,14 +677,14 @@
 }
 void Overwirte_Actor_Networks()
 {
-    for (int index2 = 0; index2 < 10; index2++) {
+    for (int index2 = 0; index2 < num_hidden_unit; index2++) {
         for (int index1 = 0; index1 < num_input_RL; index1++) {
             ha1[index1][index2] = ha1_temp[index1][index2];
         }
         ba1[index2] = ba1_temp[index2];
     }
     for (int index2 = 0; index2 < 2; index2++) {
-        for (int index1 = 0; index1 < 10; index1++) {
+        for (int index1 = 0; index1 < num_hidden_unit; index1++) {
             ha2[index1][index2] = ha2_temp[index1][index2];
         }
         ba2[index2] = ba2_temp[index2];
@@ -731,7 +775,7 @@
             ID_index_array[i] =  (i+1) * 0.5f;
     }
 
-    for (int index2 = 0; index2 < 10; index2++) {
+    for (int index2 = 0; index2 < num_hidden_unit; index2++) {
         for (int index1 = 0; index1 < num_input_RL; index1++) {
             hc1_temp[index1][index2] = (float) (rand()%100) * 0.01f ;
         }
@@ -739,19 +783,19 @@
         hc2_temp[index2] = (float) (rand()%100) * 0.01f;
     }
     bc2_temp = (float) (rand()%100) * 0.01f;
-    for (int index2 = 0; index2 < 10; index2++) {
+    for (int index2 = 0; index2 < num_hidden_unit; index2++) {
         for (int index1 = 0; index1 < num_input_RL; index1++) {
             ha1_temp[index1][index2] = (float) (rand()%100) * 0.01f;
         }
         ba1_temp[index2] = (float) (rand()%100) * 0.01f;
     }
     for (int index2 = 0; index2 < 2; index2++) {
-        for (int index1 = 0; index1 < 10; index1++) {
+        for (int index1 = 0; index1 < num_hidden_unit; index1++) {
             ha2_temp[index1][index2] = (float) (rand()%100) * 0.01f;
         }
         ba2_temp[index2] = (float) (rand()%100) * 0.01f;
     }
-    
+
     Overwirte_Critic_Networks();
     Overwirte_Actor_Networks();
 
@@ -759,20 +803,21 @@
     ***     Program is operating!
     *************************************/
     while(1) {
-//        if(timer_while==1000 && OPERATING_MODE==5) {
-        //if(timer_while==1000) {
-        //i2c
 
-        read_field(i2c_slave_addr1);
-        if(DIR_VALVE_ENC < 0) value = 1023 - value;
-        //            if(LED==1) {
-        //                LED=0;
-        //            } else
-        //                LED = 1;
-        timer_while = 0;
-        //}
+//        if(timer_while==27491) {
+//            timer_while = 0;
+//            pc.printf("ref : %f     virt_pos : %f  mean : %f    deviation : %f       Last_pos_of_batch : %f      reward_sum : %f\n", pos.sen/(float)(ENC_PULSE_PER_POSITION), logging3, logging2, logging4, logging1, logging5);
+//            //pc.printf("%f\n", virt_pos);
+//            //pc.printf("%f\n", pos.sen/(float)(ENC_PULSE_PER_POSITION));
+//            //pc.printf("ref : %f     virt_pos : %f\n", pos.sen/(float)(ENC_PULSE_PER_POSITION), virt_pos);
+//        }
 
-        timer_while ++;
+
+        //i2c
+        //read_field(i2c_slave_addr1);
+        //if(DIR_VALVE_ENC < 0) value = 1023 - value;
+
+//        timer_while ++;
 
         ///////////////////////////////////////////////////////Neural Network
 
@@ -874,8 +919,8 @@
                 LED = 1;
 
         }
-        
-        
+
+
         /////////////////////////////////////////////////////////////////////RL
         switch (Update_Case) {
             case 0: {
@@ -886,7 +931,6 @@
                 for (int epoch = 0; epoch < num_epoch; epoch++) {
                     float loss_sum = 0.0f;
                     for (int i=batch_size-1; i>=0; i--) {
-                        V[batch_size] = 0.0f;
                         //Calculate Estimated V
                         //float temp_array[3] = {state_array[i][0], state_array[i][1], state_array[i][2]};
                         float temp_array[2] = {state_array[i][0], state_array[i][1]};
@@ -894,7 +938,7 @@
                         pi[i] = exp(-(action_array[i]-mean_array[i])*(action_array[i]-mean_array[i])/(2.0f*deviation_array[i]*deviation_array[i]))/(sqrt(2.0f*PI)*deviation_array[i]);
                         Actor_Network_Old(temp_array);
                         pi_old[i] = exp(-(action_array[i]-mean_old)*(action_array[i]-mean_old)/(2.0f*deviation_old*deviation_old))/(sqrt(2.0f*PI)*deviation_old);
-                        r[i] = exp(-0.01f * state_array[i][1] * 70.0f * state_array[i][1] * 70.0f);
+                        r[i] = exp(-0.00005f * state_array[i][1] * 70.0f * state_array[i][1] * 70.0f);
                         if(i == batch_size-1) td_target[i] = r[i];
                         else td_target[i] = r[i] + gamma * V[i+1];
                         delta[i] = td_target[i] - V[i];
@@ -917,8 +961,8 @@
                         reward_sum = reward_sum + r[i];
                     }
                     logging5 = reward_sum;
-                    
-                    
+
+
                     //loss_batch = loss_sum / (float) batch_size;
                     loss_batch = loss_sum;
                     //Update Networks
@@ -929,7 +973,7 @@
                 Update_Done_Flag = 1;
                 Update_Case = 0;
                 //logging1 = V[0];
-                
+
                 break;
             }
             case 2: {
@@ -2311,7 +2355,7 @@
                 gamma_hat = gamma_hat + gamma_hat_dot / (float) TMR_FREQ_5k;
                 break;
             }
-            
+
             case MODE_RL: {
                 //t.reset();
                 //t.start();
@@ -2344,7 +2388,7 @@
                     } else if(virt_pos < -70.0f) {
                         virt_pos = -70.0f;
                     }
-                    
+
                     RL_timer++;
 
 
@@ -2563,9 +2607,9 @@
             }
 
 
-            if (flag_data_request[2] == LOW) {
+            if (flag_data_request[2] == HIGH) {
                 double t_value = 0.0f;
-                if(valve_pos.ref>=(float) VALVE_CENTER) {
+                if(value>=(float) VALVE_CENTER) {
                     t_value = 10000.0f*((double)value - (double)VALVE_CENTER)/((double)VALVE_MAX_POS - (double)VALVE_CENTER);
                 } else {
                     t_value = -10000.0f*((double)value - (double)VALVE_CENTER)/((double)VALVE_MIN_POS - (double)VALVE_CENTER);
@@ -2596,7 +2640,7 @@
 //            }
             if (flag_data_request[4] == HIGH) {
                 //valve position
-                CAN_TX_VALVE_POSITION((int16_t) (input_NN[1] * 100.0f), (int16_t) (input_NN[2]* 100.0f), (int16_t) (input_NN[3]* 100.0f), (int16_t) (input_NN[4]* 100.0f)); //1600
+                CAN_TX_VALVE_POSITION((int16_t) pos.sen/(float)(ENC_PULSE_PER_POSITION), (int16_t) virt_pos, (int16_t) 0, (int16_t) 0); //1600
             }
 
             // Others : Reference position, Reference FT, PWM, Current  (ID:1300)