rainbow

Dependencies:   mbed FastPWM

Revision:
170:42c938a40313
Parent:
169:645207e160ca
Child:
171:bfc1fd2629d8
--- a/main.cpp	Mon Nov 16 02:23:23 2020 +0000
+++ b/main.cpp	Wed Nov 18 12:03:39 2020 +0000
@@ -1,4 +1,4 @@
-//201116_1
+//201118_1
 #include "mbed.h"
 #include "FastPWM.h"
 #include "INIT_HW.h"
@@ -9,6 +9,8 @@
 #include "function_utilities.h"
 #include "stm32f4xx_flash.h"
 #include "FlashWriter.h"
+#include <string>
+#include <iostream>
 
 using namespace std;
 Timer t;
@@ -35,8 +37,8 @@
 unsigned int value; // 10bit output of reading sensor AS5510
 
 // SPI ///////////////////////////////////////////
-//SPI eeprom(PB_15, PB_14, PB_13); // EEPROM //(SPI_MOSI, SPI_MISO, SPI_SCK);
-//DigitalOut eeprom_cs(PB_12);
+SPI eeprom(PB_15, PB_14, PB_13); // EEPROM //(SPI_MOSI, SPI_MISO, SPI_SCK);
+DigitalOut eeprom_cs(PB_12);
 //FlashWriter writer(6);//2부터 7까지 되는듯 아마 sector
 SPI enc(PC_12,PC_11,PC_10);
 DigitalOut enc_cs(PD_2);
@@ -122,6 +124,7 @@
     MODE_CURRENT_CONTROL,                               //11
     MODE_JOINT_POSITION_TORQUE_CONTROL_CURRENT,         //12
     MODE_JOINT_POSITION_PRES_CONTROL_CURRENT,           //13
+    MODE_RL,                                            //14
 
     //utility
     MODE_TORQUE_SENSOR_NULLING = 20,                    //20
@@ -261,10 +264,390 @@
 
 const float bout[1] = { -0.10839174687862396f };
 
+/////////////////////////////////////////////////////////////////////////////////////////////RL
+float input_RL[num_input_RL] = { 0.0f };
+
+//Critic Networks
+float hc1[num_input_RL][10] = {0.0f};
+float bc1[10] = {0.0f};
+float hc2[10] = {0.0f};
+float bc2 = 0.0f;
+
+//Critic Networks Temporary
+float hc1_temp[num_input_RL][10] = {0.0f};
+float bc1_temp[10] = {0.0f};
+float hc2_temp[10] = {0.0f};
+float bc2_temp = 0.0f;
+
+//Actor Networks
+float ha1[num_input_RL][10] = {0.0f};
+float ba1[10] = {0.0f};
+float ha2[10][2] = {0.0f};
+float ba2[2] = {0.0f};
+
+//Actor Networks Temporary
+float ha1_temp[num_input_RL][10] = {0.0f};
+float ba1_temp[10] = {0.0f};
+float ha2_temp[10][2] = {0.0f};
+float ba2_temp[2] = {0.0f};
 
 float VALVE_POS_RAW_NN = 0.0f;
 float DDV_JOINT_POS_FF(float REF_JOINT_VEL);
 
+
+float Critic_Network(float *arr)
+{
+    float output1[10] = { 0.0f };
+    float output = 0.0f;
+    for (int index2 = 0; index2 < 10; index2++) {
+        for (int index1 = 0; index1 < num_input_RL; index1++) {
+            output1[index2] = output1[index2] + hc1[index1][index2] * arr[index1];
+        }
+        output1[index2] = tanh(output1[index2] + bc1[index2]);
+    }
+    for (int index2 = 0; index2 < 1; index2++) {
+        for (int index1 = 0; index1 < 10; index1++) {
+            output = output + hc2[index1] * output1[index1];
+        }
+        output = output + bc2;
+    }
+    return output;
+}
+
+float Critic_Network_Temp(float *arr)
+{
+    float output1[10] = { 0.0f };
+    float output = 0.0f;
+    for (int index2 = 0; index2 < 10; index2++) {
+        for (int index1 = 0; index1 < num_input_RL; index1++) {
+            output1[index2] = output1[index2] + hc1_temp[index1][index2] * arr[index1];
+        }
+        output1[index2] = tanh(output1[index2] + bc1_temp[index2]);
+    }
+    for (int index2 = 0; index2 < 1; index2++) {
+        for (int index1 = 0; index1 < 10; index1++) {
+            output = output + hc2_temp[index1] * output1[index1];
+        }
+        output = output + bc2_temp;
+    }
+    return output;
+}
+
+
+void Actor_Network(float *arr)
+{
+    float output1[10] = {0.0f};
+    float output[2] = {0.0f};
+
+    for (int index2 = 0; index2 < 10; index2++) {
+        for (int index1 = 0; index1 < num_input_RL; index1++) {
+            output1[index2] = output1[index2] + ha1[index1][index2] * arr[index1];
+        }
+        output1[index2] = output1[index2] + ba1[index2];
+        if (output1[index2] < 0) {
+            output1[index2] = 0;
+        }
+    }
+    for (int index2 = 0; index2 < 2; index2++) {
+        for (int index1 = 0; index1 < 10; index1++) {
+            output[index2] = output[index2] + ha2[index1][index2] * output1[index1];
+        }
+    }
+    mean_before_SP = output[0] + ba2[0];
+    deviation_before_SP = output[1] + ba2[1];
+    mean = log(1.0f+exp(mean_before_SP));
+    deviation = log(1.0f+exp(deviation_before_SP));
+}
+
+
+//void Actor_Network_Temp(float *arr)
+//{
+//    float output1[10] = {0.0f};
+//    float output[2] = {0.0f};
+//
+//    for (int index2 = 0; index2 < 10; index2++) {
+//        for (int index1 = 0; index1 < num_input_RL; index1++) {
+//            output1[index2] = output1[index2] + ha1_temp[index1][index2] * arr[index1];
+//        }
+//        output1[index2] = output1[index2] + ba1_temp[index2];
+//        if (output1[index2] < 0) {
+//            output1[index2] = 0;
+//        }
+//    }
+//    for (int index2 = 0; index2 < 2; index2++) {
+//        for (int index1 = 0; index1 < 10; index1++) {
+//            output[index2] = output[index2] + ha2_temp[index1][index2] * output1[index1];
+//        }
+//    }
+//    mean_before_SP = output[0] + ba2_temp[0];
+//    deviation_before_SP = output[1] + ba2_temp[1];
+//    mean = log(1.0f+exp(mean_before_SP));
+//    deviation = log(1.0f+exp(deviation_before_SP));
+//}
+
+
+void Actor_Network_Old(float *arr)
+{
+    float output1[10] = {0.0f};
+    float output[2] = {0.0f};
+
+    for (int index2 = 0; index2 < 10; index2++) {
+        for (int index1 = 0; index1 < num_input_RL; index1++) {
+            output1[index2] = output1[index2] + ha1[index1][index2] * arr[index1];
+        }
+        output1[index2] = output1[index2] + ba1[index2];
+        if (output1[index2] < 0) {
+            output1[index2] = 0;
+        }
+    }
+    for (int index2 = 0; index2 < 2; index2++) {
+        for (int index1 = 0; index1 < 10; index1++) {
+            output[index2] = output[index2] + ha2[index1][index2] * output1[index1];
+        }
+    }
+    mean_old = output[0] + ba2[0];
+    deviation_old = output[1] + ba2[1];
+    mean_old = log(1.0f+exp(mean_old));
+    deviation_old = log(1.0f+exp(deviation_old));
+}
+
+float Grad_Normal_Dist_Mean(float mean, float deviation, float action)
+{
+    float grad_mean = 0.0f;
+    grad_mean = (action-mean)*exp(-(action-mean)*(action-mean)/(2.0f*deviation*deviation))/(sqrt(2.0f*PI)*deviation*deviation*deviation);
+    return grad_mean;
+}
+
+float Grad_Normal_Dist_Deviation(float mean, float deviation, float action)
+{
+    float grad_dev = 0.0f;
+    grad_dev = exp(-(action-mean)*(action-mean)/(2.0f*deviation*deviation))*(-1.0f/(sqrt(2.0f*PI)*deviation*deviation) + (action-mean)*(action-mean)/(sqrt(2.0f*PI)*deviation*deviation*deviation*deviation));
+    return grad_dev;
+}
+
+void update_Critic_Networks(float (*arr)[num_input_RL])
+{
+    float gradient_rate = 0.01f;
+
+    float G_hc1[num_input_RL][10] = {0.0f};
+    float G_bc1[10] = {0.0f};
+    for (int index2 = 0; index2 < 10; index2++) {
+        for (int index1 = 0; index1 < num_input_RL; index1++) {
+            for (int i=0; i<batch_size; i++) {
+                float hx_sum = 0.0f;
+                float hx_sum_next = 0.0f;
+                for(int j=0; j<num_input_RL; j++) {
+                    hx_sum = hx_sum + hc1_temp[j][index2]*arr[i][j];
+                    if (i==batch_size-1) hx_sum_next = 0.0f;
+                    else hx_sum_next = hx_sum_next + hc1_temp[j][index2]*arr[i+1][j];
+                }
+                if (i==batch_size-1) G_hc1[index1][index2] = G_hc1[index1][index2] + 2.0f*advantage[i]*(-hc2_temp[index2]*(1.0f-tanh(hx_sum + bc1_temp[index2])*tanh(hx_sum + bc1_temp[index2]))*arr[i][index1]);
+                else G_hc1[index1][index2] = G_hc1[index1][index2] + 2.0f*advantage[i]*(hc2_temp[index2]*(1.0f-tanh(hx_sum_next + bc1_temp[index2])*tanh(hx_sum_next + bc1_temp[index2]))*arr[i+1][index1] - hc2_temp[index2]*(1.0f-tanh(hx_sum + bc1_temp[index2])*tanh(hx_sum + bc1_temp[index2]))*arr[i][index1]);
+            }
+            G_hc1[index1][index2] = G_hc1[index1][index2] / batch_size;
+            hc1_temp[index1][index2] = hc1_temp[index1][index2] - gradient_rate * G_hc1[index1][index2];
+        }
+        for (int i=0; i<batch_size; i++) {
+            float hx_sum = 0.0f;
+            float hx_sum_next = 0.0f;
+            for(int j=0; j<num_input_RL; j++) {
+                hx_sum = hx_sum + hc1_temp[j][index2]*arr[i][j];
+                if (i==batch_size-1) hx_sum_next = 0.0f;
+                else hx_sum_next = hx_sum_next + hc1_temp[j][index2]*arr[i+1][j];
+            }
+            if (i==batch_size-1) G_bc1[index2] = G_bc1[index2] + 2.0f*advantage[i]*(-hc2_temp[index2]*(1.0f-tanh(hx_sum + bc1_temp[index2])*tanh(hx_sum + bc1_temp[index2])));
+            else  G_bc1[index2] = G_bc1[index2] + 2.0f*advantage[i]*(hc2_temp[index2]*(1.0f-tanh(hx_sum_next + bc1_temp[index2])*tanh(hx_sum_next + bc1_temp[index2])) - hc2_temp[index2]*(1.0f-tanh(hx_sum + bc1_temp[index2])*tanh(hx_sum + bc1_temp[index2])));
+        }
+        G_bc1[index2] = G_bc1[index2] / batch_size;
+        bc1_temp[index2] = bc1_temp[index2] - gradient_rate * G_bc1[index2];
+    }
+
+    float G_hc2[10] = {0.0f};
+    float G_bc2 = 0.0f;
+    for (int index2 = 0; index2 < 1; index2++) {
+        for (int index1 = 0; index1 < 10; index1++) {
+            for (int i=0; i<batch_size; i++) {
+                float hx_sum = 0.0f;
+                float hx_sum_next = 0.0f;
+                for(int j=0; j<num_input_RL; j++) {
+                    hx_sum = hx_sum + hc1_temp[j][index1]*arr[i][j];
+                    if (i==batch_size-1) hx_sum_next = 0.0f;
+                    else hx_sum_next = hx_sum_next + hc1_temp[j][index1]*arr[i+1][j];
+                }
+                if (i==batch_size-1) G_hc2[index1] = G_hc2[index1] - 2.0f*advantage[i]*tanh(hx_sum + bc1_temp[index1]);
+                else G_hc2[index1] = G_hc2[index1] + 2.0f*advantage[i]*(tanh(hx_sum_next + bc1_temp[index1]) - tanh(hx_sum + bc1_temp[index1]));
+            }
+            G_hc2[index1] = G_hc2[index1] / batch_size;
+            hc2_temp[index1] = hc2_temp[index1] - gradient_rate * G_hc2[index1];
+        }
+        for (int i=0; i<batch_size; i++) {
+            if (i==batch_size-1) G_bc2 = G_bc2 + 2.0f*advantage[i]*(-1.0f);
+            else  G_bc2 = 0.0f;
+        }
+        G_bc2 = G_bc2/ batch_size;
+        bc2_temp = bc2_temp - gradient_rate * G_bc2;
+    }
+}
+
+void update_Actor_Networks(float (*arr)[num_input_RL])
+{
+    float gradient_rate = -0.01f;
+
+    float G_ha1[num_input_RL][10] = {0.0f};
+    float G_ba1[10] = {0.0f};
+    for (int index2 = 0; index2 < 10; index2++) {
+        for (int index1 = 0; index1 < num_input_RL; index1++) {
+            for (int i=0; i<batch_size; i++) {
+                if((advantage[i] >= 0.0f && ratio[i] >= 1.0f + epsilon) || (advantage[i] < 0.0f && ratio[i] < 1.0f - epsilon)) {
+                    G_ha1[index1][index2] = G_ha1[index1][index2];
+                } else {
+                    float hx_sum = 0.0f;
+                    for(int j=0; j<num_input_RL; j++) {
+                        hx_sum = hx_sum + ha1_temp[j][index2]*arr[i][j];
+                    }
+                    hx_sum = hx_sum + bc1_temp[index2];
+                    float d_mean_d_ha1 = 0.0f;
+                    float d_dev_d_ha1 = 0.0f;
+                    if (hx_sum >= 0) {
+                        d_mean_d_ha1 = exp(mean_before_SP_array[i])/(1.0f+exp(mean_before_SP_array[i]))*ha2_temp[index2][0]*arr[i][index1];
+                        d_dev_d_ha1 = exp(deviation_before_SP_array[i])/(1.0f+exp(deviation_before_SP_array[i]))*ha2_temp[index2][1]*arr[i][index1];
+                    } else {
+                        d_mean_d_ha1 = 0.0f;
+                        d_dev_d_ha1 = 0.0f;
+                    }
+                    G_ha1[index1][index2] = G_ha1[index1][index2] + advantage[i]/pi_old[i]*(d_mean_d_ha1*Grad_Normal_Dist_Mean(mean_array[i],deviation_array[i],action_array[i])+d_dev_d_ha1*Grad_Normal_Dist_Deviation(mean_array[i],deviation_array[i],action_array[i]));
+                }
+            }
+            G_ha1[index1][index2] = G_ha1[index1][index2] / batch_size;
+            ha1_temp[index1][index2] = ha1_temp[index1][index2] - gradient_rate * G_ha1[index1][index2];
+        }
+        for (int i=0; i<batch_size; i++) {
+            if((advantage[i] >= 0.0f && ratio[i] >= 1.0f + epsilon) || (advantage[i] < 0.0f && ratio[i] < 1.0f - epsilon))  {
+                G_ba1[index2] = G_ba1[index2];
+            } else {
+                float hx_sum = 0.0f;
+                for(int j=0; j<num_input_RL; j++) {
+                    hx_sum = hx_sum + ha1_temp[j][index2]*arr[i][j];
+                }
+                hx_sum = hx_sum + bc1_temp[index2];
+                float d_mean_d_ba1 = 0.0f;
+                float d_dev_d_ba1 = 0.0f;
+                if(hx_sum >=0) {
+                    d_mean_d_ba1 = exp(mean_before_SP_array[i])/(1.0f+exp(mean_before_SP_array[i]))*ha2_temp[index2][0];
+                    d_dev_d_ba1 = exp(deviation_before_SP_array[i])/(1.0f+exp(deviation_before_SP_array[i]))*ha2_temp[index2][1];
+                } else {
+                    d_mean_d_ba1 = 0.0f;
+                    d_dev_d_ba1 = 0.0f;
+                }
+                G_ba1[index2] = G_ba1[index2] + advantage[i]/pi_old[i]*(d_mean_d_ba1*Grad_Normal_Dist_Mean(mean_array[i],deviation_array[i],action_array[i])+d_dev_d_ba1*Grad_Normal_Dist_Deviation(mean_array[i],deviation_array[i],action_array[i]));
+            }
+        }
+        G_ba1[index2] = G_ba1[index2] / batch_size;
+        ba1_temp[index2] = ba1_temp[index2] - gradient_rate * G_ba1[index2];
+    }
+
+    float G_ha2[10][2] = {0.0f};
+    float G_ba2[2] = {0.0f};
+    for (int index2 = 0; index2 < 2; index2++) {
+        for (int index1 = 0; index1 < 10; index1++) {
+            for (int i=0; i<batch_size; i++) {
+                if((advantage[i] >= 0.0f && ratio[i] >= 1.0f + epsilon) || (advantage[i] < 0.0f && ratio[i] < 1.0f - epsilon)) {
+                    G_ha2[index1][index2] = G_ha2[index1][index2];
+                } else {
+                    float hx_sum = 0.0f;
+                    for(int j=0; j<num_input_RL; j++) {
+                        hx_sum = hx_sum + ha1_temp[j][index1]*arr[i][j];
+                    }
+                    hx_sum = hx_sum + bc1_temp[index1];
+                    float d_mean_d_ha2 = 0.0f;
+                    float d_dev_d_ha2 = 0.0f;
+                    if (hx_sum >= 0) {
+                        d_mean_d_ha2 = exp(mean_before_SP_array[i])/(1.0f+exp(mean_before_SP_array[i]))*hx_sum;
+                        d_dev_d_ha2 = exp(deviation_before_SP_array[i])/(1.0f+exp(deviation_before_SP_array[i]))*hx_sum;
+                    } else {
+                        d_mean_d_ha2 = 0.0f;
+                        d_dev_d_ha2 = 0.0f;
+                    }
+                    G_ha2[index1][index2] = G_ha2[index1][index2] + advantage[i]/pi_old[i]*(d_mean_d_ha2*Grad_Normal_Dist_Mean(mean_array[i],deviation_array[i],action_array[i])+d_dev_d_ha2*Grad_Normal_Dist_Deviation(mean_array[i],deviation_array[i],action_array[i]));
+                }
+            }
+            G_ha2[index1][index2] = G_ha2[index1][index2] / batch_size;
+            ha2_temp[index1][index2] = ha2_temp[index1][index2] - gradient_rate * G_ha2[index1][index2];
+        }
+        for (int i=0; i<batch_size; i++) {
+            if((advantage[i] >= 0.0f && ratio[i] >= 1.0f + epsilon) || (advantage[i] < 0.0f && ratio[i] < 1.0f - epsilon))  {
+                G_ba2[index2] = G_ba2[index2];
+            } else {
+
+                float d_mean_d_ba2 = 0.0f;
+                float d_dev_d_ba2 = 0.0f;
+                d_mean_d_ba2 = exp(mean_before_SP_array[i])/(1.0f+exp(mean_before_SP_array[i]));
+                d_dev_d_ba2 = exp(deviation_before_SP_array[i])/(1.0f+exp(deviation_before_SP_array[i]));
+                G_ba1[index2] = G_ba1[index2] + advantage[i]/pi_old[i]*(d_mean_d_ba2*Grad_Normal_Dist_Mean(mean_array[i],deviation_array[i],action_array[i])+d_dev_d_ba2*Grad_Normal_Dist_Deviation(mean_array[i],deviation_array[i],action_array[i]));
+            }
+        }
+        G_ba2[index2] = G_ba2[index2] / batch_size;
+        ba2_temp[index2] = ba2_temp[index2] - gradient_rate * G_ba2[index2];
+    }
+}
+
+
+
+float rand_normal(double mean, double stddev)
+{
+    //Box muller method
+    static double n2 = 0.0f;
+    static int n2_cached = 0;
+    if (!n2_cached) {
+        double x, y, r;
+        do {
+            x = 2.0f*rand()/RAND_MAX - 1;
+            y = 2.0f*rand()/RAND_MAX - 1;
+
+            r = x*x + y*y;
+        } while (r == 0.0f || r > 1.0f);
+        {
+            double d = sqrt(-2.0f*log(r)/r);
+            double n1 = x*d;
+            n2 = y*d;
+            double result = n1*stddev + mean;
+            n2_cached = 1;
+            return result;
+        }
+    } else {
+        n2_cached = 0;
+        return n2*stddev + mean;
+    }
+}
+
+void Overwirte_Critic_Networks()
+{
+    for (int index2 = 0; index2 < 10; index2++) {
+        for (int index1 = 0; index1 < num_input_RL; index1++) {
+            hc1[index1][index2] = hc1_temp[index1][index2];
+        }
+        bc1[index2] = bc1_temp[index2];
+        hc2[index2] = hc2_temp[index2];
+    }
+    bc2 = bc2_temp;
+}
+void Overwirte_Actor_Networks()
+{
+    for (int index2 = 0; index2 < 10; index2++) {
+        for (int index1 = 0; index1 < num_input_RL; index1++) {
+            ha1[index1][index2] = ha1_temp[index1][index2];
+        }
+        ba1[index2] = ba1_temp[index2];
+    }
+    for (int index2 = 0; index2 < 2; index2++) {
+        for (int index1 = 0; index1 < 10; index1++) {
+            ha2[index1][index2] = ha2_temp[index1][index2];
+        }
+        ba2[index2] = ba2_temp[index2];
+    }
+}
+
+
 int main()
 {
 
@@ -285,8 +668,8 @@
     make_delay();
 
 //    // spi init
-    //eeprom.format(8,3);
-    //eeprom.frequency(5000000); //5M
+    eeprom.format(8,3);
+    eeprom.frequency(5000000); //5M
     enc.format(8,0);
     enc.frequency(5000000); //5M
     make_delay();
@@ -348,6 +731,29 @@
             ID_index_array[i] =  (i+1) * 0.5f;
     }
 
+    for (int index2 = 0; index2 < 10; index2++) {
+        for (int index1 = 0; index1 < num_input_RL; index1++) {
+            hc1_temp[index1][index2] = (float) (rand()%100) * 0.01f ;
+        }
+        bc1_temp[index2] = (float) (rand()%100) * 0.01f;
+        hc2_temp[index2] = (float) (rand()%100) * 0.01f;
+    }
+    bc2_temp = (float) (rand()%100) * 0.01f;
+    for (int index2 = 0; index2 < 10; index2++) {
+        for (int index1 = 0; index1 < num_input_RL; index1++) {
+            ha1_temp[index1][index2] = (float) (rand()%100) * 0.01f;
+        }
+        ba1_temp[index2] = (float) (rand()%100) * 0.01f;
+    }
+    for (int index2 = 0; index2 < 2; index2++) {
+        for (int index1 = 0; index1 < 10; index1++) {
+            ha2_temp[index1][index2] = (float) (rand()%100) * 0.01f;
+        }
+        ba2_temp[index2] = (float) (rand()%100) * 0.01f;
+    }
+    
+    Overwirte_Critic_Networks();
+    Overwirte_Actor_Networks();
 
     /************************************
     ***     Program is operating!
@@ -368,7 +774,7 @@
 
         timer_while ++;
 
-        //LED = 0;
+        ///////////////////////////////////////////////////////Neural Network
 
         if(NN_Control_Flag == 0) {
             LED = 0;
@@ -461,29 +867,6 @@
                 valve_pos.ref = -output*0.0001f*((double)VALVE_MIN_POS - (double) VALVE_CENTER) + (double) VALVE_CENTER;
             }
 
-//            // torque feedback
-//            torq.err = f_past[0] - torq.sen; //[N]
-////            torq.err_sum += torq.err/(float) TMR_FREQ_5k; //[N]
-//            torq.err_sum += torq.err/(float) 1500.0f; //[N]
-//
-//
-//            valve_pos.ref = ((float) P_GAIN_JOINT_TORQUE * torq.err + (float) I_GAIN_JOINT_TORQUE * torq.err_sum) * 0.01f + DDV_JOINT_POS_FF(vel.sen) + valve_pos.ref * 0.01f;
-//
-//            if(I_GAIN_JOINT_TORQUE != 0) {
-//                double Ka = 1.0f / (double) I_GAIN_JOINT_TORQUE * 100.0f;
-//                if(valve_pos.ref>VALVE_MAX_POS) {
-//                    double valve_pos_rem = valve_pos.ref - VALVE_MAX_POS;
-//                    valve_pos_rem = valve_pos_rem * Ka;
-//                    valve_pos.ref = VALVE_MAX_POS;
-//                    torq.err_sum = torq.err_sum - valve_pos_rem/(float) 1500.0f;
-//                } else if(valve_pos.ref < VALVE_MIN_POS) {
-//                    double valve_pos_rem = valve_pos.ref - VALVE_MIN_POS;
-//                    valve_pos_rem = valve_pos_rem * Ka;
-//                    valve_pos.ref = VALVE_MIN_POS;
-//                    torq.err_sum = torq.err_sum - valve_pos_rem/(float) 1500.0f;
-//                }
-//            }
-
 
             if(LED==1) {
                 LED=0;
@@ -491,10 +874,75 @@
                 LED = 1;
 
         }
+        
+        
+        /////////////////////////////////////////////////////////////////////RL
+        switch (Update_Case) {
+            case 0: {
+                break;
+            }
+            case 1: {
+                //Network Update(just update and hold network)
+                for (int epoch = 0; epoch < num_epoch; epoch++) {
+                    float loss_sum = 0.0f;
+                    for (int i=batch_size-1; i>=0; i--) {
+                        V[batch_size] = 0.0f;
+                        //Calculate Estimated V
+                        //float temp_array[3] = {state_array[i][0], state_array[i][1], state_array[i][2]};
+                        float temp_array[2] = {state_array[i][0], state_array[i][1]};
+                        V[i] = Critic_Network_Temp(temp_array);
+                        pi[i] = exp(-(action_array[i]-mean_array[i])*(action_array[i]-mean_array[i])/(2.0f*deviation_array[i]*deviation_array[i]))/(sqrt(2.0f*PI)*deviation_array[i]);
+                        Actor_Network_Old(temp_array);
+                        pi_old[i] = exp(-(action_array[i]-mean_old)*(action_array[i]-mean_old)/(2.0f*deviation_old*deviation_old))/(sqrt(2.0f*PI)*deviation_old);
+                        r[i] = exp(-0.01f * state_array[i][1] * 70.0f * state_array[i][1] * 70.0f);
+                        if(i == batch_size-1) td_target[i] = r[i];
+                        else td_target[i] = r[i] + gamma * V[i+1];
+                        delta[i] = td_target[i] - V[i];
+                        if(i == batch_size-1) advantage[i] = 0.0f;
+                        else advantage[i] = gamma * lmbda * advantage[i+1] + delta[i];
+                        ratio[i] = pi[i]/pi_old[i];
+                        surr1[i] = ratio[i] * advantage[i];
+                        if (ratio[i] > 1.0f + epsilon) {
+                            surr2[i] = (1.0f + epsilon)*advantage[i];
+                        } else if( ratio[i] < 1.0f - epsilon) {
+                            surr2[i] = (1.0f - epsilon)*advantage[i];
+                        } else {
+                            surr2[i] = ratio[i]*advantage[i];
+                        }
+                        loss[i] = -min(surr1[i], surr2[i]);
+                        loss_sum = loss_sum + loss[i];
+                    }
+                    reward_sum = 0.0f;
+                    for (int i=0; i<batch_size; i++) {
+                        reward_sum = reward_sum + r[i];
+                    }
+                    logging5 = reward_sum;
+                    
+                    
+                    //loss_batch = loss_sum / (float) batch_size;
+                    loss_batch = loss_sum;
+                    //Update Networks
+                    update_Critic_Networks(state_array);
+                    update_Actor_Networks(state_array);
+                }
+                //virt_pos = 10.0f;
+                Update_Done_Flag = 1;
+                Update_Case = 0;
+                //logging1 = V[0];
+                
+                break;
+            }
+            case 2: {
+                //Network apply to next Network
+                Overwirte_Critic_Networks();
+                Overwirte_Actor_Networks();
+                virt_pos = 10.0f;
+                Update_Done_Flag = 1;
+                Update_Case = 0;
+                break;
+            }
 
-        //LED = 1;
-
-
+        }
     }
 }
 
@@ -781,7 +1229,8 @@
                     CUR_TORQUE_sum = 0;
                     CUR_TORQUE_mean = 0;
 
-                    ROM_RESET_DATA();
+//                    ROM_RESET_DATA();
+                    spi_eeprom_write(RID_TORQUE_SENSOR_VREF, (int16_t) (TORQUE_VREF * 1000.0f));
 
                     dac_1 = TORQUE_VREF / 3.3f;
 
@@ -1113,7 +1562,9 @@
                     CUR_PRES_A_mean = 0;
                     CUR_PRES_B_mean = 0;
 
-                    ROM_RESET_DATA();
+//                    ROM_RESET_DATA();
+                    spi_eeprom_write(RID_PRES_A_SENSOR_VREF, (int16_t) (PRES_A_VREF * 1000.0f));
+                    spi_eeprom_write(RID_PRES_B_SENSOR_VREF, (int16_t) (PRES_B_VREF * 1000.0f));
 
                     dac_1 = PRES_A_VREF / 3.3f;
                     dac_2 = PRES_B_VREF / 3.3f;
@@ -1205,7 +1656,12 @@
                             VALVE_POS_AVG_OLD = VALVE_MIN_POS;
                         }
                     }
-                    ROM_RESET_DATA();
+//                    ROM_RESET_DATA();
+                    spi_eeprom_write(RID_VALVE_MAX_POS, (int16_t) VALVE_MAX_POS);
+                    spi_eeprom_write(RID_VALVE_MIN_POS, (int16_t) VALVE_MIN_POS);
+                    for(int i=0; i<25; i++) {
+                        spi_eeprom_write(RID_VALVE_POS_VS_PWM_0 + i, (int16_t) VALVE_POS_VS_PWM[i]);
+                    }
                     ID_index = 0;
                     CONTROL_UTILITY_MODE = MODE_NO_ACT;
                 }
@@ -1350,7 +1806,10 @@
                                 VALVE_DEADZONE_MINUS = (float) FIRST_DZ;
                                 VALVE_DEADZONE_PLUS = (float) SECOND_DZ;
 
-                                ROM_RESET_DATA();
+//                                ROM_RESET_DATA();
+                                spi_eeprom_write(RID_VALVE_CNETER, (int16_t) VALVE_CENTER);
+                                spi_eeprom_write(RID_VALVE_MAX_POS, (int16_t) VALVE_MAX_POS);
+                                spi_eeprom_write(RID_VALVE_MIN_POS, (int16_t) VALVE_MIN_POS);
 
                                 CONTROL_UTILITY_MODE = MODE_NO_ACT;
                                 DZ_index = 1;
@@ -1432,7 +1891,10 @@
                                 VALVE_DEADZONE_MINUS = (float) FIRST_DZ;
                                 VALVE_DEADZONE_PLUS = (float) SECOND_DZ;
 
-                                ROM_RESET_DATA();
+//                                ROM_RESET_DATA();
+                                spi_eeprom_write(RID_VALVE_CNETER, (int16_t) VALVE_CENTER);
+                                spi_eeprom_write(RID_VALVE_MAX_POS, (int16_t) VALVE_MAX_POS);
+                                spi_eeprom_write(RID_VALVE_MIN_POS, (int16_t) VALVE_MIN_POS);
 
                                 CONTROL_UTILITY_MODE = MODE_NO_ACT;
                                 DZ_index = 1;
@@ -1509,7 +1971,11 @@
                     if(max_check == 1 && min_check == 1) {
 
                         VALVE_POS_NUM = ID_index;
-                        ROM_RESET_DATA();
+//                        ROM_RESET_DATA();
+                        for(int i=0; i<100; i++) {
+                            spi_eeprom_write(RID_VALVE_POS_VS_FLOWRATE_0 + i, (int16_t) (JOINT_VEL[i] & 0xFFFF));
+                            spi_eeprom_write(RID_VALVE_POS_VS_FLOWRATE_0_1 + i, (int16_t) ((JOINT_VEL[i] >> 16) & 0xFFFF));
+                        }
                         ID_index = 0;
                         first_check = 0;
                         VALVE_FR_timer = 0;
@@ -1560,7 +2026,7 @@
                     }
                     CONTROL_MODE = MODE_NO_ACT;
                     CONTROL_UTILITY_MODE = MODE_SEND_OVER;
-                    
+
                 }
                 break;
             }
@@ -1581,14 +2047,12 @@
                 CONTROL_MODE = MODE_NO_ACT;
                 break;
             }
-            
+
             case MODE_STEP_TEST: {
                 float valve_pos_ref = 0.0f;
-                if (cnt_step_test < (int) (1.0f * (float) TMR_FREQ_5k))
-                {
+                if (cnt_step_test < (int) (1.0f * (float) TMR_FREQ_5k)) {
                     valve_pos_ref = 0.0f;
-                }else
-                {
+                } else {
                     valve_pos_ref = 10000.0f;
                 }
                 if(valve_pos_ref >= 0) {
@@ -1605,8 +2069,7 @@
 
                 CONTROL_MODE = MODE_VALVE_POSITION_CONTROL;
                 cnt_step_test++;
-                if (cnt_step_test > (int) (2.0f * (float) TMR_FREQ_5k))
-                {
+                if (cnt_step_test > (int) (2.0f * (float) TMR_FREQ_5k)) {
                     buffer_data_size = cnt_step_test;
                     cnt_step_test = 0;
                     cnt_send_buffer = 0;
@@ -1619,7 +2082,7 @@
 //                    CONTROL_MODE = MODE_NO_ACT;
 //                    CAN_TX_PWM((int16_t) (1)); //1300
 //                }
-                             
+
                 break;
             }
 
@@ -1783,10 +2246,10 @@
 
 //                float Va = (1256.6f + Amm * 39.5f) * 0.000000001f; // 4mm pipe * 100mm + (25mm Cylinder 18mm Rod) * x,      unit : m^3
 //                float Vb = (1256.6f + Amm  * 39.5f) * 0.000000001f; // 4mm pipe * 100mm + (25mm Cylinder 18mm Rod) * (79.0mm-x),      unit : m^3
-                V = 1.0f / (1.0f/Va + 1.0f/Vb); //initial 0.0000053f
+                V_adapt = 1.0f / (1.0f/Va + 1.0f/Vb); //initial 0.0000053f
 
 
-                float f3 = -Amm*Amm*beta*0.000001f*0.000001f/V * vel.sen/(float)(ENC_PULSE_PER_POSITION)*0.001f; // unit : N/s    //xdot=10mm/s일때 -137076
+                float f3 = -Amm*Amm*beta*0.000001f*0.000001f/V_adapt * vel.sen/(float)(ENC_PULSE_PER_POSITION)*0.001f; // unit : N/s    //xdot=10mm/s일때 -137076
 
                 float g3_prime = 0.0f;
                 if (torq.sen > Amm*(Ps-Pt)*0.000001f) {
@@ -1848,6 +2311,93 @@
                 gamma_hat = gamma_hat + gamma_hat_dot / (float) TMR_FREQ_5k;
                 break;
             }
+            
+            case MODE_RL: {
+                //t.reset();
+                //t.start();
+
+//                if(LED == 0) LED = 1;
+//                else LED = 0;
+
+                if (Update_Done_Flag == 1) {
+                    //Gather Data on each loop
+//                  pos.err = (pos.ref - pos.sen)/(float)(ENC_PULSE_PER_POSITION); //[mm]
+//                  train_set_x[RL_timer] = pos.sen/(float)(ENC_PULSE_PER_POSITION)/35.0f - 1.0f;   //-1.0~1.0
+//                  train_set_error[RL_timer] = pos.err/70.0f;      //-1.0~1.0
+                    pos.err = pos.sen/(float)(ENC_PULSE_PER_POSITION)  - virt_pos; //[mm]
+                    train_set_x[RL_timer] = virt_pos/70.0f;   //-1.0~1.0
+                    train_set_error[RL_timer] = pos.err/70.0f;      //-1.0~1.0
+                    //train_set_count[RL_timer] = (float) RL_timer / (batch_size *num_batch);  //-1.0~1.0
+                    //float temp_array[3] = {train_set_x[RL_timer], train_set_error[RL_timer], train_set_count[RL_timer]};
+                    float temp_array[2] = {train_set_x[RL_timer], train_set_error[RL_timer]};
+                    Actor_Network(temp_array);
+                    mean_array[RL_timer] = mean;
+                    deviation_array[RL_timer] = deviation;
+                    mean_before_SP_array[RL_timer] = mean_before_SP;
+                    deviation_before_SP_array[RL_timer] = deviation_before_SP;
+                    action_array[RL_timer] = rand_normal(mean_array[RL_timer], deviation_array[RL_timer]);
+
+
+                    virt_pos = virt_pos + (action_array[RL_timer] - 3.0f) * 1000.0f * 0.0002f;
+                    if (virt_pos > 70.0f ) {
+                        virt_pos = 70.0f;
+                    } else if(virt_pos < -70.0f) {
+                        virt_pos = -70.0f;
+                    }
+                    
+                    RL_timer++;
+
+
+                    if (RL_timer >= batch_size) {
+                        RL_timer = 0;
+                        batch++;
+                        for(int i=0; i<batch_size; i++) {
+                            state_array[i][0] = train_set_x[i];
+                            state_array[i][1] = train_set_error[i];
+                            //state_array[i][2] = train_set_count[i];
+                        }
+                        Update_Case = 1;
+                        Update_Done_Flag = 0;
+                        logging1 = virt_pos;
+                        //virt_pos = 10.0f;
+
+                        if(batch >= num_batch) {
+                            batch = 0;
+                            RL_timer = 0;
+                            Update_Case = 2;
+                            Update_Done_Flag = 0;
+                            virt_pos = 10.0f;
+                        }
+                    }
+                }
+
+                else {
+
+                    pos.err = pos.sen/(float)(ENC_PULSE_PER_POSITION) - virt_pos; //[mm]
+                    float temp_array[3] = {0.0f};
+                    temp_array[0] = virt_pos/70.0f;   //-1.0~1.0
+                    temp_array[1] = pos.err/70.0f;      //-1.0~1.0
+                    //temp_array[2] = (float) RL_timer / (batch_size *num_batch);  //-1.0~1.0
+                    Actor_Network(temp_array);
+                    action = rand_normal(mean, deviation);
+                    //logging1 = action;
+                    logging2 = mean;
+                    logging4 = deviation;
+                    virt_pos = virt_pos + (action-3.0f) * 1000.0f * 0.0002f;
+                    if (virt_pos > 70.0f) {
+                        virt_pos = 70.0f;
+                    } else if(virt_pos < -70.0f) {
+                        virt_pos = -70.0f;
+                    }
+
+                    logging3 = virt_pos;
+                }
+
+                //t.stop();
+                //logging1 = t.read()*1000.0f;    //msec
+
+                break;
+            }
 
             default:
                 break;