Sungwoo Kim
/
HydraulicControlBoard_PostLIGHT_Original
Original Version of STM Board
Diff: main.cpp
- Revision:
- 170:42c938a40313
- Parent:
- 169:645207e160ca
- Child:
- 171:bfc1fd2629d8
diff -r 645207e160ca -r 42c938a40313 main.cpp --- a/main.cpp Mon Nov 16 02:23:23 2020 +0000 +++ b/main.cpp Wed Nov 18 12:03:39 2020 +0000 @@ -1,4 +1,4 @@ -//201116_1 +//201118_1 #include "mbed.h" #include "FastPWM.h" #include "INIT_HW.h" @@ -9,6 +9,8 @@ #include "function_utilities.h" #include "stm32f4xx_flash.h" #include "FlashWriter.h" +#include <string> +#include <iostream> using namespace std; Timer t; @@ -35,8 +37,8 @@ unsigned int value; // 10bit output of reading sensor AS5510 // SPI /////////////////////////////////////////// -//SPI eeprom(PB_15, PB_14, PB_13); // EEPROM //(SPI_MOSI, SPI_MISO, SPI_SCK); -//DigitalOut eeprom_cs(PB_12); +SPI eeprom(PB_15, PB_14, PB_13); // EEPROM //(SPI_MOSI, SPI_MISO, SPI_SCK); +DigitalOut eeprom_cs(PB_12); //FlashWriter writer(6);//2부터 7까지 되는듯 아마 sector SPI enc(PC_12,PC_11,PC_10); DigitalOut enc_cs(PD_2); @@ -122,6 +124,7 @@ MODE_CURRENT_CONTROL, //11 MODE_JOINT_POSITION_TORQUE_CONTROL_CURRENT, //12 MODE_JOINT_POSITION_PRES_CONTROL_CURRENT, //13 + MODE_RL, //14 //utility MODE_TORQUE_SENSOR_NULLING = 20, //20 @@ -261,10 +264,390 @@ const float bout[1] = { -0.10839174687862396f }; +/////////////////////////////////////////////////////////////////////////////////////////////RL +float input_RL[num_input_RL] = { 0.0f }; + +//Critic Networks +float hc1[num_input_RL][10] = {0.0f}; +float bc1[10] = {0.0f}; +float hc2[10] = {0.0f}; +float bc2 = 0.0f; + +//Critic Networks Temporary +float hc1_temp[num_input_RL][10] = {0.0f}; +float bc1_temp[10] = {0.0f}; +float hc2_temp[10] = {0.0f}; +float bc2_temp = 0.0f; + +//Actor Networks +float ha1[num_input_RL][10] = {0.0f}; +float ba1[10] = {0.0f}; +float ha2[10][2] = {0.0f}; +float ba2[2] = {0.0f}; + +//Actor Networks Temporary +float ha1_temp[num_input_RL][10] = {0.0f}; +float ba1_temp[10] = {0.0f}; +float ha2_temp[10][2] = {0.0f}; +float ba2_temp[2] = {0.0f}; float VALVE_POS_RAW_NN = 0.0f; float DDV_JOINT_POS_FF(float REF_JOINT_VEL); + +float Critic_Network(float *arr) +{ + float output1[10] = { 0.0f }; + float output = 0.0f; + for (int index2 = 0; index2 < 10; index2++) { + for (int index1 = 0; index1 < num_input_RL; index1++) { + output1[index2] = output1[index2] + hc1[index1][index2] * arr[index1]; + } + output1[index2] = tanh(output1[index2] + bc1[index2]); + } + for (int index2 = 0; index2 < 1; index2++) { + for (int index1 = 0; index1 < 10; index1++) { + output = output + hc2[index1] * output1[index1]; + } + output = output + bc2; + } + return output; +} + +float Critic_Network_Temp(float *arr) +{ + float output1[10] = { 0.0f }; + float output = 0.0f; + for (int index2 = 0; index2 < 10; index2++) { + for (int index1 = 0; index1 < num_input_RL; index1++) { + output1[index2] = output1[index2] + hc1_temp[index1][index2] * arr[index1]; + } + output1[index2] = tanh(output1[index2] + bc1_temp[index2]); + } + for (int index2 = 0; index2 < 1; index2++) { + for (int index1 = 0; index1 < 10; index1++) { + output = output + hc2_temp[index1] * output1[index1]; + } + output = output + bc2_temp; + } + return output; +} + + +void Actor_Network(float *arr) +{ + float output1[10] = {0.0f}; + float output[2] = {0.0f}; + + for (int index2 = 0; index2 < 10; index2++) { + for (int index1 = 0; index1 < num_input_RL; index1++) { + output1[index2] = output1[index2] + ha1[index1][index2] * arr[index1]; + } + output1[index2] = output1[index2] + ba1[index2]; + if (output1[index2] < 0) { + output1[index2] = 0; + } + } + for (int index2 = 0; index2 < 2; index2++) { + for (int index1 = 0; index1 < 10; index1++) { + output[index2] = output[index2] + ha2[index1][index2] * output1[index1]; + } + } + mean_before_SP = output[0] + ba2[0]; + deviation_before_SP = output[1] + ba2[1]; + mean = log(1.0f+exp(mean_before_SP)); + deviation = log(1.0f+exp(deviation_before_SP)); +} + + +//void Actor_Network_Temp(float *arr) +//{ +// float output1[10] = {0.0f}; +// float output[2] = {0.0f}; +// +// for (int index2 = 0; index2 < 10; index2++) { +// for (int index1 = 0; index1 < num_input_RL; index1++) { +// output1[index2] = output1[index2] + ha1_temp[index1][index2] * arr[index1]; +// } +// output1[index2] = output1[index2] + ba1_temp[index2]; +// if (output1[index2] < 0) { +// output1[index2] = 0; +// } +// } +// for (int index2 = 0; index2 < 2; index2++) { +// for (int index1 = 0; index1 < 10; index1++) { +// output[index2] = output[index2] + ha2_temp[index1][index2] * output1[index1]; +// } +// } +// mean_before_SP = output[0] + ba2_temp[0]; +// deviation_before_SP = output[1] + ba2_temp[1]; +// mean = log(1.0f+exp(mean_before_SP)); +// deviation = log(1.0f+exp(deviation_before_SP)); +//} + + +void Actor_Network_Old(float *arr) +{ + float output1[10] = {0.0f}; + float output[2] = {0.0f}; + + for (int index2 = 0; index2 < 10; index2++) { + for (int index1 = 0; index1 < num_input_RL; index1++) { + output1[index2] = output1[index2] + ha1[index1][index2] * arr[index1]; + } + output1[index2] = output1[index2] + ba1[index2]; + if (output1[index2] < 0) { + output1[index2] = 0; + } + } + for (int index2 = 0; index2 < 2; index2++) { + for (int index1 = 0; index1 < 10; index1++) { + output[index2] = output[index2] + ha2[index1][index2] * output1[index1]; + } + } + mean_old = output[0] + ba2[0]; + deviation_old = output[1] + ba2[1]; + mean_old = log(1.0f+exp(mean_old)); + deviation_old = log(1.0f+exp(deviation_old)); +} + +float Grad_Normal_Dist_Mean(float mean, float deviation, float action) +{ + float grad_mean = 0.0f; + grad_mean = (action-mean)*exp(-(action-mean)*(action-mean)/(2.0f*deviation*deviation))/(sqrt(2.0f*PI)*deviation*deviation*deviation); + return grad_mean; +} + +float Grad_Normal_Dist_Deviation(float mean, float deviation, float action) +{ + float grad_dev = 0.0f; + grad_dev = exp(-(action-mean)*(action-mean)/(2.0f*deviation*deviation))*(-1.0f/(sqrt(2.0f*PI)*deviation*deviation) + (action-mean)*(action-mean)/(sqrt(2.0f*PI)*deviation*deviation*deviation*deviation)); + return grad_dev; +} + +void update_Critic_Networks(float (*arr)[num_input_RL]) +{ + float gradient_rate = 0.01f; + + float G_hc1[num_input_RL][10] = {0.0f}; + float G_bc1[10] = {0.0f}; + for (int index2 = 0; index2 < 10; index2++) { + for (int index1 = 0; index1 < num_input_RL; index1++) { + for (int i=0; i<batch_size; i++) { + float hx_sum = 0.0f; + float hx_sum_next = 0.0f; + for(int j=0; j<num_input_RL; j++) { + hx_sum = hx_sum + hc1_temp[j][index2]*arr[i][j]; + if (i==batch_size-1) hx_sum_next = 0.0f; + else hx_sum_next = hx_sum_next + hc1_temp[j][index2]*arr[i+1][j]; + } + if (i==batch_size-1) G_hc1[index1][index2] = G_hc1[index1][index2] + 2.0f*advantage[i]*(-hc2_temp[index2]*(1.0f-tanh(hx_sum + bc1_temp[index2])*tanh(hx_sum + bc1_temp[index2]))*arr[i][index1]); + else G_hc1[index1][index2] = G_hc1[index1][index2] + 2.0f*advantage[i]*(hc2_temp[index2]*(1.0f-tanh(hx_sum_next + bc1_temp[index2])*tanh(hx_sum_next + bc1_temp[index2]))*arr[i+1][index1] - hc2_temp[index2]*(1.0f-tanh(hx_sum + bc1_temp[index2])*tanh(hx_sum + bc1_temp[index2]))*arr[i][index1]); + } + G_hc1[index1][index2] = G_hc1[index1][index2] / batch_size; + hc1_temp[index1][index2] = hc1_temp[index1][index2] - gradient_rate * G_hc1[index1][index2]; + } + for (int i=0; i<batch_size; i++) { + float hx_sum = 0.0f; + float hx_sum_next = 0.0f; + for(int j=0; j<num_input_RL; j++) { + hx_sum = hx_sum + hc1_temp[j][index2]*arr[i][j]; + if (i==batch_size-1) hx_sum_next = 0.0f; + else hx_sum_next = hx_sum_next + hc1_temp[j][index2]*arr[i+1][j]; + } + if (i==batch_size-1) G_bc1[index2] = G_bc1[index2] + 2.0f*advantage[i]*(-hc2_temp[index2]*(1.0f-tanh(hx_sum + bc1_temp[index2])*tanh(hx_sum + bc1_temp[index2]))); + else G_bc1[index2] = G_bc1[index2] + 2.0f*advantage[i]*(hc2_temp[index2]*(1.0f-tanh(hx_sum_next + bc1_temp[index2])*tanh(hx_sum_next + bc1_temp[index2])) - hc2_temp[index2]*(1.0f-tanh(hx_sum + bc1_temp[index2])*tanh(hx_sum + bc1_temp[index2]))); + } + G_bc1[index2] = G_bc1[index2] / batch_size; + bc1_temp[index2] = bc1_temp[index2] - gradient_rate * G_bc1[index2]; + } + + float G_hc2[10] = {0.0f}; + float G_bc2 = 0.0f; + for (int index2 = 0; index2 < 1; index2++) { + for (int index1 = 0; index1 < 10; index1++) { + for (int i=0; i<batch_size; i++) { + float hx_sum = 0.0f; + float hx_sum_next = 0.0f; + for(int j=0; j<num_input_RL; j++) { + hx_sum = hx_sum + hc1_temp[j][index1]*arr[i][j]; + if (i==batch_size-1) hx_sum_next = 0.0f; + else hx_sum_next = hx_sum_next + hc1_temp[j][index1]*arr[i+1][j]; + } + if (i==batch_size-1) G_hc2[index1] = G_hc2[index1] - 2.0f*advantage[i]*tanh(hx_sum + bc1_temp[index1]); + else G_hc2[index1] = G_hc2[index1] + 2.0f*advantage[i]*(tanh(hx_sum_next + bc1_temp[index1]) - tanh(hx_sum + bc1_temp[index1])); + } + G_hc2[index1] = G_hc2[index1] / batch_size; + hc2_temp[index1] = hc2_temp[index1] - gradient_rate * G_hc2[index1]; + } + for (int i=0; i<batch_size; i++) { + if (i==batch_size-1) G_bc2 = G_bc2 + 2.0f*advantage[i]*(-1.0f); + else G_bc2 = 0.0f; + } + G_bc2 = G_bc2/ batch_size; + bc2_temp = bc2_temp - gradient_rate * G_bc2; + } +} + +void update_Actor_Networks(float (*arr)[num_input_RL]) +{ + float gradient_rate = -0.01f; + + float G_ha1[num_input_RL][10] = {0.0f}; + float G_ba1[10] = {0.0f}; + for (int index2 = 0; index2 < 10; index2++) { + for (int index1 = 0; index1 < num_input_RL; index1++) { + for (int i=0; i<batch_size; i++) { + if((advantage[i] >= 0.0f && ratio[i] >= 1.0f + epsilon) || (advantage[i] < 0.0f && ratio[i] < 1.0f - epsilon)) { + G_ha1[index1][index2] = G_ha1[index1][index2]; + } else { + float hx_sum = 0.0f; + for(int j=0; j<num_input_RL; j++) { + hx_sum = hx_sum + ha1_temp[j][index2]*arr[i][j]; + } + hx_sum = hx_sum + bc1_temp[index2]; + float d_mean_d_ha1 = 0.0f; + float d_dev_d_ha1 = 0.0f; + if (hx_sum >= 0) { + d_mean_d_ha1 = exp(mean_before_SP_array[i])/(1.0f+exp(mean_before_SP_array[i]))*ha2_temp[index2][0]*arr[i][index1]; + d_dev_d_ha1 = exp(deviation_before_SP_array[i])/(1.0f+exp(deviation_before_SP_array[i]))*ha2_temp[index2][1]*arr[i][index1]; + } else { + d_mean_d_ha1 = 0.0f; + d_dev_d_ha1 = 0.0f; + } + G_ha1[index1][index2] = G_ha1[index1][index2] + advantage[i]/pi_old[i]*(d_mean_d_ha1*Grad_Normal_Dist_Mean(mean_array[i],deviation_array[i],action_array[i])+d_dev_d_ha1*Grad_Normal_Dist_Deviation(mean_array[i],deviation_array[i],action_array[i])); + } + } + G_ha1[index1][index2] = G_ha1[index1][index2] / batch_size; + ha1_temp[index1][index2] = ha1_temp[index1][index2] - gradient_rate * G_ha1[index1][index2]; + } + for (int i=0; i<batch_size; i++) { + if((advantage[i] >= 0.0f && ratio[i] >= 1.0f + epsilon) || (advantage[i] < 0.0f && ratio[i] < 1.0f - epsilon)) { + G_ba1[index2] = G_ba1[index2]; + } else { + float hx_sum = 0.0f; + for(int j=0; j<num_input_RL; j++) { + hx_sum = hx_sum + ha1_temp[j][index2]*arr[i][j]; + } + hx_sum = hx_sum + bc1_temp[index2]; + float d_mean_d_ba1 = 0.0f; + float d_dev_d_ba1 = 0.0f; + if(hx_sum >=0) { + d_mean_d_ba1 = exp(mean_before_SP_array[i])/(1.0f+exp(mean_before_SP_array[i]))*ha2_temp[index2][0]; + d_dev_d_ba1 = exp(deviation_before_SP_array[i])/(1.0f+exp(deviation_before_SP_array[i]))*ha2_temp[index2][1]; + } else { + d_mean_d_ba1 = 0.0f; + d_dev_d_ba1 = 0.0f; + } + G_ba1[index2] = G_ba1[index2] + advantage[i]/pi_old[i]*(d_mean_d_ba1*Grad_Normal_Dist_Mean(mean_array[i],deviation_array[i],action_array[i])+d_dev_d_ba1*Grad_Normal_Dist_Deviation(mean_array[i],deviation_array[i],action_array[i])); + } + } + G_ba1[index2] = G_ba1[index2] / batch_size; + ba1_temp[index2] = ba1_temp[index2] - gradient_rate * G_ba1[index2]; + } + + float G_ha2[10][2] = {0.0f}; + float G_ba2[2] = {0.0f}; + for (int index2 = 0; index2 < 2; index2++) { + for (int index1 = 0; index1 < 10; index1++) { + for (int i=0; i<batch_size; i++) { + if((advantage[i] >= 0.0f && ratio[i] >= 1.0f + epsilon) || (advantage[i] < 0.0f && ratio[i] < 1.0f - epsilon)) { + G_ha2[index1][index2] = G_ha2[index1][index2]; + } else { + float hx_sum = 0.0f; + for(int j=0; j<num_input_RL; j++) { + hx_sum = hx_sum + ha1_temp[j][index1]*arr[i][j]; + } + hx_sum = hx_sum + bc1_temp[index1]; + float d_mean_d_ha2 = 0.0f; + float d_dev_d_ha2 = 0.0f; + if (hx_sum >= 0) { + d_mean_d_ha2 = exp(mean_before_SP_array[i])/(1.0f+exp(mean_before_SP_array[i]))*hx_sum; + d_dev_d_ha2 = exp(deviation_before_SP_array[i])/(1.0f+exp(deviation_before_SP_array[i]))*hx_sum; + } else { + d_mean_d_ha2 = 0.0f; + d_dev_d_ha2 = 0.0f; + } + G_ha2[index1][index2] = G_ha2[index1][index2] + advantage[i]/pi_old[i]*(d_mean_d_ha2*Grad_Normal_Dist_Mean(mean_array[i],deviation_array[i],action_array[i])+d_dev_d_ha2*Grad_Normal_Dist_Deviation(mean_array[i],deviation_array[i],action_array[i])); + } + } + G_ha2[index1][index2] = G_ha2[index1][index2] / batch_size; + ha2_temp[index1][index2] = ha2_temp[index1][index2] - gradient_rate * G_ha2[index1][index2]; + } + for (int i=0; i<batch_size; i++) { + if((advantage[i] >= 0.0f && ratio[i] >= 1.0f + epsilon) || (advantage[i] < 0.0f && ratio[i] < 1.0f - epsilon)) { + G_ba2[index2] = G_ba2[index2]; + } else { + + float d_mean_d_ba2 = 0.0f; + float d_dev_d_ba2 = 0.0f; + d_mean_d_ba2 = exp(mean_before_SP_array[i])/(1.0f+exp(mean_before_SP_array[i])); + d_dev_d_ba2 = exp(deviation_before_SP_array[i])/(1.0f+exp(deviation_before_SP_array[i])); + G_ba1[index2] = G_ba1[index2] + advantage[i]/pi_old[i]*(d_mean_d_ba2*Grad_Normal_Dist_Mean(mean_array[i],deviation_array[i],action_array[i])+d_dev_d_ba2*Grad_Normal_Dist_Deviation(mean_array[i],deviation_array[i],action_array[i])); + } + } + G_ba2[index2] = G_ba2[index2] / batch_size; + ba2_temp[index2] = ba2_temp[index2] - gradient_rate * G_ba2[index2]; + } +} + + + +float rand_normal(double mean, double stddev) +{ + //Box muller method + static double n2 = 0.0f; + static int n2_cached = 0; + if (!n2_cached) { + double x, y, r; + do { + x = 2.0f*rand()/RAND_MAX - 1; + y = 2.0f*rand()/RAND_MAX - 1; + + r = x*x + y*y; + } while (r == 0.0f || r > 1.0f); + { + double d = sqrt(-2.0f*log(r)/r); + double n1 = x*d; + n2 = y*d; + double result = n1*stddev + mean; + n2_cached = 1; + return result; + } + } else { + n2_cached = 0; + return n2*stddev + mean; + } +} + +void Overwirte_Critic_Networks() +{ + for (int index2 = 0; index2 < 10; index2++) { + for (int index1 = 0; index1 < num_input_RL; index1++) { + hc1[index1][index2] = hc1_temp[index1][index2]; + } + bc1[index2] = bc1_temp[index2]; + hc2[index2] = hc2_temp[index2]; + } + bc2 = bc2_temp; +} +void Overwirte_Actor_Networks() +{ + for (int index2 = 0; index2 < 10; index2++) { + for (int index1 = 0; index1 < num_input_RL; index1++) { + ha1[index1][index2] = ha1_temp[index1][index2]; + } + ba1[index2] = ba1_temp[index2]; + } + for (int index2 = 0; index2 < 2; index2++) { + for (int index1 = 0; index1 < 10; index1++) { + ha2[index1][index2] = ha2_temp[index1][index2]; + } + ba2[index2] = ba2_temp[index2]; + } +} + + int main() { @@ -285,8 +668,8 @@ make_delay(); // // spi init - //eeprom.format(8,3); - //eeprom.frequency(5000000); //5M + eeprom.format(8,3); + eeprom.frequency(5000000); //5M enc.format(8,0); enc.frequency(5000000); //5M make_delay(); @@ -348,6 +731,29 @@ ID_index_array[i] = (i+1) * 0.5f; } + for (int index2 = 0; index2 < 10; index2++) { + for (int index1 = 0; index1 < num_input_RL; index1++) { + hc1_temp[index1][index2] = (float) (rand()%100) * 0.01f ; + } + bc1_temp[index2] = (float) (rand()%100) * 0.01f; + hc2_temp[index2] = (float) (rand()%100) * 0.01f; + } + bc2_temp = (float) (rand()%100) * 0.01f; + for (int index2 = 0; index2 < 10; index2++) { + for (int index1 = 0; index1 < num_input_RL; index1++) { + ha1_temp[index1][index2] = (float) (rand()%100) * 0.01f; + } + ba1_temp[index2] = (float) (rand()%100) * 0.01f; + } + for (int index2 = 0; index2 < 2; index2++) { + for (int index1 = 0; index1 < 10; index1++) { + ha2_temp[index1][index2] = (float) (rand()%100) * 0.01f; + } + ba2_temp[index2] = (float) (rand()%100) * 0.01f; + } + + Overwirte_Critic_Networks(); + Overwirte_Actor_Networks(); /************************************ *** Program is operating! @@ -368,7 +774,7 @@ timer_while ++; - //LED = 0; + ///////////////////////////////////////////////////////Neural Network if(NN_Control_Flag == 0) { LED = 0; @@ -461,29 +867,6 @@ valve_pos.ref = -output*0.0001f*((double)VALVE_MIN_POS - (double) VALVE_CENTER) + (double) VALVE_CENTER; } -// // torque feedback -// torq.err = f_past[0] - torq.sen; //[N] -//// torq.err_sum += torq.err/(float) TMR_FREQ_5k; //[N] -// torq.err_sum += torq.err/(float) 1500.0f; //[N] -// -// -// valve_pos.ref = ((float) P_GAIN_JOINT_TORQUE * torq.err + (float) I_GAIN_JOINT_TORQUE * torq.err_sum) * 0.01f + DDV_JOINT_POS_FF(vel.sen) + valve_pos.ref * 0.01f; -// -// if(I_GAIN_JOINT_TORQUE != 0) { -// double Ka = 1.0f / (double) I_GAIN_JOINT_TORQUE * 100.0f; -// if(valve_pos.ref>VALVE_MAX_POS) { -// double valve_pos_rem = valve_pos.ref - VALVE_MAX_POS; -// valve_pos_rem = valve_pos_rem * Ka; -// valve_pos.ref = VALVE_MAX_POS; -// torq.err_sum = torq.err_sum - valve_pos_rem/(float) 1500.0f; -// } else if(valve_pos.ref < VALVE_MIN_POS) { -// double valve_pos_rem = valve_pos.ref - VALVE_MIN_POS; -// valve_pos_rem = valve_pos_rem * Ka; -// valve_pos.ref = VALVE_MIN_POS; -// torq.err_sum = torq.err_sum - valve_pos_rem/(float) 1500.0f; -// } -// } - if(LED==1) { LED=0; @@ -491,10 +874,75 @@ LED = 1; } + + + /////////////////////////////////////////////////////////////////////RL + switch (Update_Case) { + case 0: { + break; + } + case 1: { + //Network Update(just update and hold network) + for (int epoch = 0; epoch < num_epoch; epoch++) { + float loss_sum = 0.0f; + for (int i=batch_size-1; i>=0; i--) { + V[batch_size] = 0.0f; + //Calculate Estimated V + //float temp_array[3] = {state_array[i][0], state_array[i][1], state_array[i][2]}; + float temp_array[2] = {state_array[i][0], state_array[i][1]}; + V[i] = Critic_Network_Temp(temp_array); + pi[i] = exp(-(action_array[i]-mean_array[i])*(action_array[i]-mean_array[i])/(2.0f*deviation_array[i]*deviation_array[i]))/(sqrt(2.0f*PI)*deviation_array[i]); + Actor_Network_Old(temp_array); + pi_old[i] = exp(-(action_array[i]-mean_old)*(action_array[i]-mean_old)/(2.0f*deviation_old*deviation_old))/(sqrt(2.0f*PI)*deviation_old); + r[i] = exp(-0.01f * state_array[i][1] * 70.0f * state_array[i][1] * 70.0f); + if(i == batch_size-1) td_target[i] = r[i]; + else td_target[i] = r[i] + gamma * V[i+1]; + delta[i] = td_target[i] - V[i]; + if(i == batch_size-1) advantage[i] = 0.0f; + else advantage[i] = gamma * lmbda * advantage[i+1] + delta[i]; + ratio[i] = pi[i]/pi_old[i]; + surr1[i] = ratio[i] * advantage[i]; + if (ratio[i] > 1.0f + epsilon) { + surr2[i] = (1.0f + epsilon)*advantage[i]; + } else if( ratio[i] < 1.0f - epsilon) { + surr2[i] = (1.0f - epsilon)*advantage[i]; + } else { + surr2[i] = ratio[i]*advantage[i]; + } + loss[i] = -min(surr1[i], surr2[i]); + loss_sum = loss_sum + loss[i]; + } + reward_sum = 0.0f; + for (int i=0; i<batch_size; i++) { + reward_sum = reward_sum + r[i]; + } + logging5 = reward_sum; + + + //loss_batch = loss_sum / (float) batch_size; + loss_batch = loss_sum; + //Update Networks + update_Critic_Networks(state_array); + update_Actor_Networks(state_array); + } + //virt_pos = 10.0f; + Update_Done_Flag = 1; + Update_Case = 0; + //logging1 = V[0]; + + break; + } + case 2: { + //Network apply to next Network + Overwirte_Critic_Networks(); + Overwirte_Actor_Networks(); + virt_pos = 10.0f; + Update_Done_Flag = 1; + Update_Case = 0; + break; + } - //LED = 1; - - + } } } @@ -781,7 +1229,8 @@ CUR_TORQUE_sum = 0; CUR_TORQUE_mean = 0; - ROM_RESET_DATA(); +// ROM_RESET_DATA(); + spi_eeprom_write(RID_TORQUE_SENSOR_VREF, (int16_t) (TORQUE_VREF * 1000.0f)); dac_1 = TORQUE_VREF / 3.3f; @@ -1113,7 +1562,9 @@ CUR_PRES_A_mean = 0; CUR_PRES_B_mean = 0; - ROM_RESET_DATA(); +// ROM_RESET_DATA(); + spi_eeprom_write(RID_PRES_A_SENSOR_VREF, (int16_t) (PRES_A_VREF * 1000.0f)); + spi_eeprom_write(RID_PRES_B_SENSOR_VREF, (int16_t) (PRES_B_VREF * 1000.0f)); dac_1 = PRES_A_VREF / 3.3f; dac_2 = PRES_B_VREF / 3.3f; @@ -1205,7 +1656,12 @@ VALVE_POS_AVG_OLD = VALVE_MIN_POS; } } - ROM_RESET_DATA(); +// ROM_RESET_DATA(); + spi_eeprom_write(RID_VALVE_MAX_POS, (int16_t) VALVE_MAX_POS); + spi_eeprom_write(RID_VALVE_MIN_POS, (int16_t) VALVE_MIN_POS); + for(int i=0; i<25; i++) { + spi_eeprom_write(RID_VALVE_POS_VS_PWM_0 + i, (int16_t) VALVE_POS_VS_PWM[i]); + } ID_index = 0; CONTROL_UTILITY_MODE = MODE_NO_ACT; } @@ -1350,7 +1806,10 @@ VALVE_DEADZONE_MINUS = (float) FIRST_DZ; VALVE_DEADZONE_PLUS = (float) SECOND_DZ; - ROM_RESET_DATA(); +// ROM_RESET_DATA(); + spi_eeprom_write(RID_VALVE_CNETER, (int16_t) VALVE_CENTER); + spi_eeprom_write(RID_VALVE_MAX_POS, (int16_t) VALVE_MAX_POS); + spi_eeprom_write(RID_VALVE_MIN_POS, (int16_t) VALVE_MIN_POS); CONTROL_UTILITY_MODE = MODE_NO_ACT; DZ_index = 1; @@ -1432,7 +1891,10 @@ VALVE_DEADZONE_MINUS = (float) FIRST_DZ; VALVE_DEADZONE_PLUS = (float) SECOND_DZ; - ROM_RESET_DATA(); +// ROM_RESET_DATA(); + spi_eeprom_write(RID_VALVE_CNETER, (int16_t) VALVE_CENTER); + spi_eeprom_write(RID_VALVE_MAX_POS, (int16_t) VALVE_MAX_POS); + spi_eeprom_write(RID_VALVE_MIN_POS, (int16_t) VALVE_MIN_POS); CONTROL_UTILITY_MODE = MODE_NO_ACT; DZ_index = 1; @@ -1509,7 +1971,11 @@ if(max_check == 1 && min_check == 1) { VALVE_POS_NUM = ID_index; - ROM_RESET_DATA(); +// ROM_RESET_DATA(); + for(int i=0; i<100; i++) { + spi_eeprom_write(RID_VALVE_POS_VS_FLOWRATE_0 + i, (int16_t) (JOINT_VEL[i] & 0xFFFF)); + spi_eeprom_write(RID_VALVE_POS_VS_FLOWRATE_0_1 + i, (int16_t) ((JOINT_VEL[i] >> 16) & 0xFFFF)); + } ID_index = 0; first_check = 0; VALVE_FR_timer = 0; @@ -1560,7 +2026,7 @@ } CONTROL_MODE = MODE_NO_ACT; CONTROL_UTILITY_MODE = MODE_SEND_OVER; - + } break; } @@ -1581,14 +2047,12 @@ CONTROL_MODE = MODE_NO_ACT; break; } - + case MODE_STEP_TEST: { float valve_pos_ref = 0.0f; - if (cnt_step_test < (int) (1.0f * (float) TMR_FREQ_5k)) - { + if (cnt_step_test < (int) (1.0f * (float) TMR_FREQ_5k)) { valve_pos_ref = 0.0f; - }else - { + } else { valve_pos_ref = 10000.0f; } if(valve_pos_ref >= 0) { @@ -1605,8 +2069,7 @@ CONTROL_MODE = MODE_VALVE_POSITION_CONTROL; cnt_step_test++; - if (cnt_step_test > (int) (2.0f * (float) TMR_FREQ_5k)) - { + if (cnt_step_test > (int) (2.0f * (float) TMR_FREQ_5k)) { buffer_data_size = cnt_step_test; cnt_step_test = 0; cnt_send_buffer = 0; @@ -1619,7 +2082,7 @@ // CONTROL_MODE = MODE_NO_ACT; // CAN_TX_PWM((int16_t) (1)); //1300 // } - + break; } @@ -1783,10 +2246,10 @@ // float Va = (1256.6f + Amm * 39.5f) * 0.000000001f; // 4mm pipe * 100mm + (25mm Cylinder 18mm Rod) * x, unit : m^3 // float Vb = (1256.6f + Amm * 39.5f) * 0.000000001f; // 4mm pipe * 100mm + (25mm Cylinder 18mm Rod) * (79.0mm-x), unit : m^3 - V = 1.0f / (1.0f/Va + 1.0f/Vb); //initial 0.0000053f + V_adapt = 1.0f / (1.0f/Va + 1.0f/Vb); //initial 0.0000053f - float f3 = -Amm*Amm*beta*0.000001f*0.000001f/V * vel.sen/(float)(ENC_PULSE_PER_POSITION)*0.001f; // unit : N/s //xdot=10mm/s일때 -137076 + float f3 = -Amm*Amm*beta*0.000001f*0.000001f/V_adapt * vel.sen/(float)(ENC_PULSE_PER_POSITION)*0.001f; // unit : N/s //xdot=10mm/s일때 -137076 float g3_prime = 0.0f; if (torq.sen > Amm*(Ps-Pt)*0.000001f) { @@ -1848,6 +2311,93 @@ gamma_hat = gamma_hat + gamma_hat_dot / (float) TMR_FREQ_5k; break; } + + case MODE_RL: { + //t.reset(); + //t.start(); + +// if(LED == 0) LED = 1; +// else LED = 0; + + if (Update_Done_Flag == 1) { + //Gather Data on each loop +// pos.err = (pos.ref - pos.sen)/(float)(ENC_PULSE_PER_POSITION); //[mm] +// train_set_x[RL_timer] = pos.sen/(float)(ENC_PULSE_PER_POSITION)/35.0f - 1.0f; //-1.0~1.0 +// train_set_error[RL_timer] = pos.err/70.0f; //-1.0~1.0 + pos.err = pos.sen/(float)(ENC_PULSE_PER_POSITION) - virt_pos; //[mm] + train_set_x[RL_timer] = virt_pos/70.0f; //-1.0~1.0 + train_set_error[RL_timer] = pos.err/70.0f; //-1.0~1.0 + //train_set_count[RL_timer] = (float) RL_timer / (batch_size *num_batch); //-1.0~1.0 + //float temp_array[3] = {train_set_x[RL_timer], train_set_error[RL_timer], train_set_count[RL_timer]}; + float temp_array[2] = {train_set_x[RL_timer], train_set_error[RL_timer]}; + Actor_Network(temp_array); + mean_array[RL_timer] = mean; + deviation_array[RL_timer] = deviation; + mean_before_SP_array[RL_timer] = mean_before_SP; + deviation_before_SP_array[RL_timer] = deviation_before_SP; + action_array[RL_timer] = rand_normal(mean_array[RL_timer], deviation_array[RL_timer]); + + + virt_pos = virt_pos + (action_array[RL_timer] - 3.0f) * 1000.0f * 0.0002f; + if (virt_pos > 70.0f ) { + virt_pos = 70.0f; + } else if(virt_pos < -70.0f) { + virt_pos = -70.0f; + } + + RL_timer++; + + + if (RL_timer >= batch_size) { + RL_timer = 0; + batch++; + for(int i=0; i<batch_size; i++) { + state_array[i][0] = train_set_x[i]; + state_array[i][1] = train_set_error[i]; + //state_array[i][2] = train_set_count[i]; + } + Update_Case = 1; + Update_Done_Flag = 0; + logging1 = virt_pos; + //virt_pos = 10.0f; + + if(batch >= num_batch) { + batch = 0; + RL_timer = 0; + Update_Case = 2; + Update_Done_Flag = 0; + virt_pos = 10.0f; + } + } + } + + else { + + pos.err = pos.sen/(float)(ENC_PULSE_PER_POSITION) - virt_pos; //[mm] + float temp_array[3] = {0.0f}; + temp_array[0] = virt_pos/70.0f; //-1.0~1.0 + temp_array[1] = pos.err/70.0f; //-1.0~1.0 + //temp_array[2] = (float) RL_timer / (batch_size *num_batch); //-1.0~1.0 + Actor_Network(temp_array); + action = rand_normal(mean, deviation); + //logging1 = action; + logging2 = mean; + logging4 = deviation; + virt_pos = virt_pos + (action-3.0f) * 1000.0f * 0.0002f; + if (virt_pos > 70.0f) { + virt_pos = 70.0f; + } else if(virt_pos < -70.0f) { + virt_pos = -70.0f; + } + + logging3 = virt_pos; + } + + //t.stop(); + //logging1 = t.read()*1000.0f; //msec + + break; + } default: break;