Sungwoo Kim
/
HydraulicControlBoard_Learning
for learning
Diff: main.cpp
- Revision:
- 177:8e9cf31d63f4
- Parent:
- 176:589ea3edcf3c
- Child:
- 178:1074553d2f6f
diff -r 589ea3edcf3c -r 8e9cf31d63f4 main.cpp --- a/main.cpp Tue Nov 24 10:16:10 2020 +0000 +++ b/main.cpp Tue Nov 24 13:04:54 2020 +0000 @@ -1,4 +1,4 @@ -//201124_4 +//201124_5 #include "mbed.h" #include "FastPWM.h" #include "INIT_HW.h" @@ -489,38 +489,35 @@ void update_Critic_Networks(float (*arr)[num_input_RL]) { - float gradient_rate = 0.001f; + float gradient_rate = 0.1f; float G_hc1[num_input_RL][num_hidden_unit1] = {0.0f}; - float d_V_d_hc1[batch_size][num_input_RL][num_hidden_unit1] = {0.0f}; ////////////////1 - float G_bc1[num_hidden_unit1] = {0.0f}; - float d_V_d_bc1[batch_size][num_hidden_unit1] = {0.0f}; ////////////////2 + float G_bc1[num_hidden_unit1] = {0.0f}; for (int index2 = 0; index2 < num_hidden_unit1; index2++) { for (int index1 = 0; index1 < num_input_RL; index1++) { for (int n=0; n<batch_size; n++) { + float d_V_d_hc1 = 0.0f; for(int k=0; k<num_hidden_unit2; k++) { if (hxh_c_sum_array[n][k] >= 0) { if (hx_c_sum_array[n][index2] > 0) { - //G_hc1[index1][index2] = G_hc1[index1][index2] + arr[n][index1]*hc2_temp[index2][k]*hc3_temp[k]; //////////////////////3 - d_V_d_hc1[n][index1][index2] = d_V_d_hc1[n][index1][index2] + arr[n][index1]*hc2_temp[index2][k]*hc3_temp[k]; //////////////////////4 + d_V_d_hc1 = d_V_d_hc1 + arr[n][index1]*hc2_temp[index2][k]*hc3_temp[k]; } } } - G_hc1[index1][index2] = G_hc1[index1][index2] + 2.0f*(return_G[n]-V[n])*(-d_V_d_hc1[n][index1][index2]); /////////////////////5 + G_hc1[index1][index2] = G_hc1[index1][index2] + 2.0f*(return_G[n]-V[n])*(-d_V_d_hc1); } G_hc1[index1][index2] = G_hc1[index1][index2] / batch_size; //hc1_temp[index1][index2] = hc1_temp[index1][index2] - gradient_rate * G_hc1[index1][index2]; } for (int n=0; n<batch_size; n++) { + float d_V_d_bc1 = 0.0f; for(int k=0; k<num_hidden_unit2; k++) { if (hxh_c_sum_array[n][k] >= 0) { if (hx_c_sum_array[n][index2] > 0) { - //G_bc1[index2] = G_bc1[index2] + hc2_temp[index2][k]*hc3_temp[k]; //////////////////6 - d_V_d_bc1[n][index2] = d_V_d_bc1[n][index2] + hc2_temp[index2][k]*hc3_temp[k]; //////////////////7 + d_V_d_bc1 = d_V_d_bc1 + hc2_temp[index2][k]*hc3_temp[k]; } - } } - G_bc1[index2] = G_bc1[index2] + 2.0f*(return_G[n]-V[n])*(-d_V_d_bc1[n][index2]); /////////////////////8 + G_bc1[index2] = G_bc1[index2] + 2.0f*(return_G[n]-V[n])*(-d_V_d_bc1); } G_bc1[index2] = G_bc1[index2] / batch_size; //bc1_temp[index2] = bc1_temp[index2] - gradient_rate * G_bc1[index2]; @@ -528,54 +525,50 @@ float G_hc2[num_hidden_unit1][num_hidden_unit2] = {0.0f}; - float d_V_d_hc2[batch_size][num_hidden_unit1][num_hidden_unit2] = {0.0f}; float G_bc2[num_hidden_unit2] = {0.0f}; - float d_V_d_bc2[batch_size][num_hidden_unit2] = {0.0f}; for (int index2 = 0; index2 < num_hidden_unit2; index2++) { for (int index1 = 0; index1 < num_hidden_unit1; index1++) { for (int n=0; n<batch_size; n++) { + float d_V_d_hc2 = 0.0f; if (hxh_c_sum_array[n][index2] >= 0) { if (hx_c_sum_array[n][index1] > 0) { - //G_hc2[index1][index2] = G_hc2[index1][index2] + hx_c_sum_array[n][index1]*hc3_temp[index2]; - d_V_d_hc2[n][index1][index2] = hx_c_sum_array[n][index1]*hc3_temp[index2]; + d_V_d_hc2 = hx_c_sum_array[n][index1]*hc3_temp[index2]; } } - G_hc2[index1][index2] = G_hc2[index1][index2] + 2.0f*(return_G[n]-V[n])*(-d_V_d_hc2[n][index1][index2]); + G_hc2[index1][index2] = G_hc2[index1][index2] + 2.0f*(return_G[n]-V[n])*(-d_V_d_hc2); } G_hc2[index1][index2] = G_hc2[index1][index2] / batch_size; //hc2_temp[index1][index2] = hc2_temp[index1][index2] - gradient_rate * G_hc2[index1][index2]; } for (int n=0; n<batch_size; n++) { + float d_V_d_bc2 = 0.0f; if (hxh_c_sum_array[n][index2] >= 0) { - //G_bc2[index2] = G_bc2[index2] + hc3_temp[index2]; - d_V_d_bc2[n][index2] = hc3_temp[index2]; + d_V_d_bc2 = hc3_temp[index2]; } - G_bc2[index2] = G_bc2[index2] + 2.0f*(return_G[n]-V[n])*(-d_V_d_bc2[n][index2]); + G_bc2[index2] = G_bc2[index2] + 2.0f*(return_G[n]-V[n])*(-d_V_d_bc2); } G_bc2[index2] = G_bc2[index2] / batch_size; //bc2_temp[index2] = bc2_temp[index2] - gradient_rate * G_bc2[index2]; } float G_hc3[num_hidden_unit2]= {0.0f}; - float d_V_d_hc3[batch_size][num_hidden_unit2] = {0.0f}; float G_bc3 = 0.0f; - float d_V_d_bc3[batch_size] = {0.0f}; for (int index2 = 0; index2 < 1; index2++) { for (int index1 = 0; index1 < num_hidden_unit2; index1++) { for (int n=0; n<batch_size; n++) { + float d_V_d_hc3 = 0.0f; if (hxh_c_sum_array[n][index1] >= 0) { - //G_hc3[index1] = G_hc3[index1] + hxh_c_sum_array[n][index1]; - d_V_d_hc3[n][index1] = d_V_d_hc3[n][index1] + hxh_c_sum_array[n][index1]; + d_V_d_hc3 = d_V_d_hc3 + hxh_c_sum_array[n][index1]; } - G_hc3[index1] = G_hc3[index1] + 2.0f*(return_G[n]-V[n])*(-d_V_d_hc3[n][index1]); + G_hc3[index1] = G_hc3[index1] + 2.0f*(return_G[n]-V[n])*(-d_V_d_hc3); } G_hc3[index1] = G_hc3[index1] / batch_size; //hc3_temp[index1] = hc3_temp[index1] - gradient_rate * G_hc3[index1]; } for (int n=0; n<batch_size; n++) { - //G_bc2[index2] = G_bc2[index2] + 1.0f; - d_V_d_bc3[n] = 1.0f; - G_bc3 = G_bc3 + 2.0f*(return_G[n]-V[n])*(-d_V_d_bc3[n]); + float d_V_d_bc3 = 0.0f; + d_V_d_bc3 = 1.0f; + G_bc3 = G_bc3 + 2.0f*(return_G[n]-V[n])*(-d_V_d_bc3); } G_bc3 = G_bc3 / batch_size; //bc3_temp = bc3_temp - gradient_rate * G_bc3; @@ -600,39 +593,36 @@ } bc3_temp = bc3_temp - gradient_rate * G_bc3; } - } ///////////////////////////Softplus////////////////////////////////// void update_Actor_Networks(float (*arr)[num_input_RL]) { - float gradient_rate = 0.001f; + float gradient_rate = 0.1f; float G_ha1[num_input_RL][num_hidden_unit1] = {0.0f}; float G_ba1[num_hidden_unit1] = {0.0f}; - float d_x_d_ha1[batch_size][num_input_RL][num_hidden_unit1] = {0.0f}; - float d_x_d_ba1[batch_size][num_hidden_unit1] = {0.0f}; - float d_y_d_ha1[batch_size][num_input_RL][num_hidden_unit1] = {0.0f}; - float d_y_d_ba1[batch_size][num_hidden_unit1] = {0.0f}; for (int index2 = 0; index2 < num_hidden_unit1; index2++) { for (int index1 = 0; index1 < num_input_RL; index1++) { for (int n=0; n<batch_size; n++) { + float d_x_d_ha1 = 0.0f; + float d_y_d_ha1 = 0.0f; if((advantage[n] >= 0.0f && ratio[n] >= 1.0f + epsilon) || (advantage[n] < 0.0f && ratio[n] < 1.0f - epsilon)) { G_ha1[index1][index2] = G_ha1[index1][index2]; } else { for(int k=0; k<num_hidden_unit2; k++) { if (hxh_a_sum_array[n][k] >= 0) { if (hx_a_sum_array[n][index2] > 0) { - d_x_d_ha1[n][index1][index2] = d_x_d_ha1[n][index1][index2] + arr[n][index1]*ha2_temp[index2][k]*ha3_temp[k][0]; - d_y_d_ha1[n][index1][index2] = d_y_d_ha1[n][index1][index2] + arr[n][index1]*ha2_temp[index2][k]*ha3_temp[k][1]; + d_x_d_ha1 = d_x_d_ha1 + arr[n][index1]*ha2_temp[index2][k]*ha3_temp[k][0]; + d_y_d_ha1 = d_y_d_ha1 + arr[n][index1]*ha2_temp[index2][k]*ha3_temp[k][1]; } } } float d_mean_d_ha1 = 0.0f; float d_dev_d_ha1 = 0.0f; - d_mean_d_ha1 = exp(hxhh_a_sum_array[n][0])/(1.0f+exp(hxhh_a_sum_array[n][0]))*d_x_d_ha1[n][index1][index2]; - d_dev_d_ha1 = exp(hxhh_a_sum_array[n][1])/(1.0f+exp(hxhh_a_sum_array[n][1]))*d_y_d_ha1[n][index1][index2]; + d_mean_d_ha1 = exp(hxhh_a_sum_array[n][0])/(1.0f+exp(hxhh_a_sum_array[n][0]))*d_x_d_ha1; + d_dev_d_ha1 = exp(hxhh_a_sum_array[n][1])/(1.0f+exp(hxhh_a_sum_array[n][1]))*d_y_d_ha1; G_ha1[index1][index2] = G_ha1[index1][index2] + advantage[n]/pi_old[n]*(d_mean_d_ha1*Grad_Normal_Dist_Mean(mean_array[n],deviation_array[n],action_array[n])+d_dev_d_ha1*Grad_Normal_Dist_Deviation(mean_array[n],deviation_array[n],action_array[n])); } @@ -642,21 +632,23 @@ } for (int n=0; n<batch_size; n++) { + float d_x_d_ba1 = 0.0f; + float d_y_d_ba1 = 0.0f; if((advantage[n] >= 0.0f && ratio[n] >= 1.0f + epsilon) || (advantage[n] < 0.0f && ratio[n] < 1.0f - epsilon)) { G_ba1[index2] = G_ba1[index2]; } else { for(int k=0; k<num_hidden_unit2; k++) { if (hxh_a_sum_array[n][k] >= 0) { if (hx_a_sum_array[n][index2] > 0) { - d_x_d_ba1[n][index2] = d_x_d_ba1[n][index2] + ha2_temp[index2][k]*ha3_temp[k][0]; - d_y_d_ba1[n][index2] = d_y_d_ba1[n][index2] + ha2_temp[index2][k]*ha3_temp[k][1]; + d_x_d_ba1 = d_x_d_ba1 + ha2_temp[index2][k]*ha3_temp[k][0]; + d_y_d_ba1 = d_y_d_ba1 + ha2_temp[index2][k]*ha3_temp[k][1]; } } } float d_mean_d_ba1 = 0.0f; float d_dev_d_ba1 = 0.0f; - d_mean_d_ba1 = exp(hxhh_a_sum_array[n][0])/(1.0f+exp(hxhh_a_sum_array[n][0]))*d_x_d_ba1[n][index2]; - d_dev_d_ba1 = exp(hxhh_a_sum_array[n][1])/(1.0f+exp(hxhh_a_sum_array[n][1]))*d_y_d_ba1[n][index2]; + d_mean_d_ba1 = exp(hxhh_a_sum_array[n][0])/(1.0f+exp(hxhh_a_sum_array[n][0]))*d_x_d_ba1; + d_dev_d_ba1 = exp(hxhh_a_sum_array[n][1])/(1.0f+exp(hxhh_a_sum_array[n][1]))*d_y_d_ba1; G_ba1[index2] = G_ba1[index2] + advantage[n]/pi_old[n]*(d_mean_d_ba1*Grad_Normal_Dist_Mean(mean_array[n],deviation_array[n],action_array[n])+d_dev_d_ba1*Grad_Normal_Dist_Deviation(mean_array[n],deviation_array[n],action_array[n])); } @@ -667,28 +659,26 @@ float G_ha2[num_hidden_unit1][num_hidden_unit2] = {0.0f}; float G_ba2[num_hidden_unit2] = {0.0f}; - float d_x_d_ha2[batch_size][num_hidden_unit1][num_hidden_unit2] = {0.0f}; - float d_x_d_ba2[batch_size][num_hidden_unit2] = {0.0f}; - float d_y_d_ha2[batch_size][num_hidden_unit1][num_hidden_unit2] = {0.0f}; - float d_y_d_ba2[batch_size][num_hidden_unit2] = {0.0f}; for (int index2 = 0; index2 < num_hidden_unit2; index2++) { for (int index1 = 0; index1 < num_hidden_unit1; index1++) { for (int n=0; n<batch_size; n++) { + float d_x_d_ha2 = 0.0f; + float d_y_d_ha2 = 0.0f; if((advantage[n] >= 0.0f && ratio[n] >= 1.0f + epsilon) || (advantage[n] < 0.0f && ratio[n] < 1.0f - epsilon)) { G_ha2[index1][index2] = G_ha2[index1][index2]; } else { if (hxh_a_sum_array[n][index2] >= 0) { if (hx_a_sum_array[n][index1] > 0) { - d_x_d_ha2[n][index1][index2] = hx_a_sum_array[n][index1]*ha3_temp[index2][0]; - d_y_d_ha2[n][index1][index2] = hx_a_sum_array[n][index1]*ha3_temp[index2][1]; + d_x_d_ha2 = hx_a_sum_array[n][index1]*ha3_temp[index2][0]; + d_y_d_ha2 = hx_a_sum_array[n][index1]*ha3_temp[index2][1]; } } float d_mean_d_ha2 = 0.0f; float d_dev_d_ha2 = 0.0f; - d_mean_d_ha2 = exp(hxhh_a_sum_array[n][0])/(1.0f+exp(hxhh_a_sum_array[n][0]))*d_x_d_ha2[n][index1][index2]; - d_dev_d_ha2 = exp(hxhh_a_sum_array[n][1])/(1.0f+exp(hxhh_a_sum_array[n][1]))*d_y_d_ha2[n][index1][index2]; + d_mean_d_ha2 = exp(hxhh_a_sum_array[n][0])/(1.0f+exp(hxhh_a_sum_array[n][0]))*d_x_d_ha2; + d_dev_d_ha2 = exp(hxhh_a_sum_array[n][1])/(1.0f+exp(hxhh_a_sum_array[n][1]))*d_y_d_ha2; G_ha2[index1][index2] = G_ha2[index1][index2] + advantage[n]/pi_old[n]*(d_mean_d_ha2*Grad_Normal_Dist_Mean(mean_array[n],deviation_array[n],action_array[n])+d_dev_d_ha2*Grad_Normal_Dist_Deviation(mean_array[n],deviation_array[n],action_array[n])); } @@ -698,18 +688,20 @@ } for (int n=0; n<batch_size; n++) { + float d_x_d_ba2 = 0.0f; + float d_y_d_ba2 = 0.0f; if((advantage[n] >= 0.0f && ratio[n] >= 1.0f + epsilon) || (advantage[n] < 0.0f && ratio[n] < 1.0f - epsilon)) { G_ba2[index2] = G_ba2[index2]; } else { if (hxh_a_sum_array[n][index2] >= 0) { - d_x_d_ba2[n][index2] = ha3_temp[index2][0]; - d_y_d_ba2[n][index2] = ha3_temp[index2][1]; + d_x_d_ba2 = ha3_temp[index2][0]; + d_y_d_ba2 = ha3_temp[index2][1]; } float d_mean_d_ba2= 0.0f; float d_dev_d_ba2= 0.0f; - d_mean_d_ba2 = exp(hxhh_a_sum_array[n][0])/(1.0f+exp(hxhh_a_sum_array[n][0]))*d_x_d_ba2[n][index2]; - d_dev_d_ba2 = exp(hxhh_a_sum_array[n][1])/(1.0f+exp(hxhh_a_sum_array[n][1]))*d_y_d_ba2[n][index2]; + d_mean_d_ba2 = exp(hxhh_a_sum_array[n][0])/(1.0f+exp(hxhh_a_sum_array[n][0]))*d_x_d_ba2; + d_dev_d_ba2 = exp(hxhh_a_sum_array[n][1])/(1.0f+exp(hxhh_a_sum_array[n][1]))*d_y_d_ba2; G_ba2[index2] = G_ba2[index2] + advantage[n]/pi_old[n]*(d_mean_d_ba2*Grad_Normal_Dist_Mean(mean_array[n],deviation_array[n],action_array[n])+d_dev_d_ba2*Grad_Normal_Dist_Deviation(mean_array[n],deviation_array[n],action_array[n])); } @@ -720,27 +712,25 @@ float G_ha3[num_hidden_unit2][2] = {0.0f}; float G_ba3[2] = {0.0f}; - float d_x_d_ha3[batch_size][num_hidden_unit2][2] = {0.0f}; - float d_x_d_ba3[batch_size][2] = {0.0f}; - float d_y_d_ha3[batch_size][num_hidden_unit2][2] = {0.0f}; - float d_y_d_ba3[batch_size][2] = {0.0f}; for (int index2 = 0; index2 < 2; index2++) { for (int index1 = 0; index1 < num_hidden_unit2; index1++) { for (int n=0; n<batch_size; n++) { + float d_x_d_ha3 = 0.0f; + float d_y_d_ha3 = 0.0f; if((advantage[n] >= 0.0f && ratio[n] >= 1.0f + epsilon) || (advantage[n] < 0.0f && ratio[n] < 1.0f - epsilon)) { G_ha3[index1][index2] = G_ha3[index1][index2]; } else { if (hxh_a_sum_array[n][index1] >= 0) { if (hx_a_sum_array[n][index1] > 0) { - d_x_d_ha3[n][index1][index2] = hxh_a_sum_array[n][index1]; - d_y_d_ha3[n][index1][index2] = hxh_a_sum_array[n][index1]; + d_x_d_ha3 = hxh_a_sum_array[n][index1]; + d_y_d_ha3 = hxh_a_sum_array[n][index1]; } } float d_mean_d_ha3 = 0.0f; float d_dev_d_ha3 = 0.0f; - d_mean_d_ha3 = exp(hxhh_a_sum_array[n][0])/(1.0f+exp(hxhh_a_sum_array[n][0]))*d_x_d_ha3[n][index1][index2]; - d_dev_d_ha3 = exp(hxhh_a_sum_array[n][1])/(1.0f+exp(hxhh_a_sum_array[n][1]))*d_y_d_ha3[n][index1][index2]; + d_mean_d_ha3 = exp(hxhh_a_sum_array[n][0])/(1.0f+exp(hxhh_a_sum_array[n][0]))*d_x_d_ha3; + d_dev_d_ha3 = exp(hxhh_a_sum_array[n][1])/(1.0f+exp(hxhh_a_sum_array[n][1]))*d_y_d_ha3; G_ha3[index1][index2] = G_ha3[index1][index2] + advantage[n]/pi_old[n]*(d_mean_d_ha3*Grad_Normal_Dist_Mean(mean_array[n],deviation_array[n],action_array[n])+d_dev_d_ha3*Grad_Normal_Dist_Deviation(mean_array[n],deviation_array[n],action_array[n])); } @@ -750,17 +740,19 @@ } for (int n=0; n<batch_size; n++) { + float d_x_d_ba3 = 0.0f; + float d_y_d_ba3 = 0.0f; if((advantage[n] >= 0.0f && ratio[n] >= 1.0f + epsilon) || (advantage[n] < 0.0f && ratio[n] < 1.0f - epsilon)) { G_ba3[index2] = G_ba3[index2]; } else { - d_x_d_ba3[n][index2] = 1.0f; - d_y_d_ba3[n][index2] = 1.0f; + d_x_d_ba3 = 1.0f; + d_y_d_ba3 = 1.0f; float d_mean_d_ba3= 0.0f; float d_dev_d_ba3= 0.0f; - d_mean_d_ba3 = exp(hxhh_a_sum_array[n][0])/(1.0f+exp(hxhh_a_sum_array[n][0]))*d_x_d_ba3[n][index2]; - d_dev_d_ba3 = exp(hxhh_a_sum_array[n][1])/(1.0f+exp(hxhh_a_sum_array[n][1]))*d_y_d_ba3[n][index2]; + d_mean_d_ba3 = exp(hxhh_a_sum_array[n][0])/(1.0f+exp(hxhh_a_sum_array[n][0]))*d_x_d_ba3; + d_dev_d_ba3 = exp(hxhh_a_sum_array[n][1])/(1.0f+exp(hxhh_a_sum_array[n][1]))*d_y_d_ba3; G_ba3[index2] = G_ba3[index2] + advantage[n]/pi_old[n]*(d_mean_d_ba3*Grad_Normal_Dist_Mean(mean_array[n],deviation_array[n],action_array[n])+d_dev_d_ba3*Grad_Normal_Dist_Deviation(mean_array[n],deviation_array[n],action_array[n])); } @@ -768,7 +760,7 @@ G_ba3[index2] = -G_ba3[index2] / batch_size; //ba3_temp[index2] = ba3_temp[index2] - gradient_rate * G_ba3[index2]; } - + // Simultaneous Update for (int index2 = 0; index2 < num_hidden_unit1; index2++) { for (int index1 = 0; index1 < num_input_RL; index1++) { @@ -788,155 +780,9 @@ } ba3_temp[index2] = ba3_temp[index2] - gradient_rate * G_ba3[index2]; } - + } -///////////////////////////ReLU - Bad performance////////////////////////////////// -//void update_Actor_Networks(float (*arr)[num_input_RL]) -//{ -// float gradient_rate = 0.001f; //-0.01f -// -// float G_ha1[num_input_RL][num_hidden_unit] = {0.0f}; -// float G_ba1[num_hidden_unit] = {0.0f}; -// for (int index2 = 0; index2 < num_hidden_unit; index2++) { -// for (int index1 = 0; index1 < num_input_RL; index1++) { -// for (int i=0; i<batch_size; i++) { -// if((advantage[i] >= 0.0f && ratio[i] >= 1.0f + epsilon) || (advantage[i] < 0.0f && ratio[i] < 1.0f - epsilon)) { -// G_ha1[index1][index2] = G_ha1[index1][index2]; -// } else { -// -// float hx_sum_total = 0.0f; -// for(int m = 0; m < num_hidden_unit; m++) { -// for(int n = 0; n < num_input_RL; n++) { -// hx_sum_total = hx_sum_total + ha1_temp[n][m]*arr[i][n]; -// } -// } -// hx_sum_total = hx_sum_total + bc1_temp[index2]; -// float d_mean_d_ha1 = 0.0f; -// float d_dev_d_ha1 = 0.0f; -// if (hx_sum_total >=0) { -// float hx_sum = 0.0f; -// for(int j=0; j<num_input_RL; j++) { -// hx_sum = hx_sum + ha1_temp[j][index2]*arr[i][j]; -// } -// hx_sum = hx_sum + bc1_temp[index2]; -// if (hx_sum >= 0) { -// d_mean_d_ha1 = ha2_temp[index2][0]*arr[i][index1]; -// d_dev_d_ha1 = ha2_temp[index2][1]*arr[i][index1]; -// } else { -// d_mean_d_ha1 = 0.0f; -// d_dev_d_ha1 = 0.0f; -// } -// } else { -// d_mean_d_ha1 = 0.0f; -// d_dev_d_ha1 = 0.0f; -// } -// G_ha1[index1][index2] = G_ha1[index1][index2] + advantage[i]/pi_old[i]*(d_mean_d_ha1*Grad_Normal_Dist_Mean(mean_array[i],deviation_array[i],action_array[i])+d_dev_d_ha1*Grad_Normal_Dist_Deviation(mean_array[i],deviation_array[i],action_array[i])); -// } -// } -// G_ha1[index1][index2] = G_ha1[index1][index2] / batch_size; -// ha1_temp[index1][index2] = ha1_temp[index1][index2] - gradient_rate * G_ha1[index1][index2]; -// } -// for (int i=0; i<batch_size; i++) { -// if((advantage[i] >= 0.0f && ratio[i] >= 1.0f + epsilon) || (advantage[i] < 0.0f && ratio[i] < 1.0f - epsilon)) { -// G_ba1[index2] = G_ba1[index2]; -// } else { -// -// float hx_sum_total = 0.0f; -// for(int m = 0; m < num_hidden_unit; m++) { -// for(int n = 0; n < num_input_RL; n++) { -// hx_sum_total = hx_sum_total + ha1_temp[n][m]*arr[i][n]; -// } -// } -// hx_sum_total = hx_sum_total + bc1_temp[index2]; -// float d_mean_d_ba1 = 0.0f; -// float d_dev_d_ba1 = 0.0f; -// if (hx_sum_total >=0) { -// -// float hx_sum = 0.0f; -// for(int j=0; j<num_input_RL; j++) { -// hx_sum = hx_sum + ha1_temp[j][index2]*arr[i][j]; -// } -// hx_sum = hx_sum + bc1_temp[index2]; -// -// if(hx_sum >=0) { -// d_mean_d_ba1 = ha2_temp[index2][0]; -// d_dev_d_ba1 = ha2_temp[index2][1]; -// } else { -// d_mean_d_ba1 = 0.0f; -// d_dev_d_ba1 = 0.0f; -// } -// } else { -// d_mean_d_ba1 = 0.0f; -// d_dev_d_ba1 = 0.0f; -// } -// G_ba1[index2] = G_ba1[index2] + advantage[i]/pi_old[i]*(d_mean_d_ba1*Grad_Normal_Dist_Mean(mean_array[i],deviation_array[i],action_array[i])+d_dev_d_ba1*Grad_Normal_Dist_Deviation(mean_array[i],deviation_array[i],action_array[i])); -// } -// } -// G_ba1[index2] = G_ba1[index2] / batch_size; -// ba1_temp[index2] = ba1_temp[index2] - gradient_rate * G_ba1[index2]; -// } -// -// float G_ha2[num_hidden_unit][2] = {0.0f}; -// float G_ba2[2] = {0.0f}; -// for (int index2 = 0; index2 < 2; index2++) { -// for (int index1 = 0; index1 < num_hidden_unit; index1++) { -// for (int i=0; i<batch_size; i++) { -// if((advantage[i] >= 0.0f && ratio[i] >= 1.0f + epsilon) || (advantage[i] < 0.0f && ratio[i] < 1.0f - epsilon)) { -// G_ha2[index1][index2] = G_ha2[index1][index2]; -// } else { -// -// float hx_sum_total = 0.0f; -// for(int m = 0; m < num_hidden_unit; m++) { -// for(int n = 0; n < num_input_RL; n++) { -// hx_sum_total = hx_sum_total + ha1_temp[n][m]*arr[i][n]; -// } -// } -// hx_sum_total = hx_sum_total + bc1_temp[index2]; -// float d_mean_d_ha2 = 0.0f; -// float d_dev_d_ha2 = 0.0f; -// if (hx_sum_total >=0) { -// float hx_sum = 0.0f; -// for(int j=0; j<num_input_RL; j++) { -// hx_sum = hx_sum + ha1_temp[j][index1]*arr[i][j]; -// } -// hx_sum = hx_sum + bc1_temp[index1]; -// if (hx_sum >= 0) { -// d_mean_d_ha2 = hx_sum; -// d_dev_d_ha2 = hx_sum; -// } else { -// d_mean_d_ha2 = 0.0f; -// d_mean_d_ha2 = 0.0f; -// } -// } else { -// d_mean_d_ha2 = 0.0f; -// d_mean_d_ha2 = 0.0f; -// } -// G_ha2[index1][index2] = G_ha2[index1][index2] + advantage[i]/pi_old[i]*(d_mean_d_ha2*Grad_Normal_Dist_Mean(mean_array[i],deviation_array[i],action_array[i])+d_dev_d_ha2*Grad_Normal_Dist_Deviation(mean_array[i],deviation_array[i],action_array[i])); -// } -// } -// G_ha2[index1][index2] = G_ha2[index1][index2] / batch_size; -// ha2_temp[index1][index2] = ha2_temp[index1][index2] - gradient_rate * G_ha2[index1][index2]; -// } -// for (int i=0; i<batch_size; i++) { -// if((advantage[i] >= 0.0f && ratio[i] >= 1.0f + epsilon) || (advantage[i] < 0.0f && ratio[i] < 1.0f - epsilon)) { -// G_ba2[index2] = G_ba2[index2]; -// } else { -// -// float d_mean_d_ba2 = 0.0f; -// float d_dev_d_ba2 = 0.0f; -// d_mean_d_ba2 = 1.0f; -// d_dev_d_ba2 = 1.0f; -// G_ba1[index2] = G_ba1[index2] + advantage[i]/pi_old[i]*(d_mean_d_ba2*Grad_Normal_Dist_Mean(mean_array[i],deviation_array[i],action_array[i])+d_dev_d_ba2*Grad_Normal_Dist_Deviation(mean_array[i],deviation_array[i],action_array[i])); -// } -// } -// G_ba2[index2] = G_ba2[index2] / batch_size; -// ba2_temp[index2] = ba2_temp[index2] - gradient_rate * G_ba2[index2]; -// } -//} - - - float rand_normal(double mean, double stddev) { //Box muller method @@ -1143,7 +989,7 @@ //read_field(i2c_slave_addr1); //if(DIR_VALVE_ENC < 0) value = 1023 - value; -// timer_while ++; + //timer_while ++; ///////////////////////////////////////////////////////Neural Network @@ -1268,7 +1114,7 @@ hxh_c_sum_array[n][i] = hxh_c_sum[i]; } hxhh_c_sum_array[n] = hxhh_c_sum; - + pi[n] = exp(-(action_array[n]-mean_array[n])*(action_array[n]-mean_array[n])/(2.0f*deviation_array[n]*deviation_array[n]))/(sqrt(2.0f*PI)*deviation_array[n]); Actor_Network_Old(temp_array); pi_old[n] = exp(-(action_array[n]-mean_old)*(action_array[n]-mean_old)/(2.0f*deviation_old*deviation_old))/(sqrt(2.0f*PI)*deviation_old); @@ -1305,7 +1151,6 @@ update_Critic_Networks(state_array); update_Actor_Networks(state_array); } - //virt_pos = 10.0f; Update_Done_Flag = 1; Update_Case = 0; //logging1 = V[0];