brickos: demo/trailerbot.c Source File

00001 /* trailerbot.c */
00002 /* learns to "back up" a trailer without jack-knifing.*/
00003 
00004 #include <config.h>
00005 #if defined(CONF_DMOTOR) && defined(CONF_DSENSOR) && defined(CONF_DSENSOR_ROTATION)
00006 
00007 #include <conio.h>
00008 #include <stdlib.h>
00009 #include <unistd.h>
00010 #include <dmotor.h>
00011 #include <dsensor.h>
00012 #include <tm.h>
00013 
00014 /*
00015  * Physical characteristics of the robot.
00016  * Increasing these increases the number of calculations
00017  * that must be done, but also the accuracy of the behavior of the robot.
00018  * Keep in mind that these should be congruent with the "real world."
00019  * I.e., by default, the "ANGLES" matches the accuracy of the rotation sensor
00020  * and the # of movements matches the number of motor commands 
00021  * (hard/soft left/right, forward, and back, totalling 6)
00022  */
00023 
00024 #define ANGLES 6
00025 #define MOVEMENTS 6
00026 
00027 /*
00028  * We use these to define the range of the angles. 
00029  * Basically, these should be the jacknifed states.
00030  */
00031 #define FAR_RIGHT 0
00032 #define FAR_LEFT 4
00033 
00034 /*
00035  * In order to encourage the robot to explore, we add a heaven state.
00036  * This state has a high reward, to "fool" the robot into looking for it.
00037  * Once the robot has tried (and failed) to reach it, the probability 
00038  * of reaching HEAVEN will decline and the robot will ignore it.
00039  * The higher HEAVEN_REWARD is, the longer the robot will look for it.
00040  */
00041 
00042 #define HEAVEN 5
00043 #define HEAVEN_REWARD 20
00044 
00045 /*
00046  * We need to be able to adjust the length of time that the robot moves 
00047  * for. This will vary based on surface, structure, etc. 
00048  * Adjust this so that a reverse backs the robot up all the way from a jackknife.
00049  * In my experience, 7 works reasonably well on carpet and 4 on wood floors.
00050  * This is also heavily dependent on battery power- lower this 
00051  * if you have new batteries.
00052  */
00053 
00054 #define TURN_MULTIPLIER 2
00055 
00056 /*
00057  * Calculation variables.
00058  * These variables mainly affect the probability calculations done
00059  * by the robot. 
00060  */
00061 
00062 /*
00063  * EPSILON: How randomly do we move?
00064  * We start at EPSILON_MAX, and then after every move reduce EPSILON_CURRENT
00065  * by EPSILON_DECAY. 
00066  * The higher EPSILON_MAX is, the more randomly we start.
00067  * The higher EPSILON_DECAY is, the quicker we get boring.
00068  * The lower EPSILON_MIN is, the more stable we are when we are done.
00069  */
00070 
00071 #define EPSILON_MAX .60
00072 #define EPSILON_MIN .10
00073 #define EPSILON_DECAY .02
00074 double EPSILON_CURRENT = EPSILON_MAX;
00075 
00076 /*
00077  * ALPHA: How quickly do we adjust the steering results?
00078  * The larger, the more impact an individual measurement can have.
00079  * If this is large, we can learn faster but one abberation will hurt
00080  * us longer.
00081  */
00082 #define ALPHA .20
00083 
00084 /* GAMMA: How important is the future v. the present? */
00085 /* The larger, the more important the future is. */
00086 #define GAMMA .90
00087 
00088 /* KAPPA: How thoroughly do we update the Q values?*/
00089 #define KAPPA 10
00090 
00091 /*
00092  * Movement enum: what does each MOVEMENT int mean?
00093  * Enumerate clockwise from hard left.
00094  */
00095 
00096 enum movement
00097 {
00098   HARD_LEFT, SOFT_LEFT, FORWARD, SOFT_RIGHT, HARD_RIGHT, REVERSE
00099 };
00100 
00101 /*
00102  * Data arrays.
00103  *
00104  * steering_results stores the probability of the results of movements:
00105  * i.e., if we are in angle A and take movement B, what is the probability 
00106  * that we end up at angle C? Range should be 0-1.
00107  *
00108  * q_score stores the "Q value" of an action.
00109  * i.e., given that we are at angle A, how "good" is the outcome of 
00110  * the movement? Range should be 0-1.
00111  *
00112  * These are initialized in array_initialization(), called from main().
00113  */
00114 
00115 double steering_results[ANGLES][MOVEMENTS][ANGLES];
00116 double q_score[ANGLES][MOVEMENTS];
00117 
00118 /*
00119  * function to convert random from 0-2^31-1 to 0-1.
00120  */
00121 double norm_random()
00122 {
00123   double temp;
00124   temp = random();
00125   temp /= 2147483647;
00126   return temp;
00127 } 
00128 
00129 /*
00130  * Function to convert rotation from 0-19
00131  * to a more manageable 0-4
00132  */
00133 
00134 int norm_rotation(int real_value)
00135 {
00136   switch(real_value)
00137     {
00138       /*right jack-knife*/
00139     case -1:
00140     case 0:
00141     case 1:
00142     case 2:
00143       return 0;
00144       /*leaning toward the right*/
00145     case 3:
00146     case 4:
00147     case 5:
00148     case 6:
00149       return 1;
00150       /*centered!*/
00151     case 7:
00152     case 8:
00153     case 9:
00154     case 10:
00155     case 11:
00156     case 12:
00157       return 2;
00158       /*leaning towards the left*/
00159     case 13:
00160     case 14:
00161     case 15:
00162     case 16:
00163       return 3;
00164       /*left jack-knife*/
00165     case 17:
00166     case 18:
00167     case 19:
00168     case 20:
00169       return 4;
00170       /*if none of these, big error*/
00171     default:
00172       /*to be easily visible on LCD*/
00173       cputs("ERROR");
00174       sleep(1);
00175       return -1;
00176     }
00177   /*to get rid of compiler warning*/
00178   return 0;
00179 }
00180 
00181 /*
00182  * Functions to initialize the data arrays.
00183  */
00184 
00185 void array_initialization()
00186 {
00187   int i, j, k;
00188 
00189   /*
00190    * Make all steering possibilities the same; 
00191    * All states stay the same until we "learn."
00192    * Probability is: 1/ANGLES
00193    */
00194 
00195   for(i=0; i<ANGLES; i++)
00196     {
00197       for(j=0; j<MOVEMENTS; j++)
00198   {
00199     for(k=0; k<ANGLES; k++)
00200       {
00201         steering_results[i][j][k] = (1/ANGLES);
00202       }
00203   }
00204     }
00205 
00206   /*all q_scores are set to 0, since we have "learned" nothing.*/
00207   for(i=0; i<ANGLES; i++)
00208     {
00209       for(j=0; j<MOVEMENTS; j++)
00210   {
00211     q_score[i][j] = 0;
00212   }
00213     }
00214 }
00215 
00216 /*
00217  * Big block of motor commands
00218  * provides simple functions to cleanup later code.
00219  */
00220 
00221 void run_motors()
00222 {
00223   motor_a_speed(MAX_SPEED);
00224   motor_c_speed(MAX_SPEED);
00225 }
00226 
00227 void go_forward()
00228 {
00229   motor_a_dir(fwd);
00230   motor_c_dir(fwd);
00231   run_motors();
00232   msleep(100*TURN_MULTIPLIER);
00233 }
00234 
00235 void go_back()
00236 {
00237   motor_a_dir(rev);
00238   motor_c_dir(rev);
00239   run_motors();
00240   msleep(150*TURN_MULTIPLIER);
00241 }
00242 
00243 void soft_left()
00244 {
00245   motor_a_dir(fwd);
00246   motor_c_dir(fwd);
00247   motor_a_speed(MAX_SPEED);
00248   motor_c_speed(MAX_SPEED/2);
00249   msleep(75*TURN_MULTIPLIER);
00250 }
00251 
00252 void soft_right()
00253 {
00254   motor_a_dir(fwd);
00255   motor_c_dir(fwd);
00256   motor_a_speed(MAX_SPEED/2);
00257   motor_c_speed(MAX_SPEED);
00258   msleep(75*TURN_MULTIPLIER);
00259 }
00260 
00261 void hard_right()
00262 {
00263   motor_a_dir(rev);
00264   motor_c_dir(fwd);
00265   motor_a_speed(MAX_SPEED);
00266   motor_c_speed(MAX_SPEED);
00267   msleep(100*TURN_MULTIPLIER);
00268 }
00269 
00270 void hard_left()
00271 {
00272   motor_a_dir(fwd);
00273   motor_c_dir(rev);
00274   motor_a_speed(MAX_SPEED);
00275   motor_c_speed(MAX_SPEED);
00276   msleep(100*TURN_MULTIPLIER);
00277 }
00278 
00279 void stop_motors()
00280 {
00281   motor_a_speed(0);
00282   motor_c_speed(0);
00283   motor_a_dir(brake);
00284   motor_c_dir(brake);
00285 
00286   /*to conserve batteries*/
00287   msleep(500);
00288   motor_a_dir(off);
00289   motor_c_dir(off);
00290 }
00291 
00292 /*
00293  * This function moves the robot one time.
00294  * There are three parts: 
00295  * 1) Choice of direction
00296  * 2) Actual motion
00297  * 3) update of steering_results
00298  */
00299 
00300 void move()
00301 {
00302   int i;
00303 
00304   /*variable to use in figuring out the "best" option*/
00305   int max_q_score = 0;
00306 
00307   /*what do we do next? store it here*/
00308   /*we init to -1 as an error*/
00309   int next_movement = -1;
00310 
00311   /*Where we started.*/
00312   /*We don't use ROTATION_2 all the way through in case it changes.*/
00313   int initial_angle = norm_rotation(ROTATION_2);
00314 
00315   /*Where we ended up.*/
00316   int new_angle;
00317 
00318   /*Show the current angle*/
00319   cputs("ANGLE");
00320   msleep(200);
00321   lcd_int(initial_angle);
00322   msleep(500);
00323   
00324   /*
00325    * Most of the time, we do the "correct" thing
00326    * by finding the best q_score of our possible options.
00327    * On the off chance that norm_random() is low (or EPSILON is high ;)
00328    * we then "explore" by choosing a random movement.
00329    */
00330 
00331   if(norm_random() > EPSILON_CURRENT)
00332     {
00333       /*We are doing what the table tells us to.*/
00334       cputs("real ");
00335       msleep(500);
00336 
00337       for(i=0; i<MOVEMENTS; i++)
00338   {
00339     if(q_score[initial_angle][i] > max_q_score)
00340       {
00341         max_q_score = q_score[initial_angle][i];
00342         next_movement = i;
00343       }
00344   }
00345     }
00346   else
00347     {
00348       double temp;
00349       /*We are just picking something at random.*/
00350       cputs("rand ");
00351       msleep(500);
00352 
00353       /*pick one. Any one.*/
00354       
00355       temp = norm_random();
00356       next_movement = temp*MOVEMENTS;   
00357 
00358       /*show what we do next*/
00359       lcd_int(next_movement);
00360       sleep(1);
00361     }
00362   
00363   /*what happens if next_movement never gets changed?*/
00364   /*we'd hate to do HARD_LEFT over and over again*/
00365   /*so we choose randomly*/
00366 
00367   if(-1==next_movement)
00368     {
00369       double temp;
00370       temp = norm_random();
00371       next_movement = temp*MOVEMENTS;
00372     }
00373 
00374   /*having chosen a movement, lets do it*/
00375   switch(next_movement)
00376     {
00377     case HARD_LEFT:
00378       cputs("HL   ");
00379       hard_left();
00380       break;
00381     case SOFT_LEFT:
00382       cputs("SL   ");
00383       soft_left();
00384       break;
00385     case FORWARD:
00386       cputs("FWD  ");
00387       go_forward();
00388       break;
00389     case SOFT_RIGHT:
00390       cputs("SR   ");
00391       soft_right();
00392       break;
00393     case HARD_RIGHT:
00394       cputs("HR   ");
00395       hard_right();
00396       break;
00397     case REVERSE:
00398       cputs("REV  ");
00399       go_back();
00400       break;
00401     default:
00402       /*this is an error and should never be reached*/
00403       cputs("ERROR");
00404       sleep(1);
00405       stop_motors();
00406       break;
00407     }
00408 
00409   /*Once we've started, we'd better stop*/
00410   stop_motors();
00411 
00412   /*Allows us to read direction*/
00413   msleep(500);
00414 
00415   /*This is here just to make the next function cleaner*/
00416   new_angle = norm_rotation(ROTATION_2);
00417 
00418   /*Where are we now?*/
00419   cputs("NEW  ");
00420   msleep(200);
00421   lcd_int(new_angle);
00422   msleep(500);
00423   
00424   /*
00425    * Since we know that "next_movement" took us from "initial_angle"
00426    * to new_angle (ROTATION_2), we store that increased probability.
00427    */
00428   
00429   steering_results[initial_angle][next_movement][new_angle] += ALPHA;
00430   
00431   /*here we re-norm so that the sum of the probabilities is still 1*/
00432   for(i=0; i<ANGLES; i++)
00433     {
00434       steering_results[initial_angle][next_movement][i] /= (1+ALPHA);
00435     }  
00436   
00437   /*The last thing we do is reduce Epsilon*/
00438   if(EPSILON_CURRENT > EPSILON_MIN)
00439     {
00440       EPSILON_CURRENT-=EPSILON_DECAY;
00441     }
00442 
00443 }
00444 
00445 /*
00446  * This function updates the q_score table.
00447  * q_score is essentially the "expected" value of a move:
00448  * i.e., if I'm in angle A and do movement B, what can I expect?
00449  * Something good? Bad? 
00450  */
00451 
00452 void q_score_update()
00453 {
00454   /*loop variables. Lots of them.*/
00455   int i, j, k, l;
00456 
00457   /*three variables for later*/
00458   float reward;
00459   float q_sum;
00460   float max_q_score;
00461   
00462   for(i=0; i<ANGLES; i++)
00463   {
00464     for(j=0; j<MOVEMENTS; j++)
00465     {
00466         /*are we doing a bad thing?*/   
00467             if((i>=FAR_LEFT)||(i<=FAR_RIGHT)||(REVERSE==j))
00468       {
00469         reward = 0;
00470       }
00471           /*are we in "heaven?"*/
00472             else if(HEAVEN==i)
00473       {
00474         reward = HEAVEN_REWARD;
00475       }
00476             /*if not, we get rewarded normally*/
00477           else
00478       {
00479         reward = 1;
00480       }
00481 
00482             /*
00483            * This code "looks ahead" to see two things:
00484          * 1) What possibility do we have of getting to 
00485              *    all possible angles?
00486            * 2) Once we get to those angles, what is the best 
00487          *    possible outcome?
00488              * These two pieces of information are combined and 
00489            * stored in q_sum. 
00490          */
00491 
00492         q_sum = 0;
00493 
00494             for(k=0; k<ANGLES; k++)
00495       {
00496         max_q_score = 0;
00497         for(l=0; l<MOVEMENTS; l++)
00498                   {
00499                 if(q_score[k][l] > max_q_score)
00500                 {
00501                 max_q_score = q_score[k][l];
00502             } 
00503                     }
00504         q_sum += (steering_results[i][j][k]*max_q_score);
00505       }
00506     
00507             /*store the new expected q_score*/
00508           q_score[i][j] = reward+(GAMMA*q_sum);
00509           }
00510   }
00511 }
00512 
00513 /*
00514  * main
00515  */
00516 
00517 int main(int argc, char **argv) 
00518 {
00519   int k;
00520 
00521   /*seed the random number generator*/
00522   /*would be better to get a number from the environment here*/
00523   ds_active(&SENSOR_1);
00524   msleep(100);
00525   srandom(LIGHT_1);
00526   ds_passive(&SENSOR_1);
00527 
00528   /*initialize rotation sensor*/
00529 
00530   ds_active(&SENSOR_2);
00531   ds_rotation_on(&SENSOR_2);
00532 
00533   /*could initialize for left and right here*/
00534   /*Have to make sure that the rotation sensor is correctly calibrated*/
00535   cputs("start");
00536   msleep(500);
00537   cputs("at   ");
00538   msleep(500);
00539   cputs("right");
00540   sleep(2);
00541 
00542   /*to ensure calibration, start against the right side*/
00543   ds_rotation_set(&SENSOR_2, FAR_RIGHT);
00544   msleep(50);
00545   
00546   cputs("now  ");
00547   msleep(500);
00548   cputs("centr");
00549   sleep(1);
00550 
00551   /*We have to fill the probability arrays with "correct" information*/
00552   array_initialization();
00553 
00554   while (!shutdown_requested())
00555   {
00556       
00557     /*We are going.*/
00558     cputs("move");
00559     msleep(500);
00560     move();
00561 
00562     /*We are now calculating q-scores.*/
00563     cputs("score");
00564 
00565     for(k=0; k<KAPPA; k++)
00566         {
00567             q_score_update();
00568         }
00569   }
00570 
00571   cls();
00572   return 0;
00573 }
00574 
00575 #else
00576 #warning trailerbot.c requires CONF_DMOTOR, CONF_DSENSOR, and CONF_DSENSOR_ROTATION
00577 #warning trailerbot demo will do nothing
00578 int main(int argc, char *argv[]) {
00579   return 0;
00580 }
00581 #endif // defined(CONF_DMOTOR) && defined(CONF_DSENSOR) && defined(CONF_DSENSOR_ROTATION)