Reinforcement Learning example

No preview image

1 collaborator

Default-person Russ Abbott (Author)

Tags

reinforcement learning 

Tagged by Russ Abbott almost 7 years ago

Visible to everyone | Changeable by everyone
Model was written in NetLogo 6.0.2 • Viewed 1082 times • Downloaded 53 times • Run 0 times
Download the 'Reinforcement Learning example' modelDownload this modelEmbed this model

Do you have questions or comments about this model? Ask them here! (You'll first need to log in.)


Comments and Questions

Please start the discussion about this model! (You'll first need to log in.)

Click to Run Model

;; Adapted from a model by Joe Roop: http://ccl.northwestern.edu/netlogo/models/community/Reinforcement Learning Maze
;; Coppyright Russ Abbott (Russ.Abbott@gmail.com)
;; This work is licensed under the Creative Commons Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International Public License.
;; To view a copy of the license, visit https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode.

patches-own [
  Qlist
  reward
  qa-elts
]

breed [walkers walker]

breed [qa-labels qa-label]

globals [
  episode
  goal-color
  goal-patch
  start-patch
  Hlist
  north
  east
  south
  west
]

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

to setup
  ca
  set north 0
  set east 90
  set south 180
  set west 270
  set Hlist (list west north east south)

  setup-maze

  create-walkers 1 [ set shape "bug" set color red + 1 set size 0.8 move-to start-patch set heading 45]

  set episode 1
  set-current-plot "Ave Reward Per Episode"
end 

to setup-maze
  ask patches [
    let qa-elts-commands (list (list west 0.25)  (list north 0.35)  (list east 0.3)  (list south 0.40) )
    set qa-elts []
    set Qlist [0 0 0 0]
    sprout 4 [  ;; These are four turtles which display the qa values at the patch edges
      set size 0
      set heading east
      fd 0.1
      set heading south
      fd 0.1
      let command first qa-elts-commands
      set qa-elts-commands but-first qa-elts-commands
      set heading first command
      fd second command
      set label 0
      set qa-elts lput self qa-elts
    ]
  ]
  set-maze-elements
end 

to set-maze-elements
  clear-drawing
  ask patches [
    set pcolor default-color self
  ]

  set start-patch patch -4 -4
  ask start-patch [set pcolor black]

  set goal-patch patch 4 4
  set goal-color orange + 3
  ask goal-patch [set pcolor goal-color ]

  setup-blockades
  make-passage
  set-rewards

  ask patches [
    foreach qa-elts [
      qa-elt -> ask qa-elt [
        set hidden? pcolor = blue or pcolor = goal-color
      ]
    ]
  ]
end 

to setup-blockades
  ask patches with [pxcor = max-pxcor or pxcor = (- max-pxcor)  or pycor = max-pycor or pycor = (- max-pycor) or pycor = -1] [set pcolor blue]
  foreach (list patch -1 2 patch 1 1 patch -2 -3) [
    p -> ask p [
      set pcolor blue
      ask n-of (ifelse-value (p = patch -2 -3) [1] [2]) neighbors [set pcolor blue]]]
end 

to make-passage
  ask one-of patches with [pycor = -1 and pxcor > (- max-pxcor + 3 ) and pxcor < max-pxcor - 3]  [
    set pcolor default-color self
    ask patches with [pxcor = [pxcor] of myself and (pycor = [pycor] of myself + 1 or pycor = [pycor] of myself - 1) ] [
      set pcolor default-color self
    ]
    ask one-of patches with [(pycor = [pycor] of myself + 1 ) and (pxcor = [pxcor] of myself + 1 or pxcor = [pxcor] of myself - 1) ] [
      set pcolor default-color self
    ]
  ]
end 

to set-rewards
  ask patches [set reward ifelse-value (pcolor = blue)  [boundary-reward] [base-reward] ]
  ask goal-patch [ set reward goal-reward ]
  ask patches [sprout 1 [
    set size 0
    set label-color ifelse-value (myself = goal-patch) [black] [yellow + 2]
    set label [reward] of myself
    set heading east
    fd 0.2
    ]
    foreach (list item 0 qa-elts item 2 qa-elts) [qa-elt -> ask qa-elt [set heading south fd 0.1]]
  ]
end 

to-report default-color [a-patch]
  report green - 2 + ifelse-value is-odd? ([pxcor] of a-patch + [pycor] of a-patch) [0.2] [-0.2]
end 

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

to go
  if (episode > num-episodes) [stop]
  set trace? false
  one-trip
end 

to one-trip
  clear-drawing
  ;; Don't explore when running with trace-path
  let path episode-path ifelse-value trace? [0] [exploration-% / 100]
  let lng length path
  let lngsum sum path
  let avg-reward lngsum / lng
  plot avg-reward
  output-print (word " "  episode "; path-length: " lng "; avg-reward: "  precision avg-reward 2)
  set episode episode + 1
end 

to-report episode-path [explore-%]
  let r-episode []
  ask walkers [
    pen-up
    move-to start-patch
    if trace? [pen-down set pen-size 3]
    while [ [pcolor] of patch-here = default-color self or patch-here = start-patch] [
        let Qmax max Qlist ;--get max from the Qlist values of the current patch
        let dirp 0
        ifelse (random-float 1 < explore-%) [
          set heading one-of Hlist ;--pick random direction
          set dirp position heading Hlist ;--find dir's position in the Hlist array
          ] [
          set dirp one-of all-positions Qmax Qlist   ;; Qmax may appear multiple times in Qlist. Select one at random.
          set heading item dirp Hlist
        ]
        let Qa item dirp Qlist ;--find the value in Qlist with the same position as in the Hlist

        let r [reward] of patch-ahead 1
        set r-episode lput r r-episode

        ;-- Q-learning update function
        let Qmax' max [Qlist] of patch-ahead 1
        set Qa precision ( (1 - weight) * Qa + weight * (r + gamma * Qmax') ) 3 ;--perform Q-Learning
        set Qlist replace-item dirp Qlist Qa
        ask patch-here [ (foreach qa-elts Qlist [ [t q] -> ask t [ set label precision q 1]] )]
        fd 1
        ]
  ]
  report r-episode
end 

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

to-report all-positions [elt a-list]
  report all-positions' elt a-list 0
end 

to-report all-positions' [elt a-list n]
  if empty? a-list [report []]
  let positions all-positions' elt (but-first a-list) ( n + 1 )
  if (first a-list = elt) [set positions fput n positions]
  report positions
end 

to-report is-odd? [n]
  report n mod 2 = 1
end 

to-report list-to-string [a-list sep]
  report reduce [ [so-far next] -> (word so-far sep next)] a-list
end 

to-report second [a-list]
  report first but-first a-list
end 

There is only one version of this model, created almost 7 years ago by Russ Abbott.

Attached files

File Type Description Last updated
2018-04-01_21-30-12.png png Model image almost 7 years ago, by Russ Abbott Download

This model does not have any ancestors.

This model does not have any descendants.