00001 //---------------------------------------------------------------------------- 00002 /** @file GoUctPureRandomGenerator.h */ 00003 //---------------------------------------------------------------------------- 00004 00005 #ifndef GOUCT_PURERANDOMGENERATOR_H 00006 #define GOUCT_PURERANDOMGENERATOR_H 00007 00008 #include <vector> 00009 #include "GoBoard.h" 00010 #include "GoUctUtil.h" 00011 #include "SgWrite.h" 00012 00013 //---------------------------------------------------------------------------- 00014 00015 /** Randomly select from empty points on the board. 00016 Finds and shuffles the empty points on the board at the beginning to avoid 00017 repeated loops over the board to find empty points. 00018 @note For efficiency reasons, newly occupied points are not immediately 00019 removed from the list but only checked at the time of move generation. 00020 This can cause a deviation from a uniform probability distribution if 00021 those points become empty because of capture before they are generated. 00022 Experiments have shown that this does not impact the playing strength (as 00023 in other cases in the playout policy where the computational cost for a 00024 fully uniform distribution outweighs its benfits, e.g. GoUctPlayoutPolicy 00025 does not filter duplicate points in the move lists generated by 00026 patterns). Even a constant time algorithm for removing points from the 00027 list is slower than the current implementation. */ 00028 template<class BOARD> 00029 class GoUctPureRandomGenerator 00030 { 00031 public: 00032 GoUctPureRandomGenerator(const BOARD& bd, SgRandom& random); 00033 00034 /** Finds and shuffles the empty points currently on the board. */ 00035 void Start(); 00036 00037 /** Update state. 00038 Must be called after each play on the board. */ 00039 void OnPlay(); 00040 00041 /** Return a list of points that are currently potentially empty. 00042 As a side-benefit, the generator can be used to get the list of empty 00043 points on the board to speed up full-board loops over empty points 00044 or to get a shuffled list of the empty points (e.g. for finding 00045 legal moves when expanding a node in the in-tree-phase of UCT). 00046 Points in the list are candidates, they still have to be tested, if 00047 they are really empty. */ 00048 const std::vector<SgPoint>& Candidates() const; 00049 00050 /** Generate a pure random move. 00051 Randomly select an empty point on the board that fulfills 00052 GoUctUtil::GeneratePoint() for the color currently to play on the 00053 board. */ 00054 SgPoint Generate(SgBalancer& balancer); 00055 00056 /** Generate a move using the fillboard heuristic. 00057 Tries @c numberTries times to select a point on the board and 00058 returns it, if it is empty and all adjacent and diagonal neighbors are 00059 empty. Otherwise it returns SG_NULLMOVE. Using this heuristic before 00060 any other heuristics is helpful to increase playout diversity on large 00061 boards. See section 6.1 of: 00062 Chatriot, Gelly, Hoock, Perez, Rimmel, Teytaud: 00063 <a href="http://www.lri.fr/~teytaud/eg.pdf"> 00064 Combining expert, offline, transient and online learning in 00065 Monte-Carlo exploration</a> */ 00066 SgPoint GenerateFillboardMove(int numberTries); 00067 00068 private: 00069 const BOARD& m_bd; 00070 00071 float m_invNuPoints; 00072 00073 float m_nuEmptyFloat; 00074 00075 SgRandom& m_random; 00076 00077 /** Points that are potentially empty. */ 00078 std::vector<SgPoint> m_candidates; 00079 00080 bool Empty3x3(SgPoint p) const; 00081 00082 void CheckConsistency() const; 00083 00084 void Insert(SgPoint p); 00085 }; 00086 00087 template<class BOARD> 00088 GoUctPureRandomGenerator<BOARD>::GoUctPureRandomGenerator(const BOARD& bd, 00089 SgRandom& random) 00090 : m_bd(bd), 00091 m_random(random) 00092 { 00093 m_candidates.reserve(GO_MAX_NUM_MOVES); 00094 } 00095 00096 template<class BOARD> 00097 inline const std::vector<SgPoint>& 00098 GoUctPureRandomGenerator<BOARD>::Candidates() 00099 const 00100 { 00101 return m_candidates; 00102 } 00103 00104 template<class BOARD> 00105 inline void GoUctPureRandomGenerator<BOARD>::CheckConsistency() const 00106 { 00107 #if 0 // Expensive check, enable only for debugging 00108 for (GoBoard::Iterator it(m_bd); it; ++it) 00109 { 00110 SgPoint p = *it; 00111 if (m_bd.IsEmpty(p)) 00112 if (find(m_candidates.begin(), m_candidates.end(), p) 00113 == m_candidates.end()) 00114 { 00115 SgDebug() << m_bd 00116 << "Candidates: " << SgWritePointList(m_candidates) 00117 << "does not contain: " << SgWritePoint(p) 00118 << "\nm_bd.CapturedStones(): " 00119 << SgWriteSPointList<SG_MAX_ONBOARD + 1> 00120 (m_bd.CapturedStones()) 00121 << "Last move: " 00122 << SgWritePoint(m_bd.GetLastMove()) << '\n'; 00123 SG_ASSERT(false); 00124 } 00125 } 00126 #endif 00127 } 00128 00129 template<class BOARD> 00130 inline bool GoUctPureRandomGenerator<BOARD>::Empty3x3(SgPoint p) 00131 const 00132 { 00133 return (m_bd.NumEmptyNeighbors(p) == 4 00134 && m_bd.NumEmptyDiagonals(p) == 4); 00135 } 00136 00137 template<class BOARD> 00138 inline SgPoint GoUctPureRandomGenerator<BOARD>::Generate(SgBalancer& balancer) 00139 { 00140 CheckConsistency(); 00141 SgBlackWhite toPlay = m_bd.ToPlay(); 00142 size_t i = m_candidates.size(); 00143 while (true) 00144 { 00145 if (i == 0) 00146 break; 00147 --i; 00148 SgPoint p = m_candidates[i]; 00149 if (! m_bd.IsEmpty(p)) 00150 { 00151 m_candidates[i] = m_candidates[m_candidates.size() - 1]; 00152 m_candidates.pop_back(); 00153 continue; 00154 } 00155 if (GoUctUtil::GeneratePoint(m_bd, balancer, p, toPlay)) 00156 { 00157 CheckConsistency(); 00158 return p; 00159 } 00160 } 00161 CheckConsistency(); 00162 return SG_NULLMOVE; 00163 } 00164 00165 template<class BOARD> 00166 inline SgPoint GoUctPureRandomGenerator<BOARD>::GenerateFillboardMove( 00167 int numberTries) 00168 { 00169 float effectiveTries 00170 = float(numberTries) * m_nuEmptyFloat * m_invNuPoints; 00171 size_t i = m_candidates.size(); 00172 while (effectiveTries > 1.f) 00173 { 00174 if (i == 0) 00175 return SG_NULLMOVE; 00176 --i; 00177 SgPoint p = m_candidates[i]; 00178 if (! m_bd.IsEmpty(p)) 00179 { 00180 m_candidates[i] = m_candidates[m_candidates.size() - 1]; 00181 m_candidates.pop_back(); 00182 continue; 00183 } 00184 if (Empty3x3(p)) 00185 return p; 00186 effectiveTries -= 1.f; 00187 } 00188 // Remaning fractional number of tries 00189 if (m_random.SmallInt(100) > 100 * effectiveTries) 00190 return SG_NULLMOVE; 00191 while (true) 00192 { 00193 if (i == 0) 00194 break; 00195 --i; 00196 SgPoint p = m_candidates[i]; 00197 if (! m_bd.IsEmpty(p)) 00198 { 00199 m_candidates[i] = m_candidates[m_candidates.size() - 1]; 00200 m_candidates.pop_back(); 00201 continue; 00202 } 00203 if (Empty3x3(p)) 00204 return p; 00205 break; 00206 } 00207 return SG_NULLMOVE; 00208 } 00209 00210 template<class BOARD> 00211 inline void GoUctPureRandomGenerator<BOARD>::OnPlay() 00212 { 00213 SgPoint lastMove = m_bd.GetLastMove(); 00214 if (lastMove != SG_NULLMOVE && lastMove != SG_PASS 00215 && ! m_bd.IsEmpty(lastMove)) 00216 m_nuEmptyFloat -= 1.f; 00217 const GoPointList& capturedStones = m_bd.CapturedStones(); 00218 if (! capturedStones.IsEmpty()) 00219 { 00220 // Don't remove stone played, too expensive, check later in Generate() 00221 // that generated point is still empty 00222 for (GoPointList::Iterator it(capturedStones); it; ++it) 00223 Insert(*it); 00224 m_nuEmptyFloat += float(capturedStones.Length()); 00225 } 00226 CheckConsistency(); 00227 } 00228 00229 /** Insert new candidate at random place. */ 00230 template<class BOARD> 00231 inline void GoUctPureRandomGenerator<BOARD>::Insert(SgPoint p) 00232 { 00233 size_t size = m_candidates.size(); 00234 if (size == 0) 00235 m_candidates.push_back(p); 00236 else 00237 { 00238 SgPoint& swapPoint = m_candidates[m_random.SmallInt(size)]; 00239 m_candidates.push_back(swapPoint); 00240 swapPoint = p; 00241 } 00242 } 00243 00244 template<class BOARD> 00245 inline void GoUctPureRandomGenerator<BOARD>::Start() 00246 { 00247 m_nuEmptyFloat = 0; 00248 m_candidates.clear(); 00249 for (typename BOARD::Iterator it(m_bd); it; ++it) 00250 if (m_bd.IsEmpty(*it)) 00251 { 00252 Insert(*it); 00253 m_nuEmptyFloat += 1.f; 00254 } 00255 m_invNuPoints = 1.f / float(m_bd.Size() * m_bd.Size()); 00256 CheckConsistency(); 00257 } 00258 00259 //---------------------------------------------------------------------------- 00260 00261 #endif // GOUCT_PURERANDOMGENERATOR_H