/*

  Code for implementing genetic algorithms.
  This is one ('A') of many possible implementations.

  Example GA Code only - necessary header files not included.
  (c) Copyright 2002: Douglas B. Cameron

  History:
    6/20/01 - Made into example code.
    9/21/01 - added age to individuals and age stats to population
    2/20/01 - Extracted from PatternScanner:Searching.h
*/
#ifndef WithStreamableMembers_h
  #include "WithStreamableMembers.h"
#endif
#ifndef Array_h
  #include "Array.h"
#endif
#ifndef Stats_h          // needed for PopFitnesses and generational ages stats
  #include "Stats.h"
#endif
#ifndef ExactStreamable_h
  #include "ExactStreamable.h"
#endif
#ifndef Possible_h
  #include "Possible.h"
#endif
#ifndef ReadBool_h
  #include "ReadBool.h"      
#endif
#ifndef RetainedRef_h
  #include "RetainedRef.h"
#endif

#define GeneticA_h


namespace GA {

  using namespace AlphaSquaredLib;

  typedef double Fitness;  
    // This definition seems pretty universal. Rarely would a smaller 'int' be justified.

  struct Individual
    : public Streamable2Eq< Individual, PossibleObj< ExactlyStreamed<Fitness> >, int > {

	  // Eventually implicitly hierarchial: Individual could be group of individuals, or group of group, etc.
    typedef Streamable2Eq< Individual, PossibleObj< ExactlyStreamed<Fitness> >, int > BaseT;

    Individual() 
      : BaseT( &Individual::possibleExactFitness, &Individual::generationalAge, 0xa00c ),
        possibleExactFitness(), generationalAge(0) { 
      }

	  Fitness GetFitness() const { 
            Assert(possibleExactFitness.IsValid(),"Individual:GetFitness: Not yet evaluated."); 
            return possibleExactFitness->value; 
            }
	  Fitness EvaluateFitness(); // returns cached value or calls _EvaluateFitness()

    virtual void InvalidateFitness() { possibleExactFitness.Invalidate(); }
    virtual void InvalidateFitnessAndResetAge() { possibleExactFitness.Invalidate(); generationalAge = 0; }

    int GenerationalAge() const { return generationalAge; }
    void IncrementAge() { ++generationalAge; }

	  protected:
      virtual Fitness _EvaluateFitness() = 0;   
        // Can cut short evaluation if too high, or have immediate rejection
		    // of unreasonable cases here or in Replicate.
      override void ReadOn( istream& is ); // for older versions
	  private:
      PossibleObj< ExactlyStreamed<Fitness> > possibleExactFitness;
      int     generationalAge;
  };

  struct GenerationInfo: public Streamable2Eq< GenerationInfo, int, int > {
    // Stats on individuals in last generation.
    
    typedef Streamable2Eq< GenerationInfo, int, int > BaseT;
    // In current version ageStats is neither streamed nor used in equality check.

    GenerationInfo();

    // Misleading: two funcs below must be called independently
    void NewIndividual( bool effectivelyIdenticalParents ) {  // call as new individuals created
      ++newIndividuals;
      if ( effectivelyIdenticalParents )
        ++inbredCount;
      }
    void EachIndividual( int age ) { // call for each indiv after full generation
      ageStats << age;
      }

    void Reset() { newIndividuals=0; inbredCount=0; ageStats.Reset(); }
    float InbredFraction() const { return inbredCount / float(newIndividuals); }
    override void ReadOn( istream& is );  // to catch old versions

    typedef Averager<int,float> StatsT;
    public:
      int     newIndividuals;
      int     inbredCount;    // incremented for each offspring from effectively identical parents
      StatsT  ageStats;
  };

  struct AbstractPopulation {
    // added 8/9/01 
    virtual int Size_Abstract() const = 0;
    virtual const Individual& GetIndividual_Abstract( int i ) const = 0;
    virtual Individual&       GetIndividual_Abstract( int i ) = 0;
    virtual int NumGenerations() const = 0;
    virtual void ReplaceIndividualUsingParents( int replaceI, int parent1, int parent2 ) = 0;
    virtual const GenerationInfo& GetGenerationInfo() const = 0;
  };


  template <class IType>
  struct Population
    : implements AbstractPopulation,
      public Streamable3Eq< Population<IType>, Array<IType>, int, GenerationInfo > {
    // IType must have a .CreateAsOffspring( const IType& pi1, const IType& pi2 );
    // and a .EffectivelyIdentical( const IType& );

    typedef Streamable3Eq< Population<IType>, Array<IType>, int, GenerationInfo > BaseT;

    Population() 
      : BaseT( &Population::set, &Population::generationNumber, &Population::generationInfo, 112260133 ),
        set(), generationNumber(0), generationInfo() {
      }
    explicit Population( const Population& p )  // caution: deep copy
      : BaseT( &Population::set, &Population::generationNumber, &Population::generationInfo, 112260133 ),
        set( MakeNewCopy, p.set ),
        generationNumber( p.generationNumber ),
        generationInfo( p.generationInfo ) {
      }
  
    void operator=( const Population& p ) {  // caution: deep copy
      set.Assign( MakeNewCopy, p.set );
      generationNumber = p.generationNumber;
      generationInfo = p.generationInfo;
      }

    virtual void InitializeSize( int size ) {
      set.SetSize(size);
      generationNumber = 0;
      generationInfo.Reset();
      }
	  virtual void DoGeneration() {
      // First generation is just an evaluation.
      generationInfo.Reset();
      if (generationNumber!=0) {
        EvaluateAll(); // in case some became invalidated - this assumes that _MakeStep() needs them all.
        _MakeStep();   // population --> population
      }
		  EvaluateAll();
      MFForEach( set, &Individual::IncrementAge );
      ComputeAgeStats();
      ++generationNumber;
		  }

    virtual void EvaluateAll() {
      MFForEach( set, &Individual::EvaluateFitness );
      }

    int Size() const { return set.Size(); }
    // Currently should call ComputeAgeStats() if any individuals are modified.
    const IType& El( int i ) const { return set(i); }
          IType& El( int i )       { return set(i); }
    const IType& operator() ( int i ) const { return set(i); }
          IType& operator() ( int i )       { return set(i); }
    const IType& operator[] ( int i ) const { return set[i]; } // no range checking
          IType& operator[] ( int i )       { return set[i]; } // no range checking
    required int NumGenerations() const { return generationNumber; }

    // unused: void SortDescendingFitness();

    virtual void Reset() { 
      generationNumber = 0; set.SetSize(0); generationInfo.Reset();
      }

    virtual void InvalidateFitnesses() {
      MFForEach( set, &Individual::InvalidateFitness );
      }

	  override void ReadOn(istream& is) {
      // catch old versions and ComputeAgeStats
      try {
        BaseT::ReadOn(is);
      } catch (Version::Exception& e ) {
        if (e.failedNumberRead==112260133 ) {  // currently same format
          is >> set >> generationNumber >> generationInfo;
        } else if (e.failedNumberRead==0x00aa) { // first version 0x00aa 2/15/01
          is >> set >> generationNumber;
          generationInfo.Reset();  // was not saved
        } else
          rethrow;
      }
      ComputeAgeStats(); // recompute rather than store
      }

    required const GenerationInfo& GetGenerationInfo() const { return generationInfo; }
    GenerationInfo::StatsT AgeStats() const {
      // Stats on generational age of population
      // Updated every generation. MTL1b safe - safe to call while other threads changing population,
      // but stats may be in inexact state (not exact for end of generation). Averager<> is MTL1b.
      return generationInfo.ageStats;
      }
    void ComputeAgeStats() {
      // should be called if any individuals are modified
      generationInfo.ageStats.Reset();
      for (int i=0; i<set.Size(); ++i)
        generationInfo.EachIndividual( set[i].GenerationalAge() );
      }

    // Required of AbstractPopulation
    override int Size_Abstract() const { return Size(); }
    override const Individual& GetIndividual_Abstract( int i ) const { return set(i); }
    override Individual&       GetIndividual_Abstract( int i )       { return set(i); }
    override void ReplaceIndividualUsingParents( int replaceI, int parent1, int parent2 ) {
      bool effIdenticalParents = set[parent1].EffectivelyIdentical( set[parent2] );
      generationInfo.NewIndividual( effIdenticalParents );
      set(replaceI).CreateAsOffspring( set[ parent1 ], set[ parent2 ] );
      }

    protected:
      virtual void _MakeStep() = 0;
  	    // make new generation based on current and fitness - crossover/mutate
		    // mutate: mutate probability (may be func of step. i.e. temperature)
		    // crossover: locations and rates, may be func of time too 
	  protected:
		  Array<IType>   set;
      int            generationNumber;
      GenerationInfo generationInfo;
  };


  struct PopFitnesses
    : public Streamable3<PopFitnesses,Array<Fitness>,StatsAccumulator,int> { 
    // Convenient holder of fitness values from a population.
    // 1/9/02 - moved generationAge here, often needed
    // Abstracted from PatternPopFitnesses 8/9/01

    typedef Streamable3<PopFitnesses,Array<Fitness>,StatsAccumulator,int> BaseT;

    PopFitnesses(); 
    PopFitnesses( const PopFitnesses& r );
    void operator= ( const PopFitnesses& r );

    virtual void SetFrom( const AbstractPopulation& pop );
    virtual void Reset();

    int Size() const { return fitnessArray.Size(); }
    const Array<Fitness>&    GetFitnesses() const { return fitnessArray; }
    const StatsAccumulator&  Stats() const { return statsAccumulator; }
    int GenerationAge() const { return generationAge; }

    override_ void ReadOn(istream& is); // catch old versions
    protected:
      void ForceGenerationAge(int g) { generationAge = g; } // necessary for old versions
    private: 
      Array<Fitness>    fitnessArray;   
      StatsAccumulator  statsAccumulator;
      int               generationAge;    // not purely logical to be here but often utilitarian
  };


  struct Tournament {
    // Modifies an AbstractPopulation using tournament selection. Individuals are randomly grouped
    // into tournaments. In each tournament approximately half of worst individuals
    // are replaced using parents from better half.

    struct Settings: public Streamable1Eq<Settings,int> {

      Settings()
        : Streamable1Eq<Settings,int>( &Settings::size, 201102209 ),
          size( 4 ) { // 4 is probably best - maximum 'softness'/maximum 'temperature'/minimum fitness pressure
        }
      virtual Fitness TournamentFitnessTransform( Fitness f ) const = 0;  
        // Optional transformation of Fitness for tournament.
        // { return f; }  could have this default but currently want client to be explicit

      public:
        int    size;               // min of 4
    };

    void Apply( AbstractPopulation& pop, const Settings& settings );

  };


  template <class PopulationT>
  struct PopulationSet
    : public Streamable1< PopulationSet<PopulationT>, DynArray<PopulationT> > {
	  // Set of partially or completely isolated populations (diffusion)
	  // Eventually could be on separate machines or threads

	  // The parameters used to generate GAPopulation's could themselves
	  // evolve - e.g. numIndividuals - with their fitness being avg individual
	  // fitness. So we can have evolution of GAPopulation's too.

	  // At a higher level than this, optimal diffusion rate could be evolved.
	  //	Since the optimal population size is unknown, it is probably best
	  //	to have a distribution of sizes and then optimize this size via evolution too.

    /* PopulationSet( int numPops, diffusion parameters, PopulationFactory pf? )			
	  GenerationStep() {
		  for each i  pops[i].GenerationStep()
		  }
	  GenerationSteps(int nSteps) { // more efficient?
		  for each i  pops[i].GenerationSteps(nSteps);
		  } */
    typedef Streamable1< PopulationSet<PopulationT>, DynArray<PopulationT> > BaseT;

    PopulationSet() : BaseT(&PopulationSet::pops,0x8888), pops(20) { 
      }
    PopulationSet( NewCopyToken, const PopulationSet<PopulationT>& p )
      : BaseT(&PopulationSet::pops,0x8888), pops( MakeNewCopy, p.pops ) {
      }
    void Assign( NewCopyToken, const PopulationSet<PopulationT>& p ) {
      pops.Assign( MakeNewCopy, p.pops );
      }

    virtual PopulationT& AddPopulation() { 
      pops.IncrementSize();
      return pops.Last();
      }

    virtual void Reset() { pops.SetSize(0); }

    int Size() const { return pops.Size(); }
    const PopulationT& Pop( int i ) const { return pops(i); }

    protected:
  	  DynArray<PopulationT> pops;		// the number of individuals/pop may vary
  };




  struct NumericRecombMutator
    : public Streamable2Eq< NumericRecombMutator, float, float > {
    /*   Mutational variance based on population variance

    The range of mutations is set using a standard deviation based on the
    distribution of the population. For any particular gene this is done using
    the difference between two parents. It is assumed that the parents
    were selected randomly.

    The scale of population diversity is usually a good indicator of the
    useful mutation scale. For example:
      x' = x + F * (x.a - x.b)
          ["Differential Evolution", see Price and Storn, DDJ, April 1997]
    Price and Storn use a fixed 'F' usually near 0.8. To get some additional
    randomness here, F is given a random Gaussian multiplier.

    This combines recombination and mutation based on parental variation 
    into continuous combination. When the populaiton is very disperse or
    the mutationalStdDevCoef is large this is effectively random generation.

    *****************************************
    Use of population variance in this way, places responsibility on the
    initial population distribution for setting the initial search scale.
    *****************************************
    This is much like a 'swarm' or purely statistical mechanical search.
    This does not control the temperature - which depends on the fitness
    based acceptance of the individual that this generates - only the
    search area.

    It also has many analogies to dynamic step size integration and simplex
    minimization. It is in many ways an generalization of those, although
    less efficient but able to handle multiple minima/noise.

    This is like a Monte Carlo integration over the region covered by
    the current population. It searches more in regions of dense
    population, presumably where fitness/survival is better.

    Considered having stdDevCoef evolve as well. That would be useful 
    where average local derivative is a good indicator of the average
    derivative over a much larger scale. If true then this evolution is
    inefficient (not good an expanding search region by orders of magnitude)
    and phenomena should be explicitly coded.

    Size of stdDevCoef:
      >1  --> expansion of search space
      ~1  --> "weighted" Monte Carlo over population area
      <1  --> emphasises recombination over mutation
              uncommon but possible
    */

    NumericRecombMutator() 
      : Streamable2Eq< NumericRecombMutator, float, float >(
          &NumericRecombMutator::prob, &NumericRecombMutator::stdDevCoef, 112200536 ),
        prob(0.08f),       // somewhat arbitrary default 
        stdDevCoef(0.8f) { // somewhat arbitrary default
      }
  
    template <typename T, class IntervalT>
    T Gaussian( T parent1Value, T parent2Value, const Bounds<T,IntervalT>& bounds ) const {
      // May not be strictly Gaussian - mutation is repeated until correctly
      // bounded value obtained. Mutation occurs about parent1Value. So it
      // is assumed that parent1Value and parent2Value are randomly exchanged
      // by caller. No mutation occurs if values are identical.
      // Throws if bounds has insufficient overlap with requested Gaussian
      // distribution.
      if ( !Randomly(prob) ) 
        return parent1Value;
      T range = parent2Value - parent1Value;
      if (range==0)            // probably common enough to test for efficiency
        return parent1Value;
      T v = BoundedGaussianRandom( parent1Value, range*stdDevCoef, bounds );
      return v;
      }

    public:
      float prob;
      float stdDevCoef;
  };

}

