2018年3月1日 星期四

人工智慧I -- genetic algorithm

身為一個專業的程式設計師,關心現在的流行趨勢也是理所當然。那麼,現在最熱門的東西是什麼呢? 是AlphaGo打遍人類無敵手,獨孤求敗?或是Google要從mobile first轉行AI first?又或是Apple也要開發自己的AI晶片呢?可以說大家都在瘋這個。身為一個興趣使然的程式設計師對於人工智慧當然也要研究研究。

有興趣的朋友不要浪費時間看這篇blog了,你現在須要做的事是,前往
http://cs231n.github.io/
開始學習。
另外在youtube上也有課程可以看
Stanford
台大
相信很快就能懂得比我多。(我真的不是專家XD)

No school is better than real school!

唔,你還在看我嗎?
好吧,連載再開第一篇,我們來學習基因演算法吧。

這跟人工智慧有什麼關係?
似乎在很久以前,除了目前主流的gradient descend, backpropagation的方法尋找一個神經網路的weights之外,人們還有一招基因演算法,只是現在只剩下少部分人在進行研究的樣子。
那麼我們幹麻還管這種冷門的東西?因為我只是興趣使然的研究,是不是熱門項目其實不重要。

因為這招從生物演化悟出來的方法在解某些問題上是真的有用的,其實令人嘖嘖稱奇!

OK, 基因演算法很簡單,就下面幾個步驟:

  1. 用亂數產生一群用某種方式表達的生命體,我們稱這個表達式叫基因組。(可用bit, byte, float, etc.)
  2. 把這些生命體放到環境中測試。回合結束時它們會得到一個分數。(比如說吃到多少糖,跑多遠,活了多久等)
  3. 以它們得到的分數為機率的大小進行繁殖下一代,分數越高的越容易被選中。每選兩個個體用亂數決定進行基因交換(crossover)或直接進入下一輪。複製基的過程中有機會發生突變, 如bit 0變1,或float值加減一數值等。
  4. 產生足夠的個體後回到步驟2進行下一輪,直到我們找到解答。


你應該有注意到上面有一大堆使用亂數的地方,說到底基因演算法就是有系統的嘗試各種解答組合,如果純用亂數找,就算找得到解答也可能會讓電腦算到天荒地老...


以下我提供一個範僅供參考,它可以找到能算XOR的網路。

不過在那之前我們還是說一下什麼叫神經網路好了。
神經網路就是有很多神經元組成的網路,每個神經元的輸入連接到數個神經元的輸出,計算時是從最外部的輸入開始,每個神經元把它的輸入加起來經過一個activate函數算出一個輸出,直到算完最後的輸出。這個activate函數可能是 sigmoid, tanh, relu等,看我們的網路的目的。

一個可以解XOR問題的網路如下面影片


網路的重點其實在bias, 沒bias的話,整個網路是線性的,有了bias才能處理非線性的問題。


/*
 * This sample use GA to find a solution for XOR problem
 */

#include <vector>
#include <deque>
#include <algorithm>
#include <iostream>
#include <cmath>
#include <cassert>
#include <ctime>
#include "MesenneTwister.h"

using namespace std;

#define MAX_INIT_WEIGHT   30.0
#define MAX_MUTATE_WEIGHT 15.0
#define MAX_FITNESS       1000000.0
const double crossover_rate = 0.8;
const double mutate_rate = 0.6;
const size_t numPopulation = 100;
const size_t max_generation = 100;
const double epsilon = 0.00001;    

MTRand g_rng(time(NULL));

double sigmoid(double x)
{
    return 1.0 / (1.0 + exp(-x));
}

class Node // well, 'node' is shorter than 'neuron'...
{
public:
    Node (int numInputs);
    double run(vector<double> const&inputs);
    void getWeights(vector<double> &weights);
    size_t setWeights(const double *weights);
    
private:
    vector<double> weights_;
};

Node::Node(int numInputs)
{
    weights_.resize(numInputs+1);
}

double Node::run(const vector< double >& inputs)
{
    assert (inputs.size()+1 == weights_.size() && "number of inputs wrong");
    double v=0;
    for (size_t i=0; i < inputs.size(); ++i) {
        v += inputs[i] * weights_[i];
    }
    v += weights_.back();
    return sigmoid(v);
}

void Node::getWeights(vector< double >& weights)
{
    weights.insert(weights.end(), weights_.begin(), weights_.end());
}

size_t Node::setWeights(const double* weights)
{
    weights_.assign(weights, weights + weights_.size());
    return weights_.size();
}



class Layer
{
public:
    Layer(int numNodes, int numInputs);
    ~Layer();

    void run(vector<double> const&inputs, vector<double> &outputs);
    void getWeights(vector<double> &weights);
    size_t setWeights(const double *weights);
    
private:
    vector<Node *> nodes_;
    Layer(Layer const&rhs) {};
    Layer &operator=(Layer const&rhs) { return *this;}
};

Layer::Layer(int numNodes, int numInputs)
{
    nodes_.resize(numNodes);
    for (size_t i=0; i < nodes_.size(); ++i) {
        nodes_[i] = new Node(numInputs);
    }
}

Layer::~Layer()
{
    for (size_t i=0; i < nodes_.size(); ++i) {
        delete nodes_[i];
    }
    nodes_.clear();
}

void Layer::run(const vector< double >& inputs, vector< double >& outputs)
{
    outputs.resize(nodes_.size());
    for (size_t i=0; i < nodes_.size(); ++i) {
        outputs[i] = nodes_[i]->run(inputs);
    }
}

void Layer::getWeights(vector< double >& weights)
{
    for (size_t i=0; i < nodes_.size(); ++i) {
        nodes_[i]->getWeights(weights);
    }

}

size_t Layer::setWeights(const double* weights)
{
    size_t pos = 0;
    for (size_t i=0; i < nodes_.size(); ++i) {
        pos += nodes_[i]->setWeights(weights + pos);
    }
    return pos;
}



class NeuralNetwork
{
public:
    NeuralNetwork(int numInputs, vector<int> const& hiddenLayerTopology, int numOutpus);
    ~NeuralNetwork();
    void run (vector<double> const &inputs, vector<double> &outputs);
    void getWeights(vector<double> &weights);
    void setWeights(vector<double> const &weights);
    
private:
    vector<Layer *> layers_;
    NeuralNetwork(const NeuralNetwork &rhs) {}
    NeuralNetwork &operator=(const NeuralNetwork &rhs) { return *this; }
};

NeuralNetwork::NeuralNetwork(int numInputs, const vector< int >& hiddenLayerTopology, int numOutputs)
{
    layers_.resize(hiddenLayerTopology.size() + 1);
    layers_[0] = new Layer(hiddenLayerTopology[0], numInputs);
    for (size_t i=1; i < layers_.size()-1; ++i) {
        layers_[i] = new Layer(hiddenLayerTopology[i], hiddenLayerTopology[i-1]);
    }
    layers_.back() = new Layer(numOutputs, hiddenLayerTopology.back());
}

NeuralNetwork::~NeuralNetwork()
{
    for (size_t i=0; i < layers_.size(); ++i) {
        delete layers_[i];
    }
    layers_.clear();
}

void NeuralNetwork::run(const vector< double >& inputs, vector< double >& outputs)
{
    vector< double > linputs, loutputs;
    linputs = inputs;
    for (size_t i=0; i < layers_.size(); ++i) {
        layers_[i]->run(linputs, loutputs);
        linputs = loutputs;
    }
    assert (outputs.size() == linputs.size());
    outputs.swap(linputs);
}

void NeuralNetwork::getWeights(vector< double >& weights)
{
    weights.clear();
    for (size_t i=0; i < layers_.size(); ++i) {
        layers_[i]->getWeights(weights);
    }

}

void NeuralNetwork::setWeights(const vector< double >& weights)
{
    int pos = 0;
    for (size_t i=0; i < layers_.size(); ++i) {
        pos += layers_[i]->setWeights(&weights[pos]);
    }
}

struct Genome
{
    double fitness_;
    vector<double> genes_;
    
    Genome(size_t numGenes=0);
    void init(size_t numGenes);
    void copy_genes_from (Genome const&source, size_t from_index, size_t to_index, size_t replace_start_index, double mutate_rate);
    void report () const;
    
    bool operator<(const Genome &rhs) const
    {
        return fitness_ > rhs.fitness_;
    }
};

Genome::Genome(size_t numGenes)
{
    if (numGenes) {
        genes_.resize(numGenes);
    }
}


void Genome::init(size_t numGenes)
{
    genes_.resize(numGenes);
    for (size_t i=0; i < numGenes; ++i) {
        genes_[i] = (g_rng.rand() - 0.5) * 2 * MAX_INIT_WEIGHT;
    }
}

void Genome::copy_genes_from(const Genome& source, size_t from_index, size_t to_index, size_t replace_start_index, double mutate_rate)
{
    for (size_t i=from_index,cgene=0; i < to_index; ++i,++cgene) {
        genes_[replace_start_index+cgene] = source.genes_[i];
        if (g_rng.rand() < mutate_rate) {
            genes_[replace_start_index+cgene] += (g_rng.rand() - 0.5) * 2 * MAX_MUTATE_WEIGHT;
        }
    }
}

void Genome::report() const
{
    cout << "weights ";
    for (size_t i=0; i < genes_.size(); ++i) {
        cout << genes_[i] << " ";
    }
}

void spawn_new_generation (vector<Genome> &population)
{
    assert (!population.empty());
    
    double total_fitness = 0;

    const size_t numPopulation = population.size();
    const size_t numGenes = population[0].genes_.size();
    
    for (size_t i=0; i < numPopulation; ++i) {
        total_fitness += population[i].fitness_;
    }
    
    // make fitness order list
    deque<double> fitness_list(numPopulation+1);
    // normalize fitness, put in a ordered list for search later
    for (int p=0; p < numPopulation; ++p) {
        population[p].fitness_ /= total_fitness;
        fitness_list[p+1] = population[p].fitness_ + fitness_list[p];
    }
    
    // new population by crossover, mutation
    vector<Genome> new_population;
    new_population.reserve(numPopulation);
    while (new_population.size() < numPopulation) {
        int lidx = upper_bound(fitness_list.begin(), fitness_list.end(), g_rng.rand()) - fitness_list.begin();
        --lidx;
        int ridx = upper_bound(fitness_list.begin(), fitness_list.end(), g_rng.rand()) - fitness_list.begin();
        --ridx;
        Genome a(numGenes);
        Genome b(numGenes);
        if (g_rng.rand() < crossover_rate) {
            size_t switch_point = g_rng.randInt() % (numGenes-4); // switch point at begin or end doesn't make sense.
            switch_point += 4;
            a.copy_genes_from (population[lidx], 0, switch_point, 0, mutate_rate);
            a.copy_genes_from (population[ridx], switch_point, numGenes, switch_point, mutate_rate);
            b.copy_genes_from (population[ridx], 0, switch_point, 0, mutate_rate);
            b.copy_genes_from (population[lidx], switch_point, numGenes, switch_point, mutate_rate);
        } else {
            a.copy_genes_from (population[lidx], 0, numGenes, 0, mutate_rate);
            b.copy_genes_from (population[ridx], 0, numGenes, 0, mutate_rate);
        }
        new_population.push_back(a);
        new_population.push_back(b);
    }
    
    new_population.swap(population);
}

double calculate_fitness(vector<double> const&results, vector<double> const&expects)
{
    assert (results.size() == expects.size());
    double delta = 0;
    for (size_t i=0; i < results.size(); ++i) {
        delta += fabs(results[i] - expects[i]);
    }
    
    double fitness;
    if (delta <= epsilon) {
        fitness =  MAX_FITNESS;
    } else {
        fitness = 1.0 / (delta*delta);
        fitness *= fitness;
    }
    
    cout << "results ";
    for (size_t i=0; i < results.size(); ++i) {
        cout << results[i] << ", ";
    }
    cout << " fitness " << fitness;
    cout << endl;
    
    return fitness;
}

int main()
{
    // XOR test, two inputs, one output
    vector<int> topo; // hidden layers
    topo.push_back(2);
    NeuralNetwork network(2, topo, 1);
    
    vector<double> weights;
    network.getWeights(weights);
    const size_t numGenes = weights.size();
    /*
    vector<double> sol;
    sol.push_back(20);
    sol.push_back(20);
    sol.push_back(-10);

    sol.push_back(-20);
    sol.push_back(-20);
    sol.push_back(30);

    sol.push_back(20);
    sol.push_back(20);
    sol.push_back(-30);
    //*/
    vector<Genome> population(numPopulation);
    for (size_t i=0; i < population.size(); ++i) {
        population[i].init(numGenes);
        //population[i].genes_ = sol;
    }
    
    
    bool found_solution = false;
    for (int generation = 0; !found_solution && generation < max_generation; ++generation) {
        cout << "generation " << generation << endl;
        
        vector<double> inputs(2);
        vector<double> outputs(1);
        
        vector<double> results(4);
        vector<double> expects(4);
        expects[0] = 0;
        expects[1] = 0;
        expects[2] = 1;
        expects[3] = 1;
        
        for (size_t i=0; i < population.size(); ++i) {
            network.setWeights(population[i].genes_);
            inputs[0] = 0;
            inputs[1] = 0;
            network.run(inputs, outputs);
            results[0] = outputs[0];
            
            inputs[0] = 1;
            inputs[1] = 1;
            network.run(inputs, outputs);
            results[1] = outputs[0];
            
            inputs[0] = 0;
            inputs[1] = 1;
            network.run(inputs, outputs);
            results[2] = outputs[0];
            
            inputs[0] = 1;
            inputs[1] = 0;
            network.run(inputs, outputs);
            results[3] = outputs[0];
            
            population[i].fitness_ = calculate_fitness(results, expects);
            
            if (population[i].fitness_ == MAX_FITNESS) {
                found_solution = true;
                cout << "*** found solution ";
                population[i].report();
                cout << " ***\n";
                cout << "at generation " << generation << endl;
                ::exit(0);
            }
        }
        
        spawn_new_generation (population);
    }
    
    
    return 0;
}


像我範例中一次使用100個個體做運算,通常3-5代就能找到答案。
剛做好時真的覺得蠻神奇的,竟然真的有用!

好了,這篇就到這邊,下回見!


沒有留言:

張貼留言

ftps.space released!

  I started doing web site development some time ago.   After fiddled with Flask, nginx, javascript, html, css, websocket, etc, etc... I h...