// Lemur OLAP library (c) 2003 National Research Council of Canada by Daniel Lemire, and Owen Kaser
 /**
 *  This program is free software; you can
 *  redistribute it and/or modify it under the terms of the GNU General Public
 *  License as published by the Free Software Foundation (version 2). This
 *  program is distributed in the hope that it will be useful, but WITHOUT ANY
 *  WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 *  FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
 *  details. You should have received a copy of the GNU General Public License
 *  along with this program; if not, write to the Free Software Foundation,
 *  Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
 */
#ifndef GREEDYFREQUENCYSORT_H
#define GREEDYFREQUENCYSORT_H 

#include "normalizations.h"

// this first implementation is very inefficient, there are very 
// fast ways to implement this algorithm
template <class _DT, class _LDT, class _CT = LessComparator<pair<double,int> > >
class GreedyFrequencySort : public Normalization<_DT,_LDT> {
public:
    GreedyFrequencySort() : mComparator() {}
    virtual GreedyFrequencySort* clone() const { return new GreedyFrequencySort(*this); }
    virtual ~GreedyFrequencySort() {}
    virtual const string getTextName(void) const { return "GreedyFrequencySort ("+ _CT::getDescription() +")";}
    virtual norm_type computeNormalFrom(DataCube<_DT,_LDT>& DC, 
        const norm_type & initial);
    enum {verbose = false, MIN_GRANULARITY=2, MIN_SIZE=4, MAX_LEVELS =15 };

protected:
    virtual deque<pair<double,int> > sortedHistogram(DataCube<_DT,_LDT>& DC, 
        const norm_type & normalization, const uint dimension, 	const vector<int> & begin,
        const vector<int> & end  ); 
    
    virtual vector<double> histogram(DataCube<_DT,_LDT>& DC, 
      const norm_type & normalization, const uint dimension,	const vector<int> & begin,
      const vector<int> & end  );
    norm_type 	frequencysort(DataCube<_DT,_LDT>& DC,const norm_type & norm, 
        const vector<int> & begin, const vector<int> & end) ; 
    _CT mComparator; 
};

//
// Implementations follow...
///////////////////////////////

template <class _DT, class _LDT, class _CT>
deque<pair<double,int> > GreedyFrequencySort<_DT,_LDT,_CT>::sortedHistogram(DataCube<_DT,_LDT>& DC,
    const norm_type & normalization, const uint dimension, 	const vector<int> & begin
    , const vector<int> &end ) {
        vector<double> freq = histogram(DC, normalization, dimension, begin, end);
        // everything else should be cheap
        deque<pair<double,int> > freqindexpairs;
        for(uint index = 0; index < freq.size(); ++index) {
            freqindexpairs.push_back(pair<double,int>(freq[index], index + begin[dimension]));
        }
        sort(freqindexpairs.begin(), freqindexpairs.end(), mComparator);
        return freqindexpairs;
}


template <class _DT, class _LDT,  class _CT>
vector<double> GreedyFrequencySort<_DT,_LDT,_CT>::
histogram(DataCube<_DT,_LDT>& DC, const norm_type & normalization,  const uint dimension,
    const vector<int> & begin, const vector<int> &end) {
    // this can be relatively expensive
    vector<int> shape = DC.getShape();
    assert(end[dimension] == shape[dimension]); // assumes that end doesn't move
    vector<double> answer(end[dimension] - begin[dimension],0.0); 
    for(int value = begin[dimension]; value < end[dimension]; ++value) {
        vector<int> Bounds(end);
        vector<int> Start(begin);//Bounds.size(),0);
        Start[dimension] = value;
        Bounds[dimension] = value + 1;
        vector<int> indices(Start);
        do {
           if(DC.get(PermutationUtil::permute( indices , normalization) ) != 0 ) 
             ++answer[value-begin[dimension]];
        }	while(MathUtil::increment( indices, Start, Bounds));
    }
    return answer;
}


template <class _DT, class _LDT,  class _CT>
norm_type GreedyFrequencySort<_DT,_LDT,_CT>::
computeNormalFrom( DataCube<_DT,_LDT>& DC,const norm_type & initial) {  
    vector<int> shape = DC.getShape();
    // the first step is like a frequency sort
    vector<int> begin(shape.size(),0);
    vector<int> end(shape);
    norm_type norm = frequencysort(DC,initial,begin,end);	
    int granularity = MIN_GRANULARITY;
    const int min = MIN_SIZE;
    int levels = (*max_element(shape.begin(),shape.end()) - min ) / granularity;
    while(levels > MAX_LEVELS) {// just so it remains sane
      ++granularity;
      levels = (*max_element(shape.begin(),shape.end()) - min ) / granularity; 
    }
    //levels = levels ;
    //levels = 1;
    //cout << "levels = " << levels << endl;
    for(int times = 0; times < levels; ++times) {
      for(uint dim = 0; dim < shape.size(); ++ dim) {
        begin[dim] = begin[dim] + granularity;//min(min, end[dim] - granularity);
        if( end[dim] - begin[dim] < min) begin[dim] = max(0,end[dim] - min);
      }
      norm = frequencysort(DC,norm,begin,end);   
    }
    return norm;
}
    


template <class _DT, class _LDT,  class _CT> 		
norm_type 	GreedyFrequencySort<_DT,_LDT,_CT>::frequencysort(DataCube<_DT,_LDT>& DC,const norm_type & norm, const vector<int> & begin, const vector<int> & end) { 
    vector<int> shape = DC.getShape();
    norm_type newnorm; 
    for(uint dim = 0; dim < begin.size(); ++dim ) {
        if(verbose) cout << "dim = "<< dim << endl; 
        const deque<pair<double,int> > & freqindexpairs = sortedHistogram(DC,norm, dim, begin,end);
        vector<int> normalization(shape[dim],0);
        int index = begin[dim];
        for(typename deque<pair<double,int> >::const_iterator i = freqindexpairs.begin(); 
            i != freqindexpairs.end(); ++i, ++index) {
            if(verbose) cout << "pos["<< index<<"] = "<< i->second << endl;
            assert(i->second >= begin[dim] );
            normalization[index] = norm[dim][i->second];						
        }
        assert(index ==  end[dim]);
        for(index = 0 ; index < begin[dim]; ++index) 
          normalization[index] = norm[dim][index];
        assert(PermutationUtil::isPermutation(normalization));
        newnorm.push_back(normalization);
    }
    // ok, so we do only one step in the greedy thing, but it should be sufficient
    // to see if we see a small improvment.
    return newnorm;
}



#endif
