// Lemur OLAP library (c) 2003 National Research Council of Canada by Daniel Lemire, and Owen Kaser
 /**
 *  This program is free software; you can
 *  redistribute it and/or modify it under the terms of the GNU General Public
 *  License as published by the Free Software Foundation (version 2). This
 *  program is distributed in the hope that it will be useful, but WITHOUT ANY
 *  WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 *  FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
 *  details. You should have received a copy of the GNU General Public License
 *  along with this program; if not, write to the Free Software Foundation,
 *  Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
 */
#ifndef RANDOMCLUSTERS
#define RANDOMCLUSTERS

#include "normalizationscommon.h"

// some freestanding functions that generate some random clusters of data 
// into a datacube.  (Not put into datacube interface because not general-purpose)


// "density" is the probability that a given cell will be occupied, which would then
// be the _expected_ density of the cube.


// stores 1's only.  

template<class _DT, class _LDT>
int addUniformCluster( DataCube<_DT,_LDT> &DC, vector<int> beginCorner, 
      vector<int> endCorner, double density )
{

#ifdef BLAB
  cout << "filling from ";
  for (int i=0; i < beginCorner.size(); ++i)
    cout << beginCorner[i] << " ";
  cout << " to ";
  for (int i=0; i < beginCorner.size(); ++i)
    cout << endCorner[i] << " ";
#endif


  assert(beginCorner.size() == endCorner.size());  
  // all components of beginCorner must also be smaller than corresponding ones
  // of endCorner.
  vector<int> shape = DC.getShape();
  assert(shape.size() == beginCorner.size());
   
  vector<int>Indices(beginCorner);  vector<int>bounds(endCorner);
  
  for (uint i=0; i < bounds.size(); i++)  ++bounds[i];

  int threshold = int(RAND_MAX * density);  // Pr[rand() < threshold] == density

  int count = 0;

  do {
    if (rand() < threshold)  {
      if (! DC.get(Indices)) {
  DC.put(1,Indices);
  ++count;
      }
    }
  } while(MathUtil::increment( Indices, beginCorner, bounds)); 
  return count;
}





/* just ADD spurious tuples */
template<class _DT, class _LDT>
int addUniformNoise( DataCube<_DT,_LDT> &DC, double density )
{
  return addUniformCluster( DC, vector<int>(DC.getShape().size(),0) , DC.maxIndex(), density);
}



/* subtract a few tuples too, besides adding spurious ones */

template<class _DT, class _LDT>
int toggleNoise( DataCube<_DT,_LDT> &DC, double density )
{

  vector<int> shape = DC.getShape();

  vector<int> origin(shape.size(),0);
  vector<int> bounds(shape);
  vector<int> ix(origin);
   
  int threshold = int(RAND_MAX * density);  // Pr[rand() < threshold] == density

  int count = 0;

  do {
    if (rand() < threshold)  {
      if (! DC.get(ix)) {
  DC.put(1,ix);
  ++count;
      }
      else {
  DC.put(0,ix);
  --count;
      }
    }
  } while(MathUtil::increment( ix, origin, bounds)); 
  return count;
}




template<class _DT, class _LDT>
  int addUniformClusters( DataCube<_DT,_LDT> &DC, int nClusters, int maxClusterVol, double densityMin, 
         double densityMax){

  int numAlloc = 0;
  vector<int> shape = DC.getShape();
  int nDims = shape.size();

  for (int i=0; i < nClusters; i++) {
    // uniformly choose cluster dimensions (rounding problems ignored now)

    double thisClusterVol = double(rand())/RAND_MAX * maxClusterVol;
    double fracOfTotalVol = min(1.0,thisClusterVol/DC.getVolume());
    double fracOfEachDim = pow( fracOfTotalVol, 1.0/nDims);

#ifdef BLAB
    cout << "thisClusterVol is " << thisClusterVol << endl;
    cout << "fracOfTotalVol is " << fracOfTotalVol << endl;
    cout << "fracOfEachDim is " << fracOfEachDim << endl;
#endif
    vector<int>clusterSize(nDims);
#ifdef BLAB
    cout << "Cluster: ";
#endif
    for (int j=0; j < nDims; ++j) {
      clusterSize[j] = int( ceil( shape[j] * fracOfEachDim) );
#ifdef BLAB
      cout << clusterSize[j] << " ";
#endif
    }
#ifdef BLAB
     cout << endl;
#endif
    vector<int> startPos(shape);
    vector<int> endPos(shape);

    for (int j=0; j < nDims; ++j) {
      startPos[j] -= clusterSize[j];  // largest possible starting posn.
      startPos[j] = int(startPos[j] * (double(rand())/RAND_MAX));  // choose starting posn
      endPos[j] = startPos[j] + clusterSize[j] -1;
    }

    double dens = densityMin + (double(rand())/RAND_MAX) *  (densityMax - densityMin);

    numAlloc += addUniformCluster(DC, startPos, endPos, dens);
  }
  return numAlloc;
}


// for a given chunking scheme, completely fill some chunks. Cube will be in the kernel
// for the specified chunking size if fillPct=1.0


template<class _DT, class _LDT>
     int  fillChunks( DataCube<_DT,_LDT> &DC, int nChunks, vector<int> chunkShape, double fillPct = 1.0){
  vector<int> shape = DC.getShape();
  vector<int> chunkCoordsToFillIn(shape.size());
  vector<int> endCoordsToFillIn(shape.size());


  assert(chunkShape.size() == shape.size());

  int denseTuples = 0;


  for (int ch = 0; ch < nChunks; ++ch) {
    for (uint i=0; i < shape.size(); ++i) {
      const int shapeInChunks = int(ceil(double(shape[i]) / chunkShape[i]));
      chunkCoordsToFillIn[i] = int(shapeInChunks * double(rand())/ RAND_MAX) * chunkShape[i];
      endCoordsToFillIn[i] = min(chunkCoordsToFillIn[i] + chunkShape[i] -1, shape[i]-1);
    }
    denseTuples += addUniformCluster(DC,chunkCoordsToFillIn,endCoordsToFillIn,fillPct);
  }
  return denseTuples;
}





template<class _DT, class _LDT>
     int  fillChunks( DataCube<_DT,_LDT> &DC, double probFull, vector<int> chunkShape){
  vector<int> shape = DC.getShape();
  vector<int> chunkCoordsToFillIn(shape.size());
  vector<int> endCoordsToFillIn(shape.size());


  assert(chunkShape.size() == shape.size());

  int denseTuples = 0;

  vector<int> origin(shape.size(),0);  
  vector<int> ix(origin);
  vector<int>bounds(shape);
  
  for (uint i=0; i < bounds.size(); ++i) {
    assert( bounds[i] % chunkShape[i] == 0);
    bounds[i] = shape[i] / chunkShape[i];
  } 

  do {
    if (double(rand())/ RAND_MAX <= probFull) {
      vector<int>chunkStart(shape.size());
      vector<int>chunkFin(shape.size());

      for (uint i=0; i < shape.size(); ++i) {
  chunkStart[i] = ix[i]*chunkShape[i];
  chunkFin[i] = chunkStart[i] + chunkShape[i] - 1;
      }
      denseTuples += addUniformCluster(DC, chunkStart, chunkFin, 1.0);
    }
  } while(MathUtil::increment( ix, origin, bounds)); 

  return denseTuples;
}




#endif
