// Lemur OLAP library (c) 2003 National Research Council of Canada by Daniel Lemire, and Owen Kaser
 /**
 *  This program is free software; you can
 *  redistribute it and/or modify it under the terms of the GNU General Public
 *  License as published by the Free Software Foundation (version 2). This
 *  program is distributed in the hope that it will be useful, but WITHOUT ANY
 *  WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 *  FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
 *  details. You should have received a copy of the GNU General Public License
 *  along with this program; if not, write to the Free Software Foundation,
 *  Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
 */
#ifndef OWENPARSER_H
#define OWENPARSER_H

#include <vector>
#include <fstream>
#include "../lemurcore/cubicpolynomial.h"
/*
 * This is the header necessary to parse the binary files generated
 * by the python script texttobinary. Essentially, you feed the
 * name of the file to the constructor, retrieve the shape of the
 * data (dimension,...) and then contruct your own data cube,
 * when ready, call "fill" to fill your data cube with the data
 * using a group by.
 *
 * (c) NRC/CNRC, Daniel Lemire (2003)
 */
template <class DataType,class LongDataType>
class OwenParser {
public:
    OwenParser(char * BinaryFileName);
    virtual ~OwenParser();
    virtual vector<int> getShape() const;
    virtual void fill(DataCube<DataType,LongDataType>& DC, vector<int>& GroupBy, const bool paranoid = false);
    virtual char* getHeader();
private:
    vector<int> mShape;
    std::fstream * mFileStream;
    char mHeader[21];
};

template <class DataType, class LongDataType>
OwenParser<DataType,LongDataType>::OwenParser(char * BinaryFileName) : mShape() {

    mFileStream = new std::fstream();

    if (BinaryFileName == NULL) {
        cerr << "[Warning] no input file given (toy mode ? )" << endl;
        // some things like mHeader will be junk, and we may close a closed file :(.
    }
    else {
        mFileStream->open( BinaryFileName,std::ios::binary | std::ios::in  ) ;
        if (! *mFileStream) {
            cerr << "[Error] did not succesfully open input file \"" << BinaryFileName << "\"" << endl;
            cerr << "Goodbye" << endl;
            cerr.flush();
            exit(1);
        }
        mFileStream->read(mHeader,sizeof(mHeader)-1);
        mHeader[sizeof(mHeader)-1] = 0;
        int d = 0;
        mFileStream->read((char *)&d,sizeof(d));
        for (int k = 0; k < d ;++k) {
            int n = 0;
            mFileStream->read((char *)&n,sizeof(n));
            mShape.push_back(n);
        }
    }
}

template<class DataType, class LongDataType>
char * OwenParser<DataType,LongDataType>::getHeader() {
    return mHeader;
}

template <class DataType, class LongDataType>
OwenParser<DataType,LongDataType>::~OwenParser() {
    mFileStream->close();
    delete mFileStream;
}

template <class DataType, class LongDataType>
vector<int> OwenParser<DataType,LongDataType>::getShape() const {
    return mShape;
}

template <class DataType, class LongDataType>
void OwenParser<DataType,LongDataType>::fill(DataCube<DataType,LongDataType>& DC, vector<int>& GroupBy,
        const bool paranoid) {
    const uint d = mShape.size();
    assert( d >= GroupBy.size());
    assert(DC.getShape().size() == GroupBy.size());
    for(uint dim = 0; dim < GroupBy.size(); ++dim)
        assert(mShape[GroupBy[dim]] <= DC.getShape()[dim]);// it is ok to store data into a large cube
    mFileStream->seekg(20+(1+d)*sizeof(int));
    ushort * tuple = new ushort[d];
    vector<int> index(GroupBy.size(),0);
    int number = 0;
    mFileStream->read((char *) tuple, d * sizeof(ushort));
    while(!mFileStream->eof()) {
        ++number;
        for(uint k = 0; k < GroupBy.size() ; ++k) {
            index[k] = tuple[GroupBy[k]];
        }
        const DataType current = DC.get(index);
        DC.put(current + 1,index);
        if(paranoid) {
            if(current + 1 != DC.get(index)) {
                cerr <<" Not pretty! I had value " << current << " at ";
                for(uint dim = 0; dim < index.size(); ++dim) cerr << index[dim] << " ";
                cout << " I added 1 and got " << DC.get(index) << "." <<endl;
                cout << "Current, I parsed "<< number <<
                " tuples for a total of " << number*sizeof(ushort) << " bytes."<< endl;
            }
            assert(current + 1 == DC.get(index));// sanity check
        }
        mFileStream->read((char *) tuple, d * sizeof(ushort));
    }
    delete[] tuple;
    mFileStream->clear();
    // next we check the data cube
    if(paranoid) {
        vector<RangedCubicPolynomial> query;
        for(uint dim = 0; dim < DC.getShape().size(); ++dim) {
            RangedCubicPolynomial flat =  RangedCubicPolynomial::monome(0,0,DC.getShape()[dim]);
            query.push_back(flat);
        }
        float rq = DC.rangeQuery(query);
        if(abs(rq - number	) > 0.5)
            cout << "range query counted " << rq << " tuples but I have " << number << endl;
        assert(abs(rq - number) <= 0.5);
    }
    // ok, so all of this should catch most bugs
}

#endif

