
// The VIPER algorithm!
// Created by Pradeep Shenoy (purdy@cse.iitb.ernet.in)

#pragma implementation "global.h"

#include <iostream.h>
#include <fstream.h>
#include <stdio.h>
#include <assert.h>
#include <stdlib.h>
#include <fcntl.h>

#include "include/snakes.h"
#include "include/cand.h"
#include "include/tidheap.h"
#include "include/global.h"
#include "include/largeset.h"

#define USER_MAXCANDS 10000

// #include "apriori.h"

// The backbone of our new algorithm: the dag-merge technique.
// The actual DAG structure: implemented as array
// More details: There's an extra level added so that "level" snakes
// are also maintaned: this is stored in the array[0] array.
// This is so that the treatment is uniform: I can write a

int Global::MAXSN;		// Defined in driver.h
float Global::SUPP;		// Ditto.
int  Global::MAX_TLEN; 		// Max length of a tuple. Ditto.
int Global::MAXTID;	// Also in the main file. Level1 initialises it.
long Global::diskUsage;

int DEBUG = 1;

time_t         Global::lasttime;
time_t         Global::start_time;
unsigned long  Global::startmem;
int            Global::MinTransCount;
int 	       Global::snake_bufsize;
int 	       Global::snake_indxbufsize;
long           Global::maxdu;

char *STORDATA = "file1.dat";
char *READDATA = "file2.dat";
char buf[256];
char *DATAFILE = buf;
LargeSet_t *Large = new LargeSet_t;

// Pruning parameters:
    int CASE, ALLRULE;
    float Positive, Negative, MINCONF;
    int dbsize;

extern Cand_t *preprocess(int *);
extern void PruneRules(void);

class DAG_t {

     // The DAG is from baseLevel to stopAtLevel = baseLevel +nlevels
     // The baseLevel-length itemsets are generated dynamically from
     // itemset-snakes of length baseLevel/2

    int baseLevel, stopAtLevel, nlevels;

    int isWritePrune;  // On by default

    int rdfd, wrfd; // File descriptors
    int bufcount;   // Buffer counter used by the writesnakes.

    Cand_t **arrayDAG; 
    int *arraySZ;   // keeps the size of each candidate list

    RSN *readlist;	// Input snakes
    WSN *writelist;	// Output snakes

    // Information about the write list: Filled up in FindWriteList.
    int *writearray;  	// The ids used for the writesnakes
    int writecount;	// number of wsns.
    int *GenFreqArray;  // Generator usage frequency.. used for pruning.

    // Information about the readlist: Filled up in FindReaList.
    int *ReadIDs;		// The snakes to read
    int numUniqReadIDs;		// How many
    Cand_t ***isCoverOf; 	// The generators covered by a snake
    int *ReadidCoverCount;	// how many generators


    // Private member functions.
    int preprocDAG(void){
	// First find the readlist;
	this -> findReadList();   // Also init the readsnakes.

	this -> findWriteList(); // Also init the writesnakes

	// Finally, invert the pointers in the DAG : currently they're
	// from higher to lower levels -- we need the reverse.
	this -> InvertPointers();

	return 0;
    }

    // The way this works: For each baseLevel itemset we have a pair of
    // snakes that are used to generate it. What we want is a list for each
    // input snake of the baseLevel itemsets it covers.  So, collect all the
    // pairs (generator1, baseItem), (generator2, baseItem), and do a
    // "grouppby" on the first column to to form the required list.

    int findReadList(void){

	int numpairs = arraySZ[0]*2;

	int **genPairs = new int *[numpairs+1];

	// Using one loooong array in coiled-up fashion
	int *tmp = new int [numpairs*2 +2];
        for (int j = 0; j <= numpairs; j++) 
	    genPairs[j] = &tmp[j*2];

        Cand_t *candlist = arrayDAG[0];
        for (int j = 0, k = 0; j < arraySZ[0]; j++){
	    genPairs[k][0]   = candlist[j].genID1;
	    genPairs[k++][1] = j; //  == candlist[j].localID;
    
	    genPairs[k][0]   = candlist[j].genID2;
	    genPairs[k++][1] = j; //  == candlist[j].localID;
        }

	genPairs[numpairs][0] = 1000000;  // Endmarker for the sort
    
        this -> sortOnFirst(genPairs[0], 0, 2*numpairs);
    
        // Now count NumUniq, and ReadidCoverCount (== NumRepeats) foreach
	// Allocate an array of size NumUniq, each element containing
	// the readsnakeID, and an array of size NumRepeats containing
	// the generator ids. 
	int prevID = genPairs[0][0];
	this -> numUniqReadIDs = 1; 		// Global var inside DAG_t
	for (int i = 1; i < numpairs; i++){
	    if (genPairs[i][0] != prevID){
		numUniqReadIDs++; prevID = genPairs[i][0];

	    }
	}

	// These are Dag_t private members that maintain readsnakes info.

	this -> ReadIDs = new int[numUniqReadIDs];
	this -> ReadidCoverCount = new int[numUniqReadIDs];
	this -> isCoverOf = new Cand_t** [numUniqReadIDs];


	for (int i = 0; i < numUniqReadIDs;i++) {
	    ReadidCoverCount[i] = ReadIDs[i] = 0;
	}

        prevID = ReadIDs[0] = genPairs[0][0];
	ReadidCoverCount[0]++;

	// One long array coiledup -- saves malloc()s.
	Cand_t **tmparray = new Cand_t*[numpairs];
	tmparray[0] = &candlist[genPairs[0][1]];
	for (int i = 1, k = 0; i < numpairs; i++){

            if (genPairs[i][0] != prevID)
               prevID = ReadIDs[++k] = genPairs[i][0];

	    ReadidCoverCount[k]++;
	    tmparray[i] = &candlist[genPairs[i][1]];
        }

	for (int i = 0, prevTotal = 0; i < numUniqReadIDs; i++){
	   isCoverOf[i] = &tmparray[prevTotal];
	   prevTotal += ReadidCoverCount[i];
	}

	// Finished making the readlist and assoc info.  
        delete [] genPairs; delete [] tmp;	 // First cleanup

	// Now initialise the readsnakes.
	readlist = new RSN[numUniqReadIDs];
	rdfd = open(READDATA, O_RDONLY);

	for (int i = 0; i < numUniqReadIDs; i++)
	    readlist[i].init_read(rdfd, ReadIDs[i]);

  	// cout << "FindReadList: NumReadSnakes = " << numUniqReadIDs << endl;
    
	return 0;
    }
    // The dag is represented by each level being stored as an array of
    // candidates. Each candidate has a list of its immediate subsets--
    // hence downward pointers. For updates to propagate upwards, we need
    // upward pointers, and only a pair of them. Here we invert the pointers
    // using a technique similar to that of the findReadList: collect
    // (super,sub) pairs and groupby-forming-set on second col.

    int InvertPointers(void){

	for (int i = 0; i < nlevels; i++){

	    assert(arraySZ[i+1] != 0);

	    int numpairs = arraySZ[i+1]*2;

	    int **genPairs = new int *[numpairs +1];
	    int *tmp = new int [numpairs*2 +2];

	    for (int j = 0; j <= numpairs; j++) 
		genPairs[j] = tmp+j*2;

	    Cand_t *parentlist = arrayDAG[i+1];
	    int *children = arrayDAG[i+1][0].DAGchildren;
	    
	    // coiled-arrays. Being used directly for efficiency.
	    for (int j = 0, k = 0; j < arraySZ[i+1]; j++){

		//genPairs[k][0]   = parentlist[j].DAGchildren[0];
		//genPairs[k++][1] = j; //  == parentlist[j].localID;

		//genPairs[k][0]   = parentlist[j].DAGchildren[1];
		//genPairs[k++][1] = j; //  == parentlist[j].localID;

		// Equivalent way of stating it is..
		// int *a = parentlist[j].DAGchildren;
		tmp[k++] = children[0];
		tmp[k++] = j; //  == parentlist[j].localID;

		tmp[k++] = children[1];
		tmp[k++] = j; //  == parentlist[j].localID;

		children += baseLevel+i+1;
	    }

	    if (isWritePrune)  // Else we require it later.
	        delete [] parentlist[0].DAGchildren; // a "snaked" array

	    genPairs[numpairs][0] = 1000000; // End marker for sort.

	    this -> sortOnFirst(genPairs[0], 0, 2*numpairs);

	    // Now count the Nrepeats for each element, allocate array
	    // and store the list.

	    Cand_t *candlist = arrayDAG[i];
	    Cand_t **tmparray = new Cand_t *[numpairs];
	    for (int j = 0, k = 0; j < arraySZ[i];j++){

		candlist[j].numparents = 0;
		candlist[j].DAGparents = &tmparray[k];

		while(genPairs[k][0] == j && k < numpairs) {
		    candlist[j].numparents++;
		    tmparray[k] =  parentlist + genPairs[k][1];
		    k++;
		}
			
	    }

	    // PointerInvert done for level i. Now cleanup.
	    delete [] genPairs; delete [] tmp;

	}

	return 0;
    }

    int findWriteList(void){

	GenFreqArray = NULL; writelist = NULL;
	writearray = 0; writecount = 0;
	if (isWritePrune && nlevels < baseLevel){
	    // No writing to be done! halleluljah!
	    return 0;
	} 
	

	// First find out how many snakes to be written.
	if (!isWritePrune){
	    if (DEBUG) cout << "No Write Pruning" << endl;
	    writecount = arraySZ[0]; // Write _all_ generators
	} else {
	    this -> GenFreqArray = new int [arraySZ[0]];
	    for(int i = 0; i < arraySZ[0]; i++) GenFreqArray[i] = 0;

	    for (int i = 0; i < arraySZ[nlevels]; i++){ // Forall finalLarge
	        if (this -> CoverItemset(i) < 0){
		    printf("Bailing out\n"); exit(1);
	        }
	    }

	    // Now we have the nonzero-use generators: find writelist
	    writecount = 0;
	    for(int i = 0; i < arraySZ[0]; i++) // find how many
	        if (GenFreqArray[i] != 0) writecount++;
	}



	writearray = new int[writecount];
	for(int i = 0, k = 0; i < arraySZ[0]; i++)
	    if (isWritePrune == 0 || GenFreqArray[i] != 0){
	        writearray[k++] = i;
		arrayDAG[0][i].WriteID = k-1;
	    } else arrayDAG[0][i].WriteID = (ushort)-1;
        
	// Print out some stats if you want.
	if (DEBUG)
	cout << "#Write: " << writecount << endl;

	// Initialise the writeList.
	writelist = new WSN[writecount];
	wrfd = open(STORDATA, O_WRONLY|O_TRUNC|O_CREAT, 0600);
	bufcount = writecount; // The first writecount bufs already used.
	for(int i = 0; i < writecount; i++)
	   writelist[i].init_write(wrfd, i, &bufcount);

	return 0;
    }

    // Find cover for each top-leve itemset: The strat used is as follows:
    // Collect all the lowest-level leaves of this itemset, and convert 
    // their itemsets to a bitstring using 1st bit for top-itemset's first
    // element, and so on. Now unions is bit-string exclusive or. This makes 
    // unions extremely efficient.

    inline int CoverItemset(int indx){  // Cover the given toplevel itemset

        const int MaxLeaves = 10000; // Sort of magic number...

	int genList1[MaxLeaves], genList2[MaxLeaves];
	int gencount1 = 0, gencount2 = 0;

	Cand_t *toCover = &arrayDAG[nlevels][indx];
	int *tmp1 = genList1, *tmp2 = genList2;

	tmp1[gencount1++] = indx;

	for (int i = nlevels; i > 0; i--){ // BreadthFirst at each level

	    Cand_t *candlist = arrayDAG[i];
	    gencount2 = 0;
	    for (int j = 0; j < gencount1; j++){

		// Expand each element to its children
		Cand_t toExpand = candlist[tmp1[j]];
		for (int k = 0; k < baseLevel+i; k++)
		    tmp2[gencount2++] = toExpand.DAGchildren[k];
	    }

	    // swap the arrays tmp1-tmp2
	    int *ptrswap = tmp1; tmp1 = tmp2; tmp2 = ptrswap;
	    gencount1 = gencount2;

	    tmp1[gencount1] = 1000000;
	    gencount1 = this -> sortUniq(tmp1, gencount1);

	}
	
	// tmp1 has the final list of  all subset generators.
	// Sort array and remove duplicates

	tmp1[gencount1] = 1000000;
	gencount1 = this -> sortUniq(tmp1, gencount1);

	Cand_t *genArray = arrayDAG[0];
	// Convert each array's set to a bitmap
	for (int i = 0; i < gencount1; i++){
	    tmp2[i] = this -> toBitMap(toCover->itemset, 
		                 genArray[tmp1[i]].itemset);
	}

	// Do an allunion checking for union being == finalLarge
	int FULLSET = 0;
	for (int i = 0; i < stopAtLevel; i++) FULLSET |= (1 << i);

	for (int i = 0; i < gencount1; i++){
	    for (int j = i+1; j < gencount1; j++){
		if (((tmp2[i]|tmp2[j])^FULLSET) == 0){

		    toCover -> genID1 = tmp1[i];
		    toCover -> genID2 = tmp1[j];
		    if (isWritePrune){
		    GenFreqArray[tmp1[i]]++; GenFreqArray[tmp1[j]]++;
		    }

		    return 0;
		}
	    }
	}

	assert(printf("HELP! Itemset not covered!\n"));
	return -1;
    }


    // not used now.
    inline int updateTID( Cand_t *candidate, int TID){
	// Here we mark the given read snake with the relevant TID
	// Logically easiest as a recursive function, but
	// doing it using a sort of DFS counting.
	// Idea is to avoid allocating the (variable) bfs space...

	return 0; // Can do something like return  #updates.
    }

    // General utility functions:
    inline int toBitMap(int *master, int *subset){
	// Master is of length baseLevel+nlevels, its items renamed 0,1,2, 
	// Accordingly  create the bitmap for the subset's items 
	// (subset is of half the size).

	int subsetcount = 0;
	int mastercount = 0;

	int BITMAP = 0;
	// Should be foolproof if master, subset are both sorted, and 
	// subset really IS a subset.
	while (subsetcount < baseLevel){
	    if (master[mastercount] == subset[subsetcount]){
		BITMAP |= (1 << mastercount);
		mastercount++; subsetcount++;
	    }
	    else mastercount++;
	}
	
	return BITMAP;
    }

    int sortUniq(int *array, int size){

	qsort1(array, 0, size);

	// Now remove duplicates
	int uniqcount = 1; 
	int last = array[0];
	for (int i = 1; i < size; i++){
	    if (array[i] != last)
		array[uniqcount++] = last = array[i];
	}
    
	return uniqcount;
    }

    void qsort1(int *v, int left, int right) {

	if (left >= right) return;

	int i = left, j = right;
	int pivot = v[i];
	// Subtle bug here! the v[i] above was a v[i++]
	// As a result, for rt=lt+1, the prog wouldnt 
	// enter the below loop, and j wouldnt be updated.
	while (i < j){
	    while (v[i] <= pivot) i++;
	    while (v[j] > pivot) j--;

	    if (i < j) swap(v, i, j);
	}

	swap(v, left, j);
	qsort1(v, left, j-1);
	qsort1(v, j+1, right);
    }

    inline void swap(int *v, int i , int j) {
	int temp;
	temp = v[i];
	v[i] = v[j];
	v[j] = temp;
    }

    void sortOnFirst(int *v, int left, int right){

	if (left >= right) return;

	int i = left, j = right;
	int pivot = v[i];
	while (i < j){
	    while (v[i] <= pivot) i+=2;
	    while (v[j] > pivot) j-=2;

	    if (i < j) twoswap(v, i, j);
	}

	twoswap(v, left, j);
	sortOnFirst(v, left, j-2);
	sortOnFirst(v, j+2, right);
    }

    inline void twoswap(int *v, int i , int j) {
	int temp1, temp2;

	temp1 = v[i];
	temp2 = v[i+1];

	v[i] = v[j];
	v[i+1] = v[j+1];

	v[j] = temp1;
	v[j+1] = temp2;
    }

void printItemset(int *itemset, int len){

    for (int i = 0; i < len; i++)
    	cout << itemset[i] << " ";

    cout << endl;

}

public:

    DAG_t(int mylevel){
	baseLevel = mylevel;
	stopAtLevel = 2*mylevel;
	nlevels = 0;
	isWritePrune = 1;

	// Expect a max of mylevel levels;
	arrayDAG = new Cand_t *[mylevel+1];
	arraySZ = new int [mylevel+1];
	for (int i = 0; i <=mylevel; i++){
	    arrayDAG[i] = NULL;
	    arraySZ[i] = 0;
	}

	readlist = NULL; writelist = NULL;

	ReadIDs = NULL; ReadidCoverCount = NULL;
	isCoverOf = NULL; writearray = NULL;
	GenFreqArray = NULL;
	writecount = numUniqReadIDs = 0;

    }

    void StopAt(int level){
	  stopAtLevel = level;
	  isWritePrune = 0;

      }

      ~DAG_t(void){

	  // First flush the write snakes

	  //  The readlist and writelist subsidiary data, and the snaked 
	  //  arrays _they_ might contain.
	  if (readlist) delete [] readlist;
		  // And associated stuff...
	  if (ReadIDs) delete [] ReadIDs;
	  if(ReadidCoverCount) delete [] ReadidCoverCount;
	  if (isCoverOf){  // A snaked array.
	      delete [] isCoverOf[0]; 
	      delete [] isCoverOf;  
	  }


	  if (writelist) delete [] writelist;
		  // And associated stuff...
	  if (writearray) delete [] writearray;
	  if (GenFreqArray) delete [] GenFreqArray;

	  // Now delete all the candidate arrays..
	  for (int i = 0; i < nlevels; i++){

	      delete [] arrayDAG[i][0].DAGparents;
	      delete [] arrayDAG[i];

	      if (!isWritePrune)
		  delete [] arrayDAG[i+1][0].DAGchildren;

	  }

	  delete [] arrayDAG[nlevels];
	  delete [] arrayDAG; delete [] arraySZ;
	  if (writecount)
               close(wrfd); 

	  close(rdfd);
      }

      int insertCands(Cand_t *list, int length, int ncands){

	  assert(length-baseLevel == nlevels+1);

	  nlevels++;
	  arrayDAG[nlevels] = list;
	  arraySZ[nlevels] =  ncands;

	  return ncands; //  Not really required
      }

      int insertGenerators(Cand_t *genlist, int length, int ngens){

	  assert (length == baseLevel);
	  arrayDAG[0] = genlist;
	  arraySZ[0] = ngens;

	  return ngens;
      }

      Cand_t *DAGmerge(int *nlarge){ // The main procedure;

	this -> preprocDAG();
	TidHeap heap(numUniqReadIDs+1);
        for (int i =  0; i < numUniqReadIDs; i++)
	    heap.insert(i, readlist[i].getNextTid(), 
	                 isCoverOf[i], ReadidCoverCount[i]);
	
	// loop variables
	int TID, oldTID = 0;
	heapbuc_t heapbuc;

        // the depth-first search pointer, and stack
	int atlevel = 0;
	Cand_t *candAtLevel[nlevels+1];

	// The readsnake's update list
        Cand_t **updatelist; int updatecount;


	// START LOOP (FOREACH_TID_ofEachSnake)
	heapbuc = heap.deleteMIN();

// The below "if" is simply to "Unroll" the depthfirst search loop in case
// of the number of levels being only one. Works much better this way.

if (nlevels == 1) while(heapbuc.TID >= 0){

	    updatelist = heapbuc.updatelist;
	    updatecount = heapbuc.updatecount;
	    TID = heapbuc.TID;

	    if(TID != oldTID){
		// Do a bypass of the costly updates
		for(int i = 0; i < updatecount;i++)
		    updatelist[i]->lastTID = TID;

		oldTID = TID;
		goto ByPass1;
	    }

	    oldTID = TID;
	    for (int i = 0; i < updatecount; i++){

	        Cand_t& candidate = *(updatelist[i]);
		atlevel = 0;
		if (candidate.lastTID != TID) {
	    	    candidate.lastTID = TID;
	 	    continue;
		}
	
		// candAtLevel[atlevel++] = &candidate;
		candidate.frequency++;

		// Also, this is a generator: write away to the file
		if (writecount && candidate.WriteID != (ushort)-1) // May have been pruned
	    	    writelist[candidate.WriteID].addToSnake(TID);


		// Unrolling the loop for the special case -- level3only
		for (int j = 0; j < candidate.numparents; j++){
		    Cand_t &c = *candidate.DAGparents[j];
		    if(c.lastTID != TID){
			c.lastTID = TID; continue;
		    }
		    c.frequency++;
		}
	    }

ByPass1:
	    if ((TID=readlist[heapbuc.readid].getNextTid()) > 0)
		heap.insert(heapbuc.readid, TID, updatelist, updatecount);
	    heapbuc = heap.deleteMIN();
	
} else while (heapbuc.TID >= 0){ // The orignal loop -- a depthfirst.

	    updatelist = heapbuc.updatelist;
	    updatecount = heapbuc.updatecount;
	    TID = heapbuc.TID;

	    if(TID != oldTID){
		// Do a bypass of the costly updates
		for(int i = 0; i < updatecount;i++)
		    updatelist[i]->lastTID = TID;

		oldTID = TID;
		goto ByPass;
	    }


	    // Need to update, and check for upward 
	    // propagation as well.
	    oldTID = TID;
	    for (int i = 0; i < updatecount; i++){

		// updateTID(updateList[i], heapbuc -> TID);

	        Cand_t& candidate = *(updatelist[i]);
		atlevel = 0;
		if (candidate.lastTID != TID) {
	    	    candidate.lastTID = TID;
	 	    continue;	
		}
	
		candAtLevel[atlevel++] = &candidate;
		candidate.frequency++;
		candidate.updated = 0;

		// Also, this is a generator: write away to the file
		if (writecount && candidate.WriteID != (ushort)-1) // May have been pruned
	    	    writelist[candidate.WriteID].addToSnake(TID);


		// Do a depth-first search with a stack.
		while (atlevel){

	   	    Cand_t &c = *(candAtLevel[atlevel-1]);
	   	    if (c.updated < c.numparents){
			Cand_t& cprime = *(c.DAGparents[c.updated++]);
			if (cprime.lastTID != TID) 
		    	    cprime.lastTID = TID;
			else{ // Ascend one more level.
		    	    candAtLevel[atlevel++] = &cprime;
		    	    cprime.frequency++;
		            cprime.updated = 0;
			}
	   	    }
	   	    else atlevel--;
	    	}
	    }


ByPass:
	    if ((TID=readlist[heapbuc.readid].getNextTid()) > 0)
		heap.insert(heapbuc.readid, TID, updatelist, updatecount);
	    heapbuc = heap.deleteMIN();
	} // EndLoop_FOREACH_TID


	// Collect the information gathered
	int hasLargeAtLevel = 0, levelLarge = 0;
	for (int i = 1; i <= nlevels; i++){
	    Cand_t *candlist = arrayDAG[i];
	    int candsz = arraySZ[i];

	    levelLarge = 0;
	    for (int j = 0; j < candsz; j++){
		if (Global::hasMinSup(candlist[j].frequency)){
		    Large -> Insert(candlist[j].itemset, baseLevel+i,
		    			   candlist[j].frequency);
		    levelLarge++;	
		}
	    }

	    hasLargeAtLevel = baseLevel+i;
	    cout << "Large at level " << baseLevel+i << " : " <<
	    		levelLarge << endl;
	}

	if (hasLargeAtLevel < stopAtLevel){ // We're thru!
	    *nlarge = 0;
	    return NULL;
	}

	*nlarge = levelLarge;

	Cand_t *toCover = arrayDAG[nlevels];
	Cand_t *LargeList = new Cand_t[levelLarge];
	int *itemlist = new int[levelLarge*stopAtLevel];
	int itcnt = 0;
	Cand_t *genarray = arrayDAG[0];
	for (int i = 0, k = 0; i < arraySZ[nlevels]; i++){
		if (Global::hasMinSup(toCover[i].frequency)){

		    if (!isWritePrune) CoverItemset(i);	
		    LargeList[k] = toCover[i];
		    LargeList[k].itemset = &itemlist[itcnt];

		    // Might have been writepruned.. a bit messy!
		    LargeList[k].genID1 = genarray[toCover[i].genID1].WriteID;
		    LargeList[k].genID2 = genarray[toCover[i].genID2].WriteID;

		    for (int j = 0; j < stopAtLevel; j++, itcnt++)
		       itemlist[itcnt] = toCover[i].itemset[j];

		    LargeList[k].DAGchildren = NULL;
		    k++;
		}
	}

    	return LargeList;
    }

};


void parse_args(int argc, char **argv){

    if (argc < 3) {
	cout << "Usage: " << argv[0] << " filename supp" << endl;
	// More comprehensive usage stuff later.
	exit(1);
    }

    strcpy(DATAFILE, argv[1]);
    Global::SUPP = atof(argv[2]);

    return;
}

void RunViper(void){

   int nprevlarge = 0;
   
   Cand_t *PrevLarge = preprocess(&nprevlarge);
   cout << "Level" << 2 << ": " << nprevlarge << endl;
   if (DEBUG) Global::statusbar("Preproc:"); 

   int level = 2;
   while (nprevlarge){	// while there are large items.

        Global::flip_files();

	// cout << endl << "Starting level = " << level << endl;
	DAG_t *DAG = new DAG_t(level);
	CandPrune_t *CandPrune = new CandPrune_t(level, 1); //isPruning

	// Get the generators: Itemsets to be regenerated at this level
	// These are a subset of the previously found large itemsets.
	// Also generate and store away the candidates for level+1

	CandPrune -> insertLargeList(PrevLarge, nprevlarge);

	int ncands;
	Cand_t *Candidates = CandPrune -> getCands(&ncands); 

	if (ncands == 0) {   // No candidates to be covered
		delete CandPrune; delete DAG;
		break;
	}

	if(DEBUG)
	cout << "Candidates at level" << level+1 << ": " << ncands << endl;
  	DAG -> insertCands(Candidates, level+1, ncands);

	int ngens = 0;
        Cand_t *Generators = CandPrune -> getPrunedGens(&ngens);
        DAG -> insertGenerators(Generators, level, ngens);
	delete [] PrevLarge[0].itemset; delete [] PrevLarge; // Used by candgen
	delete CandPrune;

	// cout << "NumGenerators: " << ngens << endl;
	// Generate candidates for all subsequent levels upto 2*level
	// Or stop at some intermediate level in case there's a blowup

	int delta;
        for (delta = 1; delta < level && ncands < USER_MAXCANDS; delta++){

	    CandPrune_t* candtable = new CandPrune_t(level+delta, 0);
	    candtable -> insertLargeList(Candidates, ncands);
	    Candidates = candtable -> getCands(&ncands);

	    if (!ncands) break;
	    DAG -> insertCands(Candidates, level+delta+1, ncands);
	    if(DEBUG)
	    	cout << "Candidates at level" << level+delta+1 << ": "
		     << ncands << endl;

	    delete candtable;
	}

	if (ncands > USER_MAXCANDS && delta < level) {
		DAG -> StopAt(level+delta);
		level = level+delta;
	} else level = level*2;


	// The DAG now has sufficient information to do the merge/count
	// There is some preprocessing too -- it's hidden inside DAG_t
	PrevLarge = DAG -> DAGmerge(&nprevlarge);
	if(DEBUG)
        Global::statusbar("DAGmerge:"); 

	delete DAG;
   }

   return;
}


int main(int argc, char **argv){

   // Read in the args + config variables from file.
//   parse_args(argc, argv); //commented by vikram
   Global::init_program("/dsl/local/minto/viper/init.dat");

   //###### added by vikram
   cout << "\nEnter database name: ";
   char dbname[256]; cin >> dbname;

   cout << "\nEnter minimum support: ";
   cin >> Global::SUPP;

   cout << "\nEnter output filename: ";
   char outname[256]; cin >> outname;

   ifstream mf(dbname);
   if (! mf)
   {
       cout << "Couldn't open metafile: " << dbname << endl;
       exit(1);
   }

   mf >> DATAFILE;
   mf >> Global::MAXSN;
//   mf >> Global::N_TUPLES;
   //###### added by vikram

   // Run the viper algorithm to generate large itemsets.
   RunViper();

   // Generate rules and prune them -- a post pass operation.

   // Throw away the temp files.
   struct stat statbuf;
   if (stat(STORDATA, &statbuf) == 0) unlink(STORDATA); 
   if (stat(READDATA, &statbuf) == 0) unlink(READDATA);

   //###### added by vikram
   ofstream outfile(outname);
   if ( !outfile ) {
       cout << "\nArggh!! Can't open output file.\n";
       return 1;
   }
   Large -> PrintItemsets(outfile);
   //###### added by vikram

   Global::printResult();
   return 0;

}
