/**********************************************************************
    Copyright (C) 2004 Database Systems Lab, Supercomputer Education and
    Research Centre, Indian Institute of Science, Bangalore, INDIA.
    http://dsl.serc.iisc.ernet.in

    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program; if not, write to the Free Software
    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
***********************************************************************/



/***********************************************************************
 Implementation of the Apriori Algorithm
 
 To understand the code, read the original paper that gave the
 algorithm:
 
    Fast Algorithms for Mining Association Rules,
    	By R. Agrawal and R. Srikant,
	In Proc. of 20th VLDB Conf., September 1994

 In a later paper, Agrawal mentions that the 2nd pass in the algorithm
 is to be accomplished using a 2d array rather than a hashtree data
 structure.  This implementation of the algorithm takes this into
 effect.  The later paper mentioned refers to --
 
    Parallel Mining of Association Rules:
    	Design, Implementation and Experience,
	By R. Agrawal and J. Shafer,
    	as Tech-report, No. RJ10004,
	IBM Almaden Research Center, San Jose, CA 95120,
	January 1996
***********************************************************************/

#include<stdio.h>
#include "apriori.h"
#include "database.h"
#include "triangle.h"
#include <new.h>
#include <math.h>
//#include <matrix.h>
#include <sys/times.h>


double metric = 0;
long Apriori_gen2::no_pruned = 0;
// int Leaf_node<Itemset,int>::num = 0;
// int Internal_node<Itemset,int>::num = 0;
//  WHAT'S THIS?????

double tlen=0;


void freeStoreException( void  )
{
    cerr << "Free store exhausted!" << endl;
    exit( 1 );
}

void apriori(Database& db,double minsupp,cItemsetBag& results,double probP,double probQ)
{
/* Assumption: Tuples contain items as integers which range from 0 to
		db.columns() */

	
//XXX for MASK	
cHashtree CMask(db.columns());

time_t recons_time=0;
time_t t1,t2;
double** CoeffMat;
double** distMat;
time_t start_func = time(NULL);


CoeffMat = UpdateMatrix(CoeffMat,distMat,1,probP,probQ);



    //############### special first pass
    vector<int> counts(db.columns());

    cout << "\nPass 1";
    cout << "\nNumber of candidates: " << counts.size() << endl;
    Itemset i;
    db.rewind();
    int tuple=0;

    
    while ( tuple < db.rows() && db >> i )
    {
	if ( ++tuple % 10000 == 0 ) 
	{
	    if ( tuple % 100000 == 0 )
		cout << "*" << flush;
	    else
		cout << "." << flush;
	}

	
	//counting 1-itemsets in counts[db.columns]
	for (Itemset::const_iterator j = i.begin(); j != i.end(); j++)
	{
	    counts[j->id]++;
	
	    //For performance metric purposes
	    tlen++;
	}



	
    }

    
    if ( tuple < db.rows() ) 
    {
	cout << "\nError: Database doesn't have as many tuples as"<<
		" specified in the metafile. There are only " <<
		tuple << " rows.\n";
	exit(1);
    }


    tlen=tlen/db.rows();	


    
    //minimum support
    int mincount = (int)ceil(minsupp * tuple);

    // 10 % less mincount
    //mincount-=mincount*(0.1);


    
    int noLarge = 0;
    vector<int>::iterator j;
    // XXX removed :  finding |F1| in noLarge
     /* for (j = counts.begin(); j != counts.end(); j++)
     * if (*j >= mincount)
     *     noLarge++;
     */

    //XXX reconstruct and find |F1| in noLarge
    int index;

    
    t1 = time(NULL);
    for (index = 0; index < (int)counts.size(); index++)
    {
	
	cItemset newEntry;
	newEntry.first.reserve(1);
	newEntry.first.insert(Item(index));
	newEntry.second = counts[index];

	reconstruct(newEntry,&CMask,1,db.rows(),CoeffMat,mincount);
	
	
	if (newEntry.second >= mincount)
	{
		noLarge++;
				
	}
	
	//update count to correct
	counts[index]=newEntry.second;

	////////////////////////////////////////////////////////////////////
	
    }	
    t2 = time(NULL);
    recons_time += (t2-t1);
   
   cout<<"Reconstruction time = "<<recons_time<<"\n";

	 //resizing results (type cItemsetBag) by noLarge(|F1|) to store F1
        int temp = results.size();
        results.resize(results.size()+noLarge);
        cItemsetBag::iterator k = results.begin() + temp;
    
    
        //storing F1 ( Item(index) from ) by checking 'count[index]>=mincount' in cItemsetBag 'results'
        for (index = 0; index < (int)counts.size(); index++)
            if (counts[index] >= mincount)
            {
                (k->first).reserve(1);
                (k->first).insert(Item(index));
                k->second = counts[index];
                k++;
            }
    
        cout << "\n" << noLarge << " large 1-itemsets (with count >= " <<
                mincount << ")" << endl;      

	cout<<"Pass1 time = "<<time(NULL)-start_func<<"\n"; 

    
    //Try printing CMask
    //for_each(CMask,Print());
    
    
    
    

    //############### special second pass
    if (noLarge < 2)
	return;

    //reuse counts vector to store indices to 2d count array in 2nd pass
    //replacing counts of items in 'counts' by 'index for large item' / '-1'
    vector<int> reverseIndex;
    reverseIndex.reserve(noLarge);

    index = 0;
    for (j = counts.begin(); j != counts.end(); j++)
	if (*j >= mincount)
        {
	    *j = index;
	    reverseIndex[index] = (j-counts.begin());
	    index++;
	}
        else
	    *j = -1;


    //forming C2 in Triangular_array 'counts2'
    Triangular_array counts2(noLarge);

    cout << "\nPass 2";
    cout << "\nNumber of candidates: " << counts2.size() << endl;
	
    /////////XXX////////////////////    
    //CoeffMat = UpdateMatrix(CoeffMat,2,prob);
    CoeffMat = UpdateMatrix(CoeffMat,distMat,2,probP,probQ);

    	/*for(int v=0;v<3;v++)
		printf("%f ",CoeffMat[0][v]); 
    	printf("\n");	*/
    
    ///////////////////////////////
    
    
    db.rewind();
    tuple=0;
    
    t1=time(NULL);
    while ( tuple < db.rows() && db >> i )
    {
	if ( ++tuple % 10000 == 0 ) 
	{
	    if ( tuple % 100000 == 0 )
		cout << "*" << flush;
	    else
		cout << "." << flush;
	}

	for (Itemset::const_iterator j = i.begin(); j != i.end(); j++)
	    if (counts[j->id] != -1) //*j is a large 1-itemset
		for (Itemset::const_iterator k = j+1; k != i.end(); k++)
		    if (counts[k->id] != -1) //*k is a large 1-itemset
		      counts2.increment(counts[j->id],counts[k->id]);
    }
    t2=time(NULL);
    cout<<" database counting time in pass 2 = "<<t2-t1<<"\n";


    //cHashtrees L and C for use in pass 3
    cHashtree L(db.columns()), C(db.columns());


    
    //traverse triangular array to find large 2-itemsets
    //form a candidate Itemset 'cItemset newEntry' for every large 2-Itemset and insert in cHashtree L
t1 = time(NULL);


// 8 % less mincount
  //  mincount = (int)ceil(minsupp * tuple);
   // mincount-=mincount*(0.1-0.02);	


for (int p = 0; p < noLarge-1; p++)
	for (int q = p+1; q < noLarge; q++)
	    //if (counts2.at(p,q) >= mincount)
            {
		cItemset newEntry;
		newEntry.first.reserve(2);
		newEntry.first.insert(Item(reverseIndex[p]));
		newEntry.first.insert(Item(reverseIndex[q]));
		newEntry.second = counts2.at(p,q);
		
		//XXX/////////////////////////////////////////////////////
		//Triangular_array counts2 has distorted counts ,Reconstruct it , calculate new counts
		
		
		reconstruct(newEntry,&CMask,2,db.rows(),CoeffMat,mincount);

		if(newEntry.second >= mincount)
		{
			L.insert(newEntry);
			//XXX Insert in CMask here with distorted counts //done inside reconstruct
			//newEntry.second=counts2.at(p,q);
			//CMask.move(newEntry);
		}
		///////////////////////////////////////////////////////////
		
			//L.move(newEntry);
	    }
t2 = time(NULL);
recons_time += (t2-t1);

   cout<<"Reconstruction time = "<<recons_time<<"\n"; 
    cout << "\n" << L.size() << " large 2-itemsets." << endl;
    
    

    //Genrate candidates by Apriorigen with pruning for pass 3 in C from large 2-itemsets in L
    Apriori_gen2::no_pruned = 0;
    AprioriGen(L,C);
    cout << Apriori_gen2::no_pruned << " itemsets pruned for" <<
	    " next pass." << endl;

    
    
    /*/////////////////////////////////////////////////
    	for_each(CMask,Print());
    */ //////////////////////////////////////////////
    
    //store F2 in cItemsetBag 'results'
    results.reserve(results.size()+L.size());
    for_each(L,Move(results));
	
    
    
    
    
    L.clear();
	
    cout<<"Pass1 + Pass2 time = "<<time(NULL)-start_func<<"\n"; 
 
    //############### other passes
    for ( int pass = 3; C.size() > 0; pass++ )
    {
	cout << "\nPass " << pass;
	cout << "\nNumber of candidates: " << C.size() << endl;
	db.rewind();
	tuple=0;

	//developing counts for candidates of current pass
	while ( tuple < db.rows() && db >> i )
	{
	    if ( ++tuple % 10000 == 0 ) 
	    {
		if ( tuple % 100000 == 0 )
		    cout << "*" << flush;
		else
		    cout << "." << flush;
	    }
	    //Increment counts of those candidate itemsets in hastree C which are present in tuple i
	    for_each_subset(C,IncrCount(),i);
	}
	
	
	
	metric += (tuple * sqrt(C.size()));
	

	/////////////////////////////////////////////////////////////	
	
	//__% less mincount
	//mincount = (int)ceil(minsupp * tuple);
    	//mincount-=mincount*(0.1-0.02*(pass-1));
	
	//XXX : C has distorted counts ,Reconstruct C , calculate new counts
	t1 = time(NULL);
	//CoeffMat = UpdateMatrix(CoeffMat,pass,prob);
	CoeffMat = UpdateMatrix(CoeffMat,distMat,pass,probP,probQ);
	
	
	for_each(C,Reconstruct(&CMask,&L,pass,db.rows(),CoeffMat,mincount));
	t2 = time(NULL);
	recons_time += (t2-t1);
   	cout<<"Reconstruction time = "<<recons_time<<"\n"; 
	////////////////////////////////////////////////////////////
	
	//XXX they have already been moved in Reconstruct 
	//Move large n-itemsets to hashtree L
	//for_each(C,MoveLarge(L,mincount));
	
	cout << "\n" << L.size() << " large " << pass << "-itemsets." << endl;
	

	
    	//Genrate candidates in C for next pass using L
	C.clear(); //make it empty
	Apriori_gen2::no_pruned = 0;
	AprioriGen(L,C);
	cout << Apriori_gen2::no_pruned << " itemsets pruned for" <<
		" next pass." << endl;

	
	
	/////////////////////////////////////////////////
	//FIXME U CANT DO THIS HERE store in CMask, the large-itemsets before deleting
	//for_each(L,Insert(CMask));
    	//Let me try printing the items in CMASK
	//for_each(CMask,Print());
	//cout<<CMask;
	//////////////////////////////////////////////
	
	//store frquent itemsets generated in this pass in results
	results.reserve(results.size()+L.size());
	for_each(L,Move(results));

	

	    
	L.clear();

	cout<<"Time till now = "<<time(NULL)-start_func;
	//At this point L is clear and C contains candidates, if any, for next pass
    }
    
}

/***************** Trial main() ********************/
int main(int argc,char** argv)
{
  if(argc!=6){
    printf("Usage %s <meta file> <min support> <Result File> <p> <q>\n",argv[0]);
    exit(1);
  }
  char* dbn = argv[1];
  double minsupp = atof(argv[2]);
  char* outn = argv[3];
  char dbname[256];
  char outname[256];
  int j1;
  double p = atof(argv[4]);
  double q = atof(argv[5]);
  
  time_t start=time(NULL);
  time_t end;  

  
  for(j1=0;j1<256;j1++)
    dbname[j1] = dbn[j1];
  
  for(j1=0;j1<256;j1++)
    outname[j1] = outn[j1];  
  //  delete [] dbn;
  //delete [] outn;

    /*
    set_new_handler( freeStoreException );
    cout << "\nEnter database name: ";
    cin >> dbname;
    */
    Database db(dbname);
    if ( !db ) {
	cout << "\nArgggh!! Something wrong with your database.\n";
	return 1;
    }
    /*
    cout << "\nEnter minimum support: ";
    double minsupp;
    cin >> minsupp;
    cout << "\nEnter output filename: ";
    char outname[256];
    cin >> outname;
    */



    cItemsetBag results;
    apriori(db,minsupp,results,p,q);
    ofstream outfile(outname);
    if ( !outfile ) {
	cout << "\nArggh!! Can't open output file.\n";
	return 1;
    }
    outfile << db.columns() << "\n";
    outfile << results.size() << "\n";
    // cItemsetBag::iterator????
    for (cItemsetBag::iterator i=results.begin(); i!=results.end(); i++)
    	{ outfile << (i->first) << " : " << (i->second) << "\n"; }

    cout << "\nMetric: " << metric << endl;
    end=time(NULL);
    cout<< "Time = "<<end-start<<"\n";	

struct tms *buf = new struct tms;

times(buf);

clock_t usrTick = buf->tms_utime;
fprintf(stderr,"%f %f %f %d %f ",minsupp,p,q,usrTick,tlen);	


return 0;
}


