// File:  largeset.C 
// Created by: Pradeep Shenoy(purdy@cse.iitb.ernet.in)
// Last modified: 10 Aug 1999
//
// Description: 
//     This file contains a data structure (hash table) that contains all
// large itemsets.
//

#include <iostream.h>
#include <stdio.h>
#include <assert.h>
#include "include/largeset.h"

// Randomly chosen hash function. Performance is not a consideration here:
// This is used post-counting pass for rule generation -- a very small 
// phase of the mining program.

int LargeSet_t::HashValue(ItemSet_t *itset){
    unsigned long hashval = 0;
    int upto = itset -> Length();

    for (int i = 0; i < upto; i++)
	hashval += (*itset)[i] << 8 + (*itset)[i] << 4 + (*itset)[i];

    return hashval%HTSIZE;
}

// Delete all stored data.
LargeSet_t::~LargeSet_t(void){
     for (int i = 0; i < HTSIZE; i++){
	LSBuc_t *buc = table[i];

	while (buc){
	   LSBuc_t *bac = buc;
	   buc = buc -> next;
	   delete bac -> itset;    // Delete itemset.
	   delete bac;
	}
     }

     return;
}

int LargeSet_t::GetCount(ItemSet_t *itset){
    int indx = HashValue (itset);

    if(table[indx] == NULL) return 0;
    LSBuc_t *buc = table[indx];

    while (buc && ! buc -> itset -> IsEqual(itset)) buc = buc -> next;

    if (!buc) return 0;

    return buc -> count;
}

void LargeSet_t::Insert(ItemSet_t *itset, int count){
    int indx = HashValue (itset);

    // The relevant hash chain is empty; this is first entry.
    if (table[indx] == NULL){
	table[indx] = new LSBuc_t ;
	table[indx] -> next = NULL;
	table[indx] -> itset = itset;
	table[indx] -> count =  count;

	return;
    }

    // The first entry is equal to given entry.
    if (table[indx] -> itset -> IsEqual(itset)) {
        assert (count == table[indx] -> count);
    	return;
    }

    LSBuc_t *bac = table[indx];
    LSBuc_t *buc = table[indx] -> next;

    // Traverse hash chain to see if it's already entered.
    while (buc && ! (buc -> itset -> IsEqual(itset))){
	bac = buc; buc = buc -> next;
    }

    if (buc) {
	assert(count == buc -> count);
	return;
    }

    // Ok, enter itemset here.
    bac -> next = new LSBuc_t;
    bac -> next -> next = NULL;
    bac -> next -> count = count;
    bac -> next -> itset = itset;

    return;
}

// Apply the given function $func$ on each itemset in the hash table.
// This is used for rule generation: Hence the function is not called on
// large single-items. The function it is called with is the rule generation
// routine, that takes a given large itemset, splits it into cause-effect,
// checks for confidence, and classifies the rule generated into +/-/general
// rules.

void LargeSet_t::ForAllItemsets(int(*func)(ItemSet_t*)){

   int total = 0;
   for (int i = 0; i < HTSIZE; i++){
	LSBuc_t *buc = table[i];

	while (buc){

	    if (buc -> itset -> Length() > 1)
		total += func(buc->itset);

	    buc = buc -> next;
	}
   }

   cout << "Total #rules generated: " << total << endl;

   return;
}

//###### added by vikram
// Output the large itemsets to a file.

void LargeSet_t::PrintItemsets(ostream& fp){

   for (int i = 0; i < HTSIZE; i++){
	LSBuc_t *buc = table[i];

	while (buc){

	    buc -> itset -> PrintItemset(fp);
	    fp << ": " << buc -> count << "\n";
	    buc = buc -> next;
	}
   }

   return;
}
//###### added by vikram
