// File:  cand.C
// Created by: Pradeep Shenoy (purdy@cse.iitb.ernet.in)
// Last modified: 11 Aug 1999
//
// Description:
//	Contains the candidate generation code. Basic structure is a hash
// table. Refer cand.h for details.

#pragma implementation "cand.h"

#include "include/cand.h"
#include "include/global.h"

// Some local functions
void  toArray(Prefix_t *buc);
RestList *intersect(RestList *list, Extension_t *array, int count);
RestList *copy_list(Extension_t *array, int num, Extension_t *);


//-------------------------------------------------------------------
// Basic functions of hash tables:
// constructor, destructor, insert/lookup/delete etc.

CandPrune_t::CandPrune_t(int length_of_cand, int pruning_on){
    
    table = new Prefix_t*[MAXBUC]; 
    length = length_of_cand - 1 ;
    isPrunedGen = pruning_on;
    n_generators = 0;
    n_uncovered = 0;
    candlist = NULL;
    
    for(int i=0 ; i < MAXBUC ; i++)
	table[i] = NULL;
    
    ExtendPrefix = new int[length+2];
}

CandPrune_t::~CandPrune_t(void){

    // Clean up the candidate array first.
    // Whoever called for it must have used it by now
    Cand *c;
    while(candlist){
	c = candlist;
	candlist = candlist -> next;
	
      	delete [] c -> set; 
	delete [] c -> to_decrement;
	delete c;
    }
    
    // Cleanup the hash table.
    Prefix_t *p, *pbak;
    for (int i = 0; i < MAXBUC; i++){
	p = table[i];

	while (p){
	    pbak = p;
	    p = p -> next;
	    
	    delete [] pbak -> extlist;
	    delete pbak;
      	}
    }
    
    delete [] table;
    delete [] ExtendPrefix;
}

// Default value: allows us the use 'hashvalue(set)'.
// refer note for IsEqual()
int CandPrune_t::hashvalue(int *set, int special = -1){
    unsigned long sum = 0;
    
    if (special < 0) 
	for(int i=0 ; i < length; i++ ) // add up the prefix elements.
	    sum += (set[i]<<12)+(set[i]<<8)+(set[i]<< 4)+set[i];	
    else    			    // leave out one.
	for(int i=0 ; i < length+1; i++ ) 
	    if (i != special) 
		sum += (set[i]<<12)+(set[i]<<8)+(set[i]<<4)+set[i];
    
    return (sum % MAXBUC);
}

// Default value: allows us to use it normally as isEqual(set1, set2);
// Candidate generation is done in groups -- and the candidates are also
// grouped by prefix. This prefix is one item longer than that of the hash
// table, and we need to access the buckets corresponding to each subset of
// this prefix. For ease of use, give original (longer) prefix, and index of
// item to ignore.

// Note that the _second_ set is the larger one!

int CandPrune_t::isEqual(int *set1, int *set2, int special = -1){
    
    if (special < 0){	// The usual lookup
	
  	for(int i=0 ; i<length ; i++){
	    if(set1[i] != set2[i])
		return 0 ;
	}
	return 1;          
    }
    
    // We want to skip "special"
    for(int i=0,j=0 ; i<length ; i++,j++){
	
	if(j == special) j++;
	if (set1[i] != set2[j])
	    return 0 ;
    }
    
    return 1;
}

// Bulk insert. -- Can also take advantage of bulk-insert in the following
// way-- sort the itemsets by prefix, make buckets for each prefix and
// arrays for each extension list.  Currently not done.

int CandPrune_t::insertLargeList(Cand_t *array, int nelems){
    
    for (int i = 0; i < nelems; i++){
	
	if (isPrunedGen) 
	    // We need to keep the i/2 snakes for the generators
	    insert(array[i].itemset, array[i].genID1, array[i].genID2,
		    				    array[i].frequency);
	else 
	    // Multiplexing: keep the cand-id in place of genID1
	    insert(array[i].itemset, array[i].localID, 0);
    } 
    
    
    if(isPrunedGen)
    	generatorList = new Extension_t[nelems];
    
    return 0;
}

int CandPrune_t::insert(int *itemset, int i, int j, int freq=0){
    
    int hashed;
    Prefix_t *curr;
    
    hashed = hashvalue(itemset);
    curr = table[hashed];
    
    // First case if the 1st slot is not filled up.
    if( curr == NULL){
	table[hashed] = curr = new Prefix_t;

	curr->set = itemset;// Multiplex use
	
	curr->numExt = 1;
	curr->next = NULL;
	curr->conv_flag = 0;
	
	curr->extlist = new Extension_t;
	curr->extlist->prefix = itemset;
	curr->extlist->member = itemset[length];
	curr->extlist->frequency = freq;	
	curr->extlist->i =  i;
	curr->extlist->j = j;
	curr->extlist->genID = isPrunedGen? -1:i;
	curr->extlist->next = NULL;
	
	return n_generators++;
    }

    // The first slot contains the same prefix
    if (isEqual(curr -> set, itemset))
	return addSet(itemset, i, j, curr, freq);

    Prefix_t *bac = curr; curr = curr->next;

    while(curr && !isEqual(curr ->set, itemset)){
	bac = curr;
	curr = curr->next;
    }

    // Found the relevant prefix bucket.
    if(curr) return addSet(itemset,i,j,curr,freq); 
    


    // Reach here if the prefix is new.
    curr = bac-> next = new Prefix_t; 

    curr->set = itemset;  // Multiplex the use
    
    curr->numExt = 1;
    curr->next = NULL ;
    curr->conv_flag = 0;
    
    curr->extlist =  new Extension_t;
    curr->extlist->prefix = itemset;
    curr->extlist->member = itemset[length];
    curr->extlist->frequency=freq;
    curr->extlist->i = i;
    curr->extlist->j = j;
    
    curr->extlist->genID = isPrunedGen? -1 : i;
    curr->extlist->next = NULL;
    
    return n_generators++;
}

// Used for one special case of the insert routine: If the prefix exists, 
// and we wanna find out whether the extension exists either (if not, insert
// it in correct sorted position.

int CandPrune_t::addSet(
	int *itemset, int i, int j, Prefix_t *pref, int freq){
    
    Extension_t *curr, *prev;
    
    curr = prev = pref->extlist;
    if (curr -> member > itemset[length]){
	Extension_t *x = new Extension_t;
	
	x->prefix = itemset;
	x->member = itemset[length]; 
	x->frequency=freq;
	x->i = i; x->j = j;
	
	// if pruning on, will reassign id.
	x->genID = (isPrunedGen)? -1 : i;
	
	x->next = curr;
	pref -> extlist = x;
	pref-> numExt++;
	return n_generators++;
    }
    
    while( curr && curr->member < itemset[length]){
	prev = curr;
	curr = curr->next;
    }
    
    if( curr==NULL || curr->member != itemset[length] ){
	
	Extension_t *x = new Extension_t;
	
	x->prefix = itemset;
	x->member = itemset[length];
	x->frequency=freq;
	x->i = i; x->j = j;
	
	// if pruning, will reassign id.
	x->genID = (isPrunedGen)? -1: i;
	
	x->next = curr;
	prev->next = x;
	
	pref->numExt++;
	return n_generators++;
    }
    
    return 0;
}

Prefix_t *CandPrune_t::lookup(int *itemset, int special = -1){
    
    int hashed;
    Prefix_t *curr;
    
    hashed = hashvalue(itemset, special);
    curr = table[hashed];
    
    if(curr == NULL) return NULL; // Nope, not here.

    while(curr && !isEqual(curr->set, itemset, special))
  	curr = curr->next;

    return curr; // Either found it, or curr is NULL
}

//-----------------------------------------------------------------
// Now we have the actual CandidateGeneration as well as
// Generator Prune functions. 
//-----------------------------------------------------------------


// This function returns candidate list, and at the same time it does some
// preprocessing work that is used in generating the set of covering
// itemsets required for the candidates.
Cand_t *CandPrune_t::getCands(int *numcands){
    
    // What we do here: For each prefix  in the table
    //    process_prefix(prefix-bucket).
    
    Prefix_t *tmp;
    for (int i = 0; i < MAXBUC; i++){
	tmp = table[i];

	while (tmp){
	    process_prefix(tmp);
	    tmp = tmp -> next;
	}
	
    }

    // Finished candidate generation: the candidates are in the 
    // candlist variable. 
    
    if (!n_uncovered){	// No candiates
	*numcands = 0; return NULL;
    }
    *numcands = n_uncovered;
    
    // If pruning enabled, do pruning as well
    // Current status of pruning: Re-number the generators, and remove
    // those generators that are not used in any candidate.

    if (isPrunedGen)  
	this -> numPrunedGens = this -> PruneGenerators();
    
    // Copy the candidates into array, alongwith other info
    Cand_t *c = new Cand_t[*numcands];
    int *tmpint1 = new int [(*numcands) *(length+2)]; // for itemsets
    int *tmpint2 = new int [(*numcands) *(length+2)]; 
    int count1 = 0, count2 = 0;
    
    Cand *nextcand = candlist;
    for(int i = 0; i < *numcands; i++){
	// Copy some info into the candidates
	
      	c[i].itemset = tmpint1 + count1;
	for (int j = 0;  j < length + 2; j++) // itemsets
	    tmpint1[count1++] = nextcand -> set[j];
	
	
      	// Check out some interesting funda:
	// Make sure that the covering itemsets are the ones with
	// the lowest frequencies....should reduce #updates in the merge
	// function.

	Extension_t **arr = nextcand->to_decrement;
	for (int j = 0; j < length+2;j++)
	    for(int k = j; k < length+2;k++)
		if(arr[j]->frequency > arr[k] -> frequency){
 		    Extension_t *xx = arr[j];
 		    arr[j] = arr[k];
 		    arr[k] = xx;
		}
	
	
      	c[i].DAGchildren = tmpint2 + count2;
	for(int j = 0; j < length+2; j++) { // downpointers
	    tmpint2[count2++] = arr[j] ->genID;
	    assert(tmpint2[count2-1] != -1);
	}
	
	
      	c[i].genID1 = c[i].genID2 = 0;
	c[i].DAGparents = NULL;
	c[i].numparents = 0;
	c[i].frequency = 0;
	c[i].localID = i;
	
      	nextcand = nextcand -> next;
    }
    
    return c;	 
}

int CandPrune_t::process_prefix(Prefix_t *prefix){
    
    // process_prefix() : given a prefix + list:
    //          for each element in list, 
    //		ClusterGen(prefix, elem, rest-of-list)
    
    if (!prefix -> conv_flag) toArray(prefix);
    
    Extension_t *extlist = prefix -> extlist;
    int num = prefix -> numExt;
    
    for (int i = 0; i < num-1;i++){
	
	RestList *copy = copy_list(extlist[i].next, num-1-i, &extlist[i]);
	ClusterGen(prefix -> set, extlist[i].member,  copy);
    }
    
    return 0;
}

int CandPrune_t::ClusterGen(int *set, int elem, RestList *rest){
    
    // ClusterGen() 
    //	newp = prefix + elem. 	// len = length+1
    //      for i = 0 to length-1, 
    //          remove ith element to get a i-1 prefix
    //	    get the bucket for this prefix
    //          create a "new" list, that is the intersection of oldlist and 
    //	            bucket -> head.	// an array
    //          if list == null break.
    //      END: if (list not null) output newp + each list-elm as a candidate.
    

    RestList *tmp;
    for (int i = 0; i < length;i++)
	ExtendPrefix[i] = set[i];
    ExtendPrefix[length] = elem;
    
    for (int i = 0; i < length ; i++){
	
	Prefix_t *subPrefix = lookup(ExtendPrefix, i);
	if(!subPrefix) goto CLEANUP;
	
	if (!subPrefix -> conv_flag) toArray(subPrefix);
	rest = intersect(rest, subPrefix -> extlist, subPrefix ->numExt);
	if(!rest) break;
    }
    
    while(rest){
	
	ExtendPrefix[length+1] = rest -> member;
	addCandToList(ExtendPrefix,rest ->subsets);
	tmp = rest;
	rest = rest -> next;
	delete tmp;
    }
    
    return 0;

    // Clean up some stray pointers left (if any).
CLEANUP:
    
    while(rest){
	tmp = rest;
	rest = rest -> next;
	delete [] tmp -> subsets;
	delete tmp;
    }
    
    return 0;
}

RestList *intersect(RestList *list, Extension_t *array, int count){
    
    int i = 0;
    RestList *tmp = list, *bak = list;
    while(i < count && tmp){
	if (array[i].member > tmp -> member){
	    // Delete current node frm the list.
	    
	    // Node to be deleted is the head.
	    if(tmp == list){
		list = list -> next;
		delete [] tmp-> subsets;  delete tmp;
		bak = tmp = list;
	    }
	    else{ // Node is some intermediate node
		bak -> next = tmp -> next;
		delete [] tmp -> subsets; delete tmp;
		tmp = bak -> next;
	    }
	    
	}
	else if (array[i].member < tmp -> member){
      	    // Advance array pointer.
	    i++;
	}
	else {		// Both are equal
	    // First store this Extension_t in the tmp's extList.
	    tmp ->subsets[tmp->subsetcount++] = &array[i];
	    
	    // Advance both pointers.
	    bak = tmp;
	    tmp = tmp -> next;
	    i++;
       	}
	
    }
    
    if (tmp == list)
	list = NULL;
    else bak -> next  = NULL;
    
    while (tmp){
	bak = tmp;
	tmp = tmp -> next;

	delete bak;
    }
    
    return list;
}

int CandPrune_t::addCandToList(int *set, Extension_t **list){
    
    Cand *cand = new Cand;
    
    int *myset = new int [length+2];
    
    for (int i = 0; i < length+2; i++)
	myset[i] = set[i];
    
    cand -> to_decrement =  list;
    cand -> set = myset;
    cand -> next = candlist;
    candlist = cand;
    
    return ++n_uncovered;
    
}

// We assume that the preprocessing-candidate generation has taken place.
// (and not the other variety). This leaves us in a position to run the
// following algorithm and expect all the information to be in place when we
// access it.

Cand_t *CandPrune_t::getPrunedGens(int *numgens){
    
    *numgens = numPrunedGens;
    Cand_t *genarray = new Cand_t[numPrunedGens];
    
    int *tmpint = new int [numPrunedGens *(length+1)];
    
    // Reverse the order: that's the way generators have been numbers
    for (int i = 0, k = 0; i < numPrunedGens; i++){
	
     	Extension_t e = generatorList[i];
	
     	// Copy the itemset into the snaked array
	genarray[i].itemset = tmpint + k;
	for(int j = 0; j < length; j++)
	    tmpint[k++] =  e.prefix[j];
	tmpint[k++] = e.member;
	
     	genarray[i].genID1 = e.i;
	genarray[i].genID2 = e.j;
	
     	// Blank out the rest
	genarray[i].numparents = genarray[i].frequency = 0;
	genarray[i].DAGparents = NULL;
	genarray[i].DAGchildren = NULL;
	genarray[i].localID = i;
	
    }
    
    delete [] generatorList;
    return genarray;
}

int CandPrune_t::PruneGenerators(void){
    
    int numGens = 0;
    Cand *c = candlist;
    while(c){
	
	for (int i = 0; i < length+2; i++){
	    Extension_t *e = c->to_decrement[i];
	    if (e->genID == -1){
		e -> genID = numGens;
		generatorList[numGens++] = *e;
	    }
	}
	
	c = c -> next;
    }
    
    return numGens;
}

// Convert a prefbuc's Extension list to an array, to facilitate much easier
// traversal later on.

void  toArray(Prefix_t *buc){
    
    int num = buc -> numExt;
    
    Extension_t *tmp = new Extension_t[num];
    
    buc -> conv_flag = 1;
    Extension_t *head = buc -> extlist;
    Extension_t *bak;
    
    for (int i = 0; i < num-1; i++){
	tmp[i] = *head;
	tmp[i].next = (&tmp[i+1]);
	bak = head;
	head = head -> next;
	delete bak;
    }
    
    tmp[num-1] = *head;
    tmp[num-1].next = NULL;
    delete head;
    
    buc -> extlist = tmp;
}

RestList *CandPrune_t::copy_list(
  	Extension_t *array, int num, Extension_t *Exhead)
{
    
    RestList *head = NULL;
    
    // Reverse loop because wanna preserve sorted order.
    for (int i = num-1; i >=0; i--){
	
	RestList *newl = new RestList;
	
	newl -> subsets = new Extension_t*[length+2];
	newl ->subsetcount = 0;
	newl -> subsets[newl->subsetcount++] = &array[i];
	newl -> subsets[newl->subsetcount++] = Exhead;
	newl -> member = array[i].member;
	newl -> next = head;
	
	head = newl;
    }
    
    return head;
}

