/**********************************************************************
    Copyright (C) 2004 Database Systems Lab, Supercomputer Education and
    Research Centre, Indian Institute of Science, Bangalore, INDIA.
    http://dsl.serc.iisc.ernet.in

    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program; if not, write to the Free Software
    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
***********************************************************************/


/***********************************************************************
 AUTHOR: Vikram Pudi

 DESCRIPTION:

 Hashtree is a data-structure which contains itemsets.  An itemset is a
 set of items.  An item is an elementary entity that is not further
 defined, except that it comes from a predefined set of possible
 entities.

 A hashtree is a tree -- it has a root node which has children etc.
 until we reach leaf nodes.  Itemsets are stored only in leaf nodes. 
 The other (internal) nodes are merely used to search for a given
 itemset, or to find a location to insert one.

 It is assumed that the items in an itemset are in ascending order.  An
 itemset is implemented as a vector of items, as defined in the
 standard template library (stl) of C++.  An item could be of any type,
 as long as there is an ordering among items.  That is, the
 implementation of type should define the < operator.  Since, item
 could be of any type, the implementation of hashtree considers itemset
 to be a template parameter of type "Key".

 To search for a particular itemset S = {i1,i2,...,ik}, we do the
 following --

 At the root node, we apply a hash function h() with i1 as argument. 
 This returns an integer, which determines the child (of the root),
 where S will be stored.  If the child N, is an internal node, then we
 apply the hash function with i2 as argument.  We now obtain the child
 of N where S will be stored.  This is repeated until we reach a
 leaf-node, or we have applied the hash function k times.

 In the former case, N is a leaf node.  A leaf node can have at most n
 itemsets.  If there is sufficient space in N, then we insert S into
 it.  Otherwise, we replace N with an internal node, create children
 (leaf nodes) for this internal node, and put the contents of N in
 these leaf nodes.  Then, there will be space to insert S also, in one
 of these leaf nodes.  The required leaf node, where each itemset will
 be inserted is again obtained by applying the hash function.
 
 In our implementation, a leaf node is a vector itemsets.  Along with
 each itemset, we may want to store additional information about it
 (such as its count).  Therefore a leaf node is actually a vector of
 pair<Itemset,T>.  T could be of any type -- it is a template
 parameter.

 In the latter case, N is still an internal node.  Every internal node
 will have a special leaf node as a child, which will hold such
 itemsets.  In our implementation, an internal node is a vector of
 hashnodes (its children).  This special leaf node is present at the
 0th position of this vector.
 
 It can be shown that searching for an itemset will be most efficient,
 when the capacity of a leaf node (i.e. n) is equal to that of an
 internal node.  We use an ordinary mod hash function.  If there are p
 possible items, then it can be easily shown that the number of
 itemsets which will fall in one of the special nodes (of the previous
 para), can be at most p/n.  Thus the condition that the capacities of
 leaf nodes and internal nodes to be equal, will mean that p/n = n. 
 That is, n = sqrt(p).  This is the way we calculate the node size.

 Care has been taken to stick to stl norms.  Reading Bjarne
 Stroustrup's book -- "The C++ Programming Language -3rd ed" will help
 to understand the code.  The arcane looking syntax can be bewildering
 at first sight, but is characteristic of programs that use stl and
 templates extensively.
***********************************************************************/

#ifndef HASHTREE_H_
#define HASHTREE_H_

#include "pair.h"
#include "vector.h"
#include "genPrint.h"
#include <math.h> //for ceil(), sqrt()

/***********************************************************************
 Define a function object to compare a pair<Key,T> and a Key.  This is
 required in functions which need to do a binary search for a Key in a
 leaf node.  Remember that a leaf node is a vector of pair<Key,T>.
***********************************************************************/
template<class Key, class T>
struct less_pair : public binary_function<const pair<Key,T>&, const Key&, bool> {
    bool operator()(const pair<Key,T>& e, const Key& i)
    { return (lexicographical_compare(e.first.begin(),e.first.end(),
	    i.begin(),i.end())); }
    bool operator()(const Key& i, const pair<Key,T>& e)
    { return (lexicographical_compare(i.begin(),i.end(),e.first.begin(),
	    e.first.end())); }
};

/***************************** Hashnode *******************************/
struct Hashnode
{ //base for Leaf_node and Internal_node
    bool type; //true implies leaf node
    bool leaf() const { return type; }
};

/***************************** Leaf Node *******************************/
template<class Key, class T>
struct Leaf_node : public Hashnode, public vector< pair<Key,T> >
{
    typedef Key key_type;
    typedef T data_type;
    typedef pair<Key, T> Entry;
    typedef typename vector<Entry>::iterator iterator;

  //    static int num;
    Leaf_node() { type = true;
    // num++;
    }
    ~Leaf_node() {
      // num--; 
    }

    //--- Moves the contents of an Entry e into the leaf node.
    //Note that it doesn't do any check to see if there is sufficient
    //space.  The leaf node is maintained sorted lexicographically.

    bool move(Entry& e)
    {
	iterator pos;
	pos = lower_bound(begin(),end(),e.first,less_pair<Key,T>());
		//binary search for e
	if ( pos == end() || !( pos->first == e.first )) {
	    pos=vector<Entry>::insert(pos,Entry());
	    (e.first).swap(pos->first);
	    pos->second = e.second;
	    return true;
	} else {
	    pos->second = e.second;
	    return false;
	}
    }

    //--- Removes i from the leaf node, if it is present. Returns true
    //if it actually removes i.
    bool remove(const Key& i)
    {
	iterator pos;
	pos = lower_bound(begin(),end(),i,less_pair<Key,T>());
		//binary search for i
	if ( pos != end() && pos->first == i )
	{
	    vector<Entry>::erase(pos);
	    return true;
	} else
	    return false;
    }

    //--- Searches for i in the leaf node and returns an iterator to it.
    iterator find(const Key& i)
    {
	iterator retVal = 
	    lower_bound(begin(),end(),i,less_pair<Key,T>());
	if ( retVal == end() || retVal->first != i )
	    return (end());
	else
	    return retVal;
    }

    //--- Searches for i in the leaf node and returns true if present.
    bool contains(const Key& i) const
        { return (binary_search(begin(),end(),i,less_pair<Key,T>())); }
};

/*************************** Internal Node ****************************/
template<class Key, class T>
struct Internal_node : public Hashnode, public vector<Hashnode*>
{
    int no; //no of entries in all children recursively
  //    static int num;

    int hashFunc(typename Key::T i) const { return (i.id % (capacity()-1) + 1); }
    Internal_node(size_t s,Hashnode* n) : vector<Hashnode*>(s,n)
  { type = false;
  //num++;
  }
    ~Internal_node()
    {
	for ( iterator i = begin(); i != end(); i++ ) {
	    if ( *i != 0 ) {
		if ( (*i)->leaf() )
		    delete ((Leaf_node<Key,T>*)(*i));
		else
		    delete ((Internal_node<Key,T>*)(*i));
	    }
	}

	//    num--;
    }
    size_t noSets() { return no; }
};

/***************************** HashTree *******************************/

template<class Key, class T>
class HashTree
{
    Hashnode *root;
    size_t no_items;
    size_t nodeSize;

public:

    int hashFunc(typename Key::T i) const { return (i.id % (capacity()-1) + 1); }
    typedef Key key_type;
    typedef T data_type;
//    typedef pair<Key, T> Entry;
    typedef Leaf_node<Key,T> leaf_type;
    typedef typename leaf_type::iterator iterator;
    typedef typename leaf_type::const_iterator const_iterator;
    typedef typename leaf_type::Entry Entry;
//---------------------- construct/destroy ---------------------
    HashTree() : root(0), no_items(1000), nodeSize(33) { }
    HashTree(size_t s) : root(0), no_items(s),
	    nodeSize((size_t)(ceil(sqrt(s))+1)) { }
    ~HashTree() { clear(); }
    void clear()
    {
	if ( root != 0 ) {
	    if ( root->leaf() )
	      delete ( (Leaf_node<Key,T>*)root );
	    else
	      delete ( (Internal_node<Key,T>*)root );
	}
	root=0;
    }

//-------------------------- iterators -------------------------
    iterator end() { return 0; }
    const_iterator end() const { return 0; }
/* NOTE: iterating on a hashtree is not supported as yet. However these
   functions are provided to maintain consistency with STL. */

//----------------- misc. functions to access members ----------
    void setNoItems(size_t s) { no_items = s;
	    nodeSize=(int)(ceil(sqrt(s))+1);}
    size_t capacity() const { return nodeSize; }
    size_t noItems() const { return no_items; }
    size_t size() const
    {
	if ( root == 0 )
	    return 0;
	else if ( root->leaf() )
	    return (((Leaf_node<Key,T>*)root)->size());
	else
	    return (((Internal_node<Key,T>*)root)->noSets());
    }

    Hashnode *getRoot() { return root; }
    const Hashnode *getRoot() const { return root; }
    				//warning! Not for public use.
    				//required to implement traversals.

//------------------- move/remove itemsets ---------------------
    HashTree<Key,T>& move(Entry&);
    HashTree<Key,T>& remove(const Key&);
    HashTree<Key,T>& insert(const Entry& e)
    { //copy the entry to a new place and then move it in
	Entry temp = e;
	return (move(temp));
    }

    void swap(HashTree<Key,T>& h)
    { //swap contents of 2 hashtrees
	Hashnode *troot = root;
	size_t tnodeSize = nodeSize;
	size_t tno_items = no_items;
	root = h.root;
	nodeSize = h.nodeSize;
	no_items = h.no_items;
	h.root = troot;
	h.nodeSize = tnodeSize;
	h.no_items =  tno_items;
    }

//--------------------- find an itemset ------------------------
    iterator find(const Key&);
    const_iterator find(const Key&) const;
    bool contains(const Key&) const;
};

/********************* Helper functions ***********************
  The remaining part of this file need not be changed if the
  implementation of hashtree is changed, provided that the
  implementation provides the same functions as it does now.
**************************************************************/

#include "hashTraverse.h"
	//Functions for traversing the hashtree in various ways

/************************ operators **************************/

template<class Key, class T>
inline HashTree<Key,T>& operator+=(HashTree<Key,T>& h,
	const HashTree<Key,T>::Entry& e)
	{ return (h.insert(e)); }
template<class Key, class T>
inline HashTree<Key,T>& operator-=(HashTree<Key,T>& h, const Key& i)
	{ return (h.remove(i)); }

/************************** i/o operations ****************************/

template<class Key, class T>
inline ostream& operator<<(ostream& s, const pair<Key,T>& e)
{
    s << e.first << " : " << e.second << "\n";
    return s;
}

template<class Key, class T>
inline ostream& operator<<(ostream& s, const HashTree<Key,T>& h)
{
    s << h.noItems() << "\n";
    s << h.size() << "\n";
    for_each(h,print<HashTree<Key,T>::Entry>(s));
    return s;
}

template<class Key, class T>
istream& operator>>(istream& s, HashTree<Key,T>& h)
{
    int no_items;
    s >> no_items;

    if (! s)
	return s;

    int noItemsets;
    s >> noItemsets;

    if (! s)
	return s;

    h.setNoItems(no_items);
    HashTree<Key,T>::Entry e;
    for (int i = 0; i < noItemsets && s >> e.first; i++)
    {
	char skip;
	s >> skip; //skip :
	s >> e.second;
	h.move(e);
    }

    return s;
}

/************** Inserting an itemset into the hashtree *****************
 The following member function inserts an itemset into the hashtree. 
 It is called move() because it actually moves the contents of the
 itemset into the hashtree.  This means that the original contents of
 the itemset will be destroyed!  This is meant for efficiency reasons. 
 The alternative would be to copy the itemset, i.e. do a vector copy of
 all the items, etc.  This is done using a different function --
 insert().
***********************************************************************/
template<class Key, class T>
HashTree<Key,T>& HashTree<Key,T>::move(Entry& entry)
{
    typename Key::iterator ibegin = entry.first.begin();
    typename Key::iterator ipos = ibegin;
    size_t isize = entry.first.size();

    //------- hash until we reach a leaf node
    Internal_node<Key,T> *parent = 0;
    Hashnode *child = root;
    int level = 0; //level of child; root is at level 0
    size_t index = 0;
    while ( child !=0 && child->leaf() == false ) //internal node
    {
	Internal_node<Key,T> *node=(Internal_node<Key,T>*)(child);
	index = (level < (int)isize)?
		hashFunc(*ipos++) : 0;
	parent = node;
	child = (*node)[index];
	level++;
    }

    //------- if there is a leaf already there, then check for space
    Leaf_node<Key,T> *leaf;
    if ( child != 0 ) //child is an existing leaf
    {
	leaf=(Leaf_node<Key,T>*)(child);
	if ( leaf->size() >= leaf->capacity()
		&& level < (int)isize ) //leaf is full
	{ //convert to internal node
	    Internal_node<Key,T> *node=new Internal_node<Key,T>(nodeSize,0);
	    node->no = leaf->size();
	    leaf_type::iterator pos;

	    for (pos=leaf->begin(); pos!=leaf->end(); pos++)
	    { //reinsert all the contents of original leaf node
		size_t offset;
		offset = (level < (int)(pos->first).size())?
			hashFunc((pos->first)[level]) : 0;
		if ((*node)[offset] == 0) //uninitialized
		{
		    Leaf_node<Key,T> *temp = new Leaf_node<Key,T>;
		    temp->reserve(nodeSize);
		    (*node)[offset] = temp;
		}

		((Leaf_node<Key,T>*)((*node)[offset]))->move(*pos);
	    }

	    delete leaf;

	    //set child to point to node where entry is to be inserted
	    if ( parent != 0 )
		(*parent)[index] = node;
	    else
		root = node;

	    index = (level < (int)isize)?
		    hashFunc(*ipos) : 0;
	    parent = node;
	    child = (*node)[index];
	}
    }

    //------- if there was no leaf there, then create one. This could be
    // either because there was no leaf originally, or because the leaf
    // was full and got converted to an internal node.

    if ( child == 0 )
    { //attatch a new leaf
	leaf = new Leaf_node<Key,T>;
	leaf->reserve(nodeSize);
	if ( parent != 0 )
	    (*parent)[index]=leaf;
	else
	    root = leaf;
    } else
	leaf = (Leaf_node<Key,T>*)(child);

    //------- finally move entry into leaf
    if (leaf->move(entry))
    {
	//now change size fields in all ancestors
	child = root;
	level = 0;
	ipos = ibegin;
	while ( child !=0 && child->leaf() == false ) //internal node
	{
	    Internal_node<Key,T> *node=(Internal_node<Key,T>*)(child);
	    (node->no)++;
	    index = (level < (int)isize)?
		    hashFunc(*ipos++) : 0;
	    child = (*node)[index];
	    level++;
	}
    }

    return *this;
}

/*************** Removing an itemset from the hashtree *****************
 The following member function removes an itemset from the hashtree. 
 It takes a Key as an argument.  It searches for the position of
 itemset as in the move() function above.  If it is there in some node
 N, it removes it.  If the total number of itemsets that are present
 below the parent of N, can fit into one leaf node, then that is done,
 and the unncessary nodes are deleted. This will ensure that deleting an
 itemset will leave the hashtree in a state as if it had never been
 inserted.
***********************************************************************/
template<class Key, class T>
HashTree<Key,T>& HashTree<Key,T>::remove(const Key& i)
{
    //------- Search for the leaf node where itemset is to be present
    Internal_node<Key,T> *grandParent = 0;
    Internal_node<Key,T> *parent = 0;
    Hashnode *child = root;
    int level = 0; //level of child; root is at level 0
    size_t index = 0;
    while ( child !=0 && child->leaf() == false ) //internal node
    {
	Internal_node<Key,T> *node=(Internal_node<Key,T>*)(child);
	index = (level < (int)i.size())?
		hashFunc(i[level]) : 0;
	grandParent = parent;
	parent = node;
	child = (*node)[index];
	level++;
    }

    //------- Remove the itemset if present from the leaf node
    if ( child != 0 && ((Leaf_node<Key,T>*)(child))->remove(i) )
    { //itemset i is removed from child
	if ( parent != 0 && parent->no - 1 < (int)parent->capacity() )
	{ //convert parent to leaf node
	    Leaf_node<Key,T> *node=new Leaf_node<Key,T>;
	    node->reserve(nodeSize);
	    Internal_node<Key,T>::iterator pos;
	    for (pos=parent->begin(); pos!=parent->end(); pos++)
	    {
		if (*pos != 0) {
		    Leaf_node<Key,T>* leaf = (Leaf_node<Key,T>*)(*pos);
		    Leaf_node<Key,T>::iterator j;
		    for (j=leaf->begin(); j!=leaf->end(); j++)
			node->move(*j);
		}
	    }

	    if ( grandParent != 0 )
		(*grandParent)[hashFunc(i[level-2])]=node;
	    else 
		root=node;

	    delete parent;
	}
	else if ( ((Leaf_node<Key,T>*)child)->size() == 0 )
	{
	    delete ((Leaf_node<Key,T>*)child);
	    if ( parent != 0 )
		(*parent)[index]=0;
	    else
		root=0;
	}

	//------- now change size fields in all ancestors
	child = root;
	level = 0;
	while ( child !=0 && child->leaf() == false ) //internal node
	{
	    Internal_node<Key,T> *node=(Internal_node<Key,T>*)(child);
	    (node->no)--;
	    index = (level < (int)i.size())?
		    hashFunc(i[level]) : 0;
	    child = (*node)[index];
	    level++;
	}
    }

    return *this;
}

//---- find itemset i in hashtree
template<class Key, class T>
HashTree<Key,T>::iterator HashTree<Key,T>::find(const Key& i)
{
    Hashnode *child = root;
    int level = 0; //level of child; root is at level 0
    while ( child !=0 && child->leaf() == false ) //internal node
    {
	Internal_node<Key,T> *node=(Internal_node<Key,T>*)(child);
	size_t index = (level < (int)i.size())?
		hashFunc(i[level]) : 0;
	child = (*node)[index];
	level++;
    }

    if ( child != 0 ) //child is an existing leaf
    {
	Leaf_node<Key,T>& leaf = *((Leaf_node<Key,T>*)child);
	Leaf_node<Key,T>::iterator pos = leaf.find(i);
	return ( (pos==leaf.end())? end() : pos );
    }
    else
	return end();
}

//---- find itemset i in hashtree... const version
template<class Key, class T>
HashTree<Key,T>::const_iterator HashTree<Key,T>::find(const Key& i) const
{
    Hashnode *child = root;
    int level = 0; //level of child; root is at level 0
    while ( child !=0 && child->leaf() == false ) //internal node
    {
	Internal_node<Key,T> *node=(Internal_node<Key,T>*)(child);
	size_t index = (level < (int)i.size())?
		hashFunc(i[level]) : 0;
	child = (*node)[index];
	level++;
    }

    if ( child != 0 ) //child is an existing leaf
    {
	Leaf_node<Key,T>& leaf = *((Leaf_node<Key,T>*)child);
	Leaf_node<Key,T>::iterator pos = leaf.find(i);
	return ( (pos==leaf.end())? end() : pos );
    }
    else
	return end();
}

//---- return true iff itemset i is present in hashtree
template<class Key, class T>
bool HashTree<Key,T>::contains(const Key& i) const
{
    Hashnode *child = root;
    int level = 0; //level of child; root is at level 0
    while ( child !=0 && child->leaf() == false ) //internal node
    {
	Internal_node<Key,T> *node=(Internal_node<Key,T>*)(child);
	size_t index = (level < (int)i.size())?
		hashFunc(i[level]) : 0;
	child = (*node)[index];
	level++;
    }

    if ( child != 0 ) //child is an existing leaf
	return (((Leaf_node<Key,T>*)child)->contains(i));
    else
	return false;
}

#endif
