/**********************************************************************
    Copyright (C) 2004 Database Systems Lab, Supercomputer Education and
    Research Centre, Indian Institute of Science, Bangalore, INDIA.
    http://dsl.serc.iisc.ernet.in

    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program; if not, write to the Free Software
    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
***********************************************************************/


/******************* Interface to the Taxonomy File ********************
 AUTHOR: Vikram Pudi

 DESCRIPTION:

 Generalized Association Rules was first proposed in --
 
    Mining Generalized Association Rules
    	By Ramakrishnan Srikant and Rakesh Agrawal
	In Proc. of 21st VLDB Conf., 1995
 
 The synthetic database generator by Agrawal generates a separate file
 which contains the hierarchy of items (also called taxonomy).  The
 routines in this file read this taxonomy file and convert the contents
 to a vector of Itemsets.  The ith element in the vector is for the ith
 item of the database.  The corresponding itemset contains all the
 ancestors of that item in sorted order (items with smaller ids come
 first).

 We follow the convention of Agrawal's code whereby leaf items have
 higher ids and their parents have smaller ids.  This means that all
 ids greater than a particular id, represent leaf items.  Also children
 of a particular item have consequetive ids.  Code outside of this file
 doesn't depend on these particulars though.
 
 Structure of the Taxonomy File Generated by Agrawal:
 
 c1 p1
 c2 p2
 c3 p3
   .
   .
   .
 
 Where p1 is the parent of c1, p2 is the parent of c2 and so on.  The
 item whose id is 1 less than that of c1 represents the last item which
 has no parent (i.e. it is a root).
***********************************************************************/

#ifndef TAXONOMY_H_
#define TAXONOMY_H_

#include <fstream.h>
#include "item.h"

struct Taxonomy
{
    vector<int> parents;
    int no_roots;

    Taxonomy(char *taxfilename)
    {
	ifstream tf(taxfilename);
	int parent, child;

	tf.read((char*)&child, sizeof(child));

	if (!tf)
	{
	    cout << "Couldn't read from Taxonomy file: " << 
		    taxfilename << endl;
	    exit(1);
	}

	tf.read((char*)&parent, sizeof(parent));

	no_roots = child;
	parents.resize(no_roots);
	for (int i = 0; i < (int)parents.size(); i++)
	    parents[i] = -1;

	while (tf)
	{
	    parents.push_back(parent);

	    tf.read((char*)&child, sizeof(child));
	    tf.read((char*)&parent, sizeof(parent));
	}
    }

    int noItems() const { return (parents.size()); }
    int noRoots() const { return no_roots; }
    int firstLeaf() const { return (parents.back() + 1); }
    int noLeaves() const { return (noItems() - firstLeaf()); }
};

struct AncesTable : public vector<Itemset>
{ //the vector contains ancestors of each item
    int no_items; //no of items
    int no_roots; //no of roots in the forest of items
    int no_leaves; //no of leaf nodes

    AncesTable() {}
    AncesTable(Taxonomy& tax) { construct(tax); }

    void construct(Taxonomy& tax)
    {
	no_items = tax.noItems();
	no_roots = tax.noRoots();
	no_leaves =  tax.noLeaves();

	resize(no_items);

	iterator i; int item;
	for (i = begin(), item = 0; i != end(); i++, item++)
	{
	    int parent = tax.parents[item];
	    while (parent != -1)
	    {
		i->insert(Item(parent));
		parent = tax.parents[parent];
	    }
	}
    }

    int noItems() const { return no_items; }
    int noRoots() const { return no_roots; }
    int noLeaves() const { return no_leaves; }
    int firstLeaf() const { return (no_items - no_leaves); }

    void swap(AncesTable& a)
    {
	int temp;
	temp = a.no_items; a.no_items = no_items; no_items = temp;
	temp = a.no_roots; a.no_roots = no_roots; no_roots = temp;
	temp = a.no_leaves; a.no_leaves = no_leaves; no_leaves = temp;
	vector<Itemset>::swap(a);
    }

    void addAncestors(Itemset& i) const
    {
	int resultSize = 0; //actually max result.size()
	Itemset::iterator next;
	for (next = i.begin(); next != i.end(); next++)
	    resultSize += (*this)[next->id].size();

	Itemset result;
	result.reserve(resultSize + i.size());
	for (next = i.begin(); next != i.end(); next++)
	{
	    const Itemset& anc = (*this)[next->id];

	    Itemset::const_iterator j;
	    for (j = anc.begin(); j != anc.end(); j++)
		result.insert(*j);
	}

	for (next = i.begin(); next != i.end(); next++)
	    result.insert(*next);

	i.swap(result);
    }

    bool isAncestor(Item item1, Item item2) const
    { //is item1 ancestor of item2?
	if (item1.id > item2.id)
	    return false;
	if (item1.id == item2.id)
	    return true;

	const Itemset& i = (*this)[item2.id];
	Itemset::const_iterator j;
	for (j = i.begin(); j != i.end(); j++)
	    if (item1 == *j)
		return true;

	return false;
    }

    bool isAncestor(const Itemset& i1, const Itemset& i2) const
    { //is i1 ancestor of i2?
	if (i1.size() != i2.size())
	    return false;

	Itemset::const_iterator j, k;
	for (j = i1.begin(), k = i2.begin(); j != i1.end(); j++, k++)
	    if (! isAncestor(*j, *k))
		return false;

	return true;
    }

    void pruneAncestors(vector<bool> items)
    {
	iterator i;
	for (i = begin() + firstLeaf(); i != end(); i++)
	{
	    Itemset::iterator j;
	    for (j = i->begin(); j != i->end(); )
	    {
		if (! items[j->id])
		    j = i->erase(j);
		else
		    j++;
	    }
	}
    }

    int depth(const Itemset& i) const
    { //get depth of i
	int max = 0;
	Itemset::const_iterator j;
	for (j = i.begin(); j != i.end(); j++)
	    if ((int)(*this)[j->id].size() > max)
		max = (*this)[j->id].size();

	return max;
    }
};

#endif
