/***********************************************************************
    Copyright (C) 2003 Database Systems Lab, SERC, IISc, Bangalore.

    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program; if not, write to the Free Software
    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
***********************************************************************/

#ifndef TIDSETDAG_H_
#define TIDSETDAG_H_

#include "data.h" //this includes svector.h
#include "mymath.h" //for ceil()
#include <list.h>

/***************************** DAG-node *******************************/
class TidsetDag;
struct DagNode
{ //each DagNode corresponds to an itemset

    list<DagNode*> llist;
    list<DagNode*> rlist;

    DagNode* lchild;
    list<DagNode*>::iterator lback; //back pointer
    int litem;

    DagNode* rchild;
    list<DagNode*>::iterator rback; //back pointer
    int ritem;

    int count;
    long ftid; //first TID: when this itemset was inserted

    vector<int> tidlist;
    vector<int> counts2;
    bool expanded;

    bool small(long tid, double minsupp) const
    	{ return (count < (int)ceil(minsupp*(tid-ftid+1))); }
    bool large(long tid, double minsupp) const
    	{ return (! small(tid, minsupp)); }
    bool small(long tid, double minsupp, int psize, int dbsize) const
    	{ return (count<(int)ceil(minsupp*(dbsize+(tid-ftid)*psize))); }
    bool large(long tid, double minsupp, int psize, int dbsize) const
    	{ return (! small(tid, minsupp, psize, dbsize)); }
    int supcount(int i) { return ((counts2[i-litem-1] < 0)?
	    -counts2[i-litem-1] : counts2[i-litem-1]); }
    void setcount(int i, int c) { counts2[i-litem-1] = c; }

    inline void clear();
    inline int erase_supersets();
    inline int erase_with_supersets(bool right);
    int trigger(long pno, double minsupp, int psize, int dbsize,
	    TidsetDag&);
    int expand(long tid,double minsupp,int psize,int dbsize,DagNode *i,
	    TidsetDag&);
    void trigger(int mincount, TidsetDag&);
    void expand(int mincount, DagNode *i, TidsetDag&);
    int trigger_left_right(long pno, double minsupp, int psize,
	    int dbsize, TidsetDag&);
    inline void setParentCounts(list<DagNode*>::iterator,
	    list<DagNode*>::iterator, int dbsize, int no);
    inline void setMamaCounts(long tid, double minsupp, int psize,
	    int dbsize);
    inline void setPapaCounts(long tid, double minsupp, int psize,
	    int dbsize);
    inline void incrCount(int dbsize);
    int incrCount(ostream& s, Itemset& I, long pno, long mincount, int
	    dbsize);
    int removeSmall(ostream& s, Itemset& I, long pno, double minsupp,
	    int psize, int dbsize);
    int outputLNB(ostream& s, Itemset& I, long pno, double minsupp,
	    int psize, int dbsize);
    inline void output2sets(ostream& s) const;
    int incrCountNB(ostream& s, Itemset& I, long pno, long mincount, int
	    dbsize);
    void cleanup()
    {
	count = 0;
	ftid = 0;

	list<DagNode*>::iterator i;
	for (i = llist.begin(); i != llist.end(); i++)
	    (*i)->cleanup();
    }

//----------------- i/o operations ---------------
    void output(ostream&, Itemset&, bool tid, long ltid, double minsupp)
	    const;
    void outputstat(long& internalcount, long& fathercount) const
    {
//	if (llist.size() + rlist.size() == 0)
	    internalcount += count;
	fathercount += rlist.size() * count;

	list<DagNode*>::const_iterator i;
	for (i = llist.begin(); i != llist.end(); i++)
	    (*i)->outputstat(internalcount, fathercount);
    }
};

/*************************** Itemset-DAG ******************************/
class TidsetDag
{
    friend struct DagNode;
    vector<DagNode> singletons;
    size_t no_nodes;

public:

//--------------- construct/destroy --------------
    TidsetDag() { }
    TidsetDag(size_t s) { setNoItems(s); }
    ~TidsetDag() { clear(); }

    void clear()
    {
	vector<DagNode>::iterator i;
	for (i = singletons.begin(); i != singletons.end(); i++)
	    i->clear();
	no_nodes = 0;
    }

//------------ functions to access members -------
    void setNoItems(size_t s)
    {
	singletons.resize(s);
	no_nodes = s;
	for (size_t i = 0; i < singletons.size(); i++)
	{
	    singletons[i].litem = i;
	    singletons[i].lchild = singletons[i].rchild = 0;
	    singletons[i].count = 0;
	    singletons[i].expanded = false;
	    singletons[i].ftid = 0;
	}
    }

    size_t noItems() const { return singletons.size(); }
    size_t size() const { return no_nodes; }

//----------------- misc. functions --------------
    void swap(TidsetDag& d) //swap contents of 2 TidsetDags
    {
	size_t tno_nodes = no_nodes;
	no_nodes = d.no_nodes;
	d.no_nodes =  tno_nodes;
	singletons.swap(d.singletons);
    }

//----------------- i/o operations ---------------
    DagNode* insert(Itemset&, int ftid, int count);
    void input(istream& s, bool tid, long ftid, int count);
    void inputNB(istream& s, bool tid, long ftid, int count);
    void inputNB(istream& s);
    inline void output(ostream&, bool tid, long ltid, double minsupp) const;
    inline bool outputLNB(ostream& s, long pno, double minsupp,
	    int psize, int dbsize);
    inline void output2sets(ostream& s) const;
    void generateNB(int mincount)
    {
	vector<DagNode>::iterator i;
	for (i = singletons.begin(); i != singletons.end(); i++)
	    i->trigger(mincount, const_cast<TidsetDag&>(*this));
    }
    void outputstat() const
    {
	long internalcount = 0;
	long fathercount = 0;
	vector<DagNode>::const_iterator i;
	for (i = singletons.begin(); i != singletons.end(); i++)
	    i->outputstat(internalcount, fathercount);
	cout << "\ntotal count of internal nodes = " << internalcount;
	cout << "\ntotal count of fathers = " << fathercount << endl;
    }

//------------------ crunch tuples ---------------
    inline void crunch(const Itemset& I, long tid, double minsupp);
    inline void crunchNB(const Itemset& I, long tid, double minsupp);
    void update(long pno, int psize, const vector<Itemset>& partition,
	    double minsupp);
    inline void incrCount(int dbsize);
    inline void incrCount(ostream& s, long pno, long mincount, int dbsize);
    inline void releaseSpace();
    inline void removeSmall(ostream& s, long pno, double minsupp,
	    int psize, int dbsize);
    inline bool incrCountNB(ostream& s, long pno, long mincount, int dbsize);
    void cleanup()
    {
	vector<DagNode>::iterator i;
	for (i = singletons.begin(); i != singletons.end(); i++)
	    i->cleanup();
    }
};

inline ostream& operator<<(ostream& s, const TidsetDag& d)
{
    d.output(s, true, 0, 1);
    return s;
}

inline istream& operator>>(istream& s, TidsetDag& d)
{
    d.input(s, true, 0, -1);
    return s;
}

inline void intersect(vector<int>& t1, vector<int>& t2,
	vector<int>& result)
{
    result.reserve( (t1.size() > t2.size())? t2.size() : t1.size() );
    result.resize(0);
    set_intersection(t1.begin(), t1.end(), t2.begin(), t2.end(),
	    back_inserter(result));
}

inline void intersect(vector<bool>& t1, size_t t1size, vector<int>& t2,
	vector<int>& result)
{
    result.reserve( (t1size > t2.size())? t2.size() : t1size );
    result.resize(0);
    vector<int>::iterator vi;
    for (vi = t2.begin(); vi != t2.end(); vi++)
	if (t1[*vi] == true)
	    result.push_back(*vi);
}

inline void DagNode::incrCount(int dbsize)
{
    setParentCounts(llist.begin(), llist.end(), dbsize, llist.size());

    //trigger supersets
    list<DagNode*>::iterator i;
    for (i = llist.begin(); i != llist.end(); i++)
	(*i)->incrCount(dbsize);
}

inline void TidsetDag::incrCount(int dbsize)
{
    vector<DagNode>::iterator i;
    for (i = singletons.begin(); i != singletons.end(); i++)
    {
	i->count += i->tidlist.size();
	i->incrCount(dbsize);
	i->tidlist.resize(0);
    }
}

inline void TidsetDag::releaseSpace()
{
    vector<DagNode>::iterator i;
    for (i = singletons.begin(); i != singletons.end(); i++)
    {
	i->count += i->tidlist.size();
	i->tidlist.resize(0);
    }
}

inline void TidsetDag::incrCount(ostream& s, long pno, long mincount, int
	dbsize)
{
    Itemset I;
    I.reserve(singletons.size());
    vector<DagNode>::iterator i;
    for (i = singletons.begin(); i != singletons.end(); i++)
    {
	if (! i->expanded)
	{
	    i->tidlist.resize(0);
	    continue;
	}

	no_nodes -= i->incrCount(s, I, pno, mincount, dbsize);
	i->tidlist.resize(0);
    }
}

inline void TidsetDag::removeSmall(ostream& s, long pno, double minsupp,
	int psize, int dbsize)
{
    Itemset I;
    I.reserve(1);
    vector<DagNode>::iterator i;
    for (i = singletons.begin(); i != singletons.end(); i++)
    {
	if (! i->expanded)
	    continue;
	i->counts2 = vector<int>();
	I.push_back(i->litem);
	s << I << " : " << i->count << "\n";
	no_nodes -= i->removeSmall(s, I, pno, minsupp, psize, dbsize);
	I.pop_back();
    }
}

void incrHist(int);

inline bool TidsetDag::outputLNB(ostream& s, long pno, double minsupp,
	int psize, int dbsize)
{ //returns if there are any 2-itemsets in NB
    Itemset I;
    I.reserve(1);
    vector<DagNode>::iterator i;
    bool retval = false;
    long mincount = (long)ceil(minsupp * (pno*psize + dbsize));
    for (i = singletons.begin(); i != singletons.end(); i++)
    {
	I.push_back(i->litem);
	if (i->count >= mincount)
	{
	    s << I << "(" << i->count << ")\n";
	    incrHist(1);
	}

	if (! i->expanded)
	{
	    I.pop_back();
	    continue;
	}

	if (i->ftid == 0)
	{
//	    i->output2sets(s);
	    i->counts2 = vector<int>();
	}

	if (i->counts2.size() > 0)
	    retval = true;

	no_nodes -= i->outputLNB(s, I, pno, minsupp, psize, dbsize);
	I.pop_back();
    }

    return retval;
}

inline void DagNode::output2sets(ostream& s) const
{
    if (counts2.size() > 0)
    {
	s << -((signed)counts2.size()) << " : " << litem << " :";
	vector<int>::const_iterator i;
	for (i = counts2.begin(); i != counts2.end(); i++)
	    s << " " << *i;
	s << "\n";
    }
}

inline void TidsetDag::output2sets(ostream& s) const
{
    vector<DagNode>::const_iterator i;
    for (i = singletons.begin(); i != singletons.end(); i++)
	i->output2sets(s);
}

inline void TidsetDag::crunch(const Itemset& I, long tid, double minsupp)
{
    Itemset::const_iterator item;
    for (item = I.begin(); item != I.end(); item++)
	singletons[*item].tidlist.push_back(tid);
}

inline void TidsetDag::crunchNB(const Itemset& I, long tid, double minsupp)
{
    Itemset::const_iterator i1, i2;
    for (i1 = I.begin(); i1 != I.end(); i1++)
    {
	singletons[*i1].tidlist.push_back(tid);
	if (singletons[*i1].counts2.size() == 0)
	    continue;
	for (i2 = i1+1; i2 != I.end(); i2++)
	    singletons[*i1].counts2[*i2 - *i1 - 1]++;
    }
}

inline void DagNode::setParentCounts(list<DagNode*>::iterator i,
	list<DagNode*>::iterator j, int dbsize, int no)
{
    static vector<int> templist(dbsize);
    static vector<bool> tlist(dbsize, false);
    if (no == 1)
    {
	intersect(tidlist, (*i)->rchild->tidlist, templist);
	(*i)->count += templist.size();

	if ((*i)->expanded)
	    (*i)->tidlist = templist; //templist.swap((*i)->tidlist);

	return;
    }

    vector<int>::iterator vi;
    for (vi = tidlist.begin(); vi != tidlist.end(); vi++)
	tlist[*vi] = true;

    for (; i != j; i++)
    {
	intersect(tlist, tidlist.size(), (*i)->rchild->tidlist,
		templist);
	(*i)->count += templist.size();

	if ((*i)->expanded)
	    (*i)->tidlist = templist; //templist.swap((*i)->tidlist);
    }

    for (vi = tidlist.begin(); vi != tidlist.end(); vi++)
	tlist[*vi] = false;
}

inline void DagNode::setMamaCounts(long tid, double minsupp, int psize,
	int dbsize)
{
    static vector<int> templist;
    static vector<bool> tlist(dbsize, false);
    list<DagNode*>::iterator i = llist.begin(), j = llist.end();

    if (llist.size() == 1)
    {
	intersect(tidlist, (*i)->rchild->tidlist, templist);
	if (lchild != 0) //if this is not a 1-itemset
	    (*i)->count += templist.size();
	else
	    (*i)->count = counts2[(*i)->litem - litem - 1];

	if ((*i)->large(tid, minsupp, psize, dbsize))
	    (*i)->tidlist = templist; //templist.swap((*i)->tidlist);
	//NOTE: If above line is changed to:
	//  (*i)->tidlist = templist;
	// and similar changes made in other places where we swap
	// tidlists, the memory consumption decreases drastically.
	// But response time goes up slightly.

	return;
    }

    vector<int>::iterator vi;
    for (vi = tidlist.begin(); vi != tidlist.end(); vi++)
	tlist[*vi] = true;

    for (; i != j; i++)
    {
	intersect(tlist, tidlist.size(), (*i)->rchild->tidlist,
		templist);

	if (lchild != 0) //if this is not a 1-itemset
	    (*i)->count += templist.size();
	else
	    (*i)->count = counts2[(*i)->litem - litem - 1];

	if ((*i)->large(tid, minsupp, psize, dbsize))
	    (*i)->tidlist = templist; //templist.swap((*i)->tidlist);
    }

    for (vi = tidlist.begin(); vi != tidlist.end(); vi++)
	tlist[*vi] = false;
}

inline void DagNode::setPapaCounts(long tid, double minsupp, int psize,
	int dbsize)
{
    static vector<int> templist;
    static vector<bool> tlist(dbsize, false);
    list<DagNode*>::iterator i = rlist.begin(), j = rlist.end();

    if (rlist.size() == 1)
    {
	intersect(tidlist, (*i)->lchild->tidlist, templist);
	if (lchild != 0) //if this is not a 1-itemset
	    (*i)->count += templist.size();
	else
	    (*i)->count = (*i)->lchild->counts2[(*i)->litem -
		    (*i)->lchild->litem - 1];

	if ((*i)->large(tid, minsupp, psize, dbsize))
	    (*i)->tidlist = templist; //templist.swap((*i)->tidlist);

	return;
    }

    vector<int>::iterator vi;
    for (vi = tidlist.begin(); vi != tidlist.end(); vi++)
	tlist[*vi] = true;

    for (; i != j; i++)
    {
	intersect(tlist, tidlist.size(), (*i)->lchild->tidlist,
		templist);

	if (lchild != 0) //if this is not a 1-itemset
	    (*i)->count += templist.size();
	else
	    (*i)->count = (*i)->lchild->counts2[(*i)->litem -
		    (*i)->lchild->litem - 1];

	if ((*i)->large(tid, minsupp, psize, dbsize))
	    (*i)->tidlist = templist; //templist.swap((*i)->tidlist);
    }

    for (vi = tidlist.begin(); vi != tidlist.end(); vi++)
	tlist[*vi] = false;
}

inline bool TidsetDag::incrCountNB(ostream& s, long pno, long mincount, int
	dbsize)
{ //returns if there are any 2-itemsets in NB
    Itemset I;
    I.reserve(singletons.size());
    bool retval = false;
    vector<DagNode>::iterator i;
    for (i = singletons.begin(); i != singletons.end(); i++)
    {
	if (! i->expanded)
	{
	    i->tidlist.resize(0);
	    continue;
	}

	if (i->ftid == pno+1)
	{
//	    i->output2sets(s);
	    i->counts2 = vector<int>();
	}

	if (i->counts2.size() > 0)
	    retval = true;

	no_nodes -= i->incrCountNB(s, I, pno, mincount, dbsize);
	i->tidlist.resize(0);
    }

    return retval;
}

inline int DagNode::erase_supersets()
{ //returns no of nodes deleted
    int no_nodes = llist.size() + rlist.size();

    list<DagNode*>::iterator i;
    for (i = llist.begin(); i != llist.end(); i++)
    {
	no_nodes += (*i)->erase_with_supersets(false);
	delete (*i);
    }

    llist.clear();

    for (i = rlist.begin(); i != rlist.end(); i++)
    {
	no_nodes += (*i)->erase_with_supersets(true);
	delete (*i);
    }

    rlist.clear();
    return no_nodes;
}

inline int DagNode::erase_with_supersets(bool right)
{ //returns no of nodes deleted
    int no_nodes = llist.size()+rlist.size();

    if (right)
	lchild->llist.erase(lback);
    else
	rchild->rlist.erase(rback);

    list<DagNode*>::iterator i;

    for (i = llist.begin(); i != llist.end(); i++)
    {
	no_nodes += (*i)->erase_with_supersets(false);
	delete (*i);
    }
    for (i = rlist.begin(); i != rlist.end(); i++)
    {
	no_nodes += (*i)->erase_with_supersets(true);
	delete (*i);
    }

    return no_nodes;
}

inline void DagNode::clear()
{
    count = 0;
    ftid = 0;

    list<DagNode*>::iterator i;

    for (i = llist.begin(); i != llist.end(); i++)
    {
	(*i)->erase_with_supersets(false);
	delete (*i);
    }

    llist.clear();

    for (i = rlist.begin(); i != rlist.end(); i++)
    {
	(*i)->erase_with_supersets(true);
	delete (*i);
    }

    rlist.clear();
}

//--------------- Output Operations -----------------
inline void DagNode::output(ostream& s, Itemset& I, bool tid, long ltid,
	double minsupp) const
{
    if (small(ltid, minsupp))
	    return;

    I.push_back(litem);
    s << I << " : " << count;

    if (tid)
	s << " : " << ftid;

    s << "\n";

    list<DagNode*>::const_iterator i;
    for (i = llist.begin(); i != llist.end(); i++)
	(*i)->output(s,I,tid,ltid,minsupp);

    I.pop_back();
}

inline void TidsetDag::output(ostream& s, bool tid, long ltid, double
	minsupp) const
{
    s << singletons.size() << "\n";
    s << no_nodes << "\n";

    Itemset I;
    I.reserve(singletons.size());

    vector<DagNode>::const_iterator i;
    for (i = singletons.begin(); i != singletons.end(); i++)
	i->output(s,I,tid,ltid,minsupp);
}

void initHist(int x);
void printNoItemsets();

#endif
