/***********************************************************************
 AUTHOR: Vikram Pudi
 DESCRIPTION: Data structure to hold counters and tidsets of itemsets.

    Copyright (C) 2003 Database Systems Lab, SERC, IISc, Bangalore.

    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program; if not, write to the Free Software
    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
***********************************************************************/
#include "tidsetdag.h"

vector<int> setHist; //used to store number of k-itemsets for each k
int maxK = 0; //length of longest itemset
void initHist(int x) { setHist.resize(x); }
void incrHist(int k) { setHist[k]++; }
void printNoItemsets()
{
    setHist[0] = 1;
    for (int i = 0; i <= maxK; i++)
	cout << setHist[i] << "\n";
}

int DagNode::incrCount(ostream& s, Itemset& I, long pno, long mincount,
	int dbsize)
{ //returns number of nodes deleted
    int no_nodes = 0; //return value
    list<DagNode*>::iterator i;
    setParentCounts(llist.begin(), llist.end(), dbsize, llist.size());

    //trigger supersets
    I.push_back(litem);
    for (i = llist.begin(); i != llist.end(); i++)
	no_nodes += (*i)->incrCount(s, I, pno, mincount, dbsize);

    i = llist.begin();
    while (i != llist.end())
    {
	if ((*i)->ftid == pno+1)
	{
	    I.push_back((*i)->litem);
	    if ((*i)->count < mincount)
		no_nodes += (*i)->erase_supersets();
	    else
	    {
		s << I << "(" << (*i)->count << ")\n";
		if (I.size() > maxK)
		    maxK = I.size();
		setHist[I.size()]++;
	    }

	    I.pop_back();
	}

	if ((*i)->ftid <= pno+1 &&
		(*i)->llist.size()+(*i)->rlist.size() == 0)
	{
	    DagNode *curr = *i;
	    i = llist.erase(i);
	    curr->rchild->rlist.erase(curr->rback);
	    delete curr;
	    no_nodes++;
	}
	else
	    i++;
    }

    I.pop_back();
    return no_nodes;
}

int DagNode::removeSmall(ostream& s, Itemset& I, long pno,
	double minsupp, int psize, int dbsize)
{ //returns number of nodes deleted
    int no_nodes = 0; //return value
    list<DagNode*>::iterator i = llist.begin();
    while (i != llist.end())
    {
	if ((*i)->small(pno, minsupp, psize, dbsize))
	{
	    DagNode *curr = *i;
	    i = llist.erase(i);
	    curr->rchild->rlist.erase(curr->rback);
	    delete curr;
	    no_nodes++;
	}
	else
	{
	    I.push_back((*i)->litem);

	    if ((*i)->ftid == 0)
	    {
		s << I << "(" << (*i)->count << ")\n";
		if (I.size() > maxK)
		    maxK = I.size();
		setHist[I.size()]++;
	    }

	    no_nodes += (*i)->removeSmall(s,I,pno,minsupp,psize,dbsize);
	    I.pop_back();
	    i++;
	}
    }

    i = llist.begin();
    while (i != llist.end())
    {
	if ((*i)->ftid == 0 &&
		(*i)->llist.size()+(*i)->rlist.size() == 0)
	{
	    DagNode *curr = *i;
	    i = llist.erase(i);
	    curr->rchild->rlist.erase(curr->rback);
	    delete curr;
	    no_nodes++;
	}
	else
	    i++;
    }

    return no_nodes;
}

int DagNode::outputLNB(ostream& s, Itemset& I, long pno,
	double minsupp, int psize, int dbsize)
{ //returns number of nodes deleted
    int no_nodes = 0; //return value
    list<DagNode*>::iterator i = llist.begin();
    for (i = llist.begin(); i != llist.end(); i++)
    {
	if ((*i)->ftid == 0)
	{
	    I.push_back((*i)->litem);

	    if ((*i)->small(pno, minsupp, psize, dbsize))
	    {
// 		if (lchild != 0) //this is not a 1-itemset
// 		{
// 		    s << I << "(" << (*i)->count << ")\n";
// 		    if (I.size() > maxK)
// 			maxK = I.size();
// 		    setHist[I.size()]++;
// 		}
	    }
	    else
	    {
		s << I << "(" << (*i)->count << ")\n";
		if (I.size() > maxK)
		    maxK = I.size();
		setHist[I.size()]++;

		no_nodes+=(*i)->outputLNB(s,I,pno,minsupp,psize,dbsize);
	    }

	    I.pop_back();
	}
    }

    i = llist.begin();
    while (i != llist.end())
    {
	if ((*i)->ftid == 0 &&
		(*i)->llist.size()+(*i)->rlist.size() == 0)
	{
	    DagNode *curr = *i;
	    i = llist.erase(i);
	    curr->rchild->rlist.erase(curr->rback);
	    delete curr;
	    no_nodes++;
	}
	else
	    i++;
    }

    return no_nodes;
}

int DagNode::incrCountNB(ostream& s, Itemset& I, long pno, long
	mincount, int dbsize)
{ //returns number of nodes deleted
    int no_nodes = 0; //return value
    list<DagNode*>::iterator i;
    setParentCounts(llist.begin(), llist.end(), dbsize, llist.size());

    //trigger supersets
    I.push_back(litem);
    for (i = llist.begin(); i != llist.end(); i++)
	no_nodes += (*i)->incrCountNB(s, I, pno, mincount, dbsize);

    i = llist.begin();
    while (i != llist.end())
    {
	if ((*i)->ftid == pno+1)
	{
	    I.push_back((*i)->litem);
	    if ((*i)->count < mincount)
	    {
//		if (lchild != 0) //this is not a 1-itemset
//		{
//		    s << I << "(" << (*i)->count << ")\n";
// 		    if (I.size() > maxK)
// 			maxK = I.size();
//		    setHist[I.size()]++;
//		}
		no_nodes += (*i)->erase_supersets();
	    }
	    else
	    {
		s << I << "(" << (*i)->count << ")\n";
		if (I.size() > maxK)
		    maxK = I.size();
		setHist[I.size()]++;
	    }

	    I.pop_back();
	}

	if ((*i)->ftid <= pno+1 &&
		(*i)->llist.size()+(*i)->rlist.size() == 0)
	{
	    DagNode *curr = *i;
	    i = llist.erase(i);
	    curr->rchild->rlist.erase(curr->rback);
	    delete curr;
	    no_nodes++;
	}
	else
	    i++;
    }

    I.pop_back();
    return no_nodes;
}

int DagNode::expand(long tid, double minsupp, int psize, int dbsize,
	DagNode *i, TidsetDag& D)
{ //returns no of nodes added-deleted
    int no_nodes = 0;

    list<DagNode*>::iterator j;
    for (j = llist.begin(); j != llist.end(); j++)
    {
	if (! (*j)->expanded)
	    continue;

	//try to prune newnode if it is a 3-itemset
	if (lchild == 0)
	{
	    DagNode *d;
	    int itm;
	    if ((*j)->litem < i->litem)
	    {
		d = &(D.singletons[(*j)->litem]);
		itm = i->litem;
	    }
	    else
	    {
		d = &(D.singletons[i->litem]);
		itm = (*j)->litem;
	    }

	    if (! d->expanded || d->supcount(itm) < (int)ceil(minsupp*
		    (dbsize + (tid-d->ftid)*psize)))
		continue;
	}

	DagNode* newnode = new DagNode;
	no_nodes++;
	newnode->ftid = tid;
	newnode->count = 0;
	newnode->expanded = false;

	if ((*j)->litem < i->litem)
	{
	    newnode->lchild = *j;
	    newnode->rchild = i;
	    newnode->lback = (*j)->llist.insert((*j)->llist.end(),
		    newnode);
	    newnode->rback = i->rlist.insert(i->rlist.end(),
		    newnode);
	}
	else
	{
	    newnode->lchild = i;
	    newnode->rchild = *j;
	    newnode->lback = i->llist.insert(i->llist.end(),
		    newnode);
	    newnode->rback = (*j)->rlist.insert((*j)->rlist.end(),
		    newnode);
	}

	newnode->litem = newnode->rchild->litem;
	newnode->ritem = newnode->lchild->litem;
    }

    if (lchild == 0) //if node being expanded is a 2-itemset
    {
	int itm1 = litem;
	int itm2 = i->litem;
	vector<DagNode>::iterator k;
	for (k = D.singletons.begin(); k->litem != itm1; k++)
	{
	    if (! k->expanded ||
    	    	k->supcount(itm1) < (int)ceil(minsupp*(dbsize +
	    	    	(tid-k->ftid)*psize)) ||
		k->supcount(itm2) < (int)ceil(minsupp*(dbsize +
	    	    	(tid-k->ftid)*psize)))
		continue;

	    //find itm1 and itm2 in supersets of k
	    DagNode *d1 = 0, *d2 = 0;
	    for (j = k->llist.begin(); j != k->llist.end(); j++)
	    {
		if ((*j)->litem == itm1)
		{
		    d1 = *j;
		    if (d2 != 0)
			break;
		}
		else if ((*j)->litem == itm2)
		{
		    d2 = *j;
		    if (d1 != 0)
			break;
		}
	    }

	    bool present = false;
	    for (j = d1->llist.begin(); j != d1->llist.end(); j++)
	    {
		if ((*j)->litem == itm2)
		{
		    present = true;
		    break;
		}
	    }

	    if (present)
		continue;

	    no_nodes++;
	    DagNode* newnode = new DagNode;
	    newnode->ftid = tid;
	    newnode->expanded = false;
	    newnode->lchild = d1;
	    newnode->rchild = d2;
	    newnode->lback = d1->llist.insert(d1->llist.end(),
		    newnode);
	    newnode->rback = d2->rlist.insert(d2->rlist.end(),
		    newnode);
	    newnode->litem = itm2;
	    newnode->ritem = itm1;
	    intersect(d1->tidlist, d2->tidlist, newnode->tidlist);
	    newnode->count = newnode->tidlist.size();
	    if (! newnode->small(tid, minsupp, psize, dbsize))
	    {
		no_nodes += d1->expand(tid, minsupp, psize, dbsize,
			newnode, D);
		no_nodes += newnode->trigger_left_right(tid, minsupp,
			psize, dbsize, D);
	    }
	}
    }

    i->expanded = true;
    return no_nodes;
}

int DagNode::trigger(long tid, double minsupp, int psize, int dbsize,
	TidsetDag& D)
{ //returns no of nodes added-deleted
    int no_nodes = 0;

    if (llist.size() == 0)
	return 0;

    setMamaCounts(tid, minsupp, psize, dbsize);

    list<DagNode*>::iterator i;
    for (i = llist.begin(); i != llist.end(); i++)
    {
	if ((*i)->small(tid, minsupp, psize, dbsize))
	{
	    if ((*i)->expanded)
	    {
		no_nodes -= (*i)->erase_supersets();
		(*i)->expanded = false;
	    }

	    continue;
	}
	else if (! (*i)->expanded)
	    no_nodes += expand(tid, minsupp, psize, dbsize, *i, D);
    }

    //trigger supersets
    for (i = llist.begin(); i != llist.end(); i++)
	no_nodes += (*i)->trigger(tid, minsupp, psize, dbsize, D);

    return no_nodes;
}

void DagNode::expand(int mincount, DagNode *i, TidsetDag& D)
{
    list<DagNode*>::iterator j;
    for (j = llist.begin(); j != llist.end(); j++)
    {
	if (! (*j)->expanded)
	    continue;

	DagNode* newnode = new DagNode;
	D.no_nodes++;
	newnode->ftid = 0;
	newnode->count = 0;
	newnode->expanded = false;

	if ((*j)->litem < i->litem)
	{
	    newnode->lchild = *j;
	    newnode->rchild = i;
	    newnode->lback = (*j)->llist.insert((*j)->llist.end(),
		    newnode);
	    newnode->rback = i->rlist.insert(i->rlist.end(),
		    newnode);
	}
	else
	{
	    newnode->lchild = i;
	    newnode->rchild = *j;
	    newnode->lback = i->llist.insert(i->llist.end(),
		    newnode);
	    newnode->rback = (*j)->rlist.insert((*j)->rlist.end(),
		    newnode);
	}

	newnode->litem = newnode->rchild->litem;
	newnode->ritem = newnode->lchild->litem;
    }

    i->expanded = true;
}

void DagNode::trigger(int mincount, TidsetDag& D)
{
    if (count < mincount || llist.size() == 0)
	return;

    list<DagNode*>::iterator i;
    for (i = llist.begin(); i != llist.end(); i++)
	if ((*i)->count >= mincount && ! (*i)->expanded)
	    expand(mincount, *i, D);

    //trigger supersets
    for (i = llist.begin(); i != llist.end(); i++)
	(*i)->trigger(mincount, D);
}

int DagNode::trigger_left_right(long tid, double minsupp, int psize, int
	dbsize, TidsetDag& D)
{ //returns no of nodes added-deleted
    int no_nodes = 0;
    list<DagNode*>::iterator i;
    setMamaCounts(tid, minsupp, psize, dbsize);
    setPapaCounts(tid, minsupp, psize, dbsize);

    for (i = llist.begin(); i != llist.end(); i++)
    {
	if ((*i)->small(tid, minsupp, psize, dbsize))
	{
	    if ((*i)->expanded)
	    {
		no_nodes -= (*i)->erase_supersets();
		(*i)->expanded = false;
	    }

	    continue;
	}
	else if (! (*i)->expanded)
	    no_nodes += expand(tid, minsupp, psize, dbsize, *i, D);
    }

    for (i = rlist.begin(); i != rlist.end(); i++)
    {
	if ((*i)->small(tid, minsupp, psize, dbsize))
	{
	    if ((*i)->expanded)
	    {
		no_nodes -= (*i)->erase_supersets();
		(*i)->expanded = false;
	    }

	    continue;
	}
	else if (! (*i)->expanded)
	    no_nodes += (*i)->lchild->expand(tid, minsupp, psize,
	    	    	    dbsize, *i, D);
    }

    //trigger supersets
    for (i = llist.begin(); i != llist.end(); i++)
	no_nodes += (*i)->trigger(tid, minsupp, psize, dbsize, D);
    for (i = rlist.begin(); i != rlist.end(); i++)
	no_nodes += (*i)->trigger_left_right(tid, minsupp, psize,
		dbsize, D);

    return no_nodes;
}

void TidsetDag::update(long pno, int psize, const vector<Itemset>& db,
	double minsupp)
{
    vector<int>::iterator vi;
    list<DagNode*>::iterator li;

    vector<DagNode>::iterator i;
    for (i = singletons.begin(); i != singletons.end(); i++)
    {
	i->count += i->tidlist.size();

	if (i->count < (int)ceil(minsupp*(db.size() + pno*psize)))
	{
	    if (i->expanded)
	    {
		no_nodes -= i->erase_supersets();
		i->counts2 = vector<int>();
		i->expanded = false;
	    }

	    continue;
	}
	else if (! i->expanded)
	{
	    i->ftid = pno;
	    i->counts2.resize(singletons.size() - i->litem - 1);
	    for (vi = i->counts2.begin(); vi != i->counts2.end(); vi++)
		*vi = 0;
	    i->expanded = true;
	}
    }

    for (int tuple=0; tuple < (int)db.size(); tuple++)
    {
	for (Itemset::const_iterator j = db[tuple].begin(); 
		j != db[tuple].end(); j++)
	    if (singletons[*j].counts2.size() > 0)
		for (Itemset::const_iterator k = j+1;
		    	k != db[tuple].end(); k++)
		    singletons[*j].counts2[*k - *j - 1]++;
    }

    for (i = singletons.begin(); i != singletons.end(); i++)
    {
	if (! i->expanded)
	    continue;

	//mark the counts of existing large 2-itemsets
	for (li = i->llist.begin(); li != i->llist.end(); li++)
	    i->counts2[(*li)->litem - i->litem - 1] *= -1;

	for (vi = i->counts2.begin(); vi != i->counts2.end(); vi++)
	{
	    if (*vi < 0)
	    {
		*vi *= -1; //unmark the count of large 2-itemset
		continue;
	    }

	    if (*vi < (int)ceil(minsupp*
		    (db.size() + (pno-i->ftid)*psize)) ||
		singletons[i->litem+1+vi-i->counts2.begin()].count < 
		    (int)ceil(minsupp*(db.size() + pno*psize)))
		continue;

	    DagNode *newnode = new DagNode;

	    int pi = i->litem;
	    int qi = i->litem + 1 + (vi - i->counts2.begin());
	    DagNode& pd = *i;
	    DagNode& qd = singletons[qi];

	    newnode->lchild = &(*i);
	    newnode->lback = pd.llist.insert(pd.llist.end(),
			    	    	    	newnode);
	    newnode->litem = qi;

	    newnode->rchild = &singletons[qi];
	    newnode->rback = qd.rlist.insert(qd.rlist.end(),
			    	    	    	newnode);
	    newnode->ritem = pi;

	    newnode->ftid = i->ftid;
	    newnode->count = *vi;
	    newnode->expanded = false;

	    no_nodes++;
	}

	no_nodes += i->trigger(pno, minsupp, psize, db.size(),
		const_cast<TidsetDag&>(*this));
    }

    for (i = singletons.begin(); i != singletons.end(); i++)
	i->tidlist.resize(0);
}

//--------------- Input Operations ------------------
DagNode* TidsetDag::insert(Itemset& I, int ftid, int count)
{
    if (I.size() < 1)
	return 0;

    if (I.size() == 1)
    {
	DagNode& curr = singletons[I.front()];

	if (ftid != -1)
	{
	    curr.ftid = ftid;
	    curr.count = count;
	}

	return &curr;
    }

    int last, last2;
    last = I.back();
    I.pop_back();
    last2 = I.back();
    DagNode* left = insert(I, -1, 0);
    left->expanded = true;

    I.pop_back();
    I.push_back(last);
    DagNode* right = insert(I, -1, 0);
    right->expanded = true;

    I.pop_back();
    I.push_back(last2);
    I.push_back(last);

    list<DagNode*>::iterator i;
    for (i = left->llist.begin(); i != left->llist.end(); i++)
	if ((*i)->rchild == right)
	    break;

    if (i != left->llist.end())
    {
	if (ftid != -1)
	{
	    (*i)->ftid = ftid;
	    (*i)->count = count;
	}

	return (*i);
    }

    DagNode* newnode = new DagNode;
    newnode->lback = left->llist.insert(left->llist.end(), newnode);
    newnode->rback = right->rlist.insert(right->rlist.end(), newnode);
    newnode->lchild = left;
    newnode->rchild = right;
    newnode->litem = last;
    newnode->ritem = last2;
    newnode->count = count;
    newnode->ftid = ftid;
    newnode->expanded = false;
    no_nodes++;
    return newnode;
}

void TidsetDag::input(istream& s, bool tid, long ftid, int tempcount)
{
    int no_items;
    s >> no_items;

    if (! s)
	return;

    int noItemsets;
    s >> noItemsets;

    if (! s)
	return;

    setNoItems(no_items);
    Itemset I;
    I.reserve(no_items);

    for (int i = 0; i < noItemsets && s >> I; i++)
    {
	int count;
	char skip;
	s >> skip; //skip :
	s >> count;

	if (tempcount >= 0)
	    count = tempcount;

	if (tid)
	{
	    s >> skip; //skip :
	    s >> ftid;
	}

	insert(I, ftid, count);
	I.clear();
    }
}

void TidsetDag::inputNB(istream& s, bool tid, long ftid, int tempcount)
{
    int no_items;
    s >> no_items;

    if (! s)
	return;

    int noItemsets;
    s >> noItemsets;

    setNoItems(no_items);
    Itemset I;
    I.reserve(no_items);

    for (int i = 0; ; i++)
    {
	if (! s)
	    return;

	int count;
	int setsize;
	int item;
	char skip;

	s >> setsize;
	if (! s)
	    return;
	if (setsize > 0)
	{
	    I.reserve(setsize);
	    s >> skip; //skip :
	    for (int j = 0; j < setsize; j++)
	    {
		s >> item;
		I.insert(item);
	    }

	    if (! s)
		return;

	    s >> skip; //skip :
	    s >> count;

	    if (tempcount >= 0)
		count = tempcount;

	    if (tid)
	    {
		s >> skip; //skip :
		s >> ftid;
	    }

	    insert(I, ftid, count);
	    I.clear();
	}
	else
	{
	    setsize *= -1;
	    s >> skip; //skip :
	    s >> count;

	    vector<int>& v = singletons[count].counts2;
	    v.resize(setsize);
	    s >> skip; //skip :
	    for (int j = 0; j < setsize; j++)
	    {
		s >> v[j];
		v[j] = 0;
	    }

	    if (! s)
		return;

	    i--;
	}
    }
}

void TidsetDag::inputNB(istream& s)
{
    int no_items;
    s >> no_items;

    if (! s)
	return;

    int noItemsets;
    s >> noItemsets;

    setNoItems(no_items);
    Itemset I;
    I.reserve(no_items);

    for (int i = 0; i < noItemsets && s >> I; i++)
    {
	int count;
	char skip;

	s >> skip; //skip :
	s >> count;

	insert(I, 0, count);
	if (I.size() == 1)
	{
	    vector<int>& v = singletons[I.front()].counts2;
	    v.resize(no_items - I.front() - 1, 0);
	}

	I.clear();
    }

    s >> no_items;

    if (! s)
	return;

    s >> noItemsets;

    for (int i = 0; i < noItemsets && s >> I; i++)
    {
	int count;
	char skip;

	s >> skip; //skip :
	s >> count;

	insert(I, 0, count);
	I.clear();
    }
}
