/***********************************************************************
 AUTHOR: Srikanta Bedathur
 DESCRIPTION: 
    Construct Persistent Suffix Tree over a sequence of DNA alphabets
    using Ukkonen's linear time algorithm.

    Copyright (C) 2004 Database Systems Lab, Supercomputer Education and
    Research Centre, Indian Institute of Science, Bangalore, INDIA.
                     http://dsl.serc.iisc.ernet.in

    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program; if not, write to the Free Software
    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
***********************************************************************/
#include "smartref.h"
#include "sfxnode.h"
#include "allocator.h"
#include "defines.h"

#include <string>
using namespace std;

long curindex = 0;
long curjndex = 0;




///////////////////////////////////////////////////////////////////////////
//
// Function: constructSuffixTree (char* sequence, long M)
// Synopsis: This function builds the suffix tree for the sequence of 
//           length M, using Ukkonen's linear time algorithm. The impl. is 
//           based on Gusfield's "strmat" package. Currently, the algorithm
//           constructs only Implicit suffix trees. Adding a last pass for
//           the $ symbol, just made the code a bit complicated without 
//           adding any benefit when we are looking at huge strings.
// Globals:  root -- the root of the suffix tree built.
///////////////////////////////////////////////////////////////////////////
void constructSuffixTree (Allocator<SfxNode, true>* leaf_alloc,
			  Allocator<SfxInternalNode, false> *internal_alloc,
			  Ref& root, 
			  char *sequence, 
			  long M) {

  long i, j, g, h, gprime, edgelen;
  char *edgestr;
  Ref parent(0);
  Ref child (0);
  Ref leaf (0);
  
  root = internal_alloc->newnode(0);// 0

#ifdef LINKEDLIST
  root.update() -> setNextParentFlag (true);
  root.update() -> setNext ((SFXULONG)root);
#endif

  Ref node(root);
  Ref lastnode(root);

  g = 0;
  edgelen = 0;
  edgestr = NULL;
  
  bool nointernalnode = false;
  

  for (i = 0, j=0; i < M; i++, curindex++) {
    if (i % 10000 == 0 && i != 0) {
      printstats(i);
    }
    for (; j <= i && j < M; j++, curjndex++) {
      nointernalnode = false;

      if (g == 0 || g == edgelen) {
	child = (*node)->locateChild (sequence[i]); 
	
	if ( ((SFXULONG)child) != 0){
	  node = child;
	  g = 1;
	  edgestr = sequence + node->getbegin (); 
	  edgelen = node->getlength ();
	  lastnode = root; 
	  break;
	}
	
	leaf = leaf_alloc->newnode(M-i);
	
	leaf.update()->setbegin (i);
	leaf.update()->setlength (M - i);
	leaf.update()->setch(sequence[i]);
	
#ifdef LINKEDLIST
	(*(node.update()))->addChild ( (SFXULONG)leaf, (SFXULONG)node);
#else
	(*(node.update()))->addChild ( (SFXULONG)leaf, leaf->getch());
	leaf.update()->setParent ( (SFXULONG)node);
#endif

	nointernalnode = true;
      }
      
      else {

	if (sequence[i] == edgestr[g]) { 
	  lastnode = root;
	  g++;
	  break;
	}

	nointernalnode = false;

	Ref newnode = internal_alloc->newnode(i-j); 

	ASSERT ( (SFXULONG)newnode != (SFXULONG)root);

	newnode.update()->setbegin (node->getbegin());
	newnode.update()->setlength (g); 
	newnode.update()->setch (sequence[newnode->getbegin ()]);

	(*(newnode.update()))->setSuffixLink(0);

	parent = node->getParent();

	SFXULONG remchild = (*(parent.update()))->removeChild (node->getch()); 

	ASSERT ((SFXULONG)node == remchild);
#ifdef LINKEDLIST
	(*(parent.update()))->addChild ((SFXULONG)newnode, parent);
#else
	node.update()->setParent (0); /*99999999 Node is an Internal node */

	(*(parent.update()))->addChild ((SFXULONG)newnode, newnode->getch ());
	newnode.update()->setParent ((SFXULONG)parent);
#endif

	node.update()->setbegin (node->getbegin() + g);
	node.update()->setlength (node->getlength() - g);
	node.update()->setch (sequence[node->getbegin()]);

#ifdef LINKEDLIST
	(*(newnode.update()))->addChild ((SFXULONG)node, (SFXULONG)newnode);
#else
	(*(newnode.update()))->addChild((SFXULONG)node, node->getch ());
	node.update()->setParent ( (SFXULONG)newnode);
#endif

	edgestr = sequence + newnode->getbegin ();
	edgelen = newnode->getlength();

	leaf = leaf_alloc->newnode(M-i);

	leaf.update()->setbegin (i);
	leaf.update()->setlength (M - i);
	leaf.update()->setch (sequence[i]);

#ifdef LINKEDLIST
	(*(newnode.update()))->addChild ((SFXULONG)leaf, (SFXULONG)newnode);
#else
	(*(newnode.update()))->addChild ((SFXULONG)leaf, leaf->getch());
	leaf.update()->setParent ( (SFXULONG)newnode);
#endif

	node = newnode;

	if ( (SFXULONG)lastnode != (SFXULONG)root) {

	  if ((*lastnode)->getSuffixLink () == 0) {
#ifdef STATS
		suffixlinks++;
#endif
	    (*(lastnode.update()))->setSuffixLink ( (SFXULONG)node);
	  }

	}

	lastnode = node; //retain the latest node in lastnode.
      }

      if ( (SFXULONG) node == (SFXULONG) root) {
	DBG_TRACE ("skip" << endl);
	;
      }

      else {

	if (nointernalnode) {

	  ASSERT (g == edgelen);

	  
	  if ( (*node)->getSuffixLink () != 0) {
	    node = (*node)->getSuffixLink ();
	    edgestr = sequence + node->getbegin ();
	    if ( (SFXULONG)node == (SFXULONG)root)
	      edgelen = 0;
	    else
	      edgelen = node->getlength();

	    ASSERT(edgelen > 0 || (SFXULONG)node == (SFXULONG)root);

	    g = edgelen;
	    continue;
	  }

	  node = root;
	  g = i - j - 1;
	}

	else {
	  parent = node->getParent ();


	  if ( (SFXULONG) parent == (SFXULONG) root) {
	    node = root;
	    g--;
	  }
	  else {
	    if ((*parent)->getSuffixLink() != 0) {
	      node = (*parent)->getSuffixLink();
	    }
	    else {
	      node = root;
	      g = i - j - 1;
	    }
	  }
	    
	}
#ifdef STATS
	skipcountpasses++; 
#endif
	h = i - g;
	edgestr = sequence + h;
	edgelen = g;

	while (g > 0) {
	  Ref temp = (*node)->locateChild (sequence[h]);
	  ASSERT ( (SFXULONG) temp != 0);
#ifdef STATS
	  edgestaken++;
#endif
	  node = temp;

	  gprime = node->getlength();

	  ASSERT (gprime < M && gprime > 0);

	  if (gprime > g) break;
	  g -= gprime;
	  h += gprime;
	}
	

	if ((SFXULONG)node == (SFXULONG)root) 
	  edgelen = 0;
	else
	  edgelen = node->getlength();

	ASSERT (edgelen > 0 || (SFXULONG) node == (SFXULONG) root);
	edgestr = sequence + node->getbegin ();
	if (g == 0) {
	  if ( (SFXULONG)lastnode != (SFXULONG)root &&
	       !node.isleaf() &&
	       (*lastnode)->getSuffixLink () == 0) {

#ifdef STATS
	    suffixlinks++;
#endif
	    (*(lastnode.update()))->setSuffixLink ( (SFXULONG)node);
	    lastnode = node;
	  }
	  if (node != root)
	    g = edgelen;
	}
      }
    } //End of Extension j
  } //End of phase i
  printstats(i-1);
}

