// File:  db.C 
// Created by: Pradeep Shenoy(purdy@cse.iitb.ernet.in)
// Last modified: 12 Aug 1999
//
// Description: 
// 	File that reads in the database tuple-by-tuple.
// This particular file is written for i/o with the synthetic database
// generator provided by IBM -- used for testing the Apriori algorithm
// (refer the appropriate SIGMOD paper)
// 
// To use the mining code, the routine $get_tuple()$ will have to be
// implemented as specified below.
//    To use the rule generation and pruning code, the routine $IsCause()$ 
// will have to be implemented as specified below.

#define INT_SIZE 4
#include <iostream.h>
#include <fstream.h>
#include <stdio.h>
#include <fcntl.h>
#include <unistd.h>

// This function classifies items into causes and effects: used for
// rulegeneration where rules generated are of the type 
//      (causes) => (effects)

int IsCause(int item){
    return item < 500;    
}

inline int myfgetc(int inp_fd){

    static unsigned char buf[8192];
    static int ptr = 0;
    static int max = 0;

    if (ptr == max){ // We've finished the buf.

	// read in one more.
	max = read(inp_fd, buf, sizeof(buf));
	if(max == 0) return EOF;

	ptr = 0;
	return buf[ptr++];
    }

    return buf[ptr++];
}

// The tuple reader is for the synthetic database generator that is given
// out free by IBM -- used in the Apriori paper. The format of data for this
// generator is esoteric --
// first there are two numbers (ignored), and then the number of items in
// the txn, followed by the item numbers themselves.
// The function template is: 
// 	$inp_fd$ is an input file descriptor, 
//      $items$  is a pointer to an array of integers (assumed large enough)
//      return value is number of items in current tuple.

int get_tuple(ifstream& inp_fd, int *items){

    int j,noItems; 

    //read in binary mode: SPARCompiler Library Reference Manual
    inp_fd.read((char*)&j,sizeof(j)); //skip transaction number
    if ( ! inp_fd )
	return 0;

    inp_fd.read((char*)&j,sizeof(j)); //skip customer number
    inp_fd.read((char*)&noItems,sizeof(noItems));
    inp_fd.read((char*)items, noItems*sizeof(int));

    return noItems;
}

// original get_tuple function: commented out by Vikram
// The new get_tuple function is to enable code to work on Linux systems
// int get_tuple(int inp_fd, int *items){
// 
//     int ch;
//     int i, j;
//     // static int trans = 1;
// 
//     ch = myfgetc(inp_fd);
//     if (ch == EOF) return -1;
// 
//    for(i=0;i<INT_SIZE-1;i++) myfgetc(inp_fd );
//    for(i=0;i<INT_SIZE;i++) myfgetc(inp_fd);
// 
//    int count=0;
//    for(i=0;i<INT_SIZE;i++){
//       ch = myfgetc(inp_fd);
//       count <<= 8; count += ch;
//    }
// 
//    for (i=0;i<count;i++){
//       items[i] = 0 ;
//       for ( j = 0 ; j < INT_SIZE ; j++ ){
//          ch = myfgetc ( inp_fd );
//          items[i] <<= 8;  
// 	 items[i] += ch;
//       }
//     }
// 
//     return count;
// }
