///////////////////////////////////////////////////
// Calculating the probability of getting a given signature
// Author: Pawel Pralat
// Last modification: November 9, 2015
///////////////////////////////////////////////////

#include <stdio.h>
#include <stdlib.h>
#include <math.h>

const int k = 4;             // k = cliques size in the 2-section

const double n               // n = number of vertices
= 4919;                          // Enron at K=4
//= 5014;                          // Enron at K=5
//= 1146130;                       // DBLP at K=4
//= 1264602;                       // DBLP at K=5

const double m[6]            // mi = number of hyperedges of size i
= {0, 0, 5975, 2128, 1034, 0};              // Enron at K=4
//= {0, 0, 5975, 2128, 1034, 561};            // Enron at K=5
//= {0, 0, 473560, 373262, 211011, 0};        // DBLP at K=4
//= {0, 0, 473560, 373262, 211011, 1152001};  // DBLP at K=5

///////////////////////////////////////////////////
// x to power of y
///////////////////////////////////////////////////
inline double pw(double x, double y)
{
  return exp(y*log(x));
}

double p[6];
long double q[6];

void calculate_ps()
{
  p[2] = m[2] / (n*(n-1.0)/2.0);
  p[3] = m[3] / (n*(n-1.0)*(n-2.0)/6.0);
  p[4] = m[4] / (n*(n-1.0)*(n-2.0)*(n-3.0)/24.0);
  p[5] = m[5] / (n*(n-1.0)*(n-2.0)*(n-3.0)*(n-4.0)/120.0);

  q[2] = (1-p[2])
    *pw(1-p[3],n-k)
    *pw(1-p[4],(n-k)*(n-k-1)/2.0)
    *pw(1-p[5],(n-k)*(n-k-1)*(n-k-2)/6.0);
  q[3] = (1-p[3])
    *pw(1-p[4],n-k)
    *pw(1-p[5],(n-k)*(n-k-1)/2.0);
  q[4] = (1-p[4])
    *pw(1-p[5],n-k);
  q[5] = (1-p[5]);
}

int v[k+1];                  // vector/signature; v[i] is the number of edges of size i
int ** l[k+1];               // l is a table of pointers
                             // l[i] is a pointer to a table of size bin(k,i) of pointers (potential edges of size i)
                             // l[i][j] is a pointer to a table of size [i] containing vertices that belong to an edge
int g[k][k];                 // g keeps the graph in the 2-section
double factor;               // normalizing factor

///////////////////////////////////////////////////
// Binomial coefficient
///////////////////////////////////////////////////
int bin(int n, int k)
{
  int a = 1;
  for (int i = n ; i >= n-k+1 ; i--) a *= i;
  for (int i = k ; i >= 2 ; i--) a /= i;
  return a;
}

///////////////////////////////////////////////////
// Generating hypergraphs
///////////////////////////////////////////////////
void generate_hypergraphs(int i, int j, int x, int & nr)
// currently selecting hyperedges of size i
// j of them are already selected; we need v[i] in total
// the first available i-set has label x
// nr counts the number of hypergraphs that induce K_k in 2-section
{
  if (i > k) // we are done; let's check if we have K_k in 2-section
    {
      for (int a = 0 ; a < k ; a++)
	for (int b = a+1 ; b < k ; b++)
	  if (g[a][b] == 0) return; // oops, some edge is not covered
      
      nr++;     // we got the next copy we like 
      return;
    }

  if (j == v[i]) // we selected enough i-sets; let's move to the next coordinate of the vector (size of hyperedges)
    {
      generate_hypergraphs(i+1, 0, 0, nr);
      return;
    }

  for (int z = x ; z <= bin(k,i)-v[i]+j ; z++) // i-set with label z is added 
    {
      // let's keep the current graph so that we can reverse the process
      int c[k][k];
      for (int a = 0 ; a < k ; a++)
	for (int b = 0 ; b < k ; b++)
	  c[a][b] = g[a][b];

      // let's add this i-set (which gives K_i in the 2-section)
      for (int a = 0 ; a < i ; a++)
	for (int b = a+1 ; b < i ; b++)
	  {
	    g[ l[i][z][a] ][ l[i][z][b] ] = 1;
	    g[ l[i][z][b] ][ l[i][z][a] ] = 1;
	  }
      
      generate_hypergraphs(i, j+1, z+1, nr);

      // time to reverse the process
      for (int a = 0 ; a < k ; a++)
	for (int b = 0 ; b < k ; b++)
	  g[a][b] = c[a][b];
    }
}

///////////////////////////////////////////////////
// Generating potential vectors
///////////////////////////////////////////////////
void generate_vectors(int s, int mode)
// mode = 0 : we run it to get a normalizing factor
// mode = 1 : nomalizing factor is already computed, we run it to get vectors
{
  if (s == k+1)  // all coordinates are already set; vector is ready
    {
      int nr = 0;     // nr counts the number of hypergraphs with this signature that give K_k in the 2-section
      int nr_max = 1; // the total number of hypergraphs with this signature is easy to calculate 
      for (int j = 2 ; j <= k ; j++) nr_max *= bin(bin(k,j),v[j]);
      generate_hypergraphs(2, 0, 0, nr);

      double prob = ((double) nr);
      for (int j = 2 ; j <= k-1 ; j++)
	{
	  prob *= pw(1-q[j],v[j]);
	  prob *= pw(q[j],bin(k,j)-v[j]);
	}

      prob *= pw(p[k],v[k]);
      prob *= pw(q[k],1-v[k]);

      if (mode == 0)
	{
	  factor += prob; 
	}
      else if (nr > 0) // comment this part if you want to see all signatures, not only with non-zero probability
	{
	  for (int j = 2 ; j <= k ; j++) 
	    printf("%d ", v[j]);
	  printf("| %d | %d | %.10e\n", nr, nr_max, prob/factor);
	}
      return;
    }

  for (int i = 0 ; i <= bin(k,s) ; i++) // v[s] can be any integer between 0 and bin(k,s) 
    {
      v[s] = i;
      generate_vectors(s+1, mode);
    }
}

///////////////////////////////////////////////////
// Generating all edges 
///////////////////////////////////////////////////
void generating_edges(int * p, int i, int & j, int x, int y)
// generating hyperedges of size i and putting them on l[i][j] (once they are generated, temporarily we put them on p)
// the first available vertex is x
// y vertices are already generated
{
  if (y == i) // we just generated another hyperedge
    {
      for (int z = 0 ; z < i ; z++) l[i][j][z] = p[z];
      j++;
      return;
    }

  for (int z = x ; z <= k-i+y ; z++)
    {
      p[y] = z;
      generating_edges(p, i, j, z+1, y+1);
    }  
}

///////////////////////////////////////////////////
// Main program
///////////////////////////////////////////////////
int main(int argc, char * argv[])
{
  for (int i = 2 ; i <= k ; i++) 
    {
      l[i] = new int * [bin(k,i)];
      for (int j = 0 ; j < bin(k,i) ; j++)
	{
	  l[i][j] = new int [i];
	}
      
      int tmp_array[k];
      int tmp_j = 0;
      generating_edges(tmp_array, i, tmp_j, 0, 0);
    }  

  for (int i = 0 ; i < k ; i++) // we start with the empty graph in 2-section
    for (int j = 0 ; j < k ; j++)
      g[i][j] = 0;

  calculate_ps();
  for (int i = 2 ; i <=k ; i++)  
    printf("p[%d] = %.10e, p'[%d] = %.10Le\n", i, p[i], i, 1.0-q[i]);
  printf("\n\n");

  printf("1 - %d : The first columns correspond to the signature (in particular, column 1 is the number of edges, column %d is the number of hyperedges of size %d)\n", k-1, k-1, k); 
  printf("%d : the number of hypergraphs with this signature that generate K_k in 2-section\n", k);
  printf("%d : the number of hypergraphs with this signature\n", k+1);
  printf("%d : the probability of getting this signature\n\n", k+2);

  for (int i = 0 ; i <= k ; i++) v[i] = 0; // we start with the zero vector
  factor = 0;
  generate_vectors(2,0);
  generate_vectors(2,1);

  for (int i = 2 ; i <= k ; i++) 
    {
      for (int j = 0 ; j < bin(k,i) ; j++)
	delete [] l[i][j];
      delete [] l[i];
    }

  return 0;
}
