forked from andreacarrara/pagerank
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpagerank.h
123 lines (94 loc) · 3.83 KB
/
pagerank.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
#include <cstdio>
#include <cmath>
#include <vector>
#ifndef PAGERANK_H
#define PAGERANK_H
namespace pagerank {
namespace matrix {
typedef std::vector<double> column_t;
typedef std::vector<std::vector<double>> matrix_t;
// Multiply matrix by scalar
// Product stored in the matrix
inline void scalar_multiplication(matrix_t &matrix, int num_rows, int num_cols, double scalar) {
for (int i = 0; i < num_rows; i++)
for (int j = 0; j < num_cols; j++)
matrix[i][j] *= scalar;
}
inline void scalar_multiplication(column_t &matrix, int num_rows, double scalar) {
for (int i = 0; i < num_rows; i++)
matrix[i] *= scalar;
}
// Multiply matrix by column
// Product stored in the column
// Column must have num_cols rows
inline void column_multiplication(matrix_t &matrix, int num_rows, int num_cols, column_t &column) {
std::vector<double> product(num_rows);
for (int i = 0; i < num_rows; i++) {
double sum = 0;
for (int j = 0; j < num_cols; j++)
sum += matrix[i][j] * column[j];
product[i] = sum;
}
// Copy entries in the column
column = product;
}
// Sum two matrices
// Sum stored in the first matrix
// Matrices must have the same dimensions
inline void addition(column_t &matrix1, column_t &matrix2, int num_rows) {
for (int i = 0; i < num_rows; i++)
matrix1[i] += matrix2[i];
}
// Return Euclidean norm of column
inline double norm(column_t &column, int num_rows) {
double sum = 0;
for (int i = 0; i < num_rows; i++)
sum += std::pow(column[i], 2);
return std::sqrt(sum);
}
}
//weight(d, damping factor) - real in (0, 1), best at 0.15
//error(min score) - real in (0, +inf), best at 0.0001
matrix::column_t compute(matrix::matrix_t &link_matrix, const double weight = 0.15, const double error = 0.0001) {
using namespace matrix;
auto size = (int)link_matrix.size();
for (int i = 0; i < size; ++i) {
uint count = 0;
for (int j = 0; j < size; ++j)
if (link_matrix[j][i] == 1)
count++;
if (count <= 1) continue;
for (int j = 0; j < size; ++j)
if (link_matrix[j][i] == 1)
link_matrix[j][i] = 1 / (double)count;
}
// Initialize mean column and score column
column_t mean_column(size);
column_t score_column(size);
for (int i = 0; i < size; i++) {
auto entry = 1 / (double)size;
mean_column[i] = entry;
score_column[i] = entry;
}
// Weigh link matrix and mean column
scalar_multiplication(link_matrix, size, size, 1 - weight);
scalar_multiplication(mean_column, size, weight);
double score_norm;
do {
// Store score column before operations
column_t score_diff = score_column;
// Multiply score column by weighted link matrix
column_multiplication(link_matrix, size, size, score_column);
// Add weighted mean column to score column
addition(score_column, mean_column, size);
// Subtract previous score column from score column
scalar_multiplication(score_diff, size, -1);
addition(score_diff, score_column, size);
// Calculate norm of the difference
score_norm = norm(score_diff, size);
// Repeat until score norm is smaller than error
} while (score_norm > error);
return score_column;
}
}
#endif