/[drupal]/contributions/sandbox/aronnovak/graph_creating/explore.php
ViewVC logotype

Contents of /contributions/sandbox/aronnovak/graph_creating/explore.php

Parent Directory Parent Directory | Revision Log Revision Log | View Revision Graph Revision Graph


Revision 1.18 - (show annotations) (download) (as text)
Tue Aug 15 19:18:43 2006 UTC (3 years, 3 months ago) by aronnovak
Branch: MAIN
CVS Tags: HEAD
Changes since 1.17: +36 -25 lines
File MIME type: text/x-php
Beautifiled code and fixed bugs
1 <?php
2 // $Id$
3 /**
4 * All the jobs that should be done in cron-time
5 *
6 * @author Aron Novak <aaron@szentimre.hu>
7 * @version 0.1
8 * @package sna
9 */
10
11 /**
12 * Get sql access and important functions
13 */
14 require_once './includes/bootstrap.inc';
15 drupal_bootstrap(DRUPAL_BOOTSTRAP_LATE_PAGE_CACHE);
16 require_once 'modules/sna/common.php';
17 /**
18 * Create a graph from nodes-comments tables.
19 *
20 * @param array $edges The adjacentcy list of the graph
21 * @return integer The number of interactions
22 */
23 function build_edges_from_nodes(&$edges) {
24 $edges = array();
25 $node_replies_q = "SELECT users.uid as u2, users_1.uid as u1
26 FROM {node} node, {users} users, {comments} comments, {users} users_1
27 WHERE comments.nid = node.nid AND node.uid = users.uid AND users_1.uid = comments.uid
28 AND users.name <> '' AND users_1.name <> ''
29 AND comments.pid = 0";
30
31 $comment_replies_q = "SELECT users_1.uid as 'u2', users.uid as 'u1'
32 FROM {users} users, {comments} comments, {comments} comments_1, {users} users_1
33 WHERE comments_1.cid = comments.pid
34 AND users.uid = comments.uid
35 AND users_1.uid = comments_1.uid
36 AND users_1.name <> ''
37 AND users.name <> ''";
38
39 if ((!$node_replies = db_query($node_replies_q)) || (!$comment_replies = db_query($comment_replies_q))) {
40 die("Database problem\n");
41 }
42 while ($line = db_fetch_array($comment_replies)) {
43 if ($line["u1"] != $line["u2"]) { // Do not do hitches
44 $edges[$line["u1"]][$line["u2"]]++;
45 }
46 }
47 while ($line = db_fetch_array($node_replies)) {
48 if ($line["u1"] != $line["u2"]) {
49 $edges[$line["u1"]][$line["u2"]]++;
50 }
51 }
52 return db_num_rows($node_replies) + db_num_rows($comment_replies);
53 }
54
55 /**
56 * Create a graph from the buddylist module data
57 *
58 * @param array $edges The adjacentcy list of the graph
59 * @return integer The numer of connections
60 */
61 function build_edges_from_buddy(&$edges) {
62 $edges = array();
63 $buddy_q = "SELECT uid, buddy FROM {buddylist}";
64 if ((!$buddies = db_query($buddy_q))) {
65 die("Database problem\n");
66 }
67 while ($line = db_fetch_array($buddies)) {
68 $edges[$line["uid"]][$line["buddy"]]++;
69 }
70 return db_num_rows($buddies);
71 }
72
73 /**
74 * Create a graph from accesslog table. The connection is to view other's profile
75 *
76 * @param array $edges The adjacentcy list of the graph
77 * @return integer The number of connections
78 */
79 function build_edges_from_stats(&$edges) {
80 $edges = array();
81 $stats_q = "SELECT uid, path FROM {accesslog} WHERE path LIKE 'user/%'";
82 if ((!$stats = db_query($stats_q))) {
83 die("Database problem\n");
84 }
85 while ($line = db_fetch_array($stats)) {
86 $dest = str_replace("user/", "", $line["path"]);
87 if (is_numeric($dest) && $dest != $line["uid"]) {
88 $edges[$line["uid"]][$dest]++;
89 }
90 }
91 return db_num_rows($stats);
92 }
93
94 /**
95 * When should we throw away edges
96 *
97 * @param integer $total Number of connections
98 * @return integer The thresold for throwing away edges
99 */
100 function get_limit($total) {
101 if ($total < 1000) {
102 return FALSE;
103 }
104 else {
105 return 3 * log($total - 999); // Ad-hoc computation based on some experiment
106 }
107 }
108
109 /**
110 * Create a dot file from the graph to Graphviz
111 * Graphviz is a graph visualization tool
112 *
113 * @param array $edges The adjacentcy list of the graph
114 * @param $num_interactions Number of edges in the graph
115 * @return boolean The success of writing out the file
116 */
117 function generate_graphviz_input($edges, $num_interactions) {
118 $dot_graph = "digraph G {\n";
119 $limit = intval(get_limit($num_interactions));
120 foreach ($edges as $u1 => $sub_arr) {
121 if ($u1 === 0) { // Anonymous - don't count them!
122 break;
123 }
124 foreach ($sub_arr as $u2 => $num) {
125 if ($u2 === 0) { // Anonymous - don't count them!
126 break;
127 }
128 if (($num > $limit) || ($limit === FALSE)) {
129 $dot_graph .= "\t\"". get_real_name($u1).
130 "\" -> \"". get_real_name($u2) .
131 "\" [label=". round($edges[$u1][$u2], 2) ."];\n";
132 }
133 }
134 }
135
136 $dot_graph .= "}";
137 if (!$fp = fopen(DOT_PATH, "w")) {
138 return FALSE;
139 }
140 // Write out the DOT file
141 fwrite($fp, $dot_graph);
142 fclose($fp);
143 return TRUE;
144 }
145
146 /**
147 * Create a net file from the graph to Pajek
148 * Pajek is a graph analizer and visualization tool
149 * http://vlado.fmf.uni-lj.si/pub/networks/pajek/
150 *
151 * @param array $edges The adjacentcy list of the graph
152 * @return boolean The success of writing out the file
153 */
154 function generate_pajek_input($edges) {
155 // Count unique vertex
156 $edg = "*Edges\n";
157 $vertex = get_all_vertices();
158 $num_vertex = count($vertex);
159 for ($i = 0; $i < $num_vertex; $i++) {
160 $vert .= ($i + 1) ." \"". get_real_name($vertex[$i]) ."\"\n";
161 // in NET files we have to index points in a strict order, uid is not suitable
162 $real_id[$vertex[$i]] = $i + 1;
163 }
164 foreach ($edges as $vx_from => $next) {
165 if ($vx_from === 0) { // Anonymous - don't count them!
166 break;
167 }
168 foreach (array_keys($next) as $vx_to) {
169 if ($vx_to === 0) { // Anonymous - don't count them!
170 break;
171 }
172 if ($edges[$vx_from][$vx_to] > 20) {
173 $edg .= $real_id[$vx_from] ." ". $real_id[$vx_to] ." ". $edges[$vx_from][$vx_to] ."\n";
174 }
175 }
176
177
178 }
179 $net .= "*Vertices ". $num_vertex ."\n". $vert . $edg;
180 if (!$fp = fopen(NET_PATH, "w")) {
181 return FALSE;
182 }
183 fwrite($fp, $net);
184 fclose($fp);
185 return TRUE;
186 }
187
188 /**
189 * Write out the includable graph data
190 *
191 * @param array $edges The adjacentcy list of the graph
192 * @return boolean The success of writing out
193 */
194 function put_graph($edges) {
195 if (!$file_s = fopen(DATA_PATH, "w")) {
196 return FALSE;
197 }
198 if (!lock($file_s)) {
199 return FALSE;
200 }
201 fwrite($file_s, '<?php $edges = ' . var_export($edges, TRUE) . ';?>');
202 fclose($file_s);
203 return TRUE;
204 }
205
206 /**
207 * Convert all the edges cost<->length
208 *
209 * @param array $edges The adjacentcy list of the graph
210 * @return array $edges The adjacentcy list of the graph
211 */
212 function transform_edges($edges) {
213 $min_max = get_min_and_max_degree($edges);
214 $transformed_graph = array();
215 foreach (array_keys($edges) as $A) {
216 foreach (array_keys($edges[$A]) as $B) {
217 $transformed_graph[$A][$B] = get_edge_weight($edges, $A, $B, $min_max[0], $min_max[1]);
218 }
219 }
220 return $transformed_graph;
221 }
222 $at_start = res_start();
223
224 if (!function_exists('dba_open')) {
225 die('Dba support is not available.');
226 }
227
228 clear_cache();
229 $edges = array();
230 $graph_source = variable_get('sna_data_source', GRAPH_SOURCE);
231 $build_edges = 'build_edges_from_' . $graph_source;
232 $num_interactions = $build_edges($edges);
233 $edges = transform_edges($edges);
234 if (!put_graph($edges)) {
235 die('Cannot save the network into file');
236 }
237
238 /* Cache the most popular vertex's minimal tree */
239 $most_popular = sort_by_popularity($edges);
240 $most_popular_size = sizeof($most_popular);
241 $size_cache = variable_get('sna_size_cache', NUM_CACHE);
242 $limit = $most_popular_size < $size_cache ? $most_popular_size : $size_cache;
243 $at_start = res_start();
244 for ($i = 0; $i < $limit; $i++) {
245 a_to_any($edges, $most_popular[$i][1], TRUE);
246 print $most_popular[$i][1] . " cached\n";
247 }
248
249 generate_graphviz_input($edges, $num_interactions);
250 generate_pajek_input($edges);
251 print_r(res_stop($at_start));
252
253 ?>

  ViewVC Help
Powered by ViewVC 1.1.2