/[pdpsoft]/trunk/nl.nikhef.ndpf.tools/network-stats-mashup/includes/cricket_spider.php
ViewVC logotype

Contents of /trunk/nl.nikhef.ndpf.tools/network-stats-mashup/includes/cricket_spider.php

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1271 - (show annotations) (download) (as text)
Thu Dec 10 15:08:52 2009 UTC (12 years, 8 months ago) by aramv
File MIME type: text/x-php
File size: 6079 byte(s)
Added path fix
1 <?php
2 global $cache_dir;
3 $cache_dir = "cache";
4
5 /* returns the two key components from the url, separated by <-> delimiter */
6 function name_from_url($url){
7 $name_parts = split("%2F", code_from_url($url));
8 return $name_parts[0] . " &harr; " . $name_parts[1];
9 }
10
11 function code_from_url($url){
12 $name_string = array();
13 preg_match('/target=%2F(.*);/', $url, $name_string);
14 if($name_string[1]){
15 return $name_string[1];
16 } else {
17 return md5($url);
18 }
19 }
20
21 /* checks if a given file is less than 1 day old */
22 function probe_cache($file){
23 $today = time();
24 $start_of_today = mktime(0, 0, 0, date("n", $today), date("j", $today), date("Y", $today));
25
26 if(@filemtime($file) >= strtotime("-1 day", $start_of_today)){
27 return true;
28 }
29 return false;
30 }
31
32 function highlight_color($data){
33 return '00ff00';
34 }
35
36 function border_color($data){
37 return '008000';
38 }
39
40 /* found this neat little function at: http://mobiforge.com/developing/story/lightweight-device-detection-php */
41 function is_mobile(){
42 $regex_match="/(nokia|iphone|android|motorola|^mot\-|softbank|foma|docomo|kddi|up\.browser|up\.link|";
43 $regex_match.="htc|dopod|blazer|netfront|helio|hosin|huawei|novarra|CoolPad|webos|techfaith|palmsource|";
44 $regex_match.="blackberry|alcatel|amoi|ktouch|nexian|samsung|^sam\-|s[cg]h|^lge|ericsson|philips|sagem|wellcom|bunjalloo|maui|";
45 $regex_match.="symbian|smartphone|midp|wap|phone|windows ce|iemobile|^spice|^bird|^zte\-|longcos|pantech|gionee|^sie\-|portalmmm|";
46 $regex_match.="jig\s browser|hiptop|^ucweb|^benq|haier|^lct|opera\s*mobi|opera\*mini|320x320|240x320|176x220";
47 $regex_match.=")/i";
48 return isset($_SERVER['HTTP_X_WAP_PROFILE']) or isset($_SERVER['HTTP_PROFILE']) or preg_match($regex_match, strtolower($_SERVER['HTTP_USER_AGENT']));
49 }
50
51 //image_map.html
52 function get_urls_from_html_file($file){
53 global $cache_dir;
54 $page_urls = array();
55 $urls_cache = $cache_dir . '/' . md5($file);
56 /* get a list of Cricket urls from an html file */
57 if(probe_cache($urls_cache)){
58 /* load cache */
59 $page_urls = unserialize(file_get_contents($urls_cache));
60 } else {
61 /* makes a list of urls present in the image map html */
62 preg_match_all('/href="(.+)"/', file_get_contents('./'.$file), $page_urls);
63 /* create cache */
64 file_put_contents($urls_cache, serialize($page_urls));
65 }
66 return $page_urls;
67 }
68
69 function get_images_from_absolute_and_relative_urls($urls){
70 global $cache_dir;
71 $image_urls = array();
72 $relative_urls = array();
73 foreach($urls as $index => $url){
74 /* graph name */
75 $graph_code = code_from_url($url);
76 $graph_url_cache = $cache_dir . '/' . $graph_code;
77
78 if(probe_cache($graph_url_cache)){
79 /* load cache */
80 $image_urls[$url] = unserialize(file_get_contents($graph_url_cache));
81 } else {
82 /* create cache */
83 if(stristr($url,'http://')){
84 /* absolute url */
85 $new_url = get_cricket_images_from_url($url);
86 } else {
87 /* relative url */
88 $relative_urls = get_urls_from_html_file($url);
89 $new_url = array();
90 foreach($relative_urls[1] as $relative_index => $relative_url){
91 $new_url[] = get_cricket_images_from_url($relative_url);
92 }
93 }
94 file_put_contents($graph_url_cache, serialize($new_url));
95 /* add to list */
96 $image_urls[$url] = $new_url;
97 }
98 }
99 return $image_urls;
100 }
101
102 function make_html_snippet($data, $token){
103 $html_string = '<h2 id="graph_name">' . $data[3] . '</h2>';
104 $html_string .= '<p><a id="daily_graph_link" href="' . $data[0] . '"><img id="daily_graph" alt="' . $data[3] . '" src="' . $data[1] . $token . '"/></a><br/>';
105 $html_string .= '<a id="weekly_graph_link" href="' . $data[0] . '"><img id="weekly_graph" alt="' . $data[3] . '" src="' . $data[2] . $token . '"/></a></p>';
106 return $html_string;
107 }
108
109 function make_html_snippets($data, $token){
110 $html_string = "";
111 foreach($data as $index => $trunk_data){
112 $html_string .= make_html_snippet($trunk_data, $token);
113 }
114 return $html_string;
115 }
116
117 /* for an array of cricket page urls, returns a 2-array of images */
118 function get_images_from_urls($urls){
119 $image_urls = array();
120 /* create a list of urls to images from the list of Cricket urls */
121 foreach($urls as $index => $url){
122 $image_urls[] = get_cricket_images_from_url($url);
123 }
124 return $image_urls;
125 }
126
127 /* for a given cricket page url returns an array containing:
128 [0]: the url itself
129 [1]: url to daily graph image
130 [2]: url to weekly graph image
131 [3]: trunk name
132 */
133 function get_cricket_images_from_url($url){
134 $local_urls = array();
135 $page = "";
136 /* apparently cricket checks the UA string and tries to display GIFs to wget, which don't work for some reason */
137 /* image links */
138 $page = shell_exec("wget -O- --user-agent=Firefox " . escapeshellarg($url));
139 preg_match_all('/<img src="(.*);rand=\d+".+/', $page, $local_urls);
140 /* $local_urls holds matched items, $local_urls[1] holds the matched part between ()s.
141 $local_urls[1][0] is the daily graph, $local_urls[1][1] is the weekly graph */
142 /* new_url is an array of:
143 url to page
144 url to graph image
145 name of the graph
146 */
147 return array($url, "http://www.dutchgrid.nl/ndpf/cricket/" . $local_urls[1][0] . ";rand=" , "http://www.dutchgrid.nl/ndpf/cricket/" . $local_urls[1][1] . ";rand=", name_from_url($url));
148 }
149
150 function get_absolute_url(){
151 $full_url = 'http';
152 $script_name = '';
153 if(isset($_SERVER['REQUEST_URI'])) {
154 $script_name = $_SERVER['REQUEST_URI'];
155 } else {
156 $script_name = $_SERVER['PHP_SELF'];
157 if($_SERVER['QUERY_STRING']>' ') {
158 $script_name .= '?'.$_SERVER['QUERY_STRING'];
159 }
160 }
161
162 /* strip index.php from script name */
163
164 if(isset($_SERVER['HTTPS']) && $_SERVER['HTTPS']=='on') {
165 $full_url .= 's';
166 }
167 $full_url .= '://';
168 if($_SERVER['SERVER_PORT']!='80') {
169 $full_url .=
170 $_SERVER['HTTP_HOST'].':'.$_SERVER['SERVER_PORT'].$script_name;
171 } else {
172 $full_url .= $_SERVER['HTTP_HOST'].$script_name;
173 }
174
175 /* strip trailing slash */
176 return $full_url;
177 }
178
179
180
181 ?>

grid.support@nikhef.nl
ViewVC Help
Powered by ViewVC 1.1.28