/[drupal]/drupal/rdf.php
ViewVC logotype

Contents of /drupal/rdf.php

Parent Directory Parent Directory | Revision Log Revision Log | View Revision Graph Revision Graph


Revision 1.1.1.1 - (show annotations) (download) (as text) (vendor branch)
Thu May 18 19:51:59 2000 UTC (9 years, 8 months ago) by dries
Branch: drop
CVS Tags: start
Changes since 1.1: +0 -0 lines
File MIME type: text/x-php
Imported sources
1 <?
2
3 include "functions.inc";
4 include "theme.inc";
5
6 class rdf {
7 // Contains the raw rdf file:
8 var $data;
9
10 // Contains the parsed rdf file:
11 var $title; // website name
12 var $items; // latest headlines
13
14 function url2sql($site, $timout = 10) {
15 ### Connect to database:
16 dbconnect();
17
18 ### Get channel info:
19 $result = mysql_query("SELECT * FROM channel WHERE site = '$site'");
20
21 if ($channel = mysql_fetch_object($result)) {
22 ### Decode URL:
23 $url = parse_url($channel->rdf);
24 $host = $url[host];
25 $port = $url[port] ? $url[port] : 80;
26 $path = $url[path];
27
28 // print "<PRE>$url - $host - $port - $path</PRE>";
29
30 ### Retrieve data from website:
31 $fp = fsockopen($host, $port, &$errno, &$errstr, $timout);
32
33 if ($fp) {
34 ### Get data from URL:
35 fputs($fp, "GET $path HTTP/1.0\n");
36 fputs($fp, "User-Agent: headline grabber\n");
37 fputs($fp, "Host: ". $host ."\n");
38 fputs($fp, "Accept: */*\n\n");
39
40 while(!feof($fp)) $data .= fgets($fp, 128);
41
42 // print "<PRE>$data</PRE><HR>";
43
44 if (strstr($data, "200 OK")) {
45
46 ### Remove existing entries:
47 $result = mysql_query("DELETE FROM headlines WHERE id = $channel->id");
48
49 ### Strip all 'junk':
50 $data = ereg_replace("<?xml.*/image>", "", $data);
51 $data = ereg_replace("</rdf.*", "", $data);
52 $data = chop($data);
53
54 ### Iterating through our data processing each entry/item:
55 $items = explode("</item>", $data);
56 $number = 0;
57
58 for (reset($items); $item = current($items); next($items)) {
59 ### Extract data:
60 $link = ereg_replace(".*<link>", "", $item);
61 $link = ereg_replace("</link>.*", "", $link);
62 $title = ereg_replace(".*<title>", "", $item);
63 $title = ereg_replace("</title>.*", "", $title);
64
65 ### Clean headlines:
66 $title = stripslashes(fixquotes($title));
67
68 ### Count the number of stories:
69 $number += 1;
70
71 ### Insert item in database:
72 $result = mysql_query("INSERT INTO headlines (id, title, link, number) VALUES('$channel->id', '$title', '$link', '$number')");
73 }
74
75 ### Mark channels as being updated:
76 $result = mysql_query("UPDATE channel SET timestamp = '". time() ."' WHERE id = $channel->id");
77 }
78 else print "<HR>RDF parser: 404 error?<BR><BR><PRE>$data</PRE><HR>";
79 }
80 }
81 }
82
83 function displayHeadlines($site, $timout = 1800) {
84 global $theme;
85
86 ### Connect to database:
87 dbconnect();
88
89 ### Get channel info:
90 $result = mysql_query("SELECT * FROM channel WHERE site = '$site'");
91
92 if ($channel = mysql_fetch_object($result)) {
93
94 ### Check to see whether we have to update our headlines first:
95 if (time() - $channel->timestamp > $timout) $this->url2sql($site);
96
97 ### Grab headlines from database:
98 $result = mysql_query("SELECT * FROM headlines WHERE id = $channel->id ORDER BY number");
99 while ($headline = mysql_fetch_object($result)) {
100 $content .= "<LI><A HREF=\"$headline->link\">$headline->title</A></LI>";
101 }
102 ### Add timestamp:
103 $update = round((time() - $channel->timestamp) / 60);
104 $content .= "<P ALIGN=\"right\">[ <A HREF=\"rdf.php?op=reset&id=$channel->id\"><FONT COLOR=\"$theme->hlcolor2\">reset</FONT></A> | updated $update min. ago ]</P>";
105
106 ### Display box:
107 $theme->box("$channel->site", $content);
108 }
109 else print "<P>Warning: something whiched happened: specified channel could not be found in database.</P>";
110 }
111
112 function addChannel($site, $url, $rdf) {
113 ### Connect to database:
114 dbconnect();
115
116 ### Add channel:
117 $query = mysql_query("INSERT INTO channel (site, url, rdf, timestamp) VALUES ('$site', '$url', '$rdf', now())");
118 }
119
120 function resetChannel($id) {
121 ### Connect to database:
122 dbconnect();
123
124 ### Delete headlines:
125 $result = mysql_query("DELETE FROM headlines WHERE id = $id");
126
127 ### Mark channel as invalid to enforce an update:
128 $result = mysql_query("UPDATE channel SET timestamp = 42 WHERE id = $id");
129 }
130 }
131
132 function adminAddChannel() {
133 ?>
134 <HR>
135 <FORM ACTION="rdf.php" METHOD="post">
136 <P>
137 <B>Site name:</B><BR>
138 <INPUT TYPE="text" NAME="site" SIZE="50">
139 </P>
140
141 <P>
142 <B>URL:</B><BR>
143 <INPUT TYPE="text" NAME="url" SIZE="50">
144 </P>
145
146 <P>
147 <B>RDF file:</B><BR>
148 <INPUT TYPE="text" NAME="rdf" SIZE="50">
149 </P>
150 <INPUT TYPE="submit" NAME="op" VALUE="Add RDF channel">
151 </FORM>
152 <?
153 }
154
155 function adminDisplayAll() {
156 ### Connect to database:
157 dbconnect();
158
159 ### Get channel info:
160 $result = mysql_query("SELECT * FROM channel ORDER BY id");
161
162 print "<TABLE BORDER=\"0\">";
163 while ($channel = mysql_fetch_object($result)) {
164 if ($state % 3 == 0) print " <TR>";
165
166 print " <TD ALIGN=\"center\" VALIGN=\"top\" WIDTH=\"33%\">";
167 $rdf = new rdf();
168 $rdf->displayHeadlines($channel->site);
169 print " </TD>";
170
171 if ($state % 3 == 2) print " </TR>";
172
173 $state += 1;
174 }
175 print "</TABLE>";
176 }
177
178 function adminDisplayInfo() {
179 ?>
180 <H1>Headlines</H1>
181 <H3>Concept</H3>
182 <P>
183 RDF support can change a portal in a significant way: third party websites
184 can become <I>channels</I> in our portal without having to make 'real' deals
185 and with a minimum of extra work. All they need to do is to publish an RDF,
186 so we can include their latest updates in our portal. Yet another easy way
187 to add content.
188 </P>
189 <P>
190 That in and of itself is interesting, but it's not half so interesting as
191 the fact that other sites can include our headlines as well. Anyone can
192 grab our RDF, anyone can parse it, and anyone can put a list of our
193 headlines. Yet another way to generate more traffic.
194 </P>
195
196 <H3>Features</H3>
197 <P>
198 One of the most important features (if not the most important) is
199 chaching support. To avoid bogging down other portals with a continous
200 stream of headline grabbing, all headlines are cached and refreshed once
201 in a while. The 'while' can be costumized but is set to 30 minutes by
202 default.
203 </P>
204 <P>
205 You can reset a channel, that is force to update a channels headlines
206 and you can add new channels. If you don't know what channel to add,
207 check <A HREF="http://www.xmltree.com/">http://www.xmltree.com/</A>.
208 Make sure you don't add anything except valid RDF files!
209 </P>
210
211 <H3>Status</H3>
212 <P>
213 The RDF parser is still in beta and needs proper integration in the engine.
214 Until then this test page generates nothing more then an overview off all
215 subscribed channels along with their headlines: handy for news squatting. ;)
216 </P>
217 <P>
218 RDF files are non-proprietary and publically available. Unfortunatly,
219 RDF is not the only standard: another commonly used format is RSS which
220 would be nice to support as well.
221 </P>
222 <HR>
223 <?
224 }
225
226 $theme->header();
227
228 switch($op) {
229 case "reset":
230 $channel = new rdf();
231 $channel->resetChannel($id);
232 print "<H2>channel has been reset</H2>";
233 print "<A HREF=\"rdf.php\">back</A>";
234 break;
235 case "Add RDF channel":
236 $channel = new rdf();
237 $channel->addChannel($site, $url, $rdf);
238 // fall through:
239 default:
240 adminDisplayInfo();
241 adminDisplayAll();
242 adminAddChannel();
243 }
244
245 $theme->footer();
246
247 ?>

  ViewVC Help
Powered by ViewVC 1.1.3