| 1 |
<pre>
|
| 2 |
<?php
|
| 3 |
if (module_exist("aggregator2")) {
|
| 4 |
|
| 5 |
function getRSSLocation($html, $location){
|
| 6 |
if(!$html or !$location){
|
| 7 |
return false;
|
| 8 |
}else{
|
| 9 |
//search through the HTML, save all <link> tags
|
| 10 |
// and store each link's attributes in an associative array
|
| 11 |
preg_match_all('/<link\s+(.*?)\s*\/?>/si', $html, $matches);
|
| 12 |
$links = $matches[1];
|
| 13 |
$final_links = array();
|
| 14 |
$link_count = count($links);
|
| 15 |
for($n=0; $n<$link_count; $n++){
|
| 16 |
$attributes = preg_split('/\s+/s', $links[$n]);
|
| 17 |
foreach($attributes as $attribute){
|
| 18 |
$att = preg_split('/\s*=\s*/s', $attribute, 2);
|
| 19 |
if(isset($att[1])){
|
| 20 |
$att[1] = preg_replace('/([\'"]?)(.*)\1/', '$2', $att[1]);
|
| 21 |
$final_link[strtolower($att[0])] = $att[1];
|
| 22 |
}
|
| 23 |
}
|
| 24 |
$final_links[$n] = $final_link;
|
| 25 |
}
|
| 26 |
//now figure out which one points to the RSS file
|
| 27 |
for($n=0; $n<$link_count; $n++){
|
| 28 |
if(strtolower($final_links[$n]['rel']) == 'alternate'){
|
| 29 |
if(strtolower($final_links[$n]['type']) == 'application/rss+xml'){
|
| 30 |
$href = $final_links[$n]['href'];
|
| 31 |
} elseif(strtolower($final_links[$n]['type']) == 'text/xml'){
|
| 32 |
//kludge to make the first version of this still work
|
| 33 |
$href = $final_links[$n]['href'];
|
| 34 |
} else if(strtolower($final_links[$n]['type']) == 'application/atom+xml') {
|
| 35 |
$href = $final_links[$n]['href'];
|
| 36 |
}
|
| 37 |
if($href){
|
| 38 |
if(strstr($href, "http://") !== false){ //if it's absolute
|
| 39 |
$full_url = $href;
|
| 40 |
}else{ //otherwise, 'absolutize' it
|
| 41 |
$url_parts = parse_url($location);
|
| 42 |
//only made it work for http:// links. Any problem with this?
|
| 43 |
$full_url = "http://$url_parts[host]";
|
| 44 |
if(isset($url_parts['port'])){
|
| 45 |
$full_url .= ":$url_parts[port]";
|
| 46 |
}
|
| 47 |
if($href{0} != '/'){ //it's a relative link on the domain
|
| 48 |
$full_url .= dirname($url_parts['path']);
|
| 49 |
if(substr($full_url, -1) != '/'){
|
| 50 |
//if the last character isn't a '/', add it
|
| 51 |
$full_url .= '/';
|
| 52 |
}
|
| 53 |
}
|
| 54 |
$full_url .= $href;
|
| 55 |
}
|
| 56 |
return $full_url;
|
| 57 |
}
|
| 58 |
}
|
| 59 |
}
|
| 60 |
return false;
|
| 61 |
}
|
| 62 |
}
|
| 63 |
|
| 64 |
|
| 65 |
if (is_numeric(arg(2))) {
|
| 66 |
$cond = " and nid=".arg(2);
|
| 67 |
}
|
| 68 |
|
| 69 |
$ffid = 2;
|
| 70 |
$feed_id = 'flexinode_2';
|
| 71 |
$link_id = 'flexinode_1';
|
| 72 |
$image_id = 'flexinode_8';
|
| 73 |
$description_id = 'flexinode_4';
|
| 74 |
$time_id = 'flexinode_5';
|
| 75 |
|
| 76 |
$feeds = db_query("select nid from flexinode_data where field_id=$ffid $cond order by nid desc");
|
| 77 |
|
| 78 |
while ($feed = db_fetch_array($feeds)) {
|
| 79 |
$node = node_load($feed['nid']);
|
| 80 |
|
| 81 |
$changed = false;
|
| 82 |
|
| 83 |
if (!$node->$feed_id && $node->$link_id) {
|
| 84 |
$html = aggregator2_http_request($node->$link_id, array(), 30);
|
| 85 |
$node->$feed_id = getRSSLocation($html->data, $node->$link_id);
|
| 86 |
$changed = true;
|
| 87 |
}
|
| 88 |
|
| 89 |
if ($node->$feed_id) {
|
| 90 |
$result = aggregator2_http_request($node->$feed_id, array(), 30);
|
| 91 |
if ($result->code == 200 || $result->code == 302 || $result->code == 307) {
|
| 92 |
|
| 93 |
$xml_tree = aggregator2_parse_xml($result->data);
|
| 94 |
if (!$xml_tree['parser_error']) {
|
| 95 |
|
| 96 |
if ($xml_tree['RSS']) { // RSS 0.91, 0.92, 2.0
|
| 97 |
$root = &$xml_tree['RSS'][0];
|
| 98 |
$channel = &$root['CHANNEL'][0];
|
| 99 |
$image = &$channel['IMAGE'][0]['URL'][0]['VALUE'];
|
| 100 |
$description = &$channel['DESCRIPTION'][0]['VALUE'];
|
| 101 |
$link = &$channel['LINK'][0]['VALUE'];
|
| 102 |
$title = &$channel['TITLE'][0]['VALUE'];
|
| 103 |
$item = &$channel['ITEM'][0];
|
| 104 |
$date = &$item['PUBDATE'][0]['VALUE'];
|
| 105 |
}
|
| 106 |
else if ($xml_tree['RDF:RDF']) {
|
| 107 |
$root = &$xml_tree['RDF:RDF'][0];
|
| 108 |
$channel = &$root['CHANNEL'][0];
|
| 109 |
$image = &$root['IMAGE'][0]['URL'][0]['VALUE'];
|
| 110 |
$description = &$channel['DESCRIPTION'][0]['VALUE'];
|
| 111 |
$link = &$channel['LINK'][0]['VALUE'];
|
| 112 |
$title = &$channel['TITLE'][0]['VALUE'];
|
| 113 |
$item = &$root['ITEM'][0];
|
| 114 |
$date = &$item['DC:DATE'][0]['VALUE'];
|
| 115 |
$date = strtr($date, 'T', ' ');
|
| 116 |
$date = substr($date, 0, -3);
|
| 117 |
}
|
| 118 |
else if ($xml_tree['FEED']) { // Atom 0.3, 1.0
|
| 119 |
$root = &$xml_tree['FEED'][0];
|
| 120 |
$channel = &$root;
|
| 121 |
$image = &$channel['LOGO'][0]['VALUE'];
|
| 122 |
$description = ($channel['TAGLINE'][0]['VALUE'] ? $channel['TAGLINE'][0]['VALUE'] : '');
|
| 123 |
$title = &$channel['TITLE'][0]['VALUE'];
|
| 124 |
$item = &$channel['ENTRY'][0];
|
| 125 |
$date = &$item['CREATED'][0]['VALUE'];
|
| 126 |
$date = strtr($date , 'TZ', ' ');
|
| 127 |
// TODO: remove this Atom hack when we have field mapping or at least specialized parsers in place
|
| 128 |
if (count($channel['LINK']) > 1) {
|
| 129 |
$link = $feed->link;
|
| 130 |
foreach ($channel['LINK'] as $l) {
|
| 131 |
if ($l['REL'] == 'alternate') {
|
| 132 |
$link = $l['HREF'];
|
| 133 |
}
|
| 134 |
}
|
| 135 |
}
|
| 136 |
else {
|
| 137 |
$link = $channel['LINK'][0]['HREF'];
|
| 138 |
}
|
| 139 |
}
|
| 140 |
else if ($xml_tree['CHANNEL']) { // RSS 1.1
|
| 141 |
$root = &$xml_tree['CHANNEL'][0];
|
| 142 |
$channel = &$root;
|
| 143 |
$image = &$channel['IMAGE'][0];
|
| 144 |
$description = &$channel['DESCRIPTION'][0]['VALUE'];
|
| 145 |
$link = &$channel['LINK'][0]['VALUE'];
|
| 146 |
$title = &$channel['TITLE'][0]['VALUE'];
|
| 147 |
}
|
| 148 |
else {
|
| 149 |
// unsupported format
|
| 150 |
echo "<h2>format error</h2>";
|
| 151 |
cotinue;
|
| 152 |
}
|
| 153 |
|
| 154 |
if ($date) {
|
| 155 |
$time = strtotime($date);
|
| 156 |
if ($time != -1 && $time != $node->$time_id) {
|
| 157 |
$node->$time_id = $time;
|
| 158 |
$changed = true;
|
| 159 |
echo "date\n";
|
| 160 |
}
|
| 161 |
}
|
| 162 |
|
| 163 |
if ($title && $title != $node->title) {
|
| 164 |
$node->title = $title;
|
| 165 |
$changed=true;
|
| 166 |
echo "title\n";
|
| 167 |
}
|
| 168 |
if ($link && $link != $node->$link_id) {
|
| 169 |
$node->$link_id = $link;
|
| 170 |
$changed = true;
|
| 171 |
echo "link\n";
|
| 172 |
}
|
| 173 |
if ($description && $description != $node->$description_id) {
|
| 174 |
$node->$description_id = $description;
|
| 175 |
$changed = true;
|
| 176 |
echo "description\n";
|
| 177 |
}
|
| 178 |
if ($image && $image != $node->$image_id) {
|
| 179 |
$node->$image_id = $image;
|
| 180 |
$changed = true;
|
| 181 |
echo "image\n";
|
| 182 |
}
|
| 183 |
|
| 184 |
if ($changed) {
|
| 185 |
$node->taxonomy = array_keys(taxonomy_node_get_terms($node->nid,'tid'));
|
| 186 |
|
| 187 |
node_save($node);
|
| 188 |
echo "updated ".$node->$link_id."\n";
|
| 189 |
//$node = node_submit($node);
|
| 190 |
|
| 191 |
}
|
| 192 |
|
| 193 |
} else { echo "parse error\n";
|
| 194 |
}
|
| 195 |
} else { echo "http error \n"; var_dump($result);
|
| 196 |
}
|
| 197 |
|
| 198 |
} else { echo "no feed\n";
|
| 199 |
}
|
| 200 |
}
|
| 201 |
} // no parser
|
| 202 |
|
| 203 |
?>
|
| 204 |
</pre>
|