1 | <?php |
---|
2 | /* vim: set expandtab tabstop=4 shiftwidth=4 softtabstop=4: */ |
---|
3 | |
---|
4 | /** |
---|
5 | * Abstract class providing common methods for XML_Feed_Parser feeds. |
---|
6 | * |
---|
7 | * PHP versions 5 |
---|
8 | * |
---|
9 | * LICENSE: This source file is subject to version 3.0 of the PHP license |
---|
10 | * that is available through the world-wide-web at the following URI: |
---|
11 | * http://www.php.net/license/3_0.txt. If you did not receive a copy of |
---|
12 | * the PHP License and are unable to obtain it through the web, please |
---|
13 | * send a note to license@php.net so we can mail you a copy immediately. |
---|
14 | * |
---|
15 | * @category XML |
---|
16 | * @package XML_Feed_Parser |
---|
17 | * @author James Stewart <james@jystewart.net> |
---|
18 | * @copyright 2005 James Stewart <james@jystewart.net> |
---|
19 | * @license http://www.gnu.org/copyleft/lesser.html GNU LGPL 2.1 |
---|
20 | * @version CVS: $Id: Type.php,v 1.22 2006/08/15 13:02:36 jystewart Exp $ |
---|
21 | * @link http://pear.php.net/package/XML_Feed_Parser/ |
---|
22 | */ |
---|
23 | |
---|
24 | /** |
---|
25 | * This abstract class provides some general methods that are likely to be |
---|
26 | * implemented exactly the same way for all feed types. |
---|
27 | * |
---|
28 | * @package XML_Feed_Parser |
---|
29 | * @author James Stewart <james@jystewart.net> |
---|
30 | * @version Release: 1.0.2 |
---|
31 | */ |
---|
32 | abstract class XML_Feed_Parser_Type |
---|
33 | { |
---|
34 | /** |
---|
35 | * Where we store our DOM object for this feed |
---|
36 | * @var DOMDocument |
---|
37 | */ |
---|
38 | public $model; |
---|
39 | |
---|
40 | /** |
---|
41 | * For iteration we'll want a count of the number of entries |
---|
42 | * @var int |
---|
43 | */ |
---|
44 | public $numberEntries; |
---|
45 | |
---|
46 | /** |
---|
47 | * Where we store our entry objects once instantiated |
---|
48 | * @var array |
---|
49 | */ |
---|
50 | public $entries = array(); |
---|
51 | |
---|
52 | /** |
---|
53 | * Proxy to allow use of element names as method names |
---|
54 | * |
---|
55 | * We are not going to provide methods for every entry type so this |
---|
56 | * function will allow for a lot of mapping. We rely pretty heavily |
---|
57 | * on this to handle our mappings between other feed types and atom. |
---|
58 | * |
---|
59 | * @param string $call - the method attempted |
---|
60 | * @param array $arguments - arguments to that method |
---|
61 | * @return mixed |
---|
62 | */ |
---|
63 | function __call($call, $arguments = array()) |
---|
64 | { |
---|
65 | if (! is_array($arguments)) { |
---|
66 | $arguments = array(); |
---|
67 | } |
---|
68 | |
---|
69 | if (isset($this->compatMap[$call])) { |
---|
70 | $tempMap = $this->compatMap; |
---|
71 | $tempcall = array_pop($tempMap[$call]); |
---|
72 | if (! empty($tempMap)) { |
---|
73 | $arguments = array_merge($arguments, $tempMap[$call]); |
---|
74 | } |
---|
75 | $call = $tempcall; |
---|
76 | } |
---|
77 | |
---|
78 | /* To be helpful, we allow a case-insensitive search for this method */ |
---|
79 | if (! isset($this->map[$call])) { |
---|
80 | foreach (array_keys($this->map) as $key) { |
---|
81 | if (strtoupper($key) == strtoupper($call)) { |
---|
82 | $call = $key; |
---|
83 | break; |
---|
84 | } |
---|
85 | } |
---|
86 | } |
---|
87 | |
---|
88 | if (empty($this->map[$call])) { |
---|
89 | return false; |
---|
90 | } |
---|
91 | |
---|
92 | $method = 'get' . $this->map[$call][0]; |
---|
93 | if ($method == 'getLink') { |
---|
94 | $offset = empty($arguments[0]) ? 0 : $arguments[0]; |
---|
95 | $attribute = empty($arguments[1]) ? 'href' : $arguments[1]; |
---|
96 | $params = isset($arguments[2]) ? $arguments[2] : array(); |
---|
97 | return $this->getLink($offset, $attribute, $params); |
---|
98 | } |
---|
99 | if (method_exists($this, $method)) { |
---|
100 | return $this->$method($call, $arguments); |
---|
101 | } |
---|
102 | |
---|
103 | return false; |
---|
104 | } |
---|
105 | |
---|
106 | /** |
---|
107 | * Proxy to allow use of element names as attribute names |
---|
108 | * |
---|
109 | * For many elements variable-style access will be desirable. This function |
---|
110 | * provides for that. |
---|
111 | * |
---|
112 | * @param string $value - the variable required |
---|
113 | * @return mixed |
---|
114 | */ |
---|
115 | function __get($value) |
---|
116 | { |
---|
117 | return $this->__call($value, array()); |
---|
118 | } |
---|
119 | |
---|
120 | /** |
---|
121 | * Utility function to help us resolve xml:base values |
---|
122 | * |
---|
123 | * We have other methods which will traverse the DOM and work out the different |
---|
124 | * xml:base declarations we need to be aware of. We then need to combine them. |
---|
125 | * If a declaration starts with a protocol then we restart the string. If it |
---|
126 | * starts with a / then we add on to the domain name. Otherwise we simply tag |
---|
127 | * it on to the end. |
---|
128 | * |
---|
129 | * @param string $base - the base to add the link to |
---|
130 | * @param string $link |
---|
131 | */ |
---|
132 | function combineBases($base, $link) |
---|
133 | { |
---|
134 | if (preg_match('/^[A-Za-z]+:\/\//', $link)) { |
---|
135 | return $link; |
---|
136 | } else if (preg_match('/^\//', $link)) { |
---|
137 | /* Extract domain and suffix link to that */ |
---|
138 | preg_match('/^([A-Za-z]+:\/\/.*)?\/*/', $base, $results); |
---|
139 | $firstLayer = $results[0]; |
---|
140 | return $firstLayer . "/" . $link; |
---|
141 | } else if (preg_match('/^\.\.\//', $base)) { |
---|
142 | /* Step up link to find place to be */ |
---|
143 | preg_match('/^((\.\.\/)+)(.*)$/', $link, $bases); |
---|
144 | $suffix = $bases[3]; |
---|
145 | $count = preg_match_all('/\.\.\//', $bases[1], $steps); |
---|
146 | $url = explode("/", $base); |
---|
147 | for ($i = 0; $i <= $count; $i++) { |
---|
148 | array_pop($url); |
---|
149 | } |
---|
150 | return implode("/", $url) . "/" . $suffix; |
---|
151 | } else if (preg_match('/^(?!\/$)/', $base)) { |
---|
152 | $base = preg_replace('/(.*\/).*$/', '$1', $base) ; |
---|
153 | return $base . $link; |
---|
154 | } else { |
---|
155 | /* Just stick it on the end */ |
---|
156 | return $base . $link; |
---|
157 | } |
---|
158 | } |
---|
159 | |
---|
160 | /** |
---|
161 | * Determine whether we need to apply our xml:base rules |
---|
162 | * |
---|
163 | * Gets us the xml:base data and then processes that with regard |
---|
164 | * to our current link. |
---|
165 | * |
---|
166 | * @param string |
---|
167 | * @param DOMElement |
---|
168 | * @return string |
---|
169 | */ |
---|
170 | function addBase($link, $element) |
---|
171 | { |
---|
172 | if (preg_match('/^[A-Za-z]+:\/\//', $link)) { |
---|
173 | return $link; |
---|
174 | } |
---|
175 | |
---|
176 | return $this->combineBases($element->baseURI, $link); |
---|
177 | } |
---|
178 | |
---|
179 | /** |
---|
180 | * Get an entry by its position in the feed, starting from zero |
---|
181 | * |
---|
182 | * As well as allowing the items to be iterated over we want to allow |
---|
183 | * users to be able to access a specific entry. This is one of two ways of |
---|
184 | * doing that, the other being by ID. |
---|
185 | * |
---|
186 | * @param int $offset |
---|
187 | * @return XML_Feed_Parser_RSS1Element |
---|
188 | */ |
---|
189 | function getEntryByOffset($offset) |
---|
190 | { |
---|
191 | if (! isset($this->entries[$offset])) { |
---|
192 | $entries = $this->model->getElementsByTagName($this->itemElement); |
---|
193 | if ($entries->length > $offset) { |
---|
194 | $xmlBase = $entries->item($offset)->baseURI; |
---|
195 | $this->entries[$offset] = new $this->itemClass( |
---|
196 | $entries->item($offset), $this, $xmlBase); |
---|
197 | if ($id = $this->entries[$offset]->id) { |
---|
198 | $this->idMappings[$id] = $this->entries[$offset]; |
---|
199 | } |
---|
200 | } else { |
---|
201 | throw new XML_Feed_Parser_Exception('No entries found'); |
---|
202 | } |
---|
203 | } |
---|
204 | |
---|
205 | return $this->entries[$offset]; |
---|
206 | } |
---|
207 | |
---|
208 | /** |
---|
209 | * Return a date in seconds since epoch. |
---|
210 | * |
---|
211 | * Get a date construct. We use PHP's strtotime to return it as a unix datetime, which |
---|
212 | * is the number of seconds since 1970-01-01 00:00:00. |
---|
213 | * |
---|
214 | * @link http://php.net/strtotime |
---|
215 | * @param string $method The name of the date construct we want |
---|
216 | * @param array $arguments Included for compatibility with our __call usage |
---|
217 | * @return int|false datetime |
---|
218 | */ |
---|
219 | protected function getDate($method, $arguments) |
---|
220 | { |
---|
221 | $time = $this->model->getElementsByTagName($method); |
---|
222 | if ($time->length == 0) { |
---|
223 | return false; |
---|
224 | } |
---|
225 | return strtotime($time->item(0)->nodeValue); |
---|
226 | } |
---|
227 | |
---|
228 | /** |
---|
229 | * Get a text construct. |
---|
230 | * |
---|
231 | * @param string $method The name of the text construct we want |
---|
232 | * @param array $arguments Included for compatibility with our __call usage |
---|
233 | * @return string |
---|
234 | */ |
---|
235 | protected function getText($method, $arguments = array()) |
---|
236 | { |
---|
237 | $tags = $this->model->getElementsByTagName($method); |
---|
238 | if ($tags->length > 0) { |
---|
239 | $value = $tags->item(0)->nodeValue; |
---|
240 | return $value; |
---|
241 | } |
---|
242 | return false; |
---|
243 | } |
---|
244 | |
---|
245 | /** |
---|
246 | * Apply various rules to retrieve category data. |
---|
247 | * |
---|
248 | * There is no single way of declaring a category in RSS1/1.1 as there is in RSS2 |
---|
249 | * and Atom. Instead the usual approach is to use the dublin core namespace to |
---|
250 | * declare categories. For example delicious use both: |
---|
251 | * <dc:subject>PEAR</dc:subject> and: <taxo:topics><rdf:Bag> |
---|
252 | * <rdf:li resource="http://del.icio.us/tag/PEAR" /></rdf:Bag></taxo:topics> |
---|
253 | * to declare a categorisation of 'PEAR'. |
---|
254 | * |
---|
255 | * We need to be sensitive to this where possible. |
---|
256 | * |
---|
257 | * @param string $call for compatibility with our overloading |
---|
258 | * @param array $arguments - arg 0 is the offset, arg 1 is whether to return as array |
---|
259 | * @return string|array|false |
---|
260 | */ |
---|
261 | protected function getCategory($call, $arguments) |
---|
262 | { |
---|
263 | $categories = $this->model->getElementsByTagName('subject'); |
---|
264 | $offset = empty($arguments[0]) ? 0 : $arguments[0]; |
---|
265 | $array = empty($arguments[1]) ? false : true; |
---|
266 | if ($categories->length <= $offset) { |
---|
267 | return false; |
---|
268 | } |
---|
269 | if ($array) { |
---|
270 | $list = array(); |
---|
271 | foreach ($categories as $category) { |
---|
272 | array_push($list, $category->nodeValue); |
---|
273 | } |
---|
274 | return $list; |
---|
275 | } |
---|
276 | return $categories->item($offset)->nodeValue; |
---|
277 | } |
---|
278 | |
---|
279 | /** |
---|
280 | * Count occurrences of an element |
---|
281 | * |
---|
282 | * This function will tell us how many times the element $type |
---|
283 | * appears at this level of the feed. |
---|
284 | * |
---|
285 | * @param string $type the element we want to get a count of |
---|
286 | * @return int |
---|
287 | */ |
---|
288 | protected function count($type) |
---|
289 | { |
---|
290 | if ($tags = $this->model->getElementsByTagName($type)) { |
---|
291 | return $tags->length; |
---|
292 | } |
---|
293 | return 0; |
---|
294 | } |
---|
295 | |
---|
296 | /** |
---|
297 | * Part of our xml:base processing code |
---|
298 | * |
---|
299 | * We need a couple of methods to access XHTML content stored in feeds. |
---|
300 | * This is because we dereference all xml:base references before returning |
---|
301 | * the element. This method handles the attributes. |
---|
302 | * |
---|
303 | * @param DOMElement $node The DOM node we are iterating over |
---|
304 | * @return string |
---|
305 | */ |
---|
306 | function processXHTMLAttributes($node) { |
---|
307 | $return = ''; |
---|
308 | foreach ($node->attributes as $attribute) { |
---|
309 | if ($attribute->name == 'src' or $attribute->name == 'href') { |
---|
310 | $attribute->value = $this->addBase($attribute->value, $attribute); |
---|
311 | } |
---|
312 | if ($attribute->name == 'base') { |
---|
313 | continue; |
---|
314 | } |
---|
315 | $return .= $attribute->name . '="' . $attribute->value .'" '; |
---|
316 | } |
---|
317 | if (! empty($return)) { |
---|
318 | return ' ' . trim($return); |
---|
319 | } |
---|
320 | return ''; |
---|
321 | } |
---|
322 | |
---|
323 | /** |
---|
324 | * Part of our xml:base processing code |
---|
325 | * |
---|
326 | * We need a couple of methods to access XHTML content stored in feeds. |
---|
327 | * This is because we dereference all xml:base references before returning |
---|
328 | * the element. This method recurs through the tree descending from the node |
---|
329 | * and builds our string |
---|
330 | * |
---|
331 | * @param DOMElement $node The DOM node we are processing |
---|
332 | * @return string |
---|
333 | */ |
---|
334 | function traverseNode($node) |
---|
335 | { |
---|
336 | $content = ''; |
---|
337 | |
---|
338 | /* Add the opening of this node to the content */ |
---|
339 | if ($node instanceof DOMElement) { |
---|
340 | $content .= '<' . $node->tagName . |
---|
341 | $this->processXHTMLAttributes($node) . '>'; |
---|
342 | } |
---|
343 | |
---|
344 | /* Process children */ |
---|
345 | if ($node->hasChildNodes()) { |
---|
346 | foreach ($node->childNodes as $child) { |
---|
347 | $content .= $this->traverseNode($child); |
---|
348 | } |
---|
349 | } |
---|
350 | |
---|
351 | if ($node instanceof DOMText) { |
---|
352 | $content .= htmlentities($node->nodeValue); |
---|
353 | } |
---|
354 | |
---|
355 | /* Add the closing of this node to the content */ |
---|
356 | if ($node instanceof DOMElement) { |
---|
357 | $content .= '</' . $node->tagName . '>'; |
---|
358 | } |
---|
359 | |
---|
360 | return $content; |
---|
361 | } |
---|
362 | |
---|
363 | /** |
---|
364 | * Get content from RSS feeds (atom has its own implementation) |
---|
365 | * |
---|
366 | * The official way to include full content in an RSS1 entry is to use |
---|
367 | * the content module's element 'encoded', and RSS2 feeds often duplicate that. |
---|
368 | * Often, however, the 'description' element is used instead. We will offer that |
---|
369 | * as a fallback. Atom uses its own approach and overrides this method. |
---|
370 | * |
---|
371 | * @return string|false |
---|
372 | */ |
---|
373 | protected function getContent() |
---|
374 | { |
---|
375 | $options = array('encoded', 'description'); |
---|
376 | foreach ($options as $element) { |
---|
377 | $test = $this->model->getElementsByTagName($element); |
---|
378 | if ($test->length == 0) { |
---|
379 | continue; |
---|
380 | } |
---|
381 | if ($test->item(0)->hasChildNodes()) { |
---|
382 | $value = ''; |
---|
383 | foreach ($test->item(0)->childNodes as $child) { |
---|
384 | if ($child instanceof DOMText) { |
---|
385 | $value .= $child->nodeValue; |
---|
386 | } else { |
---|
387 | $simple = simplexml_import_dom($child); |
---|
388 | $value .= $simple->asXML(); |
---|
389 | } |
---|
390 | } |
---|
391 | return $value; |
---|
392 | } else if ($test->length > 0) { |
---|
393 | return $test->item(0)->nodeValue; |
---|
394 | } |
---|
395 | } |
---|
396 | return false; |
---|
397 | } |
---|
398 | |
---|
399 | /** |
---|
400 | * Checks if this element has a particular child element. |
---|
401 | * |
---|
402 | * @param String |
---|
403 | * @param Integer |
---|
404 | * @return bool |
---|
405 | **/ |
---|
406 | function hasKey($name, $offset = 0) |
---|
407 | { |
---|
408 | $search = $this->model->getElementsByTagName($name); |
---|
409 | return $search->length > $offset; |
---|
410 | } |
---|
411 | |
---|
412 | /** |
---|
413 | * Return an XML serialization of the feed, should it be required. Most |
---|
414 | * users however, will already have a serialization that they used when |
---|
415 | * instantiating the object. |
---|
416 | * |
---|
417 | * @return string XML serialization of element |
---|
418 | */ |
---|
419 | function __toString() |
---|
420 | { |
---|
421 | $simple = simplexml_import_dom($this->model); |
---|
422 | return $simple->asXML(); |
---|
423 | } |
---|
424 | |
---|
425 | /** |
---|
426 | * Get directory holding RNG schemas. Method is based on that |
---|
427 | * found in Contact_AddressBook. |
---|
428 | * |
---|
429 | * @return string PEAR data directory. |
---|
430 | * @access public |
---|
431 | * @static |
---|
432 | */ |
---|
433 | static function getSchemaDir() |
---|
434 | { |
---|
435 | require_once 'PEAR/Config.php'; |
---|
436 | $config = new PEAR_Config; |
---|
437 | return $config->get('data_dir') . '/XML_Feed_Parser/schemas'; |
---|
438 | } |
---|
439 | } |
---|
440 | |
---|
441 | ?> |
---|