source: branches/dev/html/test/adachi/LLReader/Lib/XML/Feed/Parser/Type.php @ 14612

Revision 14612, 14.3 KB checked in by adati, 17 years ago (diff)
Line 
1<?php
2/* vim: set expandtab tabstop=4 shiftwidth=4 softtabstop=4: */
3
4/**
5 * Abstract class providing common methods for XML_Feed_Parser feeds.
6 *
7 * PHP versions 5
8 *
9 * LICENSE: This source file is subject to version 3.0 of the PHP license
10 * that is available through the world-wide-web at the following URI:
11 * http://www.php.net/license/3_0.txt.  If you did not receive a copy of
12 * the PHP License and are unable to obtain it through the web, please
13 * send a note to license@php.net so we can mail you a copy immediately.
14 *
15 * @category   XML
16 * @package    XML_Feed_Parser
17 * @author     James Stewart <james@jystewart.net>
18 * @copyright  2005 James Stewart <james@jystewart.net>
19 * @license    http://www.gnu.org/copyleft/lesser.html  GNU LGPL 2.1
20 * @version    CVS: $Id: Type.php,v 1.22 2006/08/15 13:02:36 jystewart Exp $
21 * @link       http://pear.php.net/package/XML_Feed_Parser/
22 */
23
24/**
25 * This abstract class provides some general methods that are likely to be
26 * implemented exactly the same way for all feed types.
27 *
28 * @package XML_Feed_Parser
29 * @author  James Stewart <james@jystewart.net>
30 * @version Release: 1.0.2
31 */
32abstract class XML_Feed_Parser_Type
33{
34    /**
35     * Where we store our DOM object for this feed
36     * @var DOMDocument
37     */
38    public $model;
39
40    /**
41     * For iteration we'll want a count of the number of entries
42     * @var int
43     */
44    public $numberEntries;
45
46    /**
47     * Where we store our entry objects once instantiated
48     * @var array
49     */
50    public $entries = array();
51
52    /**
53     * Proxy to allow use of element names as method names
54     *
55     * We are not going to provide methods for every entry type so this
56     * function will allow for a lot of mapping. We rely pretty heavily
57     * on this to handle our mappings between other feed types and atom.
58     *
59     * @param   string  $call - the method attempted
60     * @param   array   $arguments - arguments to that method
61     * @return  mixed
62     */
63    function __call($call, $arguments = array())
64    {
65        if (! is_array($arguments)) {
66            $arguments = array();
67        }
68
69        if (isset($this->compatMap[$call])) {
70            $tempMap = $this->compatMap;
71            $tempcall = array_pop($tempMap[$call]);
72            if (! empty($tempMap)) {
73                $arguments = array_merge($arguments, $tempMap[$call]);
74            }
75            $call = $tempcall;
76        }
77
78        /* To be helpful, we allow a case-insensitive search for this method */
79        if (! isset($this->map[$call])) {
80            foreach (array_keys($this->map) as $key) {
81                if (strtoupper($key) == strtoupper($call)) {
82                    $call = $key;
83                    break;
84                }
85            }
86        }
87
88        if (empty($this->map[$call])) {
89            return false;
90        }
91
92        $method = 'get' . $this->map[$call][0];
93        if ($method == 'getLink') {
94            $offset = empty($arguments[0]) ? 0 : $arguments[0];
95            $attribute = empty($arguments[1]) ? 'href' : $arguments[1];
96            $params = isset($arguments[2]) ? $arguments[2] : array();
97            return $this->getLink($offset, $attribute, $params);
98        }
99        if (method_exists($this, $method)) {
100            return $this->$method($call, $arguments);
101        }
102
103        return false;
104    }
105
106    /**
107     * Proxy to allow use of element names as attribute names
108     *
109     * For many elements variable-style access will be desirable. This function
110     * provides for that.
111     *
112     * @param   string  $value - the variable required
113     * @return  mixed
114     */
115    function __get($value)
116    {
117        return $this->__call($value, array());
118    }
119
120    /**
121     * Utility function to help us resolve xml:base values
122     *
123     * We have other methods which will traverse the DOM and work out the different
124     * xml:base declarations we need to be aware of. We then need to combine them.
125     * If a declaration starts with a protocol then we restart the string. If it
126     * starts with a / then we add on to the domain name. Otherwise we simply tag
127     * it on to the end.
128     *
129     * @param   string  $base - the base to add the link to
130     * @param   string  $link
131     */
132    function combineBases($base, $link)
133    {
134        if (preg_match('/^[A-Za-z]+:\/\//', $link)) {
135            return $link;
136        } else if (preg_match('/^\//', $link)) {
137            /* Extract domain and suffix link to that */
138            preg_match('/^([A-Za-z]+:\/\/.*)?\/*/', $base, $results);
139            $firstLayer = $results[0];
140            return $firstLayer . "/" . $link;
141        } else if (preg_match('/^\.\.\//', $base)) {
142            /* Step up link to find place to be */
143            preg_match('/^((\.\.\/)+)(.*)$/', $link, $bases);
144            $suffix = $bases[3];
145            $count = preg_match_all('/\.\.\//', $bases[1], $steps);
146            $url = explode("/", $base);
147            for ($i = 0; $i <= $count; $i++) {
148                array_pop($url);
149            }
150            return implode("/", $url) . "/" . $suffix;
151        } else if (preg_match('/^(?!\/$)/', $base)) {
152            $base = preg_replace('/(.*\/).*$/', '$1', $base)  ;
153            return $base . $link;
154        } else {
155            /* Just stick it on the end */
156            return $base . $link;
157        }
158    }
159
160    /**
161     * Determine whether we need to apply our xml:base rules
162     *
163     * Gets us the xml:base data and then processes that with regard
164     * to our current link.
165     *
166     * @param   string
167     * @param   DOMElement
168     * @return  string
169     */
170    function addBase($link, $element)
171    {
172        if (preg_match('/^[A-Za-z]+:\/\//', $link)) {
173            return $link;
174        }
175
176        return $this->combineBases($element->baseURI, $link);
177    }
178
179    /**
180     * Get an entry by its position in the feed, starting from zero
181     *
182     * As well as allowing the items to be iterated over we want to allow
183     * users to be able to access a specific entry. This is one of two ways of
184     * doing that, the other being by ID.
185     *
186     * @param   int $offset
187     * @return  XML_Feed_Parser_RSS1Element
188     */
189    function getEntryByOffset($offset)
190    {
191        if (! isset($this->entries[$offset])) {
192            $entries = $this->model->getElementsByTagName($this->itemElement);
193            if ($entries->length > $offset) {
194                $xmlBase = $entries->item($offset)->baseURI;
195                $this->entries[$offset] = new $this->itemClass(
196                    $entries->item($offset), $this, $xmlBase);
197                if ($id = $this->entries[$offset]->id) {
198                    $this->idMappings[$id] = $this->entries[$offset];
199                }
200            } else {
201                throw new XML_Feed_Parser_Exception('No entries found');
202            }
203        }
204
205        return $this->entries[$offset];
206    }
207
208    /**
209     * Return a date in seconds since epoch.
210     *
211     * Get a date construct. We use PHP's strtotime to return it as a unix datetime, which
212     * is the number of seconds since 1970-01-01 00:00:00.
213     *
214     * @link    http://php.net/strtotime
215     * @param    string    $method        The name of the date construct we want
216     * @param    array     $arguments    Included for compatibility with our __call usage
217     * @return    int|false datetime
218     */
219    protected function getDate($method, $arguments)
220    {
221        $time = $this->model->getElementsByTagName($method);
222        if ($time->length == 0) {
223            return false;
224        }
225        return strtotime($time->item(0)->nodeValue);
226    }
227
228    /**
229     * Get a text construct.
230     *
231     * @param    string    $method    The name of the text construct we want
232     * @param    array     $arguments    Included for compatibility with our __call usage
233     * @return    string
234     */
235    protected function getText($method, $arguments = array())
236    {
237        $tags = $this->model->getElementsByTagName($method);
238        if ($tags->length > 0) {
239            $value = $tags->item(0)->nodeValue;
240            return $value;
241        }
242        return false;
243    }
244
245    /**
246     * Apply various rules to retrieve category data.
247     *
248     * There is no single way of declaring a category in RSS1/1.1 as there is in RSS2
249     * and  Atom. Instead the usual approach is to use the dublin core namespace to
250     * declare  categories. For example delicious use both:
251     * <dc:subject>PEAR</dc:subject> and: <taxo:topics><rdf:Bag>
252     * <rdf:li resource="http://del.icio.us/tag/PEAR" /></rdf:Bag></taxo:topics>
253     * to declare a categorisation of 'PEAR'.
254     *
255     * We need to be sensitive to this where possible.
256     *
257     * @param    string    $call    for compatibility with our overloading
258     * @param   array $arguments - arg 0 is the offset, arg 1 is whether to return as array
259     * @return  string|array|false
260     */
261    protected function getCategory($call, $arguments)
262    {
263        $categories = $this->model->getElementsByTagName('subject');
264        $offset = empty($arguments[0]) ? 0 : $arguments[0];
265        $array = empty($arguments[1]) ? false : true;
266        if ($categories->length <= $offset) {
267            return false;
268        }
269        if ($array) {
270            $list = array();
271            foreach ($categories as $category) {
272                array_push($list, $category->nodeValue);
273            }
274            return $list;
275        }
276        return $categories->item($offset)->nodeValue;
277    }
278
279    /**
280     * Count occurrences of an element
281     *
282     * This function will tell us how many times the element $type
283     * appears at this level of the feed.
284     *
285     * @param    string    $type    the element we want to get a count of
286     * @return    int
287     */
288    protected function count($type)
289    {
290        if ($tags = $this->model->getElementsByTagName($type)) {
291            return $tags->length;
292        }
293        return 0;
294    }
295
296    /**
297     * Part of our xml:base processing code
298     *
299     * We need a couple of methods to access XHTML content stored in feeds.
300     * This is because we dereference all xml:base references before returning
301     * the element. This method handles the attributes.
302     *
303     * @param   DOMElement $node    The DOM node we are iterating over
304     * @return  string
305     */
306    function processXHTMLAttributes($node) {
307        $return = '';
308        foreach ($node->attributes as $attribute) {
309            if ($attribute->name == 'src' or $attribute->name == 'href') {
310                $attribute->value = $this->addBase($attribute->value, $attribute);
311            }
312            if ($attribute->name == 'base') {
313                continue;
314            }
315            $return .= $attribute->name . '="' . $attribute->value .'" ';
316        }
317        if (! empty($return)) {
318            return ' ' . trim($return);
319        }
320        return '';
321    }
322
323    /**
324     * Part of our xml:base processing code
325     *
326     * We need a couple of methods to access XHTML content stored in feeds.
327     * This is because we dereference all xml:base references before returning
328     * the element. This method recurs through the tree descending from the node
329     * and builds our string
330     *
331     * @param   DOMElement $node    The DOM node we are processing
332     * @return   string
333     */
334    function traverseNode($node)
335    {
336        $content = '';
337
338        /* Add the opening of this node to the content */
339        if ($node instanceof DOMElement) {
340            $content .= '<' . $node->tagName .
341                $this->processXHTMLAttributes($node) . '>';
342        }
343
344        /* Process children */
345        if ($node->hasChildNodes()) {
346            foreach ($node->childNodes as $child) {
347                $content .= $this->traverseNode($child);
348            }
349        }
350
351        if ($node instanceof DOMText) {
352            $content .= htmlentities($node->nodeValue);
353        }
354
355        /* Add the closing of this node to the content */
356        if ($node instanceof DOMElement) {
357            $content .= '</' . $node->tagName . '>';
358        }
359
360        return $content;
361    }
362
363    /**
364     * Get content from RSS feeds (atom has its own implementation)
365     *
366     * The official way to include full content in an RSS1 entry is to use
367     * the content module's element 'encoded', and RSS2 feeds often duplicate that.
368     * Often, however, the 'description' element is used instead. We will offer that
369     * as a fallback. Atom uses its own approach and overrides this method.
370     *
371     * @return  string|false
372     */
373    protected function getContent()
374    {
375        $options = array('encoded', 'description');
376        foreach ($options as $element) {
377            $test = $this->model->getElementsByTagName($element);
378            if ($test->length == 0) {
379                continue;
380            }
381            if ($test->item(0)->hasChildNodes()) {
382                $value = '';
383                foreach ($test->item(0)->childNodes as $child) {
384                    if ($child instanceof DOMText) {
385                        $value .= $child->nodeValue;
386                    } else {
387                        $simple = simplexml_import_dom($child);
388                        $value .= $simple->asXML();
389                    }
390                }
391                return $value;
392            } else if ($test->length > 0) {
393                return $test->item(0)->nodeValue;
394            }
395        }
396        return false;
397    }
398
399    /**
400     * Checks if this element has a particular child element.
401     *
402     * @param   String
403     * @param   Integer
404     * @return  bool
405     **/
406    function hasKey($name, $offset = 0)
407    {
408        $search = $this->model->getElementsByTagName($name);
409        return $search->length > $offset;
410    }
411
412    /**
413     * Return an XML serialization of the feed, should it be required. Most
414     * users however, will already have a serialization that they used when
415     * instantiating the object.
416     *
417     * @return    string    XML serialization of element
418     */   
419    function __toString()
420    {
421        $simple = simplexml_import_dom($this->model);
422        return $simple->asXML();
423    }
424   
425    /**
426     * Get directory holding RNG schemas. Method is based on that
427     * found in Contact_AddressBook.
428     *
429     * @return string PEAR data directory.
430     * @access public
431     * @static
432     */
433    static function getSchemaDir()
434    {
435        require_once 'PEAR/Config.php';
436        $config = new PEAR_Config;
437        return $config->get('data_dir') . '/XML_Feed_Parser/schemas';
438    }
439}
440
441?>
Note: See TracBrowser for help on using the repository browser.