1 | <?php |
---|
2 | /* vim: set expandtab tabstop=4 shiftwidth=4 softtabstop=4: */ |
---|
3 | |
---|
4 | /** |
---|
5 | * Key gateway class for XML_Feed_Parser package |
---|
6 | * |
---|
7 | * PHP versions 5 |
---|
8 | * |
---|
9 | * LICENSE: This source file is subject to version 3.0 of the PHP license |
---|
10 | * that is available through the world-wide-web at the following URI: |
---|
11 | * http://www.php.net/license/3_0.txt. If you did not receive a copy of |
---|
12 | * the PHP License and are unable to obtain it through the web, please |
---|
13 | * send a note to license@php.net so we can mail you a copy immediately. |
---|
14 | * |
---|
15 | * @category XML |
---|
16 | * @package XML_Feed_Parser |
---|
17 | * @author James Stewart <james@jystewart.net> |
---|
18 | * @copyright 2005 James Stewart <james@jystewart.net> |
---|
19 | * @license http://www.gnu.org/copyleft/lesser.html GNU LGPL |
---|
20 | * @version CVS: $Id: Parser.php,v 1.24 2006/08/15 13:04:00 jystewart Exp $ |
---|
21 | * @link http://pear.php.net/package/XML_Feed_Parser/ |
---|
22 | */ |
---|
23 | |
---|
24 | /** |
---|
25 | * XML_Feed_Parser_Type is an abstract class required by all of our |
---|
26 | * feed types. It makes sense to load it here to keep the other files |
---|
27 | * clean. |
---|
28 | */ |
---|
29 | require_once 'XML/Feed/Parser/Type.php'; |
---|
30 | |
---|
31 | /** |
---|
32 | * We will throw exceptions when errors occur. |
---|
33 | */ |
---|
34 | require_once 'XML/Feed/Parser/Exception.php'; |
---|
35 | |
---|
36 | /** |
---|
37 | * This is the core of the XML_Feed_Parser package. It identifies feed types |
---|
38 | * and abstracts access to them. It is an iterator, allowing for easy access |
---|
39 | * to the entire feed. |
---|
40 | * |
---|
41 | * @author James Stewart <james@jystewart.net> |
---|
42 | * @version Release: 1.0.2 |
---|
43 | * @package XML_Feed_Parser |
---|
44 | */ |
---|
45 | class XML_Feed_Parser implements Iterator |
---|
46 | { |
---|
47 | /** |
---|
48 | * This is where we hold the feed object |
---|
49 | * @var Object |
---|
50 | */ |
---|
51 | private $feed; |
---|
52 | |
---|
53 | /** |
---|
54 | * To allow for extensions, we make a public reference to the feed model |
---|
55 | * @var DOMDocument |
---|
56 | */ |
---|
57 | public $model; |
---|
58 | |
---|
59 | /** |
---|
60 | * A map between entry ID and offset |
---|
61 | * @var array |
---|
62 | */ |
---|
63 | protected $idMappings = array(); |
---|
64 | |
---|
65 | /** |
---|
66 | * A storage space for Namespace URIs. |
---|
67 | * @var array |
---|
68 | */ |
---|
69 | private $feedNamespaces = array( |
---|
70 | 'rss2' => array( |
---|
71 | 'http://backend.userland.com/rss', |
---|
72 | 'http://backend.userland.com/rss2', |
---|
73 | 'http://blogs.law.harvard.edu/tech/rss')); |
---|
74 | /** |
---|
75 | * Detects feed types and instantiate appropriate objects. |
---|
76 | * |
---|
77 | * Our constructor takes care of detecting feed types and instantiating |
---|
78 | * appropriate classes. For now we're going to treat Atom 0.3 as Atom 1.0 |
---|
79 | * but raise a warning. I do not intend to introduce full support for |
---|
80 | * Atom 0.3 as it has been deprecated, but others are welcome to. |
---|
81 | * |
---|
82 | * @param string $feed XML serialization of the feed |
---|
83 | * @param bool $strict Whether or not to validate the feed |
---|
84 | * @param bool $suppressWarnings Trigger errors for deprecated feed types? |
---|
85 | * @param bool $tidy Whether or not to try and use the tidy library on input |
---|
86 | */ |
---|
87 | function __construct($feed, $strict = false, $suppressWarnings = false, $tidy = false) |
---|
88 | { |
---|
89 | $this->model = new DOMDocument; |
---|
90 | if (! $this->model->loadXML($feed)) { |
---|
91 | if (extension_loaded('tidy') && $tidy) { |
---|
92 | $tidy = new tidy; |
---|
93 | $tidy->parseString($feed, |
---|
94 | array('input-xml' => true, 'output-xml' => true)); |
---|
95 | $tidy->cleanRepair(); |
---|
96 | if (! $this->model->loadXML((string) $tidy)) { |
---|
97 | throw new XML_Feed_Parser_Exception('Invalid input: this is not ' . |
---|
98 | 'valid XML'); |
---|
99 | } |
---|
100 | } else { |
---|
101 | throw new XML_Feed_Parser_Exception('Invalid input: this is not valid XML'); |
---|
102 | } |
---|
103 | |
---|
104 | } |
---|
105 | |
---|
106 | /* detect feed type */ |
---|
107 | $doc_element = $this->model->documentElement; |
---|
108 | $error = false; |
---|
109 | |
---|
110 | switch (true) { |
---|
111 | case ($doc_element->namespaceURI == 'http://www.w3.org/2005/Atom'): |
---|
112 | require_once 'XML/Feed/Parser/Atom.php'; |
---|
113 | require_once 'XML/Feed/Parser/AtomElement.php'; |
---|
114 | $class = 'XML_Feed_Parser_Atom'; |
---|
115 | break; |
---|
116 | case ($doc_element->namespaceURI == 'http://purl.org/atom/ns#'): |
---|
117 | require_once 'XML/Feed/Parser/Atom.php'; |
---|
118 | require_once 'XML/Feed/Parser/AtomElement.php'; |
---|
119 | $class = 'XML_Feed_Parser_Atom'; |
---|
120 | $error = 'Atom 0.3 deprecated, using 1.0 parser which won\'t provide ' . |
---|
121 | 'all options'; |
---|
122 | break; |
---|
123 | case ($doc_element->namespaceURI == 'http://purl.org/rss/1.0/' || |
---|
124 | ($doc_element->hasChildNodes() && $doc_element->childNodes->length > 1 |
---|
125 | && $doc_element->childNodes->item(1)->namespaceURI == |
---|
126 | 'http://purl.org/rss/1.0/')): |
---|
127 | require_once 'XML/Feed/Parser/RSS1.php'; |
---|
128 | require_once 'XML/Feed/Parser/RSS1Element.php'; |
---|
129 | $class = 'XML_Feed_Parser_RSS1'; |
---|
130 | break; |
---|
131 | case ($doc_element->namespaceURI == 'http://purl.org/rss/1.1/' || |
---|
132 | ($doc_element->hasChildNodes() && $doc_element->childNodes->length > 1 |
---|
133 | && $doc_element->childNodes->item(1)->namespaceURI == |
---|
134 | 'http://purl.org/rss/1.1/')): |
---|
135 | require_once 'XML/Feed/Parser/RSS11.php'; |
---|
136 | require_once 'XML/Feed/Parser/RSS11Element.php'; |
---|
137 | $class = 'XML_Feed_Parser_RSS11'; |
---|
138 | break; |
---|
139 | case (($doc_element->hasChildNodes() && $doc_element->childNodes->length > 1 |
---|
140 | && $doc_element->childNodes->item(1)->namespaceURI == |
---|
141 | 'http://my.netscape.com/rdf/simple/0.9/') || |
---|
142 | $doc_element->namespaceURI == 'http://my.netscape.com/rdf/simple/0.9/'): |
---|
143 | require_once 'XML/Feed/Parser/RSS09.php'; |
---|
144 | require_once 'XML/Feed/Parser/RSS09Element.php'; |
---|
145 | $class = 'XML_Feed_Parser_RSS09'; |
---|
146 | break; |
---|
147 | case ($doc_element->tagName == 'rss' and |
---|
148 | $doc_element->hasAttribute('version') && |
---|
149 | $doc_element->getAttribute('version') == 0.91): |
---|
150 | $error = 'RSS 0.91 has been superceded by RSS2.0. Using RSS2.0 parser.'; |
---|
151 | require_once 'XML/Feed/Parser/RSS2.php'; |
---|
152 | require_once 'XML/Feed/Parser/RSS2Element.php'; |
---|
153 | $class = 'XML_Feed_Parser_RSS2'; |
---|
154 | break; |
---|
155 | case ($doc_element->tagName == 'rss' and |
---|
156 | $doc_element->hasAttribute('version') && |
---|
157 | $doc_element->getAttribute('version') == 0.92): |
---|
158 | $error = 'RSS 0.92 has been superceded by RSS2.0. Using RSS2.0 parser.'; |
---|
159 | require_once 'XML/Feed/Parser/RSS2.php'; |
---|
160 | require_once 'XML/Feed/Parser/RSS2Element.php'; |
---|
161 | $class = 'XML_Feed_Parser_RSS2'; |
---|
162 | break; |
---|
163 | case (in_array($doc_element->namespaceURI, $this->feedNamespaces['rss2']) |
---|
164 | || $doc_element->tagName == 'rss'): |
---|
165 | if (! $doc_element->hasAttribute('version') || |
---|
166 | $doc_element->getAttribute('version') != 2) { |
---|
167 | $error = 'RSS version not specified. Parsing as RSS2.0'; |
---|
168 | } |
---|
169 | require_once 'XML/Feed/Parser/RSS2.php'; |
---|
170 | require_once 'XML/Feed/Parser/RSS2Element.php'; |
---|
171 | $class = 'XML_Feed_Parser_RSS2'; |
---|
172 | break; |
---|
173 | default: |
---|
174 | throw new XML_Feed_Parser_Exception('Feed type unknown'); |
---|
175 | break; |
---|
176 | } |
---|
177 | |
---|
178 | if (! $suppressWarnings && ! empty($error)) { |
---|
179 | trigger_error($error, E_USER_WARNING); |
---|
180 | } |
---|
181 | |
---|
182 | /* Instantiate feed object */ |
---|
183 | $this->feed = new $class($this->model, $strict); |
---|
184 | } |
---|
185 | |
---|
186 | /** |
---|
187 | * Proxy to allow feed element names to be used as method names |
---|
188 | * |
---|
189 | * For top-level feed elements we will provide access using methods or |
---|
190 | * attributes. This function simply passes on a request to the appropriate |
---|
191 | * feed type object. |
---|
192 | * |
---|
193 | * @param string $call - the method being called |
---|
194 | * @param array $attributes |
---|
195 | */ |
---|
196 | function __call($call, $attributes) |
---|
197 | { |
---|
198 | $attributes = array_pad($attributes, 5, false); |
---|
199 | list($a, $b, $c, $d, $e) = $attributes; |
---|
200 | return $this->feed->$call($a, $b, $c, $d, $e); |
---|
201 | } |
---|
202 | |
---|
203 | /** |
---|
204 | * Proxy to allow feed element names to be used as attribute names |
---|
205 | * |
---|
206 | * To allow variable-like access to feed-level data we use this |
---|
207 | * method. It simply passes along to __call() which in turn passes |
---|
208 | * along to the relevant object. |
---|
209 | * |
---|
210 | * @param string $val - the name of the variable required |
---|
211 | */ |
---|
212 | function __get($val) |
---|
213 | { |
---|
214 | return $this->feed->$val; |
---|
215 | } |
---|
216 | |
---|
217 | /** |
---|
218 | * Provides iteration functionality. |
---|
219 | * |
---|
220 | * Of course we must be able to iterate... This function simply increases |
---|
221 | * our internal counter. |
---|
222 | */ |
---|
223 | function next() |
---|
224 | { |
---|
225 | if (isset($this->current_item) && |
---|
226 | $this->current_item <= $this->feed->numberEntries - 1) { |
---|
227 | ++$this->current_item; |
---|
228 | } else if (! isset($this->current_item)) { |
---|
229 | $this->current_item = 0; |
---|
230 | } else { |
---|
231 | return false; |
---|
232 | } |
---|
233 | } |
---|
234 | |
---|
235 | /** |
---|
236 | * Return XML_Feed_Type object for current element |
---|
237 | * |
---|
238 | * @return XML_Feed_Parser_Type Object |
---|
239 | */ |
---|
240 | function current() |
---|
241 | { |
---|
242 | return $this->getEntryByOffset($this->current_item); |
---|
243 | } |
---|
244 | |
---|
245 | /** |
---|
246 | * For iteration -- returns the key for the current stage in the array. |
---|
247 | * |
---|
248 | * @return int |
---|
249 | */ |
---|
250 | function key() |
---|
251 | { |
---|
252 | return $this->current_item; |
---|
253 | } |
---|
254 | |
---|
255 | /** |
---|
256 | * For iteration -- tells whether we have reached the |
---|
257 | * end. |
---|
258 | * |
---|
259 | * @return bool |
---|
260 | */ |
---|
261 | function valid() |
---|
262 | { |
---|
263 | return $this->current_item < $this->feed->numberEntries; |
---|
264 | } |
---|
265 | |
---|
266 | /** |
---|
267 | * For iteration -- resets the internal counter to the beginning. |
---|
268 | */ |
---|
269 | function rewind() |
---|
270 | { |
---|
271 | $this->current_item = 0; |
---|
272 | } |
---|
273 | |
---|
274 | /** |
---|
275 | * Provides access to entries by ID if one is specified in the source feed. |
---|
276 | * |
---|
277 | * As well as allowing the items to be iterated over we want to allow |
---|
278 | * users to be able to access a specific entry. This is one of two ways of |
---|
279 | * doing that, the other being by offset. This method can be quite slow |
---|
280 | * if dealing with a large feed that hasn't yet been processed as it |
---|
281 | * instantiates objects for every entry until it finds the one needed. |
---|
282 | * |
---|
283 | * @param string $id Valid ID for the given feed format |
---|
284 | * @return XML_Feed_Parser_Type|false |
---|
285 | */ |
---|
286 | function getEntryById($id) |
---|
287 | { |
---|
288 | if (isset($this->idMappings[$id])) { |
---|
289 | return $this->getEntryByOffset($this->idMappings[$id]); |
---|
290 | } |
---|
291 | |
---|
292 | /* |
---|
293 | * Since we have not yet encountered that ID, let's go through all the |
---|
294 | * remaining entries in order till we find it. |
---|
295 | * This is a fairly slow implementation, but it should work. |
---|
296 | */ |
---|
297 | return $this->feed->getEntryById($id); |
---|
298 | } |
---|
299 | |
---|
300 | /** |
---|
301 | * Retrieve entry by numeric offset, starting from zero. |
---|
302 | * |
---|
303 | * As well as allowing the items to be iterated over we want to allow |
---|
304 | * users to be able to access a specific entry. This is one of two ways of |
---|
305 | * doing that, the other being by ID. |
---|
306 | * |
---|
307 | * @param int $offset The position of the entry within the feed, starting from 0 |
---|
308 | * @return XML_Feed_Parser_Type|false |
---|
309 | */ |
---|
310 | function getEntryByOffset($offset) |
---|
311 | { |
---|
312 | if ($offset < $this->feed->numberEntries) { |
---|
313 | if (isset($this->feed->entries[$offset])) { |
---|
314 | return $this->feed->entries[$offset]; |
---|
315 | } else { |
---|
316 | try { |
---|
317 | $this->feed->getEntryByOffset($offset); |
---|
318 | } catch (Exception $e) { |
---|
319 | return false; |
---|
320 | } |
---|
321 | $id = $this->feed->entries[$offset]->getID(); |
---|
322 | $this->idMappings[$id] = $offset; |
---|
323 | return $this->feed->entries[$offset]; |
---|
324 | } |
---|
325 | } else { |
---|
326 | return false; |
---|
327 | } |
---|
328 | } |
---|
329 | |
---|
330 | /** |
---|
331 | * Retrieve version details from feed type class. |
---|
332 | * |
---|
333 | * @return void |
---|
334 | * @author James Stewart |
---|
335 | */ |
---|
336 | function version() |
---|
337 | { |
---|
338 | return $this->feed->version; |
---|
339 | } |
---|
340 | |
---|
341 | /** |
---|
342 | * Returns a string representation of the feed. |
---|
343 | * |
---|
344 | * @return String |
---|
345 | **/ |
---|
346 | function __toString() |
---|
347 | { |
---|
348 | return $this->feed->__toString(); |
---|
349 | } |
---|
350 | } |
---|
351 | ?> |
---|