1 """
2 this module implements a python wrapper for the connotea web api
3 (http://www.connotea.org/wiki/WebAPI)
4 """
5
6
7
8
9
10 import httplib
11 import urllib
12 import urllib2
13 import md5
14 import re
15 import time
16 from xml.dom import minidom
17 from datetime import datetime
18
19 __version__ = '$LastChangedRevision: 6 $'
20 USER_AGENT = "pyconnotea/%s" % __version__
21
22
23
24
26 """
27 The base class providing access to the web api functionality via method calls.
28
29 usage:
30
31 >>> import api as connotea
32 >>> api = connotea.Api('user', 'password')
33 >>> bookmarks = api.query('bookmarks', user='user')
34 >>> posts = api.query(uri='http://www.dlib.org/dlib/may06/apps/05apps.html')
35 >>> posts[0].bookmark.citation.authors
36 [u'Ann Apps', u'Ross MacIntyre']
37 >>> newPost = api.add('http://www.zim.mpg.de/',('zim','heinz nixdorf'))
38 >>> newPost.description
39 >>> newPost.description = 'new description'
40 >>> newPost = api.edit(newPost, comment='new comment')
41 >>> newPost.description
42 u'new description'
43 >>> newPost.comments.pop().entry
44 u'new comment'
45 >>> api.remove(newPost)
46 """
47 BASE_URL = "http://www.connotea.org/data"
48 AUTH_REALM = 'Connotea'
49 AUTH_HOST = 'http://www.connotea.org'
50
51 - def __init__(self, user, password, authenticate=False, debug=False):
52 """
53 @param user: username for a connotea account
54 @param password: password for the connotea user
55 @param authenticate: flag signalling whether to try authentication on init
56 @param debug: flag signalling debug mode
57 """
58 self._user = user
59 self._password = password
60 self._debug = debug
61
62 self._authHandler = urllib2.HTTPBasicAuthHandler()
63 self._authHandler.add_password(self.AUTH_REALM, self.AUTH_HOST, self._user, self._password)
64
65 if self._debug:
66 httplib.HTTPConnection.debuglevel = 1
67 else:
68 httplib.HTTPConnection.debuglevel = 0
69
70 if authenticate:
71
72 self._request('/noop')
73
74
75 - def _request(self, url, data=None, errorHandler=None):
76 """
77 generic connotea WebAPI request
78
79 @param url: the path component of the request url to be attached to C{self.BASE_URL}
80 @param data: a dictionary or sequence of pairs to create the body of the request.
81 If this parameter is not C{None}, the request will be using the HTTP POST method.
82 @param errorHandler: a C{urllib2.HTTPDefaultErrorHandler} subclass
83 """
84 if errorHandler is None:
85 errorHandler = ThrottlingHandler
86
87 opener = urllib2.build_opener(self._authHandler, errorHandler())
88 request = urllib2.Request(self.BASE_URL + url)
89
90 if data is not None:
91 request.add_data(urllib.urlencode(data))
92
93 request.add_header('User-Agent', USER_AGENT)
94
95 if self._debug: print "url:", request.get_full_url()
96 try:
97
98 time.sleep(1)
99 o = opener.open(request)
100 try:
101 self._lastResponse = o.read()
102 return minidom.parseString(self._lastResponse)
103 except:
104 raise
105 except errorHandler:
106 if self._debug: return opener.open(request).read()
107 raise
108
109
110 - def add(self, uri, tags, title=None, description=None, mywork=False, private=False, comment=None):
111 """
112 Quote from the web api specification::
113
114 New posts can be created by submitting an HTTP POST request to
115
116 http://www.connotea.org/data/add
117
118 The body of the POST should be simply an HTML form-style set of key=value URL-escaped pairs.
119
120 The fields available are listed below. The uri and tags fields are required, alll other fields are optional.
121
122 - uri => the URL for the bookmark you're creating
123 - tags => a string representing the tags. Individual tags should be comma- or space-separated, and multi-word tags must be enclosed in quotes ("")
124 - usertitle => the title to use for the post
125 - description => description of the bookmark
126 - mywork => 0 or 1. If 1, it means you are claiming authorship (or co-authorship) of the work being bookmarked
127 - private => 0 or 1. 0 means the post is shared with all, 1 means it is kept private to the user.
128 - comment => HTML for a comment. Note that newlines will be automatically converted to <br/>.
129
130 @param uri: see above
131 @param tags: either a correctly formatted string as described above or
132 a C{list} or C{tuple} of arbitrary strings, specifying the single
133 tags. note that double quotes will be stripped from these
134 strings.
135 """
136 if isinstance(tags, list) or isinstance(tags, tuple):
137 tags = formatTags(tags)
138
139 data = {'uri': uri, 'tags': tags}
140 if title is not None:
141 data['usertitle'] = title
142
143 if description is not None:
144 data['description'] = description
145
146 if comment is not None:
147 data['comment'] = comment
148
149 if mywork:
150 data['mywork'] = 1
151 else:
152 data['mywork'] = 0
153
154 if private:
155 data['private'] = 1
156 else:
157 data['private'] = 0
158
159 response = Response(self._request('/add', data, AddErrorHandler))
160 if response.success and response.code == 201:
161
162 uriMd5 = response.location.split('/uri/')[1]
163 return self.query(user=self._user, uriMd5=uriMd5)[0]
164
165 raise Exception("invalid return code")
166
167
168 - def edit(self, post, comment=None):
169 """
170 Updates a Post.
171
172 @param post: a L{Post} instance
173 @param comment: a string
174 @return: a L{Post} instance representing the updated post, i.e. C{post} will
175 not be updated in place!
176 """
177 data = {'uri': post.link, 'tags': formatTags(post.tags), 'private': post.private}
178 if post.title:
179 data['usertitle'] = post.title
180 if post.description:
181 data['description'] = post.description
182 if comment is not None:
183 data['comment'] = comment
184
185 response = Response(self._request('/edit', data, AddErrorHandler))
186 if response.success and response.code == 201:
187 return self.query(user=self._user, uriMd5=post.hash)[0]
188
189 raise Exception("invalid return code")
190
191
193 """
194 Removes a post specified as L{Post} instance or uri.
195
196 @param what: te specification of the post ro remove
197 @return: C{None} on success
198 """
199 if isinstance(what, Post):
200 uri = what.link
201 else:
202 uri = what
203
204 response = Response(self._request('/remove', {'uri': uri}))
205 if response.success and response.code == 200:
206 return
207
208 raise Exception("invalid return code")
209
210
211 - def query(self, what='posts', user=None, tag=None, date=None, uri=None, uriMd5=None,
212 num=None, start=None, q=None):
213 """
214 query URLs are constructed as follows.
215
216 URL path compoents:
217
218 - /bookmarks or /tags or '' (empty string, which means 'posts')
219 - /user/ [username] (optional)
220 - /tag/ [tagname] (optional)
221 - /date/ [date of form YYYY-MM-DD ] (optional)
222 - /uri/ [uri or hash] (optional)
223
224 URL query parameters:
225
226 - q= [free text search string]
227 - num= [number of results per]
228 - start= [result number to start at]
229 """
230 if user is None and tag is None and date is None and uri is None and uriMd5 is None and num is None:
231 raise Exception("no filter given")
232
233 if date and not re.match('[0-9]{4}-[0-9]{2}-[0-9]{2}', date):
234 raise Exception("invalid input")
235
236 if what == 'posts':
237 url = ''
238 elif what in ['tags', 'bookmarks']:
239 url = '/' + what
240 else:
241 raise Exception("invalid input")
242
243 if user is not None: url += "/user/%s" % urllib.quote(user)
244 if tag is not None: url += "/tag/%s" % urllib.quote(tag)
245 if date is not None: url += "/date/%s" % urllib.quote(date)
246 if uri is not None: url += "/uri/%s" % md5.md5(uri).hexdigest()
247 elif uriMd5 is not None: url += "/uri/%s" % uriMd5
248
249 params = {}
250 if q is not None: params['q'] = q
251 if num is not None:
252 try: num = int(num)
253 except: raise Exception("invalid input")
254 params['num'] = num
255
256 if start is not None:
257 try: start = int(start)
258 except: raise Exception("invalid input")
259 params['start'] = start
260
261 if params: url += "?%s" % urllib.urlencode(params)
262
263 dom = self._request(url, errorHandler=QueryErrorHandler)
264 if dom.getElementsByTagName('Response'):
265 res = Response(dom)
266 if res.code == 404 and res.message == 'No items found':
267 return []
268 raise Exception('invalid response')
269
270 if what == 'bookmarks':
271 return map(Bookmark, dom.getElementsByTagName('dcterms:URI'))
272 elif what == 'posts':
273 return map(Post, dom.getElementsByTagName('Post'))
274 else:
275 return map(Tag, dom.getElementsByTagName('Tag'))
276
277
278
279
280
281
283 """
284 get a complete L{Bookmark} object associated with a L{Post} instance.
285 """
286 return self.query('bookmarks', uriMd5=post.hash)[0]
287
288
297
298
300 """
301 formats a list of tag names as one string.
302 """
303 whitespace = re.compile('\s+')
304
305 formatted = []
306 for tag in tags:
307 tag = tag.replace('"', '')
308 if whitespace.search(tag):
309 tag = '"%s"' % whitespace.sub(' ', tag)
310 formatted.append(tag)
311 return ','.join(formatted)
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
331 return repr(self.__dict__)
332
333
335 """
336 Class representing a Connotea bookmark.
337
338 note: there are two kinds of Bookmark instances. The ones parsed from a
339 response to a query form bookmarks - these do have all the data; and the
340 ones parsed from 'dcterms:URI' child elements in 'Post' elements - these
341 do only have a partial metada set.
342 """
344 self.title = node.getElementsByTagName('dc:title')[0].firstChild.nodeValue
345 for attr in ['hash', 'link']:
346 setattr(self, attr, node.getElementsByTagName(attr)[0].firstChild.nodeValue)
347
348
349 self.tags = [t.firstChild.nodeValue for t in node.getElementsByTagName('tag')]
350 self.users = [p.firstChild.nodeValue for p in node.getElementsByTagName('postedBy')]
351
352 try:
353 self.postCount = int(node.getElementsByTagName('postCount')[0].firstChild.nodeValue)
354 except IndexError:
355 self.postCount = None
356
357 try:
358 self.id = node.getElementsByTagName('bookmarkID')[0].firstChild.nodeValue
359 except IndexError:
360 self.id = None
361
362 for attr in ['created', 'updated', 'firstUser']:
363 try:
364 setattr(self, attr, node.getElementsByTagName(attr)[0].firstChild.nodeValue)
365 except IndexError:
366 setattr(self, attr, None)
367
368 citation = node.getElementsByTagName('citation')
369 if citation:
370 if len(citation) > 1:
371 self.citation = map(Citation, citation)
372 else:
373 self.citation = Citation(citation[0])
374 else:
375 self.citation = None
376
377
379 """
380 Class representing a Connotea citation.
381
382 Note: The C{date} attribute of a C{Citation} instance cannot be relied
383 upon being a C{Date} instance. It may just be a string.
384 """
386 """
387 C{node} is expected to be a minidom node for a C{citation} element.
388
389 example::
390
391 <citation>
392 <rdf:Description>
393 <prism:title>An asymmetric world</prism:title>
394 <foaf:maker>
395 <foaf:Person>
396 <foaf:name>Oliver Penrose</foaf:name>
397 </foaf:Person>
398 </foaf:maker>
399 <dc:date>2005-12-15T00:00:00Z</dc:date>
400 <prism:publicationName>Nature</prism:publicationName>
401 <prism:issn>0028-0836</prism:issn>
402 <prism:volume>438</prism:volume>
403 <prism:number>7070</prism:number>
404 <prism:startingPage>919</prism:startingPage>
405 <doiResolver rdf:resource="http://dx.doi.org/10.1038/438919a"/>
406 <dc:identifier>doi:10.1038/438919a</dc:identifier>
407 </rdf:Description>
408 </citation>
409
410 """
411 self.authors = [a.firstChild.nodeValue for a in node.getElementsByTagName('foaf:name')]
412
413
414
415
416
417 date = node.getElementsByTagName('dc:date')[0].firstChild.nodeValue
418 try:
419 self.date = Date(date)
420 except:
421 self.date = date
422
423 self.identifiers = [e.firstChild.nodeValue for e in node.getElementsByTagName('dc:identifier')]
424
425 for attr in ('doiResolver', 'pmidResolver'):
426 try:
427 setattr(self, attr, node.getElementsByTagName(attr)[0].getAttribute('rdf:resource'))
428 except IndexError:
429 setattr(self, attr, None)
430
431 for attr in ('title', 'publicationName', 'issn', 'volume', 'number', 'startingPage'):
432 try:
433 setattr(self, attr, node.getElementsByTagName('prism:'+attr)[0].firstChild.nodeValue)
434 except:
435 setattr(self, attr, None)
436
437
438 -class Post(BagOfAttributes):
439 """
440
441 """
442 - def __init__(self, node):
443 """
444 C{node} a C{Post} element node from a minidom::
445
446 <Post rdf:about="[ URL of this post on Connotea ]">
447 <title> [ The title the user gave to the post.
448 This may be different to the bookmark title and the citation title. ] </title>
449 <description> [ description ] </description>
450
451 <!-- A list of tags that this user assigned to thir post -->
452 <dc:subject> [ tag 1 ] </dc:subject>
453 <dc:subject> [ tag 2 ] </dc:subject>
454 <dc:subject> [ ... ] </dc:subject>
455
456 <userBookmarkID>[ Internal Connotea ID for this post, useful for debugging. ] </userBookmarkID>
457
458 <dc:creator>[ Name of user who created this post. ] </dc:creator>
459
460 <private> [ Privacy status: 1 = private, 0 = public ] </private>
461 <created> [ Date this post was created. ] </created>
462 <updated> [ Date this post was last updated. ] </updated>
463 <comment>
464 <!-- see below -->
465 </comment>
466
467 <uri>
468 <!-- the contents of the uri element are almost identical to the bookmark format described above -->
469
470 <dcterms:URI rdf:about=" [ The link out to the original webpage. ] ">
471 <dc:title> [ The bookmark title, as distinct from the user's personal title for the post. ] </dc:title>
472 <link> [ Again, the link out to the orginial web page. ] </link>
473 <hash> [ MD5 hash of the link. ] </hash>
474
475 <!-- If the bookmark is for an article or book that Connotea can get bibliographic information for,
476 the citation element will be present -->
477 <citation>
478 <rdf:Description>
479 <prism:title> [ Title of the reference. This may be different to the dc:title above. ] </prism:title>
480
481 <!-- list of authors -->
482 <foaf:maker>
483 <foaf:Person>
484 <foaf:name> [Name of author 1. ] </foaf:name>
485 </foaf:Person>
486 </foaf:maker>
487
488 <foaf:maker>
489 <foaf:Person>
490 <foaf:name> [ Name of author 2. ] </foaf:name>
491 </foaf:Person>
492 </foaf:maker>
493
494 [ ...etc. ]
495
496 <dc:date> [ Publication date for the reference. ] </dc:date>
497 <prism:publicationName> [ Name of publication the article appeared in
498 -- i.e the journal name. ] </prism:publicationName>
499 <prism:issn> [ ISSN of publication. ] </prism:issn>
500 <doiResolver rdf:resource=" [ If the article has a DOI, the dx.oi.org link will appear here. ] " />
501 <dc:identifier> [ The DOI in the form doi:xx.xxxx/xxxxxx ] </dc:identifier>
502
503 <pmidResolver rdf:resource=" [ If the article has a Pubmed ID,
504 a link to its entry in the Pubmed
505 database will appear here. ] " />
506 <dc:identifier> [ The Pubmed ID in the form PMID: xxxxxxxx ] </dc:identifier>
507 </rdf:Description>
508 </citation>
509 </dcterms:URI>
510 </uri>
511 </Post>
512 """
513
514 self.uri = node.getAttribute('rdf:about')
515 try:
516 self.title = node.getElementsByTagName('title')[0].firstChild.nodeValue
517 except IndexError:
518 self.title = None
519 self.tags = [t.firstChild.nodeValue for t in node.getElementsByTagName('dc:subject')]
520 self.id = node.getElementsByTagName('userBookmarkID')[0].firstChild.nodeValue
521 self.user = node.getElementsByTagName('dc:creator')[0].firstChild.nodeValue
522
523 for attr in ['hash', 'link', 'private']:
524 setattr(self, attr, node.getElementsByTagName(attr)[0].firstChild.nodeValue)
525
526
527
528 c = node.firstChild
529 while c:
530 if hasattr(c, 'tagName') and c.tagName in ('created', 'updated'):
531 setattr(self, str(c.tagName), c.firstChild.nodeValue)
532 c = c.nextSibling
533
534 self.created = Date(self.created)
535
536 if not hasattr(self, 'updated'):
537 self.updated = self.created
538 else:
539 self.updated = Date(self.updated)
540
541
542 for attr in ['description']:
543 try:
544 setattr(self, attr, node.getElementsByTagName(attr)[0].firstChild.nodeValue)
545 except IndexError:
546 if attr == 'updated':
547 self.updated = self.created
548 else:
549 setattr(self, attr, None)
550
551
552
553
554
555
556 self.bookmark = Bookmark(node.getElementsByTagName('dcterms:URI')[0])
557
558 self.comments = map(Comment, node.getElementsByTagName('comment'))
559
560
579
580
581 -class Tag(BagOfAttributes):
583 """
584 <Tag>
585 <rdf:value>zim</rdf:value>
586 <postCount>1</postCount>
587 <usageScore>1.9809094683904e-316</usageScore>
588 </Tag>
589 """
590 self.name = node.getElementsByTagName('rdf:value')[0].firstChild.nodeValue
591 self.postCount = int(node.getElementsByTagName('postCount')[0].firstChild.nodeValue)
592 self.usageScore = float(node.getElementsByTagName('usageScore')[0].firstChild.nodeValue)
593
594
596 """
597 wraps a timestamp, providing a C{datetime} instance as attribute.
598 """
599 PATTERN = re.compile('([0-9]{4})-([0-9]{2})-([0-9]{2})T([0-9]{2}):([0-9]{2}):([0-9]{2})Z')
600 FORMAT = '%Y-%m-%dT%H:%M:%SZ'
601
603 """
604 @param timestamp: a string representing a timestamp in the following format:
605 2006-05-14T18:45:11Z
606 """
607 m = self.PATTERN.match(timestamp)
608 if not m:
609 raise Exception('invalid input')
610
611 self.datetime = datetime(*map(int, m.groups()))
612
614 return repr(self.datetime.strftime(self.FORMAT))
615
616
618 """
619 example::
620
621 <Response rdf:about="">
622 <code>[ HTTP response code will be echoed here ]</code>
623 <message>[ A nice plain text message to pass on to the user ]</message>
624 <isSuccess>1</isSuccess>
625 <!-- or -->
626 <isFailure>1</isFailure>
627 <!-- Note that only one of either isSuccess or isFailure will be present -->
628 <user>[ the name of the user making the request ]</user>
629 <apiVersion>0.1.0</apiVersion>
630 <bibliotechVersion>1.7.1</bibliotechVersion>
631 </Response>
632
633 or::
634
635 <code>404</code>
636 <message>No items found</message>
637 <isFailure>1</isFailure>
638 """
640 if not dom.getElementsByTagName('Response'):
641 raise Exception("invalid response")
642
643 self.success = False
644 if dom.getElementsByTagName('isSuccess'):
645 self.success = True
646
647 for attr in ('code', 'message', 'user', 'apiVersion', 'bibliotechVersion'):
648 setattr(self, attr, dom.getElementsByTagName(attr)[0].firstChild.nodeValue)
649
650 self.code = int(self.code)
651
652
653 location = dom.getElementsByTagName('location')
654 if location:
655 self.location = location[0].firstChild.nodeValue
656 else:
657 self.location = None
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
675 """
676 HTTP Error 503 - Service unavailable
677 """
679 """
680 The value of the Retry-After header can be either an HTTP-date or an
681 integer number of seconds (in decimal) after the time of the response::
682
683 Retry-After = "Retry-After" ":" ( HTTP-date | delta-seconds )
684
685 Two examples of its use are::
686
687 Retry-After: Fri, 31 Dec 1999 23:59:59 GMT
688 Retry-After: 120
689 """
690 try:
691 seconds = int(headers.get('Retry-After'))
692 except:
693 seconds = 1
694
695 time.sleep(seconds)
696 return self.parent.open(req)
697
698
700 """
701 Error handler for L{Api.add} and L{Api.edit} requests.
702
703 '201 created' is the response we're expecting for these.
704 """
706 return fp
707
708
710 """
711 Error handler for search requests.
712
713 '404' is the response we're expecting if no items were found.
714 """
716 return fp
717
718
719 if __name__ == "__main__":
720
721
722
723 import sys
724
725 if len(sys.argv) < 3:
726 print """\
727 Usage:
728 %s <user> <password>
729 where <user> and <password> are credentials of a valid connotea account.
730
731 Note: The tests will add/edit/remove a post to this account.""" % sys.argv[0]
732 sys.exit(0)
733
734 user, password = sys.argv[1:3]
735 uri = 'http://www.zim.mpg.de/'
736 description = u'new description'
737 comment = u'new comment'
738 tag1 = 'first tag'
739 tag2 = 'second tag'
740
741 api = Api(user, password, debug=True)
742
743 try:
744 api.remove(uri)
745 except urllib2.HTTPError:
746 pass
747
748 newPost = api.add(uri, [tag1, tag2])
749 assert tag1 in newPost.tags
750 assert tag2 in newPost.tags
751
752 newPost.description = description
753 newPost = api.edit(newPost, comment=comment)
754 assert newPost.description == description
755 newPost.comments.sort(lambda x, y: cmp(x.updated.datetime,y.updated.datetime))
756 assert newPost.comments.pop().entry == comment
757
758 bookmarks = api.query('bookmarks', user=user, uri=uri)
759 assert tag1 in bookmarks[0].tags
760
761 posts = api.query(user=user, uri=uri)
762 assert tag2 in posts[0].tags
763
764 tags = api.query('tags', date=datetime.now().strftime('%Y-%m-%d'), user=user)
765 assert tag1 in map(lambda t: t.name, tags)
766
767 api.remove(newPost)
768
769 sys.exit(0)
770