Ansichten: QuotePaste - CodePaste - NoPaste
Codesnippet eingetragen am 20.2.2013 um 23:48
Von: Michael
Sprache: Python
Beschreibung: Aktualisiertes Griffith IMDB-Plugin
CodeSnippet:
  1. # -*- coding: UTF-8 -*-
  2.  
  3. __revision__ = '$Id$'
  4.  
  5. # Copyright (c) 2005-2011 Vasco Nunes, Piotr Ożarowski
  6. #
  7. # This program is free software; you can redistribute it and/or modify
  8. # it under the terms of the GNU General Public License as published by
  9. # the Free Software Foundation; either version 2 of the License, or
  10. # (at your option) any later version.
  11. #
  12. # This program is distributed in the hope that it will be useful,
  13. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  15. # GNU Library General Public License for more details.
  16. #
  17. # You should have received a copy of the GNU General Public License
  18. # along with this program; if not, write to the Free Software
  19. # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
  20.  
  21. # You may use and distribute this software under the terms of the
  22. # GNU General Public License, version 2 or later
  23.  
  24. import gutils, movie
  25. import string, re
  26.  
  27. plugin_name = 'IMDb'
  28. plugin_description = 'Internet Movie Database'
  29. plugin_url = 'www.imdb.com'
  30. plugin_language = _('English')
  31. plugin_author = 'Vasco Nunes, Piotr Ożarowski'
  32. plugin_author_email = 'griffith@griffith.cc'
  33. plugin_version = '1.13'
  34.  
  35. class Plugin(movie.Movie):
  36. def __init__(self, id):
  37. self.encode = 'utf8' # fixes broken Umlauts in the titles ...
  38. self.movie_id = id
  39. self.url = "http://imdb.com/title/tt%s" % self.movie_id
  40.  
  41. def initialize(self):
  42. self.cast_page = self.open_page(url=self.url + '/fullcredits')
  43. self.plot_page = self.open_page(url=self.url + '/plotsummary')
  44. self.comp_page = self.open_page(url=self.url + '/companycredits')
  45. self.tagl_page = self.open_page(url=self.url + '/taglines')
  46.  
  47. def get_image(self):
  48. tmp = gutils.trim(self.page, 'id="img_primary"', '</a>')
  49. self.image_url = gutils.trim(tmp, 'src="', '"')
  50.  
  51. def get_o_title(self):
  52. self.o_title = gutils.regextrim(self.page, 'class="title-extra">', '<')
  53. if not self.o_title:
  54. self.o_title = gutils.regextrim(self.page, '<h1>', '([ ]|[&][#][0-9]+[;])<span')
  55. if not self.o_title:
  56. self.o_title = re.sub(' [(].*', '', gutils.trim(self.page, '<title>', '</title>'))
  57.  
  58. def get_title(self): # same as get_o_title()
  59. self.title = gutils.regextrim(self.page, '<h1>', '([ ]|[&][#][0-9]+[;])<span')
  60. if not self.title:
  61. self.title = re.sub(' [(].*', '', gutils.trim(self.page, '<title>', '</title>'))
  62.  
  63. def get_director(self):
  64. self.director = ''
  65. parts = re.split('<a href=', gutils.trim(self.cast_page, '>Directed by<', '</table>'))
  66. if len(parts) > 1:
  67. for part in parts[1:]:
  68. director = gutils.trim(part, '>', '<')
  69. self.director = self.director + director + ', '
  70. self.director = self.director[0:len(self.director) - 2]
  71.  
  72. def get_plot(self):
  73. self.plot = gutils.regextrim(self.page, '<h5>Plot:</h5>', '(</div>|<a href.*)')
  74. self.plot = self.__before_more(self.plot)
  75. elements = string.split(self.plot_page, '<p class="plotpar">')
  76. if len(elements) > 1:
  77. self.plot = self.plot + '\n\n'
  78. elements[0] = ''
  79. for element in elements:
  80. if element <> '':
  81. self.plot = self.plot + gutils.strip_tags(gutils.before(element, '</a>')) + '\n\n'
  82.  
  83. def get_year(self):
  84. self.year = gutils.trim(self.page, '<a href="/year/', '</a>')
  85. self.year = gutils.after(self.year, '" >')
  86.  
  87. def get_runtime(self):
  88. self.runtime = gutils.regextrim(self.page, 'Runtime:<[^>]+>', ' min')
  89.  
  90. def get_genre(self):
  91. self.genre = gutils.regextrim(self.page, 'Genre[s]*:<[^>]+>', '</div>')
  92. self.genre = self.__before_more(self.genre)
  93.  
  94. def get_cast(self):
  95. self.cast = ''
  96. self.cast = gutils.trim(self.cast_page, '<table class="cast">', '</table>')
  97. if self.cast == '':
  98. self.cast = gutils.trim(self.page, '<table class="cast">', '</table>')
  99. self.cast = string.replace(self.cast, ' ... ', _(' as '))
  100. self.cast = string.replace(self.cast, '...', _(' as '))
  101. self.cast = string.replace(self.cast, '</tr><tr>', "\n")
  102. self.cast = re.sub('</tr>[ \t]*<tr[ \t]*class="even">', "\n", self.cast)
  103. self.cast = re.sub('</tr>[ \t]*<tr[ \t]*class="odd">', "\n", self.cast)
  104. self.cast = self.__before_more(self.cast)
  105.  
  106. def get_classification(self):
  107. self.classification = gutils.trim(self.page, '(<a href="/mpaa">MPAA</a>)', '</div>')
  108. self.classification = gutils.trim(self.classification, 'Rated ', ' ')
  109.  
  110. def get_studio(self):
  111. self.studio = ''
  112. tmp = gutils.regextrim(self.comp_page, 'Production Companies<[^>]+', '</ul>')
  113. tmp = string.split(tmp, 'href="')
  114. for entry in tmp:
  115. entry = gutils.trim(entry, '>', '<')
  116. if entry:
  117. self.studio = self.studio + entry + ', '
  118. if self.studio:
  119. self.studio = self.studio[:-2]
  120.  
  121. def get_o_site(self):
  122. self.o_site = ''
  123.  
  124. def get_site(self):
  125. self.site = "http://www.imdb.com/title/tt%s" % self.movie_id
  126.  
  127. def get_trailer(self):
  128. self.trailer = "http://www.imdb.com/title/tt%s/trailers" % self.movie_id
  129.  
  130. def get_country(self):
  131. self.country = '<' + gutils.trim(self.page, 'Country:<', '</div>')
  132. self.country = re.sub('[\n]+', '', self.country)
  133.  
  134. def get_rating(self):
  135. pattern = re.compile('>([0-9]([.][0-9])*)(<[^>]+>)+[/](<[^>]+>)[0-9][0-9]<')
  136. result = pattern.search(self.page)
  137. if result:
  138. self.rating = result.groups()[0]
  139. if self.rating:
  140. try:
  141. self.rating = round(float(self.rating), 0)
  142. except Exception, e:
  143. self.rating = 0
  144. else:
  145. self.rating = 0
  146.  
  147. def get_notes(self):
  148. self.notes = ''
  149. language = gutils.regextrim(self.page, 'Language:<[^>]+>', '</div>')
  150. language = gutils.strip_tags(language)
  151. language = re.sub('[\n]+', '', language)
  152. language = re.sub('[ ]+', ' ', language)
  153. language = language.strip()
  154. color = gutils.regextrim(self.page, 'Color:<[^>]+>', '</div>')
  155. color = gutils.strip_tags(color)
  156. color = re.sub('[\n]+', '', color)
  157. color = re.sub('[ ]+', ' ', color)
  158. color = color.strip()
  159. sound = gutils.regextrim(self.page, 'Sound Mix:<[^>]+>', '</div>')
  160. sound = gutils.strip_tags(sound)
  161. sound = re.sub('[\n]+', '', sound)
  162. sound = re.sub('[ ]+', ' ', sound)
  163. sound = sound.strip()
  164. tagline = gutils.regextrim(self.tagl_page, 'Taglines for', 'Related Links')
  165. index = string.rfind(tagline, '</div>')
  166. if index > -1:
  167. taglines = string.split(tagline[index:], '<hr')
  168. tagline = ''
  169. for entry in taglines:
  170. entry = gutils.clean(gutils.after(entry, '>'))
  171. if entry:
  172. tagline = tagline + entry + '\n'
  173. else:
  174. tagline = ''
  175. if len(language)>0:
  176. self.notes = "%s: %s\n" %(_('Language'), language)
  177. if len(sound)>0:
  178. self.notes += "%s: %s\n" %(gutils.strip_tags(_('<b>Audio</b>')), sound)
  179. if len(color)>0:
  180. self.notes += "%s: %s\n" %(_('Color'), color)
  181. if len(tagline)>0:
  182. self.notes += "%s: %s\n" %('Tagline', tagline)
  183.  
  184. def get_screenplay(self):
  185. self.screenplay = ''
  186. parts = re.split('<a href=', gutils.trim(self.cast_page, '>Writing credits<', '</table>'))
  187. if len(parts) > 1:
  188. for part in parts[1:]:
  189. screenplay = gutils.trim(part, '>', '<')
  190. if screenplay == 'WGA':
  191. continue
  192. screenplay = screenplay.replace(' (written by)', '')
  193. screenplay = screenplay.replace(' and<', '<')
  194. self.screenplay = self.screenplay + screenplay + ', '
  195. if len(self.screenplay) > 2:
  196. self.screenplay = self.screenplay[0:len(self.screenplay) - 2]
  197.  
  198. def get_cameraman(self):
  199. self.cameraman = ''
  200. tmp = gutils.regextrim(self.cast_page, 'Cinematography by<[^>]+', '</table>')
  201. tmp = string.split(tmp, 'href="')
  202. for entry in tmp:
  203. entry = gutils.trim(entry, '>', '<')
  204. if entry:
  205. self.cameraman = self.cameraman + entry + ', '
  206. if self.cameraman:
  207. self.cameraman = self.cameraman[:-2]
  208.  
  209. def __before_more(self, data):
  210. for element in ['>See more<', '>more<', '>Full summary<', '>Full synopsis<']:
  211. tmp = string.find(data, element)
  212. if tmp>0:
  213. data = data[:tmp] + '>'
  214. return data
  215.  
  216. class SearchPlugin(movie.SearchMovie):
  217. PATTERN = re.compile(r"""<a href=['"]/title/tt([0-9]+)/[^>]+[>](.*?)</td>""")
  218. PATTERN_DIRECT = re.compile(r"""value="/title/tt([0-9]+)""")
  219.  
  220. def __init__(self):
  221. # http://www.imdb.com/List?words=
  222. # finds every title sorted alphabetically, first results are with a quote at
  223. # the beginning (episodes from tv series), no popular results at first
  224. # http://www.imdb.com/find?more=tt;q=
  225. # finds a whole bunch of results. if you look for "Rocky" you will get 903 results.
  226. # http://www.imdb.com/find?s=tt;q=
  227. # seems to give the best results. 88 results for "Rocky", popular titles first.
  228. self.original_url_search = 'http://www.imdb.com/find?s=tt&q='
  229. self.translated_url_search = 'http://www.imdb.com/find?s=tt&q='
  230. self.encode = 'utf8'
  231.  
  232. def search(self,parent_window):
  233. if not self.open_search(parent_window):
  234. return None
  235. return self.page
  236.  
  237. def get_searches(self):
  238. elements = string.split(self.page, '<tr')
  239. if len(elements):
  240. for element in elements[1:]:
  241. match = self.PATTERN.findall(element)
  242. if len(match) > 1:
  243. tmp = re.sub('^[0-9]+[.]', '', gutils.clean(match[1][1]))
  244. self.ids.append(match[1][0])
  245. self.titles.append(tmp)
  246. if len(self.ids) < 2:
  247. # try to find a direct result
  248. match = self.PATTERN_DIRECT.findall(self.page)
  249. if len(match) > 0:
  250. self.ids.append(match[0])
  251.  
  252.  
  253. #
  254. # Plugin Test
  255. #
  256. class SearchPluginTest(SearchPlugin):
  257. #
  258. # Configuration for automated tests:
  259. # dict { movie_id -> [ expected result count for original url, expected result count for translated url ] }
  260. #
  261. test_configuration = {
  262. 'Rocky Balboa' : [ 10, 10 ],
  263. 'Ein glückliches Jahr' : [ 3, 3 ]
  264. }
  265.  
  266. class PluginTest:
  267. #
  268. # Configuration for automated tests:
  269. # dict { movie_id -> dict { arribute -> value } }
  270. #
  271. # value: * True/False if attribute only should be tested for any value
  272. # * or the expected value
  273. #
  274. test_configuration = {
  275. '0138097' : {
  276. 'title' : 'Shakespeare in Love',
  277. 'o_title' : 'Shakespeare in Love',
  278. 'director' : 'John Madden',
  279. 'plot' : True,
  280. 'cast' : 'Geoffrey Rush' + _(' as ') + 'Philip Henslowe\n\
  281. Tom Wilkinson' + _(' as ') + 'Hugh Fennyman\n\
  282. Steven O\'Donnell' + _(' as ') + 'Lambert\n\
  283. Tim McMullan' + _(' as ') + 'Frees (as Tim McMullen)\n\
  284. Joseph Fiennes' + _(' as ') + 'Will Shakespeare\n\
  285. Steven Beard' + _(' as ') + 'Makepeace - the Preacher\n\
  286. Antony Sher' + _(' as ') + 'Dr. Moth\n\
  287. Patrick Barlow' + _(' as ') + 'Will Kempe\n\
  288. Martin Clunes' + _(' as ') + 'Richard Burbage\n\
  289. Sandra Reinton' + _(' as ') + 'Rosaline\n\
  290. Simon Callow' + _(' as ') + 'Tilney - Master of the Revels\n\
  291. Judi Dench' + _(' as ') + 'Queen Elizabeth\n\
  292. Bridget McConnell' + _(' as ') + 'Lady in Waiting (as Bridget McConnel)\n\
  293. Georgie Glen' + _(' as ') + 'Lady in Waiting\n\
  294. Nicholas Boulton' + _(' as ') + 'Henry Condell\n\
  295. Gwyneth Paltrow' + _(' as ') + 'Viola De Lesseps\n\
  296. Imelda Staunton' + _(' as ') + 'Nurse\n\
  297. Colin Firth' + _(' as ') + 'Lord Wessex\n\
  298. Desmond McNamara' + _(' as ') + 'Crier\n\
  299. Barnaby Kay' + _(' as ') + 'Nol\n\
  300. Jim Carter' + _(' as ') + 'Ralph Bashford\n\
  301. Paul Bigley' + _(' as ') + 'Peter - the Stage Manager\n\
  302. Jason Round' + _(' as ') + 'Actor in Tavern\n\
  303. Rupert Farley' + _(' as ') + 'Barman\n\
  304. Adam Barker' + _(' as ') + 'First Auditionee\n\
  305. Joe Roberts' + _(' as ') + 'John Webster\n\
  306. Harry Gostelow' + _(' as ') + 'Second Auditionee\n\
  307. Alan Cody' + _(' as ') + 'Third Auditionee\n\
  308. Mark Williams' + _(' as ') + 'Wabash\n\
  309. David Curtiz' + _(' as ') + 'John Hemmings\n\
  310. Gregor Truter' + _(' as ') + 'James Hemmings\n\
  311. Simon Day' + _(' as ') + 'First Boatman\n\
  312. Jill Baker' + _(' as ') + 'Lady De Lesseps\n\
  313. Amber Glossop' + _(' as ') + 'Scullery Maid\n\
  314. Robin Davies' + _(' as ') + 'Master Plum\n\
  315. Hywel Simons' + _(' as ') + 'Servant\n\
  316. Nicholas Le Prevost' + _(' as ') + 'Sir Robert De Lesseps\n\
  317. Ben Affleck' + _(' as ') + 'Ned Alleyn\n\
  318. Timothy Kightley' + _(' as ') + 'Edward Pope\n\
  319. Mark Saban' + _(' as ') + 'Augustine Philips\n\
  320. Bob Barrett' + _(' as ') + 'George Bryan\n\
  321. Roger Morlidge' + _(' as ') + 'James Armitage\n\
  322. Daniel Brocklebank' + _(' as ') + 'Sam Gosse\n\
  323. Roger Frost' + _(' as ') + 'Second Boatman\n\
  324. Rebecca Charles' + _(' as ') + 'Chambermaid\n\
  325. Richard Gold' + _(' as ') + 'Lord in Waiting\n\
  326. Rachel Clarke' + _(' as ') + 'First Whore\n\
  327. Lucy Speed' + _(' as ') + 'Second Whore\n\
  328. Patricia Potter' + _(' as ') + 'Third Whore\n\
  329. John Ramm' + _(' as ') + 'Makepeace\'s Neighbor\n\
  330. Martin Neely' + _(' as ') + 'Paris / Lady Montague (as Martin Neeley)\n\
  331. The Choir of St. George\'s School in Windsor' + _(' as ') + 'Choir (as The Choir of St. George\'s School Windsor) rest of cast listed alphabetically:\n\
  332. Jason Canning' + _(' as ') + 'Nobleman (uncredited)\n\
  333. Kelley Costigan' + _(' as ') + 'Theatregoer (uncredited)\n\
  334. Rupert Everett' + _(' as ') + 'Christopher Marlowe (uncredited)\n\
  335. John Inman' + _(' as ') + 'Character Player (uncredited)',
  336. 'country' : 'USA',
  337. 'genre' : 'Comedy | Drama | Romance',
  338. 'classification' : False,
  339. 'studio' : 'Universal Pictures, Miramax Films, Bedford Falls Productions',
  340. 'o_site' : False,
  341. 'site' : 'http://www.imdb.com/title/tt0138097',
  342. 'trailer' : 'http://www.imdb.com/title/tt0138097/trailers',
  343. 'year' : 1998,
  344. 'notes' : _('Language') + ': English\n'\
  345. + _('Audio') + ': Dolby Digital\n'\
  346. + _('Color') + ': Color\n\
  347. Tagline: ...A Comedy About the Greatest Love Story Almost Never Told...\n\
  348. Love is the only inspiration',
  349. 'runtime' : 123,
  350. 'image' : True,
  351. 'rating' : 7,
  352. 'screenplay' : 'Marc Norman, Tom Stoppard',
  353. 'cameraman' : 'Richard Greatrex',
  354. 'barcode' : False
  355. },
  356. }