jeudi 10 mai 2012

How to download images from Bing?

Here is a small python script I used to download images from bing search engine. An extra test is performed to check if the image has the focal length in the exif information. This information is mandatory for bundler so the download system will skip the images which are not matching.

To download images from bing, you must first create an app_id for your application here:  http://www.bing.com/developers/createapp.aspx

There already exists libraries to get search result from bing: pybing and bingapi. They both work pretty well. I decided to use pybing. So let's install it:

sudo easy_install pybing

The next step is to install the python imaging library in order to capture the EXIF information:

sudo easy_install PIL

Script



  1. from pybing.query.query import BingQuery
  2. from pybing.resultset import BingResultSet
  3. from pybing.query import BingQuery
  4. from pybing.query import Pagable
  5. import Image
  6. import ExifTags
  7. import os
  8. import sys
  9. __author__ = 'jacques fontignie'
  10. from pybing import *
  11. APP_ID = <ENTER_YOUR_APP_ID>
  12. exifAttrs = dict(Model=True,Make=True,ExifImageWidth=True,ExifImageHeight=True,FocalLength=True)
  13.                  
  14. class ImageQuery(BingQuery, Pagable):
  15.     SOURCE_TYPE = constants.IMAGE_SOURCE_TYPE
  16.    
  17.     def get_request_parameters(self):
  18.         params = super(ImageQuery, self).get_request_parameters()
  19.         params.update({
  20.             'Image.Filters': 'Style:Photo'        
  21.         })
  22.         return params
  23. def download(dir, filename, url):
  24.     """Copy the contents of a file from a given URL
  25.    to a local file.
  26.    """
  27.     import urllib2
  28.    
  29.     try:
  30.         webFile = urllib2.urlopen(url)
  31.     except:
  32.         print "Unexpected error:", sys.exc_info()[0]
  33.         return False
  34.     extension = url.split('.')[-1].lower()
  35.     if not extension == "jpg":
  36.         print "not a jpg extension: " + url
  37.         return False
  38.     localPath = dir + "/" + filename + "." + extension
  39.     localFile = open(localPath, 'w')
  40.     localFile.write(webFile.read())
  41.     webFile.close()
  42.     localFile.close()  
  43.    
  44.     print "File successfully downloaded"
  45.     try:
  46.         photoHandle = Image.open(localPath)
  47.        
  48.         found = False
  49.         exif = {}
  50.         info = photoHandle._getexif()
  51.         if info:
  52.             for attr, value in info.items():
  53.                 decodedAttr = ExifTags.TAGS.get(attr, attr)
  54.                 if decodedAttr in exifAttrs: exif[decodedAttr] = value
  55.                 if 'FocalLength' in exif:
  56.                     found = True    
  57.            
  58.         if not found:
  59.             print "Dropping the file as it does not contain any useful exif information"
  60.             os.remove(localPath)
  61.             return False
  62.    
  63.     except:
  64.         print "Unexpected error:", sys.exc_info()[0]
  65.         os.remove(localPath)
  66.         return False
  67.         # get EXIF information as a dictionary
  68.    
  69.    
  70.     return True
  71. def main():
  72.    
  73.     dir = "images"
  74.     if not os.path.exists(dir):
  75.         os.makedirs(dir)
  76.    
  77.    
  78.     query = ImageQuery(APP_ID,query="rushmore")      
  79.          
  80.     results = query.execute()
  81.     default_name = "img_"
  82.     index = 0
  83.     count = 0
  84.    
  85.     for result in results:
  86.         count = count +1
  87.        
  88.         url = result.mediaurl
  89.         print (str(index) + ") " + str(count) + "- " + url)
  90.         filename = default_name + '%05d' % index
  91.                  
  92.         if download(dir,filename,url):
  93.             index = index + 1
  94.        
  95.        
  96.         if index == 500:
  97.             break
  98.        
  99.     print "Images succesfully fetched ({})".format(index)
  100.        
  101.    
  102. if __name__ == '__main__':
  103.   main()

1 commentaire:

  1. man could you help me?
    i got this error... File "/home/panda/mybing2.py", line 103, in
    main()
    File "/home/panda/mybing2.py", line 85, in main
    for result in results:
    File "/usr/local/lib/python2.7/dist-packages/pybing-0.12-py2.7.egg/pybing/resultset.py", line 116, in __iter__
    for result in query.get_search_results():
    File "/usr/local/lib/python2.7/dist-packages/pybing-0.12-py2.7.egg/pybing/query/query.py", line 76, in get_search_results
    response = self.get_search_response()
    File "/usr/local/lib/python2.7/dist-packages/pybing-0.12-py2.7.egg/pybing/query/query.py", line 72, in get_search_response
    return json.loads(contents)['SearchResponse'][self.SOURCE_TYPE]

    RépondreSupprimer