[Python] 在Django中使用scrapy →→→→→进入此内容的聊天室

来自 , 2020-12-21, 写在 Python, 查看 113 次.
URL http://www.code666.cn/view/222afbe0
  1. # # django-admin.py startproject djangoapp
  2. # # Create your django model: django startapp website
  3. # # Edit scrapy settings.py with method to point to Django environment
  4. # # Create a pipeline that accesses Django using the model.save() method
  5.  
  6. ***settings.py***
  7.  
  8. import os
  9. ITEM_PIPELINES = ['myapp.pipelines.DjangoPipeline']
  10.  
  11. # http://stackoverflow.com/questions/4271975/access-django-models-inside-of-scrapy
  12. def setup_django_env(path):
  13.     import imp, os
  14.     from django.core.management import setup_environ
  15.  
  16.     f, filename, desc = imp.find_module('settings', [path])
  17.     project = imp.load_module('settings', f, filename, desc)      
  18.  
  19.     setup_environ(project)
  20.  
  21.  
  22. current_dir = os.path.abspath(os.path.dirname(os.path.dirname(__file__)))
  23. setup_django_env(os.path.join(current_dir, '../djangoapp/'))
  24.  
  25. ***pipelines.py***
  26. from djangoapp.websites.models import Website
  27. from django.db.utils import IntegrityError
  28.  
  29. class DjangoPipeline(object):
  30.  
  31.     def process_item(self, item, spider):
  32.         website = Website(link=item['link'][0],
  33.                 created=datetime.datetime.now(),
  34.                 )
  35.         try:
  36.           website.save()
  37.         except IntegrityError:
  38.           raise DropItem("Contains duplicate domain: %s" % item['link'][0])
  39.         return item
  40.  
  41. ***djangoapp model***
  42.  
  43. from django.db import models
  44.  
  45. class Website(models.Model):
  46.     link = models.CharField(max_length=200, unique=True)
  47.     created = models.DateTimeField('date created')
  48.  
  49.     def __unicode__(self):
  50.             return u"%s" % self.link
  51.  
  52.  
  53.  
  54. #//python/8391

回复 "在Django中使用scrapy"

这儿你可以回复上面这条便签

captcha