Job Search Engine


877 浏览 5 years, 8 months

1.2 数据库存储

版权声明: 转载请注明出处 http://www.codingsoho.com/

数据库存储

爬虫完成之后的数据记录存储

sqlalchemy

使用sqlalchemy

    def parse_job_details(self, text):
            try:
                db_table_columns = ['title', 'salary', 'region', 'degree','experience', 'company', 'industry', 'href', 'description']
                p_data = {}
                for i, value in enumerate(db_table_columns):                
                    p_data[value] = self.job_data[i]
                # print(p_data)            
                je = JobEntry(**p_data)
                objs = self.helper.session.query(JobEntry).filter_by(**{'title': p_data['title'], 'company':p_data['company'], 'description': p_data['description']})
                if self.username and self.keyword:
                    trigger_obj = self.helper.session.query(TriggerScrapRecord).filter_by(**{'keyword': self.keyword, 'username': self.username}).first()
                else:
                    trigger_obj = None
                print('trigger_obj', trigger_obj, self.keyword, self.username)
                if objs.count():
                    print("\n\n>>>>>>>>> Already exist {} <<<<<<<<<<<".format(p_data))
                    obj = objs.first()
                    obj.updated = datetime.datetime.now()
                    if trigger_obj and (not trigger_obj in obj.trigger_scrap_records):
                        _ = obj.trigger_scrap_records
                        _.append(trigger_obj)
                        obj.trigger_scrap_records = _
                    self.helper.session.add(obj)
                else:
                    if trigger_obj:
                        je.trigger_scrap_records = [trigger_obj, ]
                    self.helper.session.add(je)
                self.helper.session.commit()
            except Exception as e:
                print(e)
                self.helper.session.rollback()
            self.helper.session.close()

使用mysqldb,没有处理m2m关系

            if not self.jse_sql_helper.entry_exist('', p_data['title'], p_data['company'], p_data['description']):
                self.jse_sql_helper.insert(p_data)
            else:
                self.jse_sql_helper.update_time(p_data)

主程序里爬虫之后的存储

def notify():
    scrap_records = sql_helper.session.query(TriggerScrapRecord).all()
    for record in scrap_records:
        param = {'username': record.username, 'keyword': record.keyword, 'href': record.href}
        func_sync(*(param,))
        if record.email:
            oneDayAgo = (datetime.datetime.now() - datetime.timedelta(days = 1))
            from sqlalchemy import Date, cast
            from datetime import date
            print(type(record.job_entries), type(record))
            # job_entries_objs = record.job_entries.filter(cast(JobEntry.created,Date) == date.today()).all() 
            job_entries_objs = record.job_entries.filter(JobEntry.created > oneDayAgo ).all() 
            # AttributeError: 'InstrumentedList' object has no attribute 'filter'
            # [https://stackoverflow.com/questions/11578070/sqlalchemy-instrumentedlist-object-has-no-attribute-filter](https://stackoverflow.com/questions/11578070/sqlalchemy-instrumentedlist-object-has-no-attribute-filter)
            # [https://blog.csdn.net/weixin_40161254/article/details/82689372](https://blog.csdn.net/weixin_40161254/article/details/82689372)
            # [https://stackoverflow.com/questions/7075828/make-sqlalchemy-use-date-in-filter-using-postgresql](https://stackoverflow.com/questions/7075828/make-sqlalchemy-use-date-in-filter-using-postgresql)
            if len(job_entries_objs):
                send_mail(job_entries_objs, [record.email, ])     
    sys.exit()