Job Search Engine
877 浏览 5 years, 8 months
1.2 数据库存储
版权声明: 转载请注明出处 http://www.codingsoho.com/数据库存储
爬虫完成之后的数据记录存储
sqlalchemy
使用sqlalchemy
def parse_job_details(self, text):
try:
db_table_columns = ['title', 'salary', 'region', 'degree','experience', 'company', 'industry', 'href', 'description']
p_data = {}
for i, value in enumerate(db_table_columns):
p_data[value] = self.job_data[i]
# print(p_data)
je = JobEntry(**p_data)
objs = self.helper.session.query(JobEntry).filter_by(**{'title': p_data['title'], 'company':p_data['company'], 'description': p_data['description']})
if self.username and self.keyword:
trigger_obj = self.helper.session.query(TriggerScrapRecord).filter_by(**{'keyword': self.keyword, 'username': self.username}).first()
else:
trigger_obj = None
print('trigger_obj', trigger_obj, self.keyword, self.username)
if objs.count():
print("\n\n>>>>>>>>> Already exist {} <<<<<<<<<<<".format(p_data))
obj = objs.first()
obj.updated = datetime.datetime.now()
if trigger_obj and (not trigger_obj in obj.trigger_scrap_records):
_ = obj.trigger_scrap_records
_.append(trigger_obj)
obj.trigger_scrap_records = _
self.helper.session.add(obj)
else:
if trigger_obj:
je.trigger_scrap_records = [trigger_obj, ]
self.helper.session.add(je)
self.helper.session.commit()
except Exception as e:
print(e)
self.helper.session.rollback()
self.helper.session.close()
使用mysqldb,没有处理m2m关系
if not self.jse_sql_helper.entry_exist('', p_data['title'], p_data['company'], p_data['description']):
self.jse_sql_helper.insert(p_data)
else:
self.jse_sql_helper.update_time(p_data)
主程序里爬虫之后的存储
def notify():
scrap_records = sql_helper.session.query(TriggerScrapRecord).all()
for record in scrap_records:
param = {'username': record.username, 'keyword': record.keyword, 'href': record.href}
func_sync(*(param,))
if record.email:
oneDayAgo = (datetime.datetime.now() - datetime.timedelta(days = 1))
from sqlalchemy import Date, cast
from datetime import date
print(type(record.job_entries), type(record))
# job_entries_objs = record.job_entries.filter(cast(JobEntry.created,Date) == date.today()).all()
job_entries_objs = record.job_entries.filter(JobEntry.created > oneDayAgo ).all()
# AttributeError: 'InstrumentedList' object has no attribute 'filter'
# [https://stackoverflow.com/questions/11578070/sqlalchemy-instrumentedlist-object-has-no-attribute-filter](https://stackoverflow.com/questions/11578070/sqlalchemy-instrumentedlist-object-has-no-attribute-filter)
# [https://blog.csdn.net/weixin_40161254/article/details/82689372](https://blog.csdn.net/weixin_40161254/article/details/82689372)
# [https://stackoverflow.com/questions/7075828/make-sqlalchemy-use-date-in-filter-using-postgresql](https://stackoverflow.com/questions/7075828/make-sqlalchemy-use-date-in-filter-using-postgresql)
if len(job_entries_objs):
send_mail(job_entries_objs, [record.email, ])
sys.exit()