diff --git a/wmyblog_coron.py b/wmyblog_coron.py index 842ea33093..8605716bcd 100644 --- a/wmyblog_coron.py +++ b/wmyblog_coron.py @@ -365,7 +365,7 @@ def updateBlogData(nTask=20, proxy='',articleUpdate=True,gDriveUpdate=True,comme # id_list = [d['href'].split('/')[-1] for d in doc.findAll(class_='main-title')][:5] doc = BeautifulSoup(requests.get(f'https://blog.udn.com/MengyuanWang/article',headers=headers).content, features="lxml") id_list = [d('a')[0]['href'].split('/')[-1] for d in doc.findAll(class_='article_topic')][:5] - id_list += ['162032391','108908773','171633910','164999328','131394982','108908755','108908678','131174635','125380740','108908753','108908771'] + id_list += ['162032391','108908773','171633910','164999328','131394982','108908755','108908678','131174635','125380740','108908753','108908771','178109282'] doc = BeautifulSoup(requests.get(f'https://{domain}/blog/inc_2011/psn_article_ajax.jsp?uid=MengyuanWang&f_FUN_CODE=new_rep',headers=headers).content, features="lxml") id_list = id_list + [d('a')[0]['href'].split('/')[-1] for d in doc.findAll('dt')] id_list += list(pd.read_pickle('./data/comment_full.pkl').id.iloc[:20])