普通版

import gevent
import gevent.monkey

gevent.monkey.patch_all()
import requests
from lxml import etree
import re
import json

word_lists=["相宜本草"]#"悦诗风吟", "碧欧泉","自然堂", "百雀羚", "美宝莲", "伊蒂之屋", "施华蔻", "滋源", "丝蕴", "阿道夫", "吕 ryo", "黛丝恩", "芙丽芳丝", "碧柔", "妮维雅 洁面","力士", "菲诗小铺", "安宝笛", "妮维雅", "曼秀雷敦", "高夫"]
# key_word="资生堂"
for j in range(len(word_lists)):
# def main(word):
    url="https://list.tmall.com/search_product.htm?q={}".format(word_lists[j])
    print(url)
    # try:
    html=requests.get(url).text
    # print(html)
    html1=etree.HTML(html)
    print(len(html1.xpath('//div[@class="product-iWrap"]')))
    print("++++++++++++++++++")
    for i in range(3):
    # for i in range(len(html1.xpath('//div[@class="product-iWrap"]'))):
        product_url=html1.xpath('//div[@class="product-iWrap"]//*[@class="productTitle" or contains(@class,"productTitle")]/a[1]/@href')[i]
        print(product_url)
        product_id=re.findall('[&\?]id=(.*?)&',product_url,re.S)[0]
        print(product_id)
        sell_id=re.findall('user_id=(.*?)&',product_url,re.S)[0]
        url1 = "https://detail.tmall.com/item.htm?id={}".format(product_id)
        product_title1=html1.xpath('//div[@class="product-iWrap"]//*[@class="productTitle" or contains(@class,"productTitle")]/a[1]')[i]
        product_title=product_title1.xpath("string(.)").strip()
        print(product_title)
        print(product_title,product_id,sell_id,product_url,"\n")

        for m in range(23,33):
            # url="https://rate.tmall.com/list_detail_rate.htm"
            url2 = "https://rate.tmall.com/list_detail_rate.htm?itemId={}&sellerId={}&currentPage={}".format(product_id, sell_id,m + 1)
            print(url2)
            headers1 = {
                "authority": "rate.tmall.com",
                "method": "GET",
                "scheme": "https",
                "accept": "*/*",
                "accept-encoding": "gzip, deflate, br",
                "accept-language": "zh-CN,zh;q=0.9",
                "cookie": "cna=Y/gvEkEdxn8CAcookie15=VFC%2FuZ9ayeYq2g%3D%3D; uc3=vt3=F8dByRMGABk%2F9ypnSPI%3D&id2=VAFaC64q7QnX&nk2=okKCgOPi&lg2=V32FPkk%2Fw0dUvg%3D%3D; tracknick=%5Cu6D9F%5Cu6F2A%5Cu5784; lid=%E6%B6%9F%E6%BC%AA%E5%9E%84; _l_g_=Ug%3D%3D; unb=705807873; lgc=%5Cu6D9F%5Cu6F2A%5Cu5784; cookie1=VAYrG4g5P7m343E16lVP62IC5DHsXMLSw0SctPbwLR0%3D; login=true; cookie17=VAFaC64q7QnX; cookie2=157e6a74cd30d191b9059b3db4528c71; _nk_=%5Cu6D9F%5Cu6F2A%5Cu5784; t=9112f19023b58d6a7e797c127de1caa6; sg=%E5%9E%8436; csg=fc322464; _tb_token_=e319fb776eb5e; otherx=e%3D1%26p%3D*%26s%3D0%26c%3D0%26f%3D0%26g%3D0%26t%3D0; x=__ll%3D-1%26_ato%3D0; whl=-1%260%260%260; x5sec=7b22726174656d616e616765723b32223a226131663338313366373636376139386230353064393932333633393736383537434c4f436c2b4546454c6134694d75486874477235414561437a63774e5467774e7a67334d7a7378227d; l=aBZu-uDGysaSOrzKzMaY_MNDH707gvfPNK5y1MayQTEhNP3AkeCXjJtoWMsLn_DX7ofhOouHI-a..; isg=BHZ2kDnRNuT9DcIyeWDIPEVfx6y4P7uuVKNbheBffdn2Ixe9SCXd4cbVP7_qjbLp",
                "referer": "https://detail.tmall.com/item.htm?spm=a1z10.3-b-s.w4011-15884659123.103.42684d47xxbPpL&id=567935633478&rn=b030d97bc9c089d8c7ed425dc8ca7805&abbucket=17&skuId=3625875477451",
                "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.80 Safari/537.36",
            }

            cotents_all = {}

            html2 = requests.get(url2, headers=headers1).text
            html2 = re.findall('jsonp.*?\((.*)\)', html2, re.S)[0]
            new_html2 = json.loads(html2)
            print(html2)
            page_num = round(int(new_html2['rateDetail']['rateCount']['total']) / 20)
            for w in range(len(new_html2['rateDetail']['rateList'])):
                # for j in range(3):
                name = new_html2['rateDetail']['rateList'][w]['displayUserNick']
                user_id = new_html2['rateDetail']['rateList'][w]['id']
                time = new_html2['rateDetail']['rateList'][w]['rateDate']
                content = new_html2['rateDetail']['rateList'][w]['rateContent']

                cotents_all['url'] = url1
                cotents_all['id'] = user_id
                cotents_all['context'] = product_title
                cotents_all['content'] = content
                cotents_all['tC'] = time
                cotents_all['user'] = {"displayName": name}

                print(w + 1, name, time, content)

                try:
                    # response=requests.post(url="http://s/bulk",json=lists_all)
                    headers2 = {'Content-Type': 'application/json'}
                    # headers2 = {"Content-Type": "application/x-www-form-urlencoded"}
                    # data=json.dumps(lists_all)
                    # response=requests.post(url="http://servicees/bulk",headers=headers2,json=json.dumps(lists_all,ensure_ascii=False))
                    response = requests.post(url="http://servlk/tmall", headers=headers2,
                                             json=cotents_all)
                    print(response.status_code)
                    print("++++++++++++++++")
                except:
                    pass
    # except:
    #     pass

# if __name__ == "__main__":
#     # content_all =["资生堂","红腰子","新透白","PK107","shiseido"]
#     word_lists = ["欧珀莱", "UNO", "悠莱", "珊珂", "泊美", "ZA", "水之密语", "丝蓓绮", "玛馨妮", "可悠然", "惠润", "海蓝之谜", "莱珀妮", "希思黎", "娇兰",
#                   "阿玛尼", "科颜氏", "兰芝", "倩碧", "碧欧泉", "悦木之源", "芭比波朗", "汤姆福特", "YSL", "MAC", "欧莱雅", "雪肌精", "馥蕾诗", "玉兰油",
#                   "梦妆", "丸美", "Allie 防晒", "Sofina 防晒", "悦诗风吟", "碧欧泉", "自然堂", "百雀羚", "美宝莲", "伊蒂之屋", "施华蔻", "滋源", "丝蕴",
#                   "阿道夫", "吕 ryo", "黛丝恩", "芙丽芳丝", "碧柔", "妮维雅 洁面", "力士", "菲诗小铺", "安宝笛", "妮维雅", "曼秀雷敦", "高夫"]
#
#     # content_all =["PK107","shiseido"]
#     # content_all =["Lays Stax","乐事无限","乐事 桶","Calbee 麦片","卡乐比 麦片","上好佳 薯片","薯条"]
#     length=len(word_lists)
#     xclist = []  #构建协程链接池
#     for j in range(length):
#         xclist.append(gevent.spawn(main,word_lists[j]))
#     print(xclist)
#
#
#
#     gevent.joinall(xclist)