main_taobao.py 6.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112
  1. import requests
  2. from bs4 import BeautifulSoup
  3. import json
  4. import re
  5. import pandas as pd
  6. # 构建 URL
  7. url = "https://h5api.m.taobao.com/h5/mtop.relationrecommend.wirelessrecommend.recommend/2.0/"
  8. params = {
  9. 'jsv': ['2.7.2'],
  10. 'appKey': ['12574478'],
  11. 't': ['1719997731260'],
  12. 'sign': ['0eb71404536a3ce3a6fc9466ad323975'],
  13. 'api': ['mtop.relationrecommend.wirelessrecommend.recommend'],
  14. 'v': ['2.0'],
  15. 'type': ['jsonp'],
  16. 'dataType': ['jsonp'],
  17. 'callback': [],
  18. 'data': [
  19. '{"appId":"34385","params":"{\\"device\\":\\"HMA-AL00\\",\\"isBeta\\":\\"false\\",\\"grayHair\\":\\"false\\",\\"from\\":\\"nt_history\\",\\"brand\\":\\"HUAWEI\\",\\"info\\":\\"wifi\\",\\"index\\":\\"4\\",\\"rainbow\\":\\"\\",\\"schemaType\\":\\"auction\\",\\"elderHome\\":\\"false\\",\\"isEnterSrpSearch\\":\\"true\\",\\"newSearch\\":\\"false\\",\\"network\\":\\"wifi\\",\\"subtype\\":\\"\\",\\"hasPreposeFilter\\":\\"false\\",\\"prepositionVersion\\":\\"v2\\",\\"client_os\\":\\"Android\\",\\"gpsEnabled\\":\\"false\\",\\"searchDoorFrom\\":\\"srp\\",\\"debug_rerankNewOpenCard\\":\\"false\\",\\"homePageVersion\\":\\"v7\\",\\"searchElderHomeOpen\\":\\"false\\",\\"search_action\\":\\"initiative\\",\\"sugg\\":\\"_4_1\\",\\"sversion\\":\\"13.6\\",\\"style\\":\\"list\\",\\"ttid\\":\\"600000@taobao_pc_10.7.0\\",\\"needTabs\\":\\"true\\",\\"areaCode\\":\\"CN\\",\\"vm\\":\\"nw\\",\\"countryNum\\":\\"156\\",\\"m\\":\\"pc\\",\\"page\\":1,\\"n\\":48,\\"q\\":\\"%E5%8D%A1%E5%A5%87%E5%B0%94\\",\\"tab\\":\\"all\\",\\"pageSize\\":48,\\"totalPage\\":100,\\"totalResults\\":4800,\\"sourceS\\":\\"0\\",\\"sort\\":\\"_coefp\\",\\"bcoffset\\":\\"\\",\\"ntoffset\\":\\"\\",\\"filterTag\\":\\"\\",\\"service\\":\\"\\",\\"prop\\":\\"\\",\\"loc\\":\\"\\",\\"start_price\\":null,\\"end_price\\":null,\\"startPrice\\":null,\\"endPrice\\":null,\\"itemIds\\":null,\\"p4pIds\\":null,\\"categoryp\\":\\"\\"}"}'
  20. ]
  21. }
  22. headers = {
  23. 'accept':
  24. '*/*',
  25. 'accept-language':
  26. 'zh-CN,zh;q=0.9',
  27. 'cookie':
  28. 't=ae9a5bced4e814b1e5326389580124be; cna=W1YJH+ZpOm0BASQKQsZWCsn0; thw=xx; _tb_token_=3fe477e908565; xlly_s=1; _samesite_flag_=true; 3PcFlag=1719822445580; cookie2=1bfa9e0fe989fb148b05a3b269ab4ff4; unb=786076921; lgc=qq986150960; cancelledSubSites=empty; cookie17=VAmsksGfmXs%2F; dnk=%5Cu6D45%5Cu8272%5Cu6D41%5Cu4E910; tracknick=qq986150960; _l_g_=Ug%3D%3D; sg=012; _nk_=qq986150960; cookie1=W8gxM%2FLe7ljyh2uh4d1LWpQmu%2FQweXV2gpwXFVTbCjk%3D; sgcookie=E100QH5Ncp0OvAEB7Shh1H8%2BfVvDQt0hZE6JqqXZxVsNPj4J%2FCx4SRixy2HJurtdCG96z6O%2Fs9fVsxPROpv%2FlL6EOCGZ8FyKg3tEV7G%2FDJA7%2FClszclUfoBg9v5dk8SIRlj%2F; havana_lgc2_0=eyJoaWQiOjc4NjA3NjkyMSwic2ciOiJjYzVmYmNhM2Q3MDcxMmFkNTc5MDViMjVlZmNmYjQ0OSIsInNpdGUiOjAsInRva2VuIjoiMWJuRjZCMjN2QXJaNkdRNzNfOHN4bGcifQ; _hvn_lgc_=0; havana_lgc_exp=1750926465175; cookie3_bak=1bfa9e0fe989fb148b05a3b269ab4ff4; cookie3_bak_exp=1720081665175; wk_cookie2=1bbd0ac41a095c530186c01b95bfd4bf; wk_unb=VAmsksGfmXs%2F; uc1=cookie16=VT5L2FSpNgq6fDudInPRgavC%2BQ%3D%3D&cookie14=UoYfqCMMMZyu1w%3D%3D&pas=0&existShop=false&cookie21=W5iHLLyFeYZ1WM9hVnmS&cookie15=V32FPkk%2Fw0dUvg%3D%3D; sn=; uc3=vt3=F8dD3i41btFAp24R98w%3D&nk2=EuRbCy%2FHVkweNeY%3D&lg2=VT5L2FSpMGV7TQ%3D%3D&id2=VAmsksGfmXs%2F; csg=07fac379; env_bak=FM%2Bgz3ym3%2BlZwpWu99ggUtAI6C4ZIwuIV6CvfmyeQV1j; skt=40b88493ce1360a7; existShop=MTcxOTgyMjQ2NQ%3D%3D; uc4=nk4=0%40EJv4suhZ2rF5nsl3QUPBySWd7Ng6%2Fw%3D%3D&id4=0%40VhCXy35hR5p56p9WWGSGGJgUBdo%3D; _cc_=U%2BGCWk%2F7og%3D%3D; sdkSilent=1719908866287; mtop_partitioned_detect=1; _m_h5_tk=a618463e9757da8c4b559ecdfc9595eb_1720006718685; _m_h5_tk_enc=61062c4120fb67aeef77ddbd727e48e8; tfstk=f23-OV2UZICJGktnPbtmt2HzMgdmp4hyEYl1-J2lAxHxO-bHazxUJvexL8quP8bLJXHEKvheaWwIdvenqnYiabzURdDpSFcyz683ZDQ7AiOQZ7TJnnXmabzFgsXBjmGPvFqtP4ZIdrZbTW4QN8ZBMoNgOy_7Aasfh-PQRJMQFiGb95wCFyw7hfJPuJ-QIwnLdDe_9MY_y00YwotmN-a3rBVL2vh8HdatS7BaCbwARw2yaX3SOv9d-PiZH2EsLeQ4rfnbl7nvOiw7X0MugvLOP8gxN4woPKbuF4ljj2DXOwwKccZTFmJ28uiZEcZxoLQ4CmGxton2Tg2nbWMUb2vNz8GI_VogWeCLMchbkg8qSVUmrO2TtgOvMMSUVSmGzGmmzDCjHSeMiYsFYoIaMRAv4MSUVSPYIIXOYMrAb; isg=BHd3DnIR3csAY1lA0Q41onPjBm3BPEueTv2kmskg0cUjeJ-60Q4N7lceW9gmlCMW',
  29. 'referer':
  30. 'https://s.taobao.com/search?_input_charset=utf-8&commend=all&ie=utf8&initiative_id=tbindexz_20170306&localImgKey=&page=1&q=%E5%8D%A1%E5%A5%87%E5%B0%94&search_type=item&source=suggest&sourceId=tb.index&spm=a21bo.jianhua%2Fa.201856.d13&ssid=s5-e&suggest=0_2&suggest_query=&tab=all&wq=',
  31. 'sec-ch-ua':
  32. '"Not/A)Brand";v="8", "Chromium";v="126", "Google Chrome";v="126"',
  33. 'sec-ch-ua-mobile':
  34. '?0',
  35. 'sec-ch-ua-platform':
  36. '"macOS"',
  37. 'sec-fetch-dest':
  38. 'script',
  39. 'sec-fetch-mode':
  40. 'no-cors',
  41. 'sec-fetch-site':
  42. 'same-site',
  43. 'user-agent':
  44. 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36'
  45. }
  46. # 自定义异常类
  47. class ConversionError(Exception):
  48. pass
  49. def write_stores_data_to_excel(data):
  50. if data:
  51. try:
  52. stores = data['data']['itemsArray']
  53. except (KeyError, TypeError) as e:
  54. print(f"write_stores_data_to_excel: Error parse value : {e}")
  55. stores = None
  56. else:
  57. print("No data to write to Excel.")
  58. stores = None
  59. return stores
  60. if __name__ == "__main__":
  61. try:
  62. search_key = '卡奇尔'
  63. output_file = f"output_{search_key}.xlsx"
  64. combined_list = []
  65. for i in range(1, 10):
  66. # params_data = json.loads(params['data'][0])
  67. # params_data_params = json.loads(params_data['params'])
  68. # params_data_params['page'] = i
  69. # params_data_params['q'] = search_key
  70. # params_data['params'] = json.dumps(params_data_params)
  71. # params['data'][0] = json.dumps(params_data)
  72. response = requests.request("GET",
  73. url=url,
  74. headers=headers,
  75. params=params)
  76. if response.status_code == 200:
  77. response_str = response.content.decode('utf-8')
  78. # 使用正则表达式去除回调函数部分
  79. data = re.search(r"\((.*)\)", response_str).group(1)
  80. # 打印结果
  81. data = json.loads(data)
  82. goods = write_stores_data_to_excel(data)
  83. if goods:
  84. combined_list.extend(goods)
  85. if len(combined_list) > 50:
  86. break
  87. else:
  88. # 请求失败,处理错误
  89. print(
  90. f"API Request failed with status code: {response.status_code}"
  91. )
  92. break
  93. if len(combined_list) != 0:
  94. df = pd.DataFrame(combined_list)
  95. # 将 DataFrame 写入 Excel 文件
  96. df.to_excel(output_file, index=False, engine='xlsxwriter')
  97. else:
  98. print("No data found in the JSON file.")
  99. except ConversionError as e:
  100. print(e)
  101. except Exception as e:
  102. print(f"An error occurred: {e}")