import requests from bs4 import BeautifulSoup import json import re import pandas as pd # 构建 URL url = "https://h5api.m.taobao.com/h5/mtop.relationrecommend.wirelessrecommend.recommend/2.0/" params = { 'jsv': ['2.7.2'], 'appKey': ['12574478'], 't': ['1719997731260'], 'sign': ['0eb71404536a3ce3a6fc9466ad323975'], 'api': ['mtop.relationrecommend.wirelessrecommend.recommend'], 'v': ['2.0'], 'type': ['jsonp'], 'dataType': ['jsonp'], 'callback': [], 'data': [ '{"appId":"34385","params":"{\\"device\\":\\"HMA-AL00\\",\\"isBeta\\":\\"false\\",\\"grayHair\\":\\"false\\",\\"from\\":\\"nt_history\\",\\"brand\\":\\"HUAWEI\\",\\"info\\":\\"wifi\\",\\"index\\":\\"4\\",\\"rainbow\\":\\"\\",\\"schemaType\\":\\"auction\\",\\"elderHome\\":\\"false\\",\\"isEnterSrpSearch\\":\\"true\\",\\"newSearch\\":\\"false\\",\\"network\\":\\"wifi\\",\\"subtype\\":\\"\\",\\"hasPreposeFilter\\":\\"false\\",\\"prepositionVersion\\":\\"v2\\",\\"client_os\\":\\"Android\\",\\"gpsEnabled\\":\\"false\\",\\"searchDoorFrom\\":\\"srp\\",\\"debug_rerankNewOpenCard\\":\\"false\\",\\"homePageVersion\\":\\"v7\\",\\"searchElderHomeOpen\\":\\"false\\",\\"search_action\\":\\"initiative\\",\\"sugg\\":\\"_4_1\\",\\"sversion\\":\\"13.6\\",\\"style\\":\\"list\\",\\"ttid\\":\\"600000@taobao_pc_10.7.0\\",\\"needTabs\\":\\"true\\",\\"areaCode\\":\\"CN\\",\\"vm\\":\\"nw\\",\\"countryNum\\":\\"156\\",\\"m\\":\\"pc\\",\\"page\\":1,\\"n\\":48,\\"q\\":\\"%E5%8D%A1%E5%A5%87%E5%B0%94\\",\\"tab\\":\\"all\\",\\"pageSize\\":48,\\"totalPage\\":100,\\"totalResults\\":4800,\\"sourceS\\":\\"0\\",\\"sort\\":\\"_coefp\\",\\"bcoffset\\":\\"\\",\\"ntoffset\\":\\"\\",\\"filterTag\\":\\"\\",\\"service\\":\\"\\",\\"prop\\":\\"\\",\\"loc\\":\\"\\",\\"start_price\\":null,\\"end_price\\":null,\\"startPrice\\":null,\\"endPrice\\":null,\\"itemIds\\":null,\\"p4pIds\\":null,\\"categoryp\\":\\"\\"}"}' ] } headers = { 'accept': '*/*', 'accept-language': 'zh-CN,zh;q=0.9', 'cookie': 't=ae9a5bced4e814b1e5326389580124be; cna=W1YJH+ZpOm0BASQKQsZWCsn0; thw=xx; _tb_token_=3fe477e908565; xlly_s=1; _samesite_flag_=true; 3PcFlag=1719822445580; cookie2=1bfa9e0fe989fb148b05a3b269ab4ff4; unb=786076921; lgc=qq986150960; cancelledSubSites=empty; cookie17=VAmsksGfmXs%2F; dnk=%5Cu6D45%5Cu8272%5Cu6D41%5Cu4E910; tracknick=qq986150960; _l_g_=Ug%3D%3D; sg=012; _nk_=qq986150960; cookie1=W8gxM%2FLe7ljyh2uh4d1LWpQmu%2FQweXV2gpwXFVTbCjk%3D; sgcookie=E100QH5Ncp0OvAEB7Shh1H8%2BfVvDQt0hZE6JqqXZxVsNPj4J%2FCx4SRixy2HJurtdCG96z6O%2Fs9fVsxPROpv%2FlL6EOCGZ8FyKg3tEV7G%2FDJA7%2FClszclUfoBg9v5dk8SIRlj%2F; havana_lgc2_0=eyJoaWQiOjc4NjA3NjkyMSwic2ciOiJjYzVmYmNhM2Q3MDcxMmFkNTc5MDViMjVlZmNmYjQ0OSIsInNpdGUiOjAsInRva2VuIjoiMWJuRjZCMjN2QXJaNkdRNzNfOHN4bGcifQ; _hvn_lgc_=0; havana_lgc_exp=1750926465175; cookie3_bak=1bfa9e0fe989fb148b05a3b269ab4ff4; cookie3_bak_exp=1720081665175; wk_cookie2=1bbd0ac41a095c530186c01b95bfd4bf; wk_unb=VAmsksGfmXs%2F; uc1=cookie16=VT5L2FSpNgq6fDudInPRgavC%2BQ%3D%3D&cookie14=UoYfqCMMMZyu1w%3D%3D&pas=0&existShop=false&cookie21=W5iHLLyFeYZ1WM9hVnmS&cookie15=V32FPkk%2Fw0dUvg%3D%3D; sn=; uc3=vt3=F8dD3i41btFAp24R98w%3D&nk2=EuRbCy%2FHVkweNeY%3D&lg2=VT5L2FSpMGV7TQ%3D%3D&id2=VAmsksGfmXs%2F; csg=07fac379; env_bak=FM%2Bgz3ym3%2BlZwpWu99ggUtAI6C4ZIwuIV6CvfmyeQV1j; skt=40b88493ce1360a7; existShop=MTcxOTgyMjQ2NQ%3D%3D; uc4=nk4=0%40EJv4suhZ2rF5nsl3QUPBySWd7Ng6%2Fw%3D%3D&id4=0%40VhCXy35hR5p56p9WWGSGGJgUBdo%3D; _cc_=U%2BGCWk%2F7og%3D%3D; sdkSilent=1719908866287; mtop_partitioned_detect=1; _m_h5_tk=a618463e9757da8c4b559ecdfc9595eb_1720006718685; _m_h5_tk_enc=61062c4120fb67aeef77ddbd727e48e8; tfstk=f23-OV2UZICJGktnPbtmt2HzMgdmp4hyEYl1-J2lAxHxO-bHazxUJvexL8quP8bLJXHEKvheaWwIdvenqnYiabzURdDpSFcyz683ZDQ7AiOQZ7TJnnXmabzFgsXBjmGPvFqtP4ZIdrZbTW4QN8ZBMoNgOy_7Aasfh-PQRJMQFiGb95wCFyw7hfJPuJ-QIwnLdDe_9MY_y00YwotmN-a3rBVL2vh8HdatS7BaCbwARw2yaX3SOv9d-PiZH2EsLeQ4rfnbl7nvOiw7X0MugvLOP8gxN4woPKbuF4ljj2DXOwwKccZTFmJ28uiZEcZxoLQ4CmGxton2Tg2nbWMUb2vNz8GI_VogWeCLMchbkg8qSVUmrO2TtgOvMMSUVSmGzGmmzDCjHSeMiYsFYoIaMRAv4MSUVSPYIIXOYMrAb; isg=BHd3DnIR3csAY1lA0Q41onPjBm3BPEueTv2kmskg0cUjeJ-60Q4N7lceW9gmlCMW', 'referer': 'https://s.taobao.com/search?_input_charset=utf-8&commend=all&ie=utf8&initiative_id=tbindexz_20170306&localImgKey=&page=1&q=%E5%8D%A1%E5%A5%87%E5%B0%94&search_type=item&source=suggest&sourceId=tb.index&spm=a21bo.jianhua%2Fa.201856.d13&ssid=s5-e&suggest=0_2&suggest_query=&tab=all&wq=', 'sec-ch-ua': '"Not/A)Brand";v="8", "Chromium";v="126", "Google Chrome";v="126"', 'sec-ch-ua-mobile': '?0', 'sec-ch-ua-platform': '"macOS"', 'sec-fetch-dest': 'script', 'sec-fetch-mode': 'no-cors', 'sec-fetch-site': 'same-site', 'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36' } # 自定义异常类 class ConversionError(Exception): pass def write_stores_data_to_excel(data): if data: try: stores = data['data']['itemsArray'] except (KeyError, TypeError) as e: print(f"write_stores_data_to_excel: Error parse value : {e}") stores = None else: print("No data to write to Excel.") stores = None return stores if __name__ == "__main__": try: search_key = '卡奇尔' output_file = f"output_{search_key}.xlsx" combined_list = [] for i in range(1, 10): # params_data = json.loads(params['data'][0]) # params_data_params = json.loads(params_data['params']) # params_data_params['page'] = i # params_data_params['q'] = search_key # params_data['params'] = json.dumps(params_data_params) # params['data'][0] = json.dumps(params_data) response = requests.request("GET", url=url, headers=headers, params=params) if response.status_code == 200: response_str = response.content.decode('utf-8') # 使用正则表达式去除回调函数部分 data = re.search(r"\((.*)\)", response_str).group(1) # 打印结果 data = json.loads(data) goods = write_stores_data_to_excel(data) if goods: combined_list.extend(goods) if len(combined_list) > 50: break else: # 请求失败,处理错误 print( f"API Request failed with status code: {response.status_code}" ) break if len(combined_list) != 0: df = pd.DataFrame(combined_list) # 将 DataFrame 写入 Excel 文件 df.to_excel(output_file, index=False, engine='xlsxwriter') else: print("No data found in the JSON file.") except ConversionError as e: print(e) except Exception as e: print(f"An error occurred: {e}")