332 lines
20 KiB
Plaintext
332 lines
20 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 1,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"import requests as r\n",
|
|
"from bs4 import BeautifulSoup, Tag\n",
|
|
"import json\n",
|
|
"import os\n",
|
|
"from threading import Thread\n",
|
|
"from urllib.parse import urlparse"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 2,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"def download_worker(media_url, save_path):\n",
|
|
" res = r.get(media_url)\n",
|
|
" with open(save_path, 'wb') as fp:\n",
|
|
" fp.write(res.content)\n",
|
|
" print('[crawler] 图像已经保存至', save_path)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 3,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"def crawler_issue(issue_id: str):\n",
|
|
" folder = 'issue-' + str(issue_id)\n",
|
|
" os.makedirs('../docs/digital-issue/' + folder, exist_ok=True)\n",
|
|
" res = r.get('https://github.com/Digital-EDA/Digital-IDE/issues/' + str(issue_id))\n",
|
|
" soup = BeautifulSoup(res.text, 'html.parser')\n",
|
|
"\n",
|
|
" texts = []\n",
|
|
" for td in soup.find_all('td', attrs={ 'class': 'js-comment-body' }):\n",
|
|
" if isinstance(td, Tag):\n",
|
|
" for p in td.find_all('p'):\n",
|
|
" if isinstance(p, Tag):\n",
|
|
" text = p.text\n",
|
|
" texts.append(text)\n",
|
|
" for a in p.find_all('a'):\n",
|
|
" href = a.attrs['href']\n",
|
|
" if href and '.png' in href:\n",
|
|
" urlp = urlparse(href)\n",
|
|
" name = urlp.path.split('/')[-1]\n",
|
|
" save_path = '../docs/digital-issue/' + folder + '/' + name\n",
|
|
" t = Thread(target=download_worker, args=(href, save_path))\n",
|
|
" t.start()\n",
|
|
"\n",
|
|
" text = '\\n'.join(texts)\n",
|
|
" with open('../docs/digital-issue/{}/issue.md'.format(folder), 'w', encoding='utf-8') as fp:\n",
|
|
" fp.write(text) "
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 5,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-67/331660157-920d7143-f262-42d5-af57-a817bf3aee01.png\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"crawler_issue(67)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 6,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"def crawler_issue_page(page_url: str):\n",
|
|
" res = r.get(page_url)\n",
|
|
" if res.status_code != 200:\n",
|
|
" print('page url {} return 404'.format(page_url))\n",
|
|
" return\n",
|
|
" html = res.text\n",
|
|
" soup = BeautifulSoup(html, 'html.parser')\n",
|
|
" issue_container = soup.find('div', { 'class': 'js-navigation-container js-active-navigation-container' })\n",
|
|
" issue_ids = []\n",
|
|
" for div in issue_container.children:\n",
|
|
" if isinstance(div, Tag):\n",
|
|
" id = div.attrs['id'].split('_')[-1]\n",
|
|
" issue_ids.append(int(id))\n",
|
|
" \n",
|
|
" for issue_id in issue_ids:\n",
|
|
" print('爬取 issue-{} 中 ...'.format(issue_id))\n",
|
|
" crawler_issue(issue_id)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 7,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"爬取 issue-71 中 ...\n",
|
|
"爬取 issue-70 中 ...\n",
|
|
"爬取 issue-69 中 ...\n",
|
|
"爬取 issue-68 中 ...\n",
|
|
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-69/331220146-9deeccbf-cc0e-4810-bdd9-80e11d083c15.png\n",
|
|
"爬取 issue-67 中 ...\n",
|
|
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-69/331220242-9abe8b7b-5985-4c1a-ac0f-30aba75ef8d2.png\n",
|
|
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-69/331219978-d22a5a5d-da00-430c-b966-68517ab264c0.png\n",
|
|
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-69/331220080-b0c5f0af-e38c-4819-9efa-7491650ddb92.png\n",
|
|
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-69/331220203-3bb8aefd-e04a-4eb7-ae87-ca48f1daa120.png\n",
|
|
"爬取 issue-66 中 ...\n",
|
|
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-67/331660157-920d7143-f262-42d5-af57-a817bf3aee01.png\n",
|
|
"爬取 issue-65 中 ...\n",
|
|
"爬取 issue-64 中 ...\n",
|
|
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-66/330112198-fb783018-b217-4cab-afef-32d339c4047a.png\n",
|
|
"爬取 issue-63 中 ...\n",
|
|
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-65/328184842-2e13483e-4ece-4eb6-8c8a-3d9c92a97651.png\n",
|
|
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-65/328185054-e08c66b2-7e87-4238-88cb-e0672b2de530.png\n",
|
|
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-65/328185191-305e8b86-a9de-434b-a1cf-80c441c51df2.png\n",
|
|
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-64/327033289-dc79968e-8279-43aa-b6a1-a6f1acd4155f.png\n",
|
|
"爬取 issue-62 中 ...\n",
|
|
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-64/327033187-1b9134e0-387e-491d-a478-3ea6438728a4.png\n",
|
|
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-63/326496750-53a0c779-8a4b-418d-b21c-4ea2151edd92.png\n",
|
|
"爬取 issue-61 中 ...\n",
|
|
"爬取 issue-60 中 ...\n",
|
|
"爬取 issue-55 中 ...\n",
|
|
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-61/325593801-bd5c4229-f47a-4d6e-99a1-0cc912378f0e.png\n",
|
|
"爬取 issue-54 中 ...\n",
|
|
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-55/316435757-14c497a5-6ecc-4f97-850b-7e13988ec7aa.png\n",
|
|
"爬取 issue-53 中 ...\n",
|
|
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-54/306546248-83a57abe-63ff-43ae-8140-5e2b284193f0.png\n",
|
|
"爬取 issue-52 中 ...\n",
|
|
"爬取 issue-51 中 ...\n",
|
|
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-53/305870587-69d44e21-edfc-4fc0-9ad7-daaec393caac.png\n",
|
|
"爬取 issue-50 中 ...\n",
|
|
"爬取 issue-49 中 ...\n",
|
|
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-51/302262263-3e5581e0-4e36-463b-9379-43d1f9e366b8.png\n",
|
|
"爬取 issue-48 中 ...\n",
|
|
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-49/300495384-94077fee-624f-48cc-98fd-d6e6fe16251b.png\n",
|
|
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-49/300495541-23556932-5526-4428-b1c2-25c840352422.png\n",
|
|
"爬取 issue-47 中 ...\n",
|
|
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-49/300495316-f8c98f42-a320-4c4e-84f7-2aaab4fb39f1.png\n",
|
|
"爬取 issue-46 中 ...\n",
|
|
"爬取 issue-45 中 ...\n",
|
|
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-46/294358431-92b4f252-91a9-4326-ae14-9d21037d3478.png\n",
|
|
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-46/294358432-2bbec170-1400-49c3-a30e-a0acc4bf3f66.png\n",
|
|
"爬取 issue-44 中 ...\n",
|
|
"爬取 issue-43 中 ...\n",
|
|
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-43/335602119-bd06d356-3356-45a8-8556-b9b60fdb337b.png\n",
|
|
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-43/292665721-e4730448-1588-424c-9a98-c661dfb5237d.png\n",
|
|
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-43/292665609-f6bf03cd-857b-4156-8795-6e41416d96e4.png\n",
|
|
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-43/335599172-7ea754a3-1dea-428f-baf4-e04c400e2744.png\n",
|
|
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-43/292665655-3341f355-0ca4-4757-9814-5702515922e7.png\n",
|
|
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-43/292665566-dc8dd944-f464-4606-8900-4562cdf404c7.png\n",
|
|
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-43/335601886-3731689f-7ddb-4d4b-9e73-9e1a631e403f.png\n",
|
|
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-43/335602049-bade0794-1653-42f7-8816-d3f3484009e3.png\n",
|
|
"爬取 issue-42 中 ...\n",
|
|
"爬取 issue-41 中 ...\n",
|
|
"爬取 issue-40 中 ...\n",
|
|
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-42/292425880-bc7f4792-6c68-45e0-862f-23b7b1232dce.png\n",
|
|
"爬取 issue-39 中 ...\n",
|
|
"爬取 issue-38 中 ...\n",
|
|
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-39/291777353-268ce49b-78c4-46a8-b543-542db0ef6dcc.png\n",
|
|
"爬取 issue-37 中 ...\n",
|
|
"爬取 issue-36 中 ...\n",
|
|
"爬取 issue-35 中 ...\n",
|
|
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-37/291228777-6d65928c-5ffd-4aab-af19-03291f31473a.png\n",
|
|
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-36/291267690-67d01501-ab54-4274-8425-e876b7035391.png\n",
|
|
"爬取 issue-34 中 ...\n",
|
|
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-36/291267797-b00bdf44-c648-47d3-9bd9-eb1e68b12193.png\n",
|
|
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-36/291268221-1c380a09-76b5-45a6-aff3-d8a873868402.png\n",
|
|
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-36/291268654-d8a5686c-c109-461b-b68a-ff00bcd9f462.png\n",
|
|
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-36/291268318-27f79c67-8e8d-419a-a0b7-e744b416b704.png\n",
|
|
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-36/291268113-20044b19-d508-47ea-9f07-f675bc72a2cb.png\n",
|
|
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-36/291267929-d697e859-8ad0-4cc3-aa15-e50d0a26dc53.png\n",
|
|
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-36/291268011-69f0a3ef-d509-47c9-b949-36d280edc4f8.png\n",
|
|
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-36/291268429-e0517629-7b5a-4751-a431-330f04d8c1ee.png\n",
|
|
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-36/291268509-e9c687a4-2b28-46f1-8670-827359df792e.png\n",
|
|
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-36/291225163-78241e4c-400a-4d75-a008-3c34ca26ae4a.png\n",
|
|
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-36/291225347-63167ad5-6896-4afe-a6d7-197532a23f8f.png\n",
|
|
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-35/291224673-a3f70f16-271c-4905-b1f0-1c011b56d3bf.png\n",
|
|
"爬取 issue-33 中 ...\n",
|
|
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-34/291140726-cba47a2d-dd99-49e5-b302-733e266d0c44.png\n",
|
|
"爬取 issue-32 中 ...\n",
|
|
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-34/291310957-8b8f17a0-ec66-4009-9657-2433d51319c8.png\n",
|
|
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-34/291140726-cba47a2d-dd99-49e5-b302-733e266d0c44.png\n",
|
|
"爬取 issue-31 中 ...\n",
|
|
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-32/290024353-47c9297c-6160-402a-b3bf-e08bd9c923ea.png\n",
|
|
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-32/290024305-41e868d5-e737-4f1d-93af-558db2abba91.png\n",
|
|
"爬取 issue-30 中 ...\n",
|
|
"爬取 issue-29 中 ...\n",
|
|
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-31/290022938-62774f96-82ac-46f4-9599-818a6a430cd9.png\n",
|
|
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-30/290022151-dc253b33-fbfe-4f9c-8023-e00e180015d6.png\n",
|
|
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-30/290022070-0e449a39-5360-474e-bc18-1c1729071f66.png\n",
|
|
"爬取 issue-28 中 ...\n",
|
|
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-29/290018445-3d43470c-5b8b-4466-b8c2-800694771143.png\n",
|
|
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-29/290020867-53b8d764-f8f8-41ab-8262-0ac17702fa42.png\n",
|
|
"爬取 issue-27 中 ...\n",
|
|
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-29/290020867-53b8d764-f8f8-41ab-8262-0ac17702fa42.png\n",
|
|
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-29/290018445-3d43470c-5b8b-4466-b8c2-800694771143.png\n",
|
|
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-28/288112763-1dfaa7f2-f52b-42b5-ba40-c47c16205265.png\n",
|
|
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-28/288112141-a1c08800-b6dd-4215-9ecf-288759cc0174.png\n",
|
|
"爬取 issue-26 中 ...\n",
|
|
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-27/287923928-5e817c41-c54d-409c-be36-576efb0a299a.png\n",
|
|
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-27/287924096-490bef45-fc31-4ffc-a3a5-d077c350ff88.png\n",
|
|
"爬取 issue-25 中 ...\n",
|
|
"爬取 issue-24 中 ...\n",
|
|
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-25/282802849-27fc9f7a-f7f5-4b8d-84e5-1060166b0ad7.png\n",
|
|
"爬取 issue-23 中 ...\n",
|
|
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-25/282800215-946fad7c-54ae-46c9-be9a-d5a69b4fbf7b.png\n",
|
|
"爬取 issue-22 中 ...\n",
|
|
"爬取 issue-21 中 ...\n",
|
|
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-22/275505102-406e3256-7a3e-4deb-9456-2a49b41ca85d.png\n",
|
|
"爬取 issue-20 中 ...\n",
|
|
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-21/279390992-4a04af63-c176-49a8-a60e-5c3e95c07f8b.png\n",
|
|
"爬取 issue-19 中 ...\n",
|
|
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-21/275300475-444cf824-5489-461d-9678-440901554f68.png\n",
|
|
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-20/268173022-80623e60-fba6-4f4c-85eb-5fb542ba8170.png\n",
|
|
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-20/268173043-927f355a-37b5-45f5-bd88-78317549bf54.png\n",
|
|
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-20/268173050-b186e855-4ad3-44c0-8708-59b11c5506a7.png\n",
|
|
"爬取 issue-17 中 ...\n",
|
|
"爬取 issue-16 中 ...\n",
|
|
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-17/264944498-f609f333-53a1-40f3-8bd5-a320b21398df.png\n",
|
|
"爬取 issue-15 中 ...\n",
|
|
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-16/264304291-1fbb69c3-02fa-4d50-9dec-cc6da46c1dd2.png\n",
|
|
"爬取 issue-14 中 ...\n",
|
|
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-15/264302335-b7b9d42e-aa16-474d-8c49-5573e397c374.png\n",
|
|
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-15/264302196-e355b398-1ba7-4b7e-aa0b-b1d67646182a.png\n",
|
|
"爬取 issue-13 中 ...\n",
|
|
"爬取 issue-12 中 ...\n",
|
|
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-14/263748729-d0d2d005-019b-404f-a720-8f75b19a52ba.png\n",
|
|
"爬取 issue-11 中 ...\n",
|
|
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-12/263475138-92d989d2-2b5e-432c-bfde-8bd8f3524b6e.png\n",
|
|
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-12/263475180-595e8d91-2645-47bf-a4db-24aad89d12ae.png\n",
|
|
"爬取 issue-10 中 ...\n",
|
|
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-12/289706401-d79d32f8-5738-4088-bc92-74e19da24885.png\n",
|
|
"爬取 issue-9 中 ...\n",
|
|
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-11/263142498-1af4cb41-c431-4de0-9d26-65729d3dfe65.png\n",
|
|
"爬取 issue-8 中 ...\n",
|
|
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-10/263140095-8d3beafa-ad35-405d-bcf7-3964853174b2.png\n",
|
|
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-9/261163518-42173b79-b7b3-41c3-8860-1007f140fe86.png\n",
|
|
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-9/261163571-1caa7264-3702-4467-9986-49e0557b0edc.png\n",
|
|
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-9/261163490-45ab4e0e-7175-4a65-9a70-4e51b4c1366a.png\n",
|
|
"爬取 issue-7 中 ...\n",
|
|
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-8/260685640-0c9db5e7-ae1e-4558-b3f7-72ebb4f67043.png\n",
|
|
"爬取 issue-6 中 ...\n",
|
|
"爬取 issue-5 中 ...\n",
|
|
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-7/260630437-8d34c4a0-cc48-44b5-bbb8-94742c2e0776.png\n",
|
|
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-7/260630481-0cbc73cf-f516-4b3d-92f5-17598f089297.png\n",
|
|
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-6/256976429-8ad21695-2397-4a79-8fab-43fa01da5e24.png\n",
|
|
"爬取 issue-4 中 ...\n",
|
|
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-6/260248721-a56ad686-d1ac-4fa4-9fe7-fb9007f7a1e3.png\n",
|
|
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-6/256976463-9b61e743-536e-4d53-af74-f8015b104a36.png\n",
|
|
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-6/256976410-22019e06-df93-48b1-93a6-05901197b277.png\n",
|
|
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-6/260716833-f3d89a67-7b4f-4daa-9a0a-8313dcf9caaa.png\n",
|
|
"爬取 issue-3 中 ...\n",
|
|
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-5/255643011-49b2efa2-09f4-463e-908b-4510d2110429.png\n",
|
|
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-5/260307745-b545d146-a49b-4ebf-af88-ce3982a2e0ff.png\n",
|
|
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-5/255646125-b5035137-6df5-4189-95c2-199970dfbe8d.png\n",
|
|
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-4/255386114-73e4b52e-3eee-4652-971e-4bf123d6c9aa.png\n",
|
|
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-4/255386123-e5e990fa-af1f-439b-bb74-aa20af50366c.png\n",
|
|
"爬取 issue-2 中 ...\n",
|
|
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-4/255386053-b536d9d7-fe3b-4c83-a581-0884e3cf04f6.png\n",
|
|
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-4/255386339-56413f1c-bb32-49c0-aa85-dceeceb8594a.png\n",
|
|
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-3/255608587-cd487ee5-95be-47a5-90d4-5f02e0a94cc2.png\n",
|
|
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-3/255603702-8379ccc3-4d7a-407f-8777-aba9666e7c58.png\n",
|
|
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-3/260107703-9e01db47-6c15-4d41-b823-a1896be68af7.png\n",
|
|
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-3/260107481-5c0127b4-2273-4a14-b996-2d109a947a5e.png\n",
|
|
"爬取 issue-1 中 ...\n",
|
|
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-3/255342825-dae63d0c-05b8-4965-b2e0-19df84778a5e.png\n",
|
|
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-2/253884964-cbaf42fa-fa7d-48ed-8353-184dd0895a12.png\n"
|
|
]
|
|
},
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-2/255247553-45d154cc-37d1-459d-80d3-adad6324de4c.png\n",
|
|
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-2/256974788-7c2e9bf0-d239-4022-9aac-f8f160afad6f.png\n",
|
|
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-2/256974788-7c2e9bf0-d239-4022-9aac-f8f160afad6f.png\n",
|
|
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-1/253879145-d8f82699-aca6-44aa-bb1c-57066cf39f66.png\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"page_urls = [\n",
|
|
" 'https://github.com/Digital-EDA/Digital-IDE/issues?page=1&q=',\n",
|
|
" 'https://github.com/Digital-EDA/Digital-IDE/issues?page=2&q=',\n",
|
|
" 'https://github.com/Digital-EDA/Digital-IDE/issues?page=3&q='\n",
|
|
"]\n",
|
|
"\n",
|
|
"for url in page_urls:\n",
|
|
" crawler_issue_page(url)"
|
|
]
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "base",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.11.5"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 2
|
|
}
|