{ "cells": [ { "cell_type": "code", "execution_count": 5, "id": "83707cda", "metadata": {}, "outputs": [], "source": [ "import requests\n", "import os" ] }, { "cell_type": "code", "execution_count": 6, "id": "72fb7570", "metadata": {}, "outputs": [], "source": [ "url = \"https://www.arztsuche-bw.de/index.php?suchen=1&sorting=name&direction=ASC&arztgruppe=psychotherapeut&landkreis=Karlsruhe+-+Stadt\"\n", "offset_str = \"&offset=\"" ] }, { "cell_type": "code", "execution_count": 11, "id": "8eff4531", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Crawling page 13\r" ] } ], "source": [ "outpath = \"./data\"\n", "\n", "if not os.path.exists(outpath):\n", " os.makedirs(outpath)\n", "\n", "# we can see on the website there are 14 pages\n", "for page in range(14):\n", " print(f\"Crawling page {page}\", end=\"\\r\")\n", " \n", " res = requests.get(url + offset_str + str(page * 20)) # 20 items per page\n", "\n", " if res.status_code == 200:\n", " with open(os.path.join(outpath, f\"page_{page}.html\"), \"w+\") as outfile:\n", " outfile.write(res.text)" ] }, { "cell_type": "code", "execution_count": null, "id": "7d7f9ca1", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "python-minimal kernel", "language": "python", "name": "python-minimal" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.7" } }, "nbformat": 4, "nbformat_minor": 5 }