데이터분석_실무/크롤링실무8 데이터 크롤링 및 엔지니어링 ver8 from selenium import webdriverfrom selenium.webdriver.common.keys import Keysfrom selenium.webdriver.common.by import Byfrom bs4 import BeautifulSoupimport requestsfrom datetime import datetimeimport timeimport calendarimport pandas as pdimport numpy as npimport redriver = webdriver.Chrome()mongddang_list= []start_time = time.time()for i in range(1,151): driver.get("https://youth.seoul.go.. 데이터분석_실무/크롤링실무 2024. 6. 9. 데이터크롤링_ver7 from selenium import webdriverfrom selenium.webdriver.chrome.service import Service as ChromeServicefrom webdriver_manager.chrome import ChromeDriverManagerfrom selenium.webdriver.common.by import Byfrom selenium.common.exceptions import NoSuchElementExceptionfrom selenium.webdriver.support import expected_conditions as ECfrom selenium.webdriver.support.ui import WebDriverWaitfrom datetime impor.. 데이터분석_실무/크롤링실무 2024. 6. 7. 데이터엔지니어링_Ver6 main_url = 'https://mediahub.seoul.go.kr/news/hometown/hometownNewsList.do'driver = webdriver.Chrome(service=ChromeService(ChromeDriverManager().install()))driver.get(main_url)cnt = 0j = 2ID_list = []# 페이지 넘기면서 게시글 ID 크롤링while True: try: time.sleep(2) for i in range(1, 9): html = driver.page_source soup = bs(html, 'html.parser') IDs = soup.select.. 데이터분석_실무/크롤링실무 2024. 6. 2. 데이터 엔지니어링_ver5 from selenium import webdriverfrom selenium.webdriver.common.by import Byfrom selenium.webdriver.support.ui import WebDriverWaitfrom bs4 import BeautifulSoupimport requestsimport sysimport pandas as pdimport numpy as npimport timeimport refrom datetime import datetime# href 가져오는 코드ref 가져오는 코드driver = webdriver.Chrome()hrefs = []for i in range(1,10): driver.get(f"https://jejuyouthdream.com/pol.. 데이터분석_실무/크롤링실무 2024. 5. 26. 크롤링 실무 ver4 from selenium import webdriver from selenium.webdriver.common.by import By from selenium.webdriver.support.ui import WebDriverWait from bs4 import BeautifulSoup import requests import sys import calendar # 인천테크노 파크 크롤링 코드 # Selenium WebDriver 객체 생성 driver = webdriver.Chrome() # 리스트형태로 들어오는 모든 데이터를 append 하기 위한 list 초기화 wello_list = [] # # 크롤링 할 총 페이지를 설정 여기선 1 page 에서 12 page 까지 크롤링 for page in .. 데이터분석_실무/크롤링실무 2024. 4. 14. 데이터 크롤링 실무_ver3 from selenium import webdriver from selenium.webdriver.common.by import By from selenium.webdriver.support.ui import WebDriverWait from bs4 import BeautifulSoup import requests import sys # 인천테크노 파크 크롤링 코드 # Selenium WebDriver 객체 생성 driver = webdriver.Chrome() # 리스트형태로 들어오는 모든 데이터를 append 하기 위한 list 초기화 wello_list = [] # 크롤링 할 총 페이지를 설정 여기선 1 page 에서 12 page 까지 크롤링 for page in range(1,13): # 각 페.. 데이터분석_실무/크롤링실무 2024. 4. 11. 사이트수집(실무) from selenium import webdriver from selenium.webdriver.common.by import By from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support import expected_conditions as EC from bs4 import BeautifulSoup from selenium import webdriver from bs4 import BeautifulSoup # Selenium WebDriver 객체 생성 driver = webdriver.Chrome() wello_list = [] for i in range(1,13): driver.get(f"https.. 데이터분석_실무/크롤링실무 2024. 4. 9. 정책사이트_데이터수집(실무) from selenium import webdriver from selenium.webdriver.common.keys import Keys from selenium.webdriver.common.by import By from bs4 import BeautifulSoup from datetime import datetime import requests import time driver = webdriver.Chrome() info_lists = list() for i in range(1,20): driver.get("https://youth.seoul.go.kr/infoData/sprtInfo/list.do?sprtInfoId=&key=2309130006&pageIndex=" + str(i) + "&o.. 데이터분석_실무/크롤링실무 2024. 4. 1. 이전 1 다음