본문 바로가기
Quant Stock

코스피&코스닥 종목 리스트 가져오기, 네이버 크롤링 파이썬 코드

by quantWhale 2023. 4. 18.
반응형

일전에는 코스피만 가져와서 필터링하는 파이썬 코드를 올렸었는데

오늘은 코스닥을 포함하여 개별 엑셀로 저장하는 코드로 변경했습니다.

import pandas as pd
import requests
from bs4 import BeautifulSoup
from openpyxl import Workbook


def get_stock_codes(market):
    if market == 'kospi':
        url = "https://finance.naver.com/sise/sise_market_sum.nhn?sosok=0"
        page_count = 32
    elif market == 'kosdaq':
        url = "https://finance.naver.com/sise/sise_market_sum.nhn?sosok=1"
        page_count = 33
    else:
        raise ValueError("Invalid market type")

    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3"}

    stock_codes = []
    company_names = []

    for i in range(1, page_count):
        res = requests.get(url + "&page=" + str(i), headers=headers)
        res.raise_for_status()
        soup = BeautifulSoup(res.text, "lxml")

        data_rows = soup.find("table", attrs={"class": "type_2"}).find("tbody").find_all("tr")
        for row in data_rows:
            columns = row.find_all("td")
            if len(columns) <= 1:  # 의미 없는 데이터 스킵
                continue
            stock_code = columns[1].find("a").get("href").split("=")[-1]
            company_name = columns[1].get_text()

            if (company_name.endswith("우") or stock_code.startswith("57") or
                    "채권" in company_name.lower() or
                    "테마" in company_name.lower() or
                    "인버스" in company_name.lower() or
                    "msci" in company_name.lower() or
                    "액티브" in company_name.lower() or
                    "(전환)" in company_name.lower() or
                    "hanaro" in company_name.lower() or
                    "우B" in company_name or
                    "kodex" in company_name.lower() or
                    "ACE" in company_name or
                    "SOL" in company_name or
                    "미국" in company_name.lower() or
                    "S&P500" in company_name or
                    "나스닥" in company_name.lower() or
                    "다우" in company_name.lower() or
                    "국고채" in company_name.lower() or
                    "kosef" in company_name.lower() or
                    "tiger" in company_name.lower() or
                    "arirang" in company_name.lower() or
                    "kbstar" in company_name.lower() or
                    "etn" in company_name.lower() or
                    "etf" in company_name.lower() or
                    "FOCUS" in company_name or
                    "레버리지" in company_name or
                    "리츠" in company_name):
                continue

            stock_codes.append(stock_code)
            company_names.append(company_name)

    stock_df = pd.DataFrame({"종목코드": stock_codes, "종목명": company_names})
    return stock_df

# 종목 코드 엑셀 파일로 저장하는 함수
def save_stock_codes_to_excel(stock_codes, market, file_name):
    wb = Workbook()
    ws = wb.active
    ws.title = f'{market.upper()} 종목 코드'
    ws.append(['종목코드', '종목명'])

    for index, row in stock_codes.iterrows():
        ws.append([row['종목코드'], row['종목명']])

    wb.save(file_name)


if __name__ == '__main__':
    kospi_stock_codes = get_stock_codes('kospi')
    save_stock_codes_to_excel(kospi_stock_codes, 'kospi', 'kospi_stock_codes_Filter.xlsx')
    print('코스피 종목 코드 엑셀 파일 생성 완료!')

    kosdaq_stock_codes = get_stock_codes('kosdaq')
    save_stock_codes_to_excel(kosdaq_stock_codes, 'kosdaq', 'kosdaq_stock_codes_Filter.xlsx')
    print('코스닥 종목 코드 엑셀 파일 생성 완료!')
반응형
-

댓글