본문 바로가기
Quant Stock

코스피 종목 리스트 가져오기(종목필터) - 네이버 크롤링 파이썬 코드

by quantWhale 2023. 4. 17.
반응형

코드가 날코딩이기는 하지만 쓸데없는 ETN, ETF 같은 종목들 제외하도록 했습니다.


import pandas as pd
import requests
from io import BytesIO
from openpyxl import Workbook

import pandas as pd
import requests
from bs4 import BeautifulSoup
from openpyxl import Workbook


def get_kospi_stock_codes():
    url = "https://finance.naver.com/sise/sise_market_sum.nhn?sosok=0"
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3"}

    stock_codes = []
    company_names = []

    for i in range(1, 32):
        res = requests.get(url + "&page=" + str(i), headers=headers)
        res.raise_for_status()
        soup = BeautifulSoup(res.text, "lxml")

        data_rows = soup.find("table", attrs={"class": "type_2"}).find("tbody").find_all("tr")
        for row in data_rows:
            columns = row.find_all("td")
            if len(columns) <= 1:  # 의미 없는 데이터 스킵
                continue
            stock_code = columns[1].find("a").get("href").split("=")[-1]
            company_name = columns[1].get_text()

            if (company_name.endswith("우") or stock_code.startswith("57") or
                    "채권" in company_name.lower() or
                    "테마" in company_name.lower() or
                    "인버스" in company_name.lower() or
                    "msci" in company_name.lower() or
                    "액티브" in company_name.lower() or
                    "(전환)" in company_name.lower() or
                    "hanaro" in company_name.lower() or
                    "우B" in company_name or
                    "kodex" in company_name.lower() or
                    "ACE" in company_name or
                    "SOL" in company_name or
                    "미국" in company_name.lower() or
                    "S&P500" in company_name or
                    "나스닥" in company_name.lower() or
                    "다우" in company_name.lower() or
                    "국고채" in company_name.lower() or
                    "kosef" in company_name.lower() or
                    "tiger" in company_name.lower() or
                    "arirang" in company_name.lower() or
                    "kbstar" in company_name.lower() or
                    "etn" in company_name.lower() or
                    "etf" in company_name.lower() or
                    "FOCUS" in company_name or
                    "레버리지" in company_name or
                    "리츠" in company_name):
                continue

            stock_codes.append(stock_code)
            company_names.append(company_name)

    kospi_df = pd.DataFrame({"종목코드": stock_codes, "회사명": company_names})
    return kospi_df


# 코스피 종목 코드 엑셀 파일로 저장하는 함수
def save_kospi_stock_codes_to_excel(stock_codes, file_name='kospi_stock_codes_Filter.xlsx'):
    wb = Workbook()
    ws = wb.active
    ws.title = '코스피 종목 코드'
    ws.append(['종목코드', '회사명'])

    for index, row in stock_codes.iterrows():
        ws.append([row['종목코드'], row['회사명']])

    wb.save(file_name)


if __name__ == '__main__':
    kospi_stock_codes = get_kospi_stock_codes()
    save_kospi_stock_codes_to_excel(kospi_stock_codes)
    print('코스피 종목 코드 엑셀 파일 생성 완료!')
반응형
-

댓글