ํ๋์ API ํค๋ก AI ์ฑ๋ด ๋ง๋ค๊ธฐ: 30๋ถ ๋ง์ ์ ๋ก์์ ํ๋ก๋์ ๊น์ง
์ด ํํ ๋ฆฌ์ผ์์๋ ์คํธ๋ฆฌ๋ฐ ์๋ต, ๋ํ ๊ธฐ๋ก, ๋ชจ๋ธ ์ ํ, ์ ์ ํ ์ค๋ฅ ์ฒ๋ฆฌ๋ฅผ ๊ฐ์ถ ํ๋ก๋์ ์ค๋น ์๋ฃ AI ์ฑ๋ด ๋ฐฑ์๋๋ฅผ ๊ตฌ์ถํฉ๋๋ค. Python, FastAPI, OpenAI SDK๋ฅผ ์ฌ์ฉํ๋ฉฐ, API ์ง๊ณ๊ธฐ๋ฅผ ํตํด ์ด๋ค ๋ชจ๋ธ์ด๋ ์ฌ์ฉํ ์ ์์ต๋๋ค.
ํ์ ์กฐ๊ฑด
pip install fastapi uvicorn openai
1๋จ๊ณ: ๊ธฐ๋ณธ ์ฑํ ์๋ํฌ์ธํธ
from fastapi import FastAPI
from fastapi.responses import StreamingResponse
from openai import OpenAI
from pydantic import BaseModel
app = FastAPI()
client = OpenAI(
api_key="sk-lemon-xxx",
base_url="https://api.lemondata.cc/v1"
)
class ChatRequest(BaseModel):
message: str
model: str = "gpt-4.1-mini"
conversation_id: str | None = None
@app.post("/chat")
async def chat(req: ChatRequest):
response = client.chat.completions.create(
model=req.model,
messages=[{"role": "user", "content": req.message}]
)
return {"reply": response.choices[0].message.content}
์ด ์ฝ๋๋ ์๋ํ์ง๋ง ์คํธ๋ฆฌ๋ฐ, ๊ธฐ๋ก, ์ค๋ฅ ์ฒ๋ฆฌ๊ฐ ์์ต๋๋ค. ์ด์ ์ด๋ฅผ ๊ฐ์ ํด ๋ณด๊ฒ ์ต๋๋ค.
2๋จ๊ณ: ์คํธ๋ฆฌ๋ฐ ์ถ๊ฐ
์คํธ๋ฆฌ๋ฐ์ ์ ์ฒด ์๋ต์ ๊ธฐ๋ค๋ฆฌ์ง ์๊ณ ํ ํฐ์ด ์์ฑ๋๋ ์ฆ์ ์ ์กํฉ๋๋ค. ์ฌ์ฉ์๋ ์ค์๊ฐ์ผ๋ก ๋ต๋ณ์ด ํ์ฑ๋๋ ๊ฒ์ ๋ณผ ์ ์์ต๋๋ค.
@app.post("/chat/stream")
async def chat_stream(req: ChatRequest):
def generate():
stream = client.chat.completions.create(
model=req.model,
messages=[{"role": "user", "content": req.message}],
stream=True
)
for chunk in stream:
delta = chunk.choices[0].delta
if delta.content:
yield f"data: {delta.content}\n\n"
yield "data: [DONE]\n\n"
return StreamingResponse(
generate(),
media_type="text/event-stream"
)
3๋จ๊ณ: ๋ํ ๊ธฐ๋ก
๋ํ ๊ธฐ๋ก์ ๋ฉ๋ชจ๋ฆฌ์ ์ ์ฅํฉ๋๋ค(ํ๋ก๋์ ์์๋ Redis๋ ๋ฐ์ดํฐ๋ฒ ์ด์ค๋ก ๊ต์ฒด ๊ฐ๋ฅ).
from collections import defaultdict
import uuid
conversations: dict[str, list] = defaultdict(list)
SYSTEM_PROMPT = "You are a helpful assistant. Be concise and direct."
@app.post("/chat/stream")
async def chat_stream(req: ChatRequest):
conv_id = req.conversation_id or str(uuid.uuid4())
# ๋ฉ์์ง ๊ธฐ๋ก ๊ตฌ์ฑ
messages = [{"role": "system", "content": SYSTEM_PROMPT}]
messages.extend(conversations[conv_id])
messages.append({"role": "user", "content": req.message})
# ์ฌ์ฉ์ ๋ฉ์์ง ์ ์ฅ
conversations[conv_id].append(
{"role": "user", "content": req.message}
)
def generate():
full_response = []
stream = client.chat.completions.create(
model=req.model,
messages=messages,
stream=True
)
for chunk in stream:
delta = chunk.choices[0].delta
if delta.content:
full_response.append(delta.content)
yield f"data: {delta.content}\n\n"
# ์ด์์คํดํธ ์๋ต ์ ์ฅ
conversations[conv_id].append(
{"role": "assistant", "content": "".join(full_response)}
)
yield f"data: [DONE]\n\n"
return StreamingResponse(
generate(),
media_type="text/event-stream",
headers={"X-Conversation-ID": conv_id}
)
4๋จ๊ณ: ์ค๋ฅ ์ฒ๋ฆฌ
AI API ํธ์ถ์ ์ฌ๋ฌ ์ด์ ๋ก ์คํจํ ์ ์์ต๋๋ค: ์๋ ์ ํ, ์์ก ๋ถ์กฑ, ๋ชจ๋ธ ๋ฏธ์ฌ์ฉ ๊ฐ๋ฅ ๋ฑ. ๊ฐ ๊ฒฝ์ฐ๋ฅผ ์ฒ๋ฆฌํฉ๋๋ค:
from openai import (
APIError,
RateLimitError,
APIConnectionError
)
@app.post("/chat/stream")
async def chat_stream(req: ChatRequest):
conv_id = req.conversation_id or str(uuid.uuid4())
messages = build_messages(conv_id, req.message)
def generate():
try:
full_response = []
stream = client.chat.completions.create(
model=req.model,
messages=messages,
stream=True
)
for chunk in stream:
delta = chunk.choices[0].delta
if delta.content:
full_response.append(delta.content)
yield f"data: {delta.content}\n\n"
conversations[conv_id].append(
{"role": "assistant", "content": "".join(full_response)}
)
except RateLimitError as e:
yield f"data: [ERROR] ์๋ ์ ํ์ ๊ฑธ๋ ธ์ต๋๋ค. ์ ์๋ง ๊ธฐ๋ค๋ ค ์ฃผ์ธ์.\n\n"
except APIConnectionError:
yield f"data: [ERROR] ์ฐ๊ฒฐ ์คํจ. ์ฌ์๋ ์ค...\n\n"
except APIError as e:
yield f"data: [ERROR] {e.message}\n\n"
yield "data: [DONE]\n\n"
return StreamingResponse(generate(), media_type="text/event-stream")
def build_messages(conv_id: str, user_msg: str) -> list:
messages = [{"role": "system", "content": SYSTEM_PROMPT}]
# ์ปจํ
์คํธ ๊ธธ์ด ๊ด๋ฆฌ๋ฅผ ์ํด ์ต๊ทผ 10ํด ์ ์ง
history = conversations[conv_id][-20:]
messages.extend(history)
messages.append({"role": "user", "content": user_msg})
conversations[conv_id].append({"role": "user", "content": user_msg})
return messages
5๋จ๊ณ: ๋ชจ๋ธ ์ ํ
์ฌ์ฉ์๊ฐ ๋ํ ์ค์ ๋ชจ๋ธ์ ์ ํํ ์ ์๊ฒ ํฉ๋๋ค. ๋ค์ํ ์๊ตฌ์ ๋ง๋ ์ฌ๋ฌ ๋ชจ๋ธ:
AVAILABLE_MODELS = {
"fast": "gpt-4.1-mini",
"smart": "claude-sonnet-4-6",
"reasoning": "o3",
"budget": "deepseek-chat",
"creative": "claude-sonnet-4-6",
}
@app.get("/models")
async def list_models():
return {"models": AVAILABLE_MODELS}
ํ๋ก ํธ์๋์์ ์ด ์ต์
๋ค์ ๋ณด์ฌ์ค ์ ์์ต๋๋ค. ๋ชจ๋ ๋ชจ๋ธ์ด ์ง๊ณ๊ธฐ๋ฅผ ํตํด ๋์ผํ OpenAI ํธํ ํ์์ ์ฌ์ฉํ๋ฏ๋ก, model ํ๋ผ๋ฏธํฐ๋ง ๋ณ๊ฒฝํ๋ฉด ์ ํ์ด ๊ฐ๋ฅํฉ๋๋ค.
6๋จ๊ณ: ์ปจํ ์คํธ ์๋์ฐ ๊ด๋ฆฌ
๊ธด ๋ํ๋ ๋ชจ๋ธ ์ปจํ ์คํธ ์ ํ์ ์ด๊ณผํ ์ ์์ต๋๋ค. ์ฌ๋ผ์ด๋ฉ ์๋์ฐ๋ฅผ ๊ตฌํํฉ๋๋ค:
def trim_history(messages: list, max_tokens: int = 8000) -> list:
"""์์คํ
ํ๋กฌํํธ + ์ต๊ทผ ๋ฉ์์ง๋ฅผ ํ ํฐ ์์ฐ ๋ด๋ก ์ ์งํฉ๋๋ค."""
# ๋๋ต์ ์ธ ์ถ์ : 1 ํ ํฐ โ 4 ๋ฌธ์
system = messages[0] # ์์คํ
ํ๋กฌํํธ๋ ํญ์ ์ ์ง
history = messages[1:]
total_chars = len(system["content"])
trimmed = []
for msg in reversed(history):
msg_chars = len(msg["content"])
if total_chars + msg_chars > max_tokens * 4:
break
trimmed.insert(0, msg)
total_chars += msg_chars
return [system] + trimmed
์์ฑ๋ ์ ํ๋ฆฌ์ผ์ด์
# ์คํ: uvicorn main:app --reload --port 8000
# ํ
์คํธ: curl -N -X POST http://localhost:8000/chat/stream \
# -H "Content-Type: application/json" \
# -d '{"message": "Hello!", "model": "gpt-4.1-mini"}'
์ ์ฒด ์ฝ๋๋ 100์ค ๋ฏธ๋ง์ ๋๋ค. ์ฌ๊ธฐ์ ๋ค์์ ์ถ๊ฐํ ์ ์์ต๋๋ค:
- ์ธ์ฆ (API ํค ๋๋ JWT)
- ์์์ ์ ์ฅ์ (๋ํ์ฉ PostgreSQL ๋๋ Redis)
- ์ฌ์ฉ์๋ณ ์๋ ์ ํ
- ์ฌ์ฉ๋ ์ถ์ ๋ฐ ์ฒญ๊ตฌ
- ์๋ฐฉํฅ ์คํธ๋ฆฌ๋ฐ์ ์ํ WebSocket ์ง์
- ํ๋ก ํธ์๋ (React, Vue ๋๋ EventSource๋ฅผ ์ฌ์ฉํ๋ ์์ JS)
๋น์ฉ ์ถ์
์ผ์ผ 1,000๊ฑด ๋ํ(ํ๊ท 5ํด) ์ฒ๋ฆฌํ๋ ์ฑ๋ด ๊ธฐ์ค:
| ๋ชจ๋ธ | ์ผ์ผ ๋น์ฉ | ์๊ฐ ๋น์ฉ |
|---|---|---|
| GPT-4.1-mini | ์ฝ $2.40 | ์ฝ $72 |
| GPT-4.1 | ์ฝ $12.00 | ์ฝ $360 |
| Claude Sonnet 4.6 | ์ฝ $18.00 | ์ฝ $540 |
| DeepSeek V3 | ์ฝ $1.68 | ์ฝ $50 |
๋๋ถ๋ถ ๋ํ๋ GPT-4.1-mini๋ฅผ ์ฌ์ฉํ๊ณ , ์ฌ์ฉ์๊ฐ ์์ฒญํ ๋๋ง Claude Sonnet 4.6์ผ๋ก ์ ๊ทธ๋ ์ด๋ํ๋ฉด ๋๋ถ๋ถ์ ์ ํ๋ฆฌ์ผ์ด์ ์์ ์ $100 ์ดํ๋ก ๋น์ฉ์ ์ ์งํ ์ ์์ต๋๋ค.
API ํค ๋ฐ๊ธฐ: lemondata.cc๋ 300๊ฐ ์ด์์ ๋ชจ๋ธ์ ํ๋์ ์๋ํฌ์ธํธ๋ก ์ ๊ณตํฉ๋๋ค. $1 ๋ฌด๋ฃ ํฌ๋ ๋ง์ผ๋ก ์์ํ์ธ์.
