moved files

This commit is contained in:
2026-03-03 14:07:01 +08:00
parent 65a1705280
commit 6b0e50c532
5 changed files with 0 additions and 0 deletions

View File

@@ -1,77 +0,0 @@
import pandas as pd
import os.path as osp
import os
from lang_agent.rag.emb import QwenEmbeddings
from langchain_text_splitters import CharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain_openai import OpenAIEmbeddings
from langchain_core.documents import Document
def main(save_path = "assets/xiaozhan_emb"):
cat_f = "assets/xiaozhan_data/catering_end_category.csv"
desc_f = "assets/xiaozhan_data/catering_end_dish.csv"
df_cat = pd.read_csv(cat_f)
df_desc = pd.read_csv(desc_f)
df_desc = df_desc[df_desc["is_available"] == 't'].reset_index(drop=True)
id_desc_dic = {}
for _, (id, name, desc) in df_cat[["id", "name", "description"]].iterrows():
id_desc_dic[id] = f"{name}-{desc}"
df_desc["cat_desc"] = df_desc["category_id"].map(id_desc_dic)
data = []
for _, (name, desc, px, cat_desc) in df_desc[["name", "description", "price", "cat_desc"]].iterrows():
sen = f"茶名称:{name}|茶描述:{desc}|价格{px}|饮品类:{cat_desc}"
data.append(sen)
# texts = [Document(e) for e in data]
texts = data
embeddings = QwenEmbeddings(
api_key=os.environ.get("ALI_API_KEY")
)
if not osp.exists(save_path):
# --- STEP 2: Create vector store ---
# vectorstore = FAISS.from_documents(texts, embeddings)
if os.environ.get("ALI_API_KEY") is None or os.environ.get("ALI_API_KEY") == "SOMESHIT":
texts = [Document(e) for e in data]
vectorstore = FAISS.from_documents(texts, embeddings)
else:
out_emb = embeddings.batch_embed_documents(texts)
vectorstore = FAISS.from_embeddings(zip(texts, out_emb), embeddings)
# --- STEP 3: SAVE the FAISS index to local files ---
vectorstore.save_local(save_path)
print(f"✅ Saved FAISS index to: {save_path}")
# --- STEP 4: LOAD later from disk in a separate session ---
# (You can imagine this being a new Python script.)
loaded_vectorstore = FAISS.load_local(
folder_path=save_path,
embeddings=embeddings,
allow_dangerous_deserialization=True # Required for LangChain >= 0.1.1
)
print("✅ Loaded FAISS index successfully!")
# --- STEP 5: Use the retriever/QA chain on the loaded store ---
retriever = loaded_vectorstore.as_retriever(search_kwargs={
"k":5
})
u = loaded_vectorstore.similarity_search("灯与尘", k=2)
res = retriever.invoke("野心心")
for doc in res:
print(doc)
print("==============================================")
if __name__ == "__main__":
main()