dataclasses
:@cocoindex.flow_def(name="PatientIntakeExtraction")def patient_intake_extraction_flow(flow_builder: cocoindex.FlowBuilder, data_scope: cocoindex.DataScope): """Định nghĩa flow trích xuất dữ liệu bệnh nhân từ các biểu mẫu.""" credential_path = os.environ["GOOGLE_SERVICE_ACCOUNT_CREDENTIAL"] root_folder_ids = os.environ["GOOGLE_DRIVE_ROOT_FOLDER_IDS"].split(",")
data_scope["documents"] = flow_builder.add_source( cocoindex.sources.GoogleDrive( service_account_credential_path=credential_path, root_folder_ids=root_folder_ids, binary=True ) ) patients_index = data_scope.add_collector()
class ToMarkdown(cocoindex.op.FunctionSpec): """Chuyển đổi tài liệu sang markdown."""
@cocoindex.op.executor_class(gpu=True, cache=True, behavior_version=1)class ToMarkdownExecutor: spec: ToMarkdown _converter: MarkItDown
def prepare(self): client = OpenAI() self._converter = MarkItDown(llm_client=client, llm_model="gpt-4o")
def __call__(self, content: bytes, filename: str) -> str: suffix = os.path.splitext(filename)[1] with tempfile.NamedTemporaryFile(delete=True, suffix=suffix) as temp_file: temp_file.write(content) temp_file.flush() text = self._converter.convert(temp_file.name).text_content return text
with data_scope["documents"].row() as doc: doc["markdown"] = doc["content"].transform(ToMarkdown(), filename=doc["filename"])
with data_scope["documents"].row() as doc: doc["patient_info"] = doc["markdown"].transform( cocoindex.functions.ExtractByLlm( llm_spec=cocoindex.LlmSpec( api_type=cocoindex.LlmApiType.OPENAI, model="gpt-4o" ), output_type=Patient, instruction="Please extract patient information from the intake form." ) ) patients_index.collect( filename=doc["filename"], patient_info=doc["patient_info"], )
patients_index.export( "patients", cocoindex.storages.Postgres(table_name="patients_info"), primary_key_fields=["filename"],)
python3 main.py cocoindex evaluate
Công cụ | Hệ điều hành | Mục đích |
---|---|---|
DirEqual | macOS | So sánh thư mục, tập tin |
Meld | Linux/Windows | So sánh thư mục, tập tin |
python3 main.py cocoindex server -c https://cocoindex.io
python main.py cocoindex setuppython main.py cocoindex update
psql postgres://cocoindex:cocoindex@localhost/cocoindex
select * from patients_info;