echo ======================================== echo MinerU PDF Parser echo ======================================== echo. echo PDF files in current directory: echo ---------------------------------------- set file_count=0 for%%iin (*.pdf) do ( set /a file_count+=1 echo!file_count!. %%i ) echo ----------------------------------------
:: Check if there are any PDF files if%file_count%equ0 ( echo [ERROR] No .pdf file found in current directory! pause exit /b 1 )
:: Get user input for file name set /p INPUT_FILE="Enter file name (number or full name, Enter for first one): "
if "%INPUT_FILE%"=="" ( set file_num=1 for%%iin (*.pdf) do ( if!file_num!equ1set INPUT_FILE=%%i set /a file_num+=1 ) echo Auto-selected: !INPUT_FILE! ) else ( echo%INPUT_FILE%|findstr /r "^[0-9]*$" >nul ifnoterrorlevel1 ( set file_num=1 for%%iin (*.pdf) do ( if!file_num!equ%INPUT_FILE%set INPUT_FILE=%%i set /a file_num+=1 ) echo Selected: !INPUT_FILE! ) else ( ifnotexist "%INPUT_FILE%" ( echo [ERROR] File "%INPUT_FILE%" not found! pause exit /b 1 ) ) )
:: Get base filename without extension set BASENAME=%~n1 if "%BASENAME%"=="" ( for%%iin ("%INPUT_FILE%") doset BASENAME=%%~ni )
echo. echo ======================================== echo Page Range (Optional) echo ======================================== echo Leave empty to process all pages. echo Page numbers start from 1 (first page = 1) echo ======================================== echo.
set /p START_PAGE_INPUT="Enter start page (Enter for first page): " set /p END_PAGE_INPUT="Enter end page (Enter for last page): "
:: Convert user input (1-indexed) to mineru format (0-indexed) set START_PAGE= set END_PAGE= set START_ARG= set END_ARG= set FOLDER_START=* set FOLDER_END=*
ifnot "%START_PAGE_INPUT%"=="" ( set /a START_PAGE=%START_PAGE_INPUT% - 1 set START_ARG=-s !START_PAGE! set FOLDER_START=%START_PAGE_INPUT% echoStart page: %START_PAGE_INPUT% ^(mineru: !START_PAGE!^) )
ifnot "%END_PAGE_INPUT%"=="" ( set /a END_PAGE=%END_PAGE_INPUT% - 1 set END_ARG=-e !END_PAGE! set FOLDER_END=%END_PAGE_INPUT% echo End page: %END_PAGE_INPUT% ^(mineru: !END_PAGE!^) )
:: Build output folder name set OUTPUT_FOLDER=%BASENAME%_%FOLDER_START%-%FOLDER_END% :: Clean up double *- or -* if "%START_PAGE_INPUT%"=="" set OUTPUT_FOLDER=%BASENAME%_*-%FOLDER_END% if "%END_PAGE_INPUT%"=="" set OUTPUT_FOLDER=%BASENAME%_%FOLDER_START%-* if "%START_PAGE_INPUT%"=="" if "%END_PAGE_INPUT%"=="" set OUTPUT_FOLDER=%BASENAME%_full
echo. echo Output folder: %OUTPUT_FOLDER%
echo. echo ======================================== echo Select Backend Model echo ======================================== echo [1] pipeline - Fast, CPU/low VRAM, good for text echo [2] hybrid-auto-engine - High accuracy, 8GB+ VRAM, good for tables echo [3] vlm-auto-engine - Pure VLM, 8GB+ VRAM, CN/EN optimized echo ======================================== echo [Enter] default: pipeline echo ======================================== echo.
set /p MODEL_CHOICE="Enter option [1/2/3] (Enter for default pipeline): "
:: Set model type if "%MODEL_CHOICE%"=="2" ( set MODEL_TYPE=hybrid-auto-engine echo Selected: hybrid-auto-engine ) elseif "%MODEL_CHOICE%"=="3" ( set MODEL_TYPE=vlm-auto-engine echo Selected: vlm-auto-engine ) else ( set MODEL_TYPE=pipeline echo Selected: pipeline )
:: Execute MinerU command with modelscope source :: Use PowerShell with direct parameter passing to handle Chinese characters and parentheses powershell -Command "& { $env:MINERU_MODEL_SOURCE='modelscope'; mineru -p '%INPUT_FILE%' -o '%OUTPUT_FOLDER%' -b '%MODEL_TYPE%' %START_ARG%%END_ARG% }"
:: Check execution result iferrorlevel1 ( echo. echo [ERROR] Parse failed! echo. echo Possible issues: echo - For auto-engine models: Need 8GB+ VRAM NVIDIA GPU echo - Check if mineru is installed: pip install magic-pdf[full] ) else ( echo. echo [SUCCESS] Parse completed! echo Output saved in: %CD%\%OUTPUT_FOLDER% )