Main Page | Modules | Alphabetical List | Compound List | File List | Compound Members | File Members | Related Pages | Search

vvxtrSample.cc

Go to the documentation of this file.
00001 
00012 #include <vvocr/vvxtrFactory.h>
00013 
00014 #include <string>
00015 #include <ios>
00016 #include <iostream>
00017 #include <fstream>
00018 
00019 #include <signal.h>
00020 
00021 //#include "stdio.h"            // These headers are used, but are included through vvxtrFactory.h
00022 //#include <vvocr/vvxtrAPI.h>
00023 //#include <vvutil/vvLog.h>
00024 
00025 using std::ios;
00026 using std::string;
00027 using std::cout ;
00028 using std::cerr ;
00029 using std::endl ;
00030 
00031 
00033 // Global variables
00034 
00035 string g_remoteEngine ;
00036 vvEngAPI * g_xtrEngine = NULL ;
00037 
00038 
00040 // Prototypes
00041 
00042 // Create an engine
00043 vvEngAPI * CreateEngine();
00044 
00045 // Main example routine
00046 vvxtrStatus runOCR(vvEngAPI * xtrEngine);
00047 
00048 // Process one document, from input to output
00049 vvxtrStatus processOneDocument(vvEngAPI * xtrEngine);
00050 
00051 // Open an input image file
00052 vvxtrStatus openImageDocument(vvEngAPI * xtrEngine, char * inputFilename);
00053 
00054 // Run preprocessing and recognition the currently loaded page of image data
00055 vvxtrStatus processOnePage(vvEngAPI * xtrEngine);
00056 
00057 // Output a document
00058 vvxtrStatus outputDoc(vvEngAPI * xtrEngine);
00059 
00060 // Output an subimage
00061 vvxtrStatus outputImg(vvEngAPI * xtrEngine);
00062 
00063 // Exit and error handling
00064 void CloseDown();
00065 void SigTermHandler(int param);
00066 void SigErrorHandler(int param);
00067 
00068 
00069 
00071 // Program entry point
00072 
00073 int main (int argc, char ** argv)
00074 {
00075     // Set up exit and error handling.
00076     atexit(CloseDown);
00077 
00078     signal(SIGTERM,SigTermHandler);
00079     signal(SIGINT,SigTermHandler);
00080     signal(SIGQUIT,SigTermHandler);
00081     signal(SIGSEGV,SigErrorHandler);
00082     signal(SIGBUS,SigErrorHandler);
00083 
00084     // Status returned from OCR engine calls
00085     int status;
00086 
00087     // Set the log level:
00088     //
00089     // If you are having serious trouble and don't mind being overrun with log messages,
00090     // you can set the log level to:
00091     //
00092     //  vvLogSetLevel(1000);
00093     //
00094     // Otherwise, it works well to set the log level to print error messages
00095     // only, unless the environment variable VV_DEBUG indicates otherwise:
00096     int level = VVLOG_ERROR ;
00097     char * debug = getenv("VV_DEBUG");
00098     if (debug)
00099     {
00100         level = atoi(debug);
00101     }
00102     vvLogSetLevel(level);
00103 
00104     if (argc==2)
00105     {
00106         // The first argument describes where to find the ocrxtrdaemon.
00107         g_remoteEngine = argv[1];
00108 
00109         // Instantiate an engine object.
00110         g_xtrEngine = CreateEngine();
00111         if (g_xtrEngine==NULL)
00112         {
00113             cerr << "Error creating engine" << endl ;
00114             status = -1;
00115         }
00116         else
00117         {
00118             const char * version ;
00119 
00120             // Print library and ocrxtrdaemon versions.
00121             
00122             g_xtrEngine->vvGetVersion(vvRemote, &version);
00123             cout << "Remote ocrxtrdaemon version number : " << version << endl ;
00124             
00125             g_xtrEngine->vvGetVersion(vvLocal, &version);
00126             cout << "Local client library version : " << version << endl ;
00127 
00128             // Run the sample program.
00129             
00130             status = runOCR( g_xtrEngine );
00131         }
00132     }
00133     else  // Handle an invalid number of parameters.
00134     {
00135         cout << "Usage: " << argv[0] << " host[:port]" << endl ;
00136         status = -1 ;
00137     }
00138 
00139     // Print returned error codes.
00140     if (status != VV_OK)
00141     {
00142         if (g_xtrEngine)
00143         {
00144             cerr << "Error: Status code " << status << ", which indicates " << g_xtrEngine->vvGetStatusString(status) << endl ;
00145             cerr << "Last server message was: '" << g_xtrEngine->vvGetStatusString() << "'" << endl ;
00146         }
00147         else
00148         {
00149             cerr << "Error: Status code: " << status << endl ;
00150             cerr << "Probably failed to connect with " << argv[1] << endl ;
00151         }
00152     }
00153 
00154     // Clean up and finish.
00155     if (g_xtrEngine != NULL)
00156     {
00157         delete g_xtrEngine;
00158         g_xtrEngine = NULL;
00159     }
00160     return status;
00161 }
00162 
00176 vvEngAPI * CreateEngine()
00177 {
00178     return vvxtrCreateRemoteEngine(g_remoteEngine.c_str());
00179 }
00180 
00196 vvxtrStatus runOCR(vvEngAPI * xtrEngine)
00197 {
00198     char response [40];
00199     char inputFilename [200];
00200     char inputSource [40];
00201 
00202     // If the daemon runs on the same filesystem as the client program,
00203     // communication will be much faster if we set this hint.
00204     //
00205     // Do not set this hint if you are running the ocrxtrdaemon on
00206     // a separate filesystem from the client program.
00207     vvERROR(xtrEngine->vvSetHint(vvHintLocalFilesystem));
00208     
00209 
00210     // Initialize the engine first.
00211     cout << ".... initializing the engine instance" << endl ;
00212     vvERROR(xtrEngine->vvInitInstance());
00213 
00214     // Start an OCR session to process a batch of input documents.
00215     // Some training takes place automatically within one OCR
00216     // session, so images processed in one session should come
00217     // from the same source.  See vvEngAPI::vvStartOCRSes.
00218     cout << ".... starting the OCR session" << endl ;
00219     vvERROR(xtrEngine->vvStartOCRSes());
00220 
00221     // Initialize the engine values.
00222     cout << ".... initializing the engine values" << endl ;
00223     vvERROR(xtrEngine->vvInitValues());
00224 
00225 
00226     // Set preprocessing and output options.
00227     cout << ".... setting preproessing and recognition options\n";
00228     vvERROR(xtrEngine->vvSetValue(dm_pp_auto_segment, vvYes));
00229     vvERROR(xtrEngine->vvSetValue(dm_pp_segment_lineart, vvYes));
00230     vvERROR(xtrEngine->vvSetValue(dm_format_analysis, vvYes));
00231 
00232 
00233     // Process an arbitrary number of input document images.
00234     strcpy( response, "y" );
00235     while ( strcmp(response, "y") == 0 )
00236     {
00237         vvERROR( processOneDocument(xtrEngine) );
00238 
00239         cout << "Process another input file? [y|n]" << endl << "> " ;
00240         std::cin >> response;
00241     }
00242 
00243 
00244     // End the OCR session.
00245     cout << ".... ending the OCR session" << endl ;
00246     vvERROR( xtrEngine->vvEndOCRSes() );
00247 
00248     // End the OCR engine instance.  At this point, nothing else can
00249     // be done unless the engine is initialized again.
00250     cout << ".... ending the engine instance" << endl ;
00251     vvERROR( xtrEngine->vvEndInstance() );
00252 
00253     return VV_OK;
00254 }
00255 
00268 vvxtrStatus processOneDocument(vvEngAPI * xtrEngine)
00269 {
00270     // Open an input image document.
00271     
00272     char inputFilename [200];
00273     cout << "Enter the input filename:" << endl << "> ";
00274     std::cin >> inputFilename;
00275 
00276     vvERROR(openImageDocument(xtrEngine, inputFilename));
00277 
00278     
00279     // Process pages from the input image.
00280 
00281     // Find the number of pages in the input document.
00282     int numPages = 0;
00283     vvERROR( xtrEngine->vvGetValue(dm_in_num_pages, &numPages) );
00284     cout << ".... " << inputFilename << " contains " << numPages << " page(s)" << endl ;
00285 
00286     bool readAnotherPage = true;
00287     while (readAnotherPage)
00288     {
00289         char response[40];
00290         cout << "Enter a valid page number to recognize:" << endl << "> ";
00291         std::cin >> response;
00292 
00293         // Select the page to read
00294         vvERROR( xtrEngine->vvSetValue(dm_in_curr_page, atol(response) ));
00295 
00296         // Load a page of image data into the OCR engine.
00297         cout << ".... opening page " << response << endl ;
00298         vvxtrStatus status = xtrEngine->vvReadImageData();
00299 
00300         // Handle invalid page numbers.
00301         if (status == VVXTR_ERR_INVALID_PAGE)
00302         {
00303             cout << ".... page " << response << 
00304                     " is not valid.  This image contains " << numPages << " page(s)." << endl ;
00305             readAnotherPage = true;
00306         }
00307         else if (status == VV_OK)
00308         {
00309             // Process a valid page.
00310             vvERROR( processOnePage(xtrEngine) );
00311 
00312             // Determine what to do next.
00313             cout << "Process another page? [y|n] " << endl << "> ";
00314             std::cin >> response;
00315 
00316             if ( strcmp(response, "y") == 0 ) readAnotherPage = true;
00317             else readAnotherPage = false;
00318     
00319             // Close the loaded image page.
00320             cout << ".... unloading the image data" << endl ;
00321             vvERROR(xtrEngine->vvUnloadImage());
00322         }
00323         else
00324         {
00325             vvERROR( status );
00326         }
00327     }
00328 
00329     
00330     // Close the input image file.
00331     
00332     cout << ".... closing input image file.\n";
00333     vvERROR( xtrEngine->vvCloseImageFile() );
00334 
00335     return VV_OK;
00336 }
00337 
00348 vvxtrStatus openImageDocument(vvEngAPI * xtrEngine, char * inputFilename)
00349 {
00350     if (xtrEngine == NULL)
00351         vvERROR( VVXTR_ERR );
00352 
00353 
00354     // Set the input filename.
00355     vvERROR( xtrEngine->vvSetValue(dm_in_filename, inputFilename) );
00356     
00357     // Open the file.
00358     cout << ".... opening the input image file " << inputFilename << endl ;
00359     vvxtrStatus status = xtrEngine->vvOpenImageFile();
00360 
00361     // Handle errors
00362     if (status != VV_OK)
00363     {
00364         if ( status == VVXTR_ERR_UNABLE_TO_LOAD_IMAGE_FILE )
00365             cout << ".... unable to load input file \"" << inputFilename << "\"; exiting." << endl ;
00366         else
00367             cout << ".... unknown error when trying to load input file \"" << inputFilename << "\"; exiting." << endl ;
00368     }
00369 
00370     return status;
00371 }
00372 
00390 vvxtrStatus processOnePage(vvEngAPI * xtrEngine)
00391 {
00392     char response[40];
00393 
00394     if (xtrEngine == NULL)
00395         vvERROR( VVXTR_ERR );
00396 
00397     cout << ".... preprocessing and recognizing the image data" << endl ;
00398         
00399     vvERROR( xtrEngine->vvPreprocess() );
00400     vvERROR( xtrEngine->vvRecognize() );
00401 
00402 
00403     // Output recognized text.
00404 
00405     cout << "Output a document? [y/n]" << endl << "> ";
00406     std::cin >> response;
00407 
00408     if ((strcmp(response,"y") == 0) || (strcmp(response, "Y") == 0))
00409         vvERROR( outputDoc(xtrEngine) );
00410 
00411     
00412     // Output a subimage.
00413     
00414     cout << "Output an image? [y/n]" << endl << "> ";
00415     std::cin >> response;
00416 
00417     if ((strcmp(response,"y") == 0) || (strcmp(response, "Y") == 0))
00418         vvERROR( outputImg(xtrEngine) );
00419 
00420     return VV_OK;
00421 }
00422 
00423 
00433 vvxtrStatus outputImg(vvEngAPI * xtrEngine)
00434 {
00435     vvxtrStatus status = VV_OK;
00436     char response [20];
00437 
00438     if (xtrEngine != NULL)
00439     {
00440 
00441         // Set up parameters for image output. A dm_focus_area of
00442         // vvFocusAreaRegion specifies that we want to get only one region from the
00443         // page. The alternative is to get the whole page.
00444         vvERROR( xtrEngine->vvSetValue(dm_focus_area, vvFocusAreaRegion));
00445 
00446         // User selects subimage output file format.
00447         {
00448             cout <<"Choose the subimage output format:" << endl <<
00449                    "    [t]iff, [g]if, [j]peg, [p]ng, p[d]f, [r]gb, [v]vxtrImage" << endl << "> ";
00450             std::cin >> response;
00451 
00452             int imgType = atol(response);
00453     
00454             if (strcmp(response,"t") == 0)
00455                 imgType = vvSubimageFormatTiffpack;
00456             else if (strcmp(response,"g") == 0)
00457                 imgType = vvSubimageFormatGif;
00458             else if (strcmp(response,"j") == 0)
00459                 imgType = vvSubimageFormatJpeg;
00460             else if (strcmp(response,"p") == 0)
00461                 imgType = vvSubimageFormatPng;
00462             else if (strcmp(response,"d") == 0)
00463                 imgType = vvSubimageFormatEPdf;
00464             else if (strcmp(response,"r") == 0)
00465                 imgType = vvSubimageFormatRgb;
00466             else
00467                 imgType = vvSubimageFormatVvxtrImage;
00468 
00469             vvERROR( xtrEngine->vvSetValue( dm_out_graphics_format, imgType ));
00470         }
00471 
00472         // List all available region ids, grouped by "image", "text" or "all".
00473         {
00474             char* regIds;
00475 
00476             // Get all image region ids
00477             vvERROR( xtrEngine->vvGetValue(dm_region_ids_image, &regIds));
00478             char * imageRegions = strdup(regIds);
00479 
00480             // Get all text region ids
00481             vvERROR( xtrEngine->vvGetValue(dm_region_ids_text, &regIds));
00482             char * textRegions = strdup(regIds);
00483 
00484             // Get all region ids
00485             vvERROR( xtrEngine->vvGetValue(dm_region_ids, &regIds));
00486             char * allRegions = strdup(regIds);
00487 
00488             cout << ".... image region ids: " << (char*)imageRegions << endl ;
00489             cout << ".... text region ids:  " << (char*)textRegions << endl ;
00490             cout << ".... all region ids:   " << (char*)allRegions << endl ;
00491 
00492             free(textRegions);
00493             free(allRegions);
00494             free(imageRegions);
00495         }
00496 
00497         // User selects region number.
00498         cout << "Specify a region to output as an image or [n]one:" << endl << "> ";
00499         std::cin >> response;
00500         while ((strcmp(response, "n") != 0)&&(strcmp(response,"none") != 0))
00501         {
00502             int regionID = atol(response);
00503 
00504             // Capture the subimage of region regionID.
00505             // Subimage data is extracted from the original input image.
00506             cout << ".... capturing subimage of region " << regionID << endl ;
00507             vvERROR(xtrEngine->vvCaptureSubimage(regionID));
00508 
00509             cout << "Output image to a [f]ile or [m]emory?" << endl << "> ";
00510             std::cin >> response;
00511 
00512             if(strcmp(response,"m") == 0)
00513             {
00514                 // Memory size describes size of the data only.
00515                 int memSize = 0;
00516                 vvERROR( xtrEngine->vvGetValue(dm_subimage_memory_size, &memSize));
00517 
00518                 char * data = new char[memSize];
00519                 
00520                 // Handle a vvxtrImage differently than file formats.
00521                 int imgFormat = 0;
00522                 vvERROR( xtrEngine->vvGetValue( dm_out_graphics_format, &imgFormat));
00523 
00524                 if (imgFormat == (int)vvSubimageFormatVvxtrImage)
00525                 {
00526                     vvxtrImage * img = vvxtrImage::FromRaw(data) ;
00527 
00528                     cout << ".... Write output image memory [" << memSize << "]\n";
00529                     vvERROR( xtrEngine->vvAcquireSubimageMemory(img,memSize));
00530 
00531 // TODO: Do something with the image we've just acquired!
00532 // This would be where in the code you would manipulate the image directly, if you
00533 // so desire.
00534 
00535                 }
00536                 else
00537                 {
00538                     cout << ".... Write output image memory [" << memSize << "]\n" ;
00539                     vvERROR( xtrEngine->vvAcquireSubimageMemory(data,memSize));
00540                     
00541                     string filename = "outImgMemory";
00542                     std::ofstream os (filename.c_str(), ios::binary);
00543 
00544 // TODO: Do something with the image we've just acquired!
00545 // This would be where in the code you would manipulate the image directly, if you
00546 // so desire. We just write it out to disk, which is silly, because why not just
00547 // use vvAcquireSubimageFile ?
00548 
00549                     if (os)
00550                     {
00551                         os.write(data,memSize);
00552                     }
00553                     else
00554                     {
00555                         cout << "Can't open output filename " << filename ;
00556                         status == VV_ERR ;
00557                     }
00558                 }
00559                 delete[] data;
00560                 if (status != VV_OK) return status;
00561             }   // end memory output
00562             else
00563             {
00564                 // Write image output to a file.
00565                 char outputFilename [30];
00566                 sprintf(outputFilename, "outImg_%d", regionID);
00567                 cout << ".... writing output subimage file " << outputFilename << endl ;
00568                 vvERROR( xtrEngine->vvAcquireSubimageFile(outputFilename));
00569             }
00570             vvERROR(status);
00571 
00572             cout << "Specify a region to output as an image or [n]one:" << endl << "> ";
00573             std::cin >> response;
00574         }
00575 
00576     } else
00577         status = VVXTR_ERR;
00578 
00579     // Reset focus area
00580     vvERROR( xtrEngine->vvSetValue(dm_focus_area, vvFocusAreaPage));
00581 
00582     return status;
00583 }
00584 
00585 
00594 vvxtrStatus outputDoc(vvEngAPI * xtrEngine)
00595 {
00596     if (xtrEngine != NULL)
00597     {
00598         char response [40];
00599 
00600         // Set up output values.
00601         cout << "Choose the document output format:" <<
00602                 "  [p]df, [a]scii, [u]nicode, [x]doc" << endl << "> ";
00603         std::cin >> response;
00604 
00605         int fileFormat ;
00606 
00607         // First we select the output text format
00608         if (strcmp(response,"p") == 0)
00609             fileFormat = vvTextFormatPdf ;
00610         else if (strcmp(response,"u") == 0)
00611             fileFormat = vvTextFormatUnicode ;
00612         else if (strcmp(response,"x") == 0)
00613             fileFormat = vvTextFormatXdoc ;
00614         else    // default ascii
00615             fileFormat = vvTextFormatIso ;
00616     
00617         // Start an output document with the correct file format.
00618         cout << ".... starting the output document" << endl ;
00619         vvERROR(xtrEngine->vvStartDoc(fileFormat));
00620 
00621         // Spool output from the engine based on the options already set.
00622         // Note that you can select different pages and/or regions and
00623         // call vvSpoolDoc multiple times before you call vvEndDoc. 
00624         cout << ".... writing to the output document" << endl ;
00625         vvERROR(xtrEngine->vvSpoolDoc());
00626 
00627         // End the output document.  At this point, we are done writing
00628         // output to this output doc. We can write more output, but would
00629         // need to start a new output document.
00630         cout << ".... ending the output document" << endl ;
00631         vvERROR(xtrEngine->vvEndDoc());
00632 
00633         cout << "Send output doc to a [f]ile or [m]emory?" << endl << "> ";
00634         std::cin >> response;
00635 
00636         if ((strcmp(response,"f") == 0) || (strcmp(response,"file") == 0))
00637         {
00638             // Write output to a file.
00639             
00640             cout << "Enter the output filename:" << endl << "> ";
00641             std::cin >> response;
00642             cout << ".... acquiring the document output as a file called " << response << endl ;
00643             vvERROR(xtrEngine->vvAcquireDocFile(response));
00644         }
00645         else
00646         {
00647             // Obtain output in memory.
00648             
00649             int dataSize;
00650 
00651             // Get file size in memory.
00652             vvERROR( xtrEngine->vvGetValue(dm_doc_memory_size, &dataSize));
00653             cout << ".... need to allocate " << dataSize <<
00654                     " bytes to store the output document." << endl ;
00655 
00656             // Allocate memory for doc data.
00657             char * memBuffer = (char*)malloc(dataSize);
00658 
00659             // Write output to memory.
00660             cout << ".... acquiring the output document in memory." << endl ;
00661             vvERROR(xtrEngine->vvAcquireDocMemory((void*) memBuffer, dataSize));
00662 
00663             cout << ".... the contents of the memory buffer are:" << endl << endl ;
00664             for(int i=0; i<dataSize; i++)
00665                 cout.put(memBuffer[i]) ;
00666 
00667             cout << endl << endl ;
00668 
00669             // Free allocated memory.
00670             free(memBuffer);
00671         }
00672 
00673     } else
00674         vvERROR( VVXTR_ERR );
00675 
00676     return VV_OK;
00677 }
00678 
00679 
00685 void CloseDown()
00686 {
00687     vvLog(VVLOG_INFO,"[%d] CloseDown()\n",getpid());
00688     
00689     if (g_xtrEngine != NULL) g_xtrEngine->vvKill();
00690 }
00691 
00697 void SigTermHandler(int param)
00698 {
00699     vvLog(VVLOG_ERROR,"[%d] SigTermHandler::Received signal %d. Exiting.\n",getpid(),param);
00700     exit(0);
00701 }
00702 
00708 void SigErrorHandler(int param)
00709 {
00710     vvLog(VVLOG_ERROR,"[%d] Received ERROR signal %d. Exiting.\n",getpid(),param);
00711     exit(0);
00712 }
00713 

Generated on Thu Dec 11 09:32:13 2003 for OCR Shop XTR/API User Documentation by doxygen 1.3.2