Main Page | Modules | Alphabetical List | Compound List | File List | Compound Members | File Members | Related Pages | Search

vvxtrSample2.cc

Go to the documentation of this file.
00001 
00029 #include <vvocr/vvxtrFactory.h>     // All OCR Shop XTR/API header files are included through here
00030 
00031 #include <string>
00032 #include <ios>
00033 #include <iostream>
00034 #include <fstream>
00035 
00036 #include <signal.h>
00037 
00038 //#include <vvocr/vvxtrAPI.h>       // These headers are used, but included from vvxtrFactory.h.
00039 //#include <vvutil/vvLog.h>
00040 //#include "stdio.h"
00041 
00042 using std::ios;
00043 using std::string;
00044 using std::cout ;
00045 using std::cerr ;
00046 using std::endl ;
00047 
00048 
00050 // Global variables
00051 
00052 string g_remoteEngine ;
00053 vvEngAPI * g_xtrEngine = NULL ;
00054 
00055 
00057 // Prototypes
00058 
00059 // Create an engine
00060 vvEngAPI * CreateEngine();
00061 
00062 // Main example routine
00063 vvxtrStatus runOCR(vvEngAPI * xtrEngine);
00064 
00065 // Process one document, from input to output
00066 vvxtrStatus processOneDocument(vvEngAPI * xtrEngine);
00067 
00068 // Run preprocessing and recognition the currently loaded page of image data
00069 vvxtrStatus processOnePage(vvEngAPI * xtrEngine);
00070 
00071 // Output a document
00072 vvxtrStatus outputDoc(vvEngAPI * xtrEngine);
00073 
00074 // Output an subimage
00075 vvxtrStatus outputImg(vvEngAPI * xtrEngine);
00076 
00077 // Read in raw image data
00078 vvxtrStatus createImageStruct(unsigned char * buf, vvxtrImage** inputImage);
00079 vvxtrImage* readFromRawData();
00080 
00081 // Exit and error handling
00082 void CloseDown();
00083 void SigTermHandler(int param);
00084 void SigErrorHandler(int param);
00085 
00086 
00087 
00089 // Program entry point
00090 
00091 int main (int argc, char ** argv)
00092 {
00093     // Set up exit and error handling.
00094     atexit(CloseDown);
00095 
00096     signal(SIGTERM,SigTermHandler);
00097     signal(SIGINT,SigTermHandler);
00098     signal(SIGQUIT,SigTermHandler);
00099     signal(SIGSEGV,SigErrorHandler);
00100     signal(SIGBUS,SigErrorHandler);
00101 
00102     // Status returned from OCR engine calls
00103     int status;
00104 
00105     // Set the log level:
00106     //
00107     // If you are having serious trouble and don't mind being overrun with log messages,
00108     // you can set the log level to:
00109     //
00110     //  vvLogSetLevel(1000);
00111     //
00112     // Otherwise, it works well to set the log level to print error messages
00113     // only, unless the environment variable VV_DEBUG indicates otherwise:
00114     int level = VVLOG_ERROR ;
00115     char * debug = getenv("VV_DEBUG");
00116     if (debug)
00117     {
00118         level = atoi(debug);
00119     }
00120     vvLogSetLevel(level);
00121 
00122     if (argc==2)
00123     {
00124         // The first argument describes where to find the ocrxtrdaemon.
00125         g_remoteEngine = argv[1];
00126 
00127         // Instantiate an engine object.
00128         g_xtrEngine = CreateEngine();
00129         if (g_xtrEngine==NULL)
00130         {
00131             cerr << "Error creating engine" << endl ;
00132             status = -1;
00133         }
00134         else
00135         {
00136             const char * version ;
00137 
00138             // Print library and ocrxtrdaemon versions.
00139             
00140             g_xtrEngine->vvGetVersion(vvRemote, &version);
00141             cout << "Remote ocrxtrdaemon version number : " << version << endl ;
00142             
00143             g_xtrEngine->vvGetVersion(vvLocal, &version);
00144             cout << "Local client library version : " << version << endl ;
00145 
00146             // Run the sample program.
00147             
00148             status = runOCR( g_xtrEngine );
00149         }
00150     }
00151     else  // Handle an invalid number of parameters.
00152     {
00153         cout << "Usage: " << argv[0] << " host[:port]" << endl ;
00154         status = -1 ;
00155     }
00156 
00157     // Print returned error codes.
00158     if (status != VV_OK)
00159     {
00160         if (g_xtrEngine)
00161         {
00162             cerr << "Error: Status code " << status << ", which indicates " << g_xtrEngine->vvGetStatusString(status) << endl ;
00163             cerr << "Last server message was: '" << g_xtrEngine->vvGetStatusString() << "'" << endl ;
00164         }
00165         else
00166         {
00167             cerr << "Error: Status code: " << status << endl ;
00168             cerr << "Probably failed to connect with " << argv[1] << endl ;
00169         }
00170     }
00171 
00172     // Clean up and finish.
00173     if (g_xtrEngine != NULL)
00174     {
00175         delete g_xtrEngine;
00176         g_xtrEngine = NULL;
00177     }
00178     return status;
00179 }
00180 
00181 
00195 vvEngAPI * CreateEngine()
00196 {
00197     return vvxtrCreateRemoteEngine(g_remoteEngine.c_str());
00198 }
00199 
00215 vvxtrStatus runOCR(vvEngAPI * xtrEngine)
00216 {
00217     char response [40];
00218     char inputFilename [200];
00219     char inputSource [40];
00220 
00221     // If the daemon runs on the same filesystem as the client program,
00222     // communication will be much faster if we set this hint.
00223     //
00224     // Do not set this hint if you are running the ocrxtrdaemon on
00225     // a separate filesystem from the client program.
00226     vvERROR(xtrEngine->vvSetHint(vvHintLocalFilesystem));
00227     
00228 
00229     // Initialize the engine first.
00230     cout << ".... initializing the engine instance" << endl ;
00231     vvERROR(xtrEngine->vvInitInstance());
00232 
00233     // Start an OCR session to process a batch of input documents.
00234     // Some training takes place automatically within one OCR
00235     // session, so images processed in one session should come
00236     // from the same source.  See vvEngAPI::vvStartOCRSes.
00237     cout << ".... starting the OCR session" << endl ;
00238     vvERROR(xtrEngine->vvStartOCRSes());
00239 
00240     // Initialize the engine values.
00241     cout << ".... initializing the engine values" << endl ;
00242     vvERROR(xtrEngine->vvInitValues());
00243 
00244 
00245     // Set preprocessing and output options.
00246     cout << ".... setting preproessing and recognition options\n";
00247     vvERROR(xtrEngine->vvSetValue(dm_pp_auto_segment, vvYes));
00248     vvERROR(xtrEngine->vvSetValue(dm_pp_segment_lineart, vvYes));
00249     vvERROR(xtrEngine->vvSetValue(dm_format_analysis, vvYes));
00250     vvERROR(xtrEngine->vvSetValue(dm_pp_reverse_video, vvYes));
00251     vvERROR(xtrEngine->vvSetValue(dm_pp_photometric_interp , vvCorrect));
00252 
00253 
00254     // Process an arbitrary number of input document images.
00255     strcpy( response, "y" );
00256     while ( strcmp(response, "y") == 0 )
00257     {
00258         vvERROR( processOneDocument(xtrEngine) );
00259 
00260         cout << "Process another input image? [y|n]" << endl << "> " ;
00261         std::cin >> response;
00262     }
00263 
00264 
00265     // End the OCR session.
00266     cout << ".... ending the OCR session" << endl ;
00267     vvERROR( xtrEngine->vvEndOCRSes() );
00268 
00269     // End the OCR engine instance.  At this point, nothing else can
00270     // be done unless the engine is initialized again.
00271     cout << ".... ending the engine instance" << endl ;
00272     vvERROR( xtrEngine->vvEndInstance() );
00273 
00274     return VV_OK;
00275 }
00276 
00289 vvxtrStatus processOneDocument(vvEngAPI * xtrEngine)
00290 {
00291     vvxtrImage* inputImage = NULL;      // One page of image data
00292 
00293     // Read raw input data from a file and load it into the engine through memory:
00294     inputImage = readFromRawData();
00295 
00296     // Process pages from the input image.
00297     if (inputImage != NULL)
00298     {
00299         // Load a page of image data into the OCR engine.
00300         cout << ".... loading image data into the engine" << endl ;
00301         vvxtrStatus status = xtrEngine->vvReadImageData(inputImage);
00302 
00303         // Handle errors.
00304         if (status == VV_OK)
00305         {
00306             // Process a valid page.
00307             vvERROR( processOnePage(xtrEngine) );
00308 
00309             // Close the loaded image page.
00310             cout << ".... unloading the image data" << endl ;
00311             vvERROR(xtrEngine->vvUnloadImage());
00312         }
00313         else
00314         {
00315             vvERROR( status );
00316         }
00317 
00318         // Free allocated memory.
00319         if (inputImage->GetData() != NULL)
00320             free (inputImage->GetData());
00321         if (inputImage != NULL)
00322             delete inputImage;
00323     }
00324     else    // inputImage was not loaded successfully
00325     {
00326         vvERROR( VV_ERR );
00327     }
00328 
00329     return VV_OK;
00330 }
00331 
00332 
00350 vvxtrStatus processOnePage(vvEngAPI * xtrEngine)
00351 {
00352     char response[40];
00353 
00354     if (xtrEngine == NULL)
00355         vvERROR( VVXTR_ERR );
00356 
00357     cout << ".... preprocessing and recognizing the image data" << endl ;
00358         
00359     vvERROR( xtrEngine->vvPreprocess() );
00360     vvERROR( xtrEngine->vvRecognize() );
00361 
00362 
00363     // Output recognized text.
00364 
00365     cout << "Output a document? [y/n]" << endl << "> ";
00366     std::cin >> response;
00367 
00368     if ((strcmp(response,"y") == 0) || (strcmp(response, "Y") == 0))
00369         vvERROR( outputDoc(xtrEngine) );
00370 
00371     
00372     // Output a subimage.
00373     
00374     cout << "Output an image? [y/n]" << endl << "> ";
00375     std::cin >> response;
00376 
00377     if ((strcmp(response,"y") == 0) || (strcmp(response, "Y") == 0))
00378         vvERROR( outputImg(xtrEngine) );
00379 
00380     return VV_OK;
00381 }
00382 
00383 
00393 vvxtrStatus outputImg(vvEngAPI * xtrEngine)
00394 {
00395     vvxtrStatus status = VV_OK;
00396     char response [20];
00397 
00398     if (xtrEngine != NULL)
00399     {
00400 
00401         // Set up parameters for image output. A dm_focus_area of
00402         // vvFocusAreaRegion specifies that we want to get only one region from the
00403         // page. The alternative is to get the whole page.
00404         vvERROR( xtrEngine->vvSetValue(dm_focus_area, vvFocusAreaRegion));
00405 
00406         // User selects subimage output file format.
00407         {
00408             cout <<"Choose the subimage output format:" << endl <<
00409                    "    [t]iff, [g]if, [j]peg, [p]ng, p[d]f, [r]gb, [v]vxtrImage" << endl << "> ";
00410             std::cin >> response;
00411 
00412             int imgType = atol(response);
00413     
00414             if (strcmp(response,"t") == 0)
00415                 imgType = vvSubimageFormatTiffpack;
00416             else if (strcmp(response,"g") == 0)
00417                 imgType = vvSubimageFormatGif;
00418             else if (strcmp(response,"j") == 0)
00419                 imgType = vvSubimageFormatJpeg;
00420             else if (strcmp(response,"p") == 0)
00421                 imgType = vvSubimageFormatPng;
00422             else if (strcmp(response,"d") == 0)
00423                 imgType = vvSubimageFormatEPdf;
00424             else if (strcmp(response,"r") == 0)
00425                 imgType = vvSubimageFormatRgb;
00426             else
00427                 imgType = vvSubimageFormatVvxtrImage;
00428 
00429             vvERROR( xtrEngine->vvSetValue( dm_out_graphics_format, imgType ));
00430         }
00431 
00432         // List all available region ids, grouped by "image", "text" or "all".
00433         {
00434             char* regIds;
00435 
00436             // Get all image region ids
00437             vvERROR( xtrEngine->vvGetValue(dm_region_ids_image, &regIds));
00438             char * imageRegions = strdup(regIds);
00439 
00440             // Get all text region ids
00441             vvERROR( xtrEngine->vvGetValue(dm_region_ids_text, &regIds));
00442             char * textRegions = strdup(regIds);
00443 
00444             // Get all region ids
00445             vvERROR( xtrEngine->vvGetValue(dm_region_ids, &regIds));
00446             char * allRegions = strdup(regIds);
00447 
00448             cout << ".... image region ids: " << (char*)imageRegions << endl ;
00449             cout << ".... text region ids:  " << (char*)textRegions << endl ;
00450             cout << ".... all region ids:   " << (char*)allRegions << endl ;
00451 
00452             free(textRegions);
00453             free(allRegions);
00454             free(imageRegions);
00455         }
00456 
00457         // User selects region number.
00458         cout << "Specify a region to output as an image or [n]one:" << endl << "> ";
00459         std::cin >> response;
00460         while ((strcmp(response, "n") != 0)&&(strcmp(response,"none") != 0))
00461         {
00462             int regionID = atol(response);
00463 
00464             // Capture the subimage of region regionID.
00465             // Subimage data is extracted from the original input image.
00466             cout << ".... capturing subimage of region " << regionID << endl ;
00467             vvERROR(xtrEngine->vvCaptureSubimage(regionID));
00468 
00469             cout << "Output image to a [f]ile or [m]emory?" << endl << "> ";
00470             std::cin >> response;
00471 
00472             if(strcmp(response,"m") == 0)
00473             {
00474                 // Memory size describes size of the data only.
00475                 int memSize = 0;
00476                 vvERROR( xtrEngine->vvGetValue(dm_subimage_memory_size, &memSize));
00477 
00478                 char * data = new char[memSize];
00479                 
00480                 // Handle a vvxtrImage differently than file formats.
00481                 int imgFormat = 0;
00482                 vvERROR( xtrEngine->vvGetValue( dm_out_graphics_format, &imgFormat));
00483 
00484                 if (imgFormat == (int)vvSubimageFormatVvxtrImage)
00485                 {
00486                     vvxtrImage * img = vvxtrImage::FromRaw(data) ;
00487 
00488                     cout << ".... Write output image memory [" << memSize << "]\n";
00489                     vvERROR( xtrEngine->vvAcquireSubimageMemory(img,memSize));
00490 
00491 // TODO: Do something with the image we've just acquired!
00492 // This would be where in the code you would manipulate the image directly, if you
00493 // so desire.
00494 
00495                 }
00496                 else
00497                 {
00498                     cout << ".... Write output image memory [" << memSize << "]\n" ;
00499                     vvERROR( xtrEngine->vvAcquireSubimageMemory(data,memSize));
00500                     
00501                     string filename = "outImgMemory";
00502                     std::ofstream os (filename.c_str(), ios::binary);
00503 
00504 // TODO: Do something with the image we've just acquired!
00505 // This would be where in the code you would manipulate the image directly, if you
00506 // so desire. We just write it out to disk, which is silly, because why not just
00507 // use vvAcquireSubimageFile ?
00508 
00509                     if (os)
00510                     {
00511                         os.write(data,memSize);
00512                     }
00513                     else
00514                     {
00515                         cout << "Can't open output filename " << filename ;
00516                         status == VV_ERR ;
00517                     }
00518                 }
00519                 delete[] data;
00520                 if (status != VV_OK) return status;
00521             }   // end memory output
00522             else
00523             {
00524                 // Write image output to a file.
00525                 char outputFilename [30];
00526                 sprintf(outputFilename, "outImg_%d", regionID);
00527                 cout << ".... writing output subimage file " << outputFilename << endl ;
00528                 vvERROR( xtrEngine->vvAcquireSubimageFile(outputFilename));
00529             }
00530             vvERROR(status);
00531 
00532             cout << "Specify a region to output as an image or [n]one:" << endl << "> ";
00533             std::cin >> response;
00534         }
00535 
00536     } else
00537         status = VVXTR_ERR;
00538 
00539     // Reset focus area
00540     vvERROR( xtrEngine->vvSetValue(dm_focus_area, vvFocusAreaPage));
00541 
00542     return status;
00543 }
00544 
00545 
00554 vvxtrStatus outputDoc(vvEngAPI * xtrEngine)
00555 {
00556     if (xtrEngine != NULL)
00557     {
00558         char response [40];
00559 
00560         // Set up output values.
00561         cout << "Choose the document output format:" <<
00562                 "  [p]df, [a]scii, [u]nicode, [x]doc" << endl << "> ";
00563         std::cin >> response;
00564 
00565         int fileFormat ;
00566 
00567         // First we select the output text format
00568         if (strcmp(response,"p") == 0)
00569             fileFormat = vvTextFormatPdf ;
00570         else if (strcmp(response,"u") == 0)
00571             fileFormat = vvTextFormatUnicode ;
00572         else if (strcmp(response,"x") == 0)
00573             fileFormat = vvTextFormatXdoc;
00574         else    // default ascii
00575             fileFormat = vvTextFormatIso ;
00576     
00577         // Start an output document with the correct file format.
00578         cout << ".... starting the output document" << endl ;
00579         vvERROR(xtrEngine->vvStartDoc(fileFormat));
00580 
00581         // Spool output from the engine based on the options already set.
00582         // Note that you can select different pages and/or regions and
00583         // call vvSpoolDoc multiple times before you call vvEndDoc. 
00584         cout << ".... writing to the output document" << endl ;
00585         vvERROR(xtrEngine->vvSpoolDoc());
00586 
00587         // End the output document.  At this point, we are done writing
00588         // output to this output doc. We can write more output, but would
00589         // need to start a new output document.
00590         cout << ".... ending the output document" << endl ;
00591         vvERROR(xtrEngine->vvEndDoc());
00592 
00593         cout << "Send output doc to a [f]ile or [m]emory?" << endl << "> ";
00594         std::cin >> response;
00595 
00596         if ((strcmp(response,"f") == 0) || (strcmp(response,"file") == 0))
00597         {
00598             // Write output to a file.
00599             
00600             cout << "Enter the output filename:" << endl << "> ";
00601             std::cin >> response;
00602             cout << ".... acquiring the document output as a file called " << response << endl ;
00603             vvERROR(xtrEngine->vvAcquireDocFile(response));
00604         }
00605         else
00606         {
00607             // Obtain output in memory.
00608             
00609             int dataSize;
00610 
00611             // Get file size in memory.
00612             vvERROR( xtrEngine->vvGetValue(dm_doc_memory_size, &dataSize));
00613             cout << ".... need to allocate " << dataSize <<
00614                     " bytes to store the output document." << endl ;
00615 
00616             // Allocate memory for doc data.
00617             char * memBuffer = (char*)malloc(dataSize);
00618 
00619             // Write output to memory.
00620             cout << ".... acquiring the output document in memory." << endl ;
00621             vvERROR(xtrEngine->vvAcquireDocMemory((void*) memBuffer, dataSize));
00622 
00623             cout << ".... the contents of the memory buffer are:" << endl << endl ;
00624             for(int i=0; i<dataSize; i++)
00625                 cout.put(memBuffer[i]) ;
00626 
00627             cout << endl << endl ;
00628 
00629             // Free allocated memory.
00630             free(memBuffer);
00631         }
00632 
00633     } else
00634         vvERROR( VVXTR_ERR );
00635 
00636     return VV_OK;
00637 }
00638 
00648 vvxtrImage* readFromRawData()
00649 {
00650     char inputFilename [200];           // File containing raw image data
00651     FILE *fp = NULL;
00652 
00653     unsigned char * buf = NULL;         // Buffer of raw image data
00654 
00655     vvxtrImage* inputImage = NULL;      // Image data to return
00656 
00657     cout << "Enter the filename for the raw input image data:" << endl << "> ";
00658     std::cin >> inputFilename;
00659 
00660     // Open file containing raw image data.
00661     if (!(fp = fopen(inputFilename, "rb")))
00662     {
00663         cout << "readRawData:  Error opening file " << inputFilename << " for reading." << endl ;
00664         return NULL;
00665     }
00666 
00667     // Find number of bytes in file.
00668     if ((fseek(fp, 0, SEEK_END)) != 0)
00669     {
00670         printf("fseek problem!\n");
00671         return NULL;
00672     }
00673     int numBytes = ftell(fp);
00674     rewind(fp);
00675 
00676     // Read image data from the file into buf.
00677     if ((buf = (unsigned char *) (malloc)(numBytes)) == NULL )
00678     {
00679         cout << "readRawData:  Error in memory allocation (" << numBytes << " bytes)" << endl ;
00680         return NULL;
00681     }
00682 
00683     // Verify byte count.
00684     if ( (fread(buf, 1, numBytes, fp)) != numBytes)
00685     {
00686         cout << "readRawData:  Unable to read " << numBytes <<" bytes" << endl;
00687         free (buf);
00688         return NULL;
00689     }
00690 
00691     // Close input file.
00692     fclose(fp);
00693 
00694     cout << ".... read " << numBytes << " bytes of input data from " << inputFilename << "." << endl;
00695 
00696     // Convert the raw data in the image buffer into a vvxtrImage structure.
00697     if (createImageStruct(buf, &inputImage) != VV_OK)
00698     {
00699         free (buf);
00700         delete inputImage;
00701         return NULL;
00702     }
00703 
00704     return inputImage;
00705 }
00706 
00718 vvxtrStatus createImageStruct(unsigned char * buf, vvxtrImage** inputImage)
00719 {
00720     char response [40];
00721 
00722     (*inputImage) = new vvxtrImage();
00723 
00724     cout << "Enter the image data: " << endl ;
00725 
00726     cout << "width?  " ;
00727     std::cin >> response;
00728     (*inputImage)->SetWidth(atol(response));
00729 
00730     cout << "height?  " ;
00731     std::cin >> response;
00732     (*inputImage)->SetHeight(atol(response));
00733 
00734     cout << "bytes per line?  " ;
00735     std::cin >> response;
00736     (*inputImage)->SetBytesPerLine(atol(response));
00737 
00738     cout << "X dpi?  " ;
00739     std::cin >> response;
00740     (*inputImage)->SetXDPI(atol(response));
00741 
00742     cout << "Y dpi?  " ;
00743     std::cin >> response;
00744     (*inputImage)->SetYDPI(atol(response));
00745 
00746     cout << "bits per pixel (1, 8, or 24)?  " ;
00747     std::cin >> response;
00748     (*inputImage)->SetBitsPerPixel(atol(response));
00749 
00750     cout << "bits per sample (1 or 8)?  " ;
00751     std::cin >> response;
00752     (*inputImage)->SetBitsPerSample(atol(response));
00753 
00754     (*inputImage)->SetData(buf);
00755 
00756     cout << ".... Image data placed in a vvxtrImage structure:" << endl << endl;
00757     cout << "  width      " << (*inputImage)->GetWidth() << endl;
00758     cout << "  height     " << (*inputImage)->GetHeight() << endl;
00759     cout << "  bpl        " << (*inputImage)->GetBytesPerLine() << endl;
00760     cout << "  xdpi       " << (*inputImage)->GetXDPI() << endl;
00761     cout << "  ydpi       " << (*inputImage)->GetYDPI() << endl;
00762     cout << "  bpp        " << (*inputImage)->GetBitsPerPixel() << endl;
00763     cout << "  bps        " << (*inputImage)->GetBitsPerSample() << endl;
00764     cout << "  image size " << (*inputImage)->GetImageSize() << endl << endl;
00765 
00766     return VV_OK;
00767 }
00768 
00774 void CloseDown()
00775 {
00776     vvLog(VVLOG_INFO,"[%d] CloseDown()\n",getpid());
00777     
00778     if (g_xtrEngine != NULL) g_xtrEngine->vvKill();
00779 }
00780 
00786 void SigTermHandler(int param)
00787 {
00788     vvLog(VVLOG_ERROR,"[%d] SigTermHandler::Received signal %d. Exiting.\n",getpid(),param);
00789     exit(0);
00790 }
00791 
00797 void SigErrorHandler(int param)
00798 {
00799     vvLog(VVLOG_ERROR,"[%d] Received ERROR signal %d. Exiting.\n",getpid(),param);
00800     exit(0);
00801 }

Generated on Thu Dec 11 09:32:13 2003 for OCR Shop XTR/API User Documentation by doxygen 1.3.2