/************************************************************************/
/* */
/* ScanFile -- Manage RE search for a single file */
/* */
/* This module accepts the regular expression to be used */
/* plus various search and display options, and performs */
/* the requested search on each file given. It uses FastFile */
/* to bring the file into a memory buffer in a line-oriented */
/* fashion, and RETable and/or the self-tuned Boyer-Moore */
/* search algorithms to implement the search on each buffer. */
/* */
/* The search effort is partitioned into a fast file scan */
/* search and a slower match portion. This division is */
/* made so that the file may be searched with the least */
/* effort. However, the decision on what's appropriate */
/* as the scan RE is partially dependent on the nature of */
/* the file being searched: we may perform fairly poorly */
/* if our guesses are wrong. */
/* */
/* File offsets are handled by 32-bit integers, which is */
/* inadequate for really big files. */
/* */
/* The whole structure of this module, which in some */
/* distorted way extends to include matcheng.h, is a bit of */
/* a mess. It needs to be split into smaller and more */
/* coherent pieces, but exactly how isn't clear. */
/* */
/* Another problem is that while almost everything else in */
/* ggrep is reentrant, this module most certainly isn't. */
/* */
/* Copyright (C) Grouse Software 1995-2000. All rights reserved. */
/* Written for Grouse by behoffski (Brenton Hoff). */
/* */
/* Free software: no warranty; use anywhere is ok; spread the */
/* sources; note any mods; share variations and derivatives */
/* (including sending to behoffski@grouse.com.au). */
/* */
/************************************************************************/
#include "ascii.h"
#include <compdef.h>
#include <dirent.h>
#include <errno.h>
#include "fastfile.h"
#include "main.h"
#include "matcheng.h"
#include "memrchr.h"
#include <memory.h>
#include "platform.h"
#include "retable.h"
#include "scanfile.h"
#include "stbm.h"
#include "stbmshim.h"
#include <stdio.h>
#include <sys/types.h>
#include "tbldisp.h"
#include "tracery.h"
#include <stdarg.h>
/*Parameters for buffering file into lines*/
#define FILE_BUFFER_SIZE (4096uL * 14)
#define BYTES_BEFORE_BUFFER 8
#define BYTES_AFTER_BUFFER (64 + 4)
#define SCANFILE_DIR_NAME_SIZE_DEFAULT 16384
/*Note: BYTES_AFTER_BUFFER must be >= BOYER_MOORE_LOOKAHEAD_MAX*/
/*Use behoffski's favourite byte value as an endmarker*/
#define SCANFILE_ENDMARKER_DEFAULT 0xee
/*File stats plus parent pointer so we may search for recursion loops*/
typedef struct {
void *pParent;
struct stat stat;
} ScanFile_Stats;
typedef BOOL (FILE_SCANNER)(void);
typedef struct {
/*------------Variables controlling matching each buffer-----------*/
/*Tracery control block for this module*/
Tracery_ObjectInfo TraceInfo;
/*Function+context for fast buffer scanning*/
MatchEng_MatchFunction pScan;
MatchEng_Spec *pScanContext;
/*Search/match context shared between modules (used by fast scan)*/
MatchEng_Details Details;
/*Function+context for completing matching once scan text found*/
MatchEng_MatchFunction pMatch;
MatchEng_Spec *pMatchContext;
/*Duplicate context used by slower match attempts*/
MatchEng_Details Details2;
/*Function to handle lines selected by search*/
MatchEng_SelectFunction *pSelect;
/*Match sense -- line selection may be inverted by caller*/
BOOL SelectMatchingLines;
/*Flags recording if any lines matched overall and for current file*/
BOOL MatchedAny;
/*Flag indicating whether inverted blocks need to be unpacked*/
BOOL UnpackBlocks;
/*Flag indicating whether to recurse directories*/
BOOL RecurseDir;
/*Flag naming if we want to find the line start*/
BOOL FindLineStart;
/*------------------File buffer conditioning---------------*/
/*FastFile file handle*/
FastFile_Context *pHandle;
/*Variables for conditioning start of memory buffer*/
CHAR PrecedingLF;
/*Memory specifying bytes after buffer to optimise search*/
UINT EndLength;
CHAR EndBytes[BYTES_AFTER_BUFFER];
/*------------Treely-ruly-module-related variables--------------*/
/*Platform-specific functions to display matches*/
MatchEng_SelectFunction *pNormalOut;
MatchEng_SelectFunction *pHighlightOut;
MatchEng_SelectFunction *pFilenameOut;
/*RE match function provided by client*/
MatchEng_MatchFunction pExternMatchFunc;
/*Debugging options*/
LWORD Debug;
} SCANFILE_MODULE_CONTEXT;
module_scope SCANFILE_MODULE_CONTEXT gScanFile;
/*Extra information for Tracery operation*/
#ifdef TRACERY_ENABLED
#define TRACERY_MODULE_INFO (gScanFile.TraceInfo)
/*Debugging/tracing flags*/
#define SCANFILE_T_BUFFER BIT0
#define SCANFILE_T_SCAN BIT1
#define SCANFILE_T_MATCH BIT2
#define SCANFILE_T_DIR BIT3
module_scope Tracery_EditEntry gScanFile_TraceryEditDefs[] = {
{"B", SCANFILE_T_BUFFER, SCANFILE_T_BUFFER, "Trace buffer"},
{"b", SCANFILE_T_BUFFER, 0x00, "Ignore buffer"},
{"S", SCANFILE_T_SCAN, SCANFILE_T_SCAN, "Trace scanner"},
{"s", SCANFILE_T_SCAN, 0x00, "Ignore scanner"},
{"M", SCANFILE_T_MATCH, SCANFILE_T_MATCH, "Trace matcher"},
{"m", SCANFILE_T_MATCH, 0x00, "Ignore matcher"},
{"D", SCANFILE_T_DIR, SCANFILE_T_DIR, "Trace directory"},
{"d", SCANFILE_T_DIR, 0x00, "Ignore directory"},
TRACERY_EDIT_LIST_END
};
#endif /*TRACERY_ENABLED*/
/************************************************************************/
/* */
/* Start -- Begin managing what has to be managed */
/* */
/************************************************************************/
public_scope BOOL
ScanFile_Start(void)
{
/*Make sure FastFile starts first*/
if (! FastFile_Start(FILE_BUFFER_SIZE)) {
return FALSE;
}
return TRUE;
} /*Start*/
/************************************************************************/
/* */
/* NewScanContext -- Prepare blank scan context block */
/* */
/* Typically we get out scan context from RETable, as we set */
/* to use the table-driven architecture. However, in some */
/* cases we use an alterative scan engine (e.g. STBM). */
/* This function provides a basic scan context block for */
/* alternate searches to use. */
/* */
/* The whole implementation of scan context is rather klunky */
/* and would benefit from a careful restructuring. */
/* */
/************************************************************************/
module_scope BOOL
ScanFile_NewScanContext(MatchEng_Spec **ppScanContext)
{
MatchEng_Spec *pScanContext;
/*Destroy return arguments to reduce chance of being misunderstood*/
*ppScanContext = (MatchEng_Spec *) NULL;
/*Acquire memory to store context*/
pScanContext = (MatchEng_Spec *)
Platform_SmallMalloc(sizeof(*pScanContext));
if (pScanContext == NULL) {
/*Sorry, no memory available*/
return FALSE;
}
/*Okay, set up reasonable defaults for context block*/
/* ?? */
memset(pScanContext, 0, sizeof(*pScanContext));
/*Created context, write to caller and report success*/
*ppScanContext = pScanContext;
return TRUE;
} /*NewScanContext*/
/************************************************************************/
/* */
/* MatchedAbandon -- Halt search if matching line found */
/* */
/* This function is used for the -L search option. */
/* */
/************************************************************************/
module_scope BOOL
ScanFile_MatchedAbandon(MatchEng_Details *pDetails)
{
/*Search can abandon current file*/
return FALSE;
} /*MatchedAbandon*/
/************************************************************************/
/* */
/* Open -- Prepare file for scanning */
/* */
/************************************************************************/
module_scope BOOL
ScanFile_Open(CHAR *pFilename, struct stat *pStat)
{
BOOL FirstFile;
BOOL Opened;
/*Is this the second or later file to be searched?*/
FirstFile = TRUE;
if (gScanFile.pHandle != NULL) {
/*Yes, remember this for later*/
FirstFile = FALSE;
}
/*Is this the first file being searched?*/
if (FirstFile) {
/*Yes, open a new FastFile handle*/
Opened = FastFile_Open(pFilename,
FILE_BUFFER_SIZE,
FASTFILE_P_MODE_LINE,
pStat,
&gScanFile.pHandle);
} else {
/*No, reuse existing handle -- it's faster*/
Opened = FastFile_Reopen(gScanFile.pHandle, pFilename, pStat);
}
/*Did we succeed?*/
if (! Opened) {
/*No, unable to open file*/
fprintf(stderr, "%s: %s: %s\n",
Platform_ProgramName(),
pFilename,
strerror(errno));
/*Record problem for exit value reporting*/
Main_ReturnCode(MAIN_RETURN_FAULT);
/*Skip to next file to process*/
return FALSE;
}
/*Is the filename anything other than stdin?*/
if (pFilename != NULL) {
/*Yes, record filename for reporting*/
gScanFile.Details.pFilename = pFilename;
} else {
/*Standard input -- plug in name ourselves*/
gScanFile.Details.pFilename = "(standard input)";
}
/*Is this the second or later file?*/
if (! FirstFile) {
/*Yes, handle is already configured, so we're done*/
return TRUE;
}
/*Configure FastFile to reserve space for LF before buffer*/
if (! FastFile_StartCondition(gScanFile.pHandle,
BYTES_BEFORE_BUFFER,
1, &gScanFile.PrecedingLF)) {
/*Error configuring buffer: not enough memory, perhaps?*/
fprintf(stderr, "%s: Unable to condition start\n",
Platform_ProgramName());
Main_ReturnCode(MAIN_RETURN_FAULT);
return FALSE;
}
/*Configure FastFile to prepare end of buffer*/
switch (gScanFile.pScanContext->EndCondition) {
case MATCHENG_CONDITION_TRAILING_LITERAL:
/*Add literal to simplify memory search specification*/
gScanFile.EndBytes[0] = LF;
memset(&gScanFile.EndBytes[1],
gScanFile.pScanContext->TrailingLiteral,
gScanFile.pScanContext->PatternLength);
gScanFile.EndLength =
gScanFile.pScanContext->PatternLength + 1;
break;
default:
gScanFile.EndBytes[0] = LF;
gScanFile.EndLength = 1;
break;
}
if (! FastFile_EndCondition(gScanFile.pHandle,
BYTES_AFTER_BUFFER,
gScanFile.EndLength,
gScanFile.EndBytes)) {
/*Error configuring buffer: not enough memory, perhaps?*/
fprintf(stderr, "%s: Unable to condition end\n",
Platform_ProgramName());
Main_ReturnCode(MAIN_RETURN_FAULT);
return FALSE;
}
/*Opened successfully*/
return TRUE;
} /*Open*/
/************************************************************************/
/* */
/* ExpandNames -- Build a list of all files in a directory */
/* */
/* Prepares a list of all files in the specified directory, */
/* with a NUL terminating each name and consecutive NULs */
/* (a zero-length string) marking the end of the list. No */
/* attempt is made to sort names into alphabetical order. */
/* */
/* Returns the first name in the list, or NULL if the function */
/* was unable to build the list successfully. The list is */
/* allocated out of the heap, so if a pointer is returned, */
/* the caller must free the memory to avoid memory leaks. */
/* */
/* We expect that the caller will want to prepend the */
/* directory name, and possibly a trailing slash, to the */
/* file, so we add space at the start of the list to allow */
/* for this case, and report the start of the prepended area */
/* as our return value. The caller must add */
/* strlen(pDirname) + 1 bytes to the returned pointer to find */
/* the first name. We return this earlier pointer so that */
/* the caller can free the memory block correctly. */
/* */
/************************************************************************/
module_scope CHAR *
ScanFile_ExpandNames(CHAR *pDirname)
{
DIR *pDir;
struct dirent *pEntry;
UINT BlockSize;
CHAR *pMem;
CHAR *pBiggerMem;
CHAR *pFile;
UINT NameSize;
BlockSize = SCANFILE_DIR_NAME_SIZE_DEFAULT;
/*Open the directory for enumeration*/
pDir = opendir(pDirname);
if (pDir == NULL) {
/*Failed to access directory*/
return NULL;
}
/*Acquire initial space to store names*/
pMem = malloc(BlockSize);
if (pMem == NULL) {
/*Sorry, unable to acquire space to store names*/
closedir(pDir);
return FALSE;
}
/*Add an offset to allow directory name to be prepended*/
pFile = pMem + strlen(pDirname) + 1;
/*Work through each entry in the directory*/
for (;;) {
/*Read next entry of the file, if any*/
pEntry = readdir(pDir);
if (pEntry == NULL) {
/*Finished enumerating files*/
break;
}
/*Skip "." and ".." entries if found*/
if ((strcmp(pEntry->d_name, ".") == 0) ||
(strcmp(pEntry->d_name, "..") == 0)) {
continue;
}
/*While the name wouldn't fit into the memory block...*/
NameSize = strlen(pEntry->d_name);
while ((pFile + NameSize + 2) >= (pMem + BlockSize)) {
/*...Allocate a larger block*/
BlockSize *= 2;
pBiggerMem = realloc(pMem, BlockSize);
if (pBiggerMem == NULL) {
/*Sorry, ran out of memory to store names*/
free(pMem);
return FALSE;
}
/*Change pointers to use newly-acquired space*/
pFile = pBiggerMem + (pFile - pMem);
pMem = pBiggerMem;
}
/*Add the name to the block*/
memcpy(pFile, pEntry->d_name, NameSize + 1);
pFile += NameSize + 1;
}
/*Terminate the list with a 0-length entry*/
*pFile++ = NUL;
/*Okay, report results to caller*/
return pMem;
} /*ExpandNames*/
/************************************************************************/
/* */
/* RecurseDir -- Enumerate and search files in directory */
/* */
/* Finds the names of all the files in the specified directory, */
/* and executes the search on each file found. This routine */
/* is modelled closely on the directory recursion facility */
/* in GNU Grep, including checking for circular references */
/* in the directory heirarchy. */
/* */
/* Returns FALSE if multi-file searches are to be skipped. */
/* */
/************************************************************************/
module_scope BOOL
ScanFile_RecurseDir(CHAR *pDirname, ScanFile_Stats *pStats)
{
ScanFile_Stats *pSearch;
CHAR *pNames;
CHAR *pFile;
UINT SlashSpace;
UINT DirLen;
TRACERY(SCANFILE_T_DIR, {
printf("ScanFile_RecurseDir(%s, ...)\n", pDirname);
});
/*Loop through all parent directories of this one*/
for (pSearch = pStats->pParent;
pSearch != NULL;
pSearch = pSearch->pParent) {
/*Does this predecessor match this directory?*/
if ((pSearch->stat.st_ino == pStats->stat.st_ino) &&
(pSearch->stat.st_dev == pStats->stat.st_dev)) {
/*Yes, we've detected a loop: Abandon this directory*/
return TRUE;
}
}
/*Okay, we haven't encountered a loop*/
/*Expand the directory into a list of names*/
pNames = ScanFile_ExpandNames(pDirname);
if (pNames == NULL) {
/*Sorry, no memory to list files of this directory*/
return FALSE;
}
/*Prepare to prepend name (and optional slash) to each filename*/
SlashSpace = 0;
DirLen = strlen(pDirname);
if (pDirname[DirLen - 1] != '/') {
SlashSpace = 1;
}
/*Loop through each file in the expanded list*/
for (pFile = pNames + DirLen + 1;
*pFile != '\0';
pFile += strlen(pFile) + 1) {
/*Prepend the directory name to the filename*/
memcpy(pFile - DirLen - SlashSpace,
pDirname, DirLen);
if (SlashSpace == 1) {
pFile[-1] = '/';
}
/*Okay, search the complete path*/
if (! ScanFile_Search(pFile - DirLen - SlashSpace, pStats)) {
/*Search isn't interested in any more files*/
free(pNames);
return FALSE;
}
}
/*Free space acquired for directory names and report success*/
free(pNames);
return TRUE;
} /*RecurseDir*/
/************************************************************************/
/* */
/* DisplayBlock -- Display block of lines (for inverted match) */
/* */
/* Displays a block of lines up to but not including the */
/* matching line specified in pDetails. Also calculates */
/* match counts and buffer line counts if these details are */
/* to be reported. */
/* */
/* The function returns TRUE if the file scan may continue, */
/* and returns FALSE if we're no longer interested in the */
/* remainder of the file. */
/* */
/************************************************************************/
public_scope BOOL
ScanFile_DisplayBlock(MatchEng_Details *pDetails,
BYTE *pBlockStart,
BYTE *pBlockEnd)
{
BYTE *pNextLF;
TRACERY(SCANFILE_T_MATCH, {
printf("\nScanfile_DisplayBlock(%p, %p (%d))\n",
pBlockStart,
pBlockEnd,
pBlockEnd - pBlockStart);
});
/*Is there any data to report?*/
if (pBlockEnd != pBlockStart) {
/*Yes, note that lines were matched*/
gScanFile.MatchedAny = TRUE;
}
/*Do we need to do any line-by-line analysis or reporting?*/
if (! gScanFile.UnpackBlocks) {
/*No, merely dump entire block to the output*/
fwrite(pBlockStart,
1,
pBlockEnd - pBlockStart,
stdout);
/*Finished reporting*/
return TRUE;
}
/*Loop through each line of the block*/
while (pBlockStart < pBlockEnd) {
/*Split block into lines at each LF and report/count lines*/
TRACERY(SCANFILE_T_MATCH, {
printf("\nmemchr(%p, LF, %u)",
pBlockStart,
pBlockEnd - pBlockStart);
});
gScanFile.Details.LineMatchCount++;
/*Look for next line separator*/
pNextLF = memchr(pBlockStart, LF, pBlockEnd - pBlockStart);
/*Did we find a separator?*/
if (pNextLF == NULL) {
/*No, block ends without LF*/
pNextLF = pBlockEnd - 1;
}
/*Do we have a function to report the line?*/
if (gScanFile.pSelect != NULLFUNC) {
/*Yes, fill in details and report line*/
gScanFile.Details.pLineStart = pBlockStart;
gScanFile.Details.pMatchStart = pBlockStart;
gScanFile.Details.pLineEnd = pNextLF + 1;
gScanFile.Details.pMatchEnd = pNextLF;
if (! gScanFile.pSelect(&gScanFile.Details)) {
/*We may abandon this file*/
return FALSE;
}
/*Line number*/
gScanFile.Details.LineNr++;
}
pBlockStart = pNextLF + 1;
}
return TRUE;
} /*DisplayBlock*/
/************************************************************************/
/* */
/* SearchBuffer -- Search one buffer of file */
/* */
/************************************************************************/
module_scope BOOL
ScanFile_SearchBuffer(BYTE *pInBuf)
{
BYTE *pBufCurr;
BOOL Found;
UINT32 BufferLines;
/*Start scan at first byte of buffer*/
pBufCurr = pInBuf;
BufferLines = 0;
/*Loop through buffer, looking for RE matches*/
for (;;) {
/*Search buffer for matching text*/
Found = gScanFile.pScan(gScanFile.pScanContext,
pBufCurr,
&gScanFile.Details);
BufferLines += gScanFile.Details.BufLineNr;
if (! Found) {
/*Scan portion of RE not found within buffer*/
TRACERY(SCANFILE_T_SCAN, {
printf("Scan not found, buflines: %u\n",
gScanFile.Details.BufLineNr);
});
BufferLines++;
break;
}
/*Do we need the line start but haven't found it?*/
if ((gScanFile.Details.pLineStart == NULL) &&
gScanFile.FindLineStart) {
/*Yes, find start of matching line*/
gScanFile.Details.pLineStart =
((CHAR *) memrchr(gScanFile.Details.pMatchStart - 1,
LF, ~0)) + 1;
}
TRACERY(SCANFILE_T_SCAN, {
CHAR s[42];
Tracery_Decode(&gScanFile.TraceInfo,
TRACERY_FLAGS, s, sizeof(s));
printf("\n%s (%s): ",
Tracery_Name(&gScanFile.TraceInfo), s);
printf("Found: %p, %p(%02x)..%p(%02x) ",
gScanFile.Details.pLineStart,
gScanFile.Details.pMatchStart,
*gScanFile.Details.pMatchStart,
gScanFile.Details.pMatchEnd,
*gScanFile.Details.pMatchEnd);
});
/*Find the end of the line found by the scan*/
gScanFile.Details.pLineEnd = memchr(
gScanFile.Details.pMatchEnd, LF, ~0);
TRACERY(SCANFILE_T_SCAN, {
printf(" End: %p\n", gScanFile.Details.pLineEnd);
});
/*Fast scan succeeded: is there a slow match as well?*/
if (gScanFile.pMatch == NULLFUNC) {
/*No slow match: search is complete*/
goto Matched;
}
/*Find the end of the line found by the scan*/
gScanFile.pMatchContext->pAfterEndOfBuffer =
gScanFile.Details.pLineEnd;
Found = gScanFile.pMatch(gScanFile.pMatchContext,
gScanFile.Details.pLineStart,
&gScanFile.Details2);
/*Did we match the harder (starting) bit?*/
if (! Found) {
/*No, revert to scanning for easier bit*/
pBufCurr = gScanFile.Details.pLineEnd + 1;
BufferLines++;
/*Have we hit the end of the buffer?*/
if (pBufCurr >=
gScanFile.pScanContext->pAfterEndOfBuffer) {
/*Yes, finished this buffer*/
break;
}
continue;
}
/*Copy full details of match into main buffer*/
gScanFile.Details.pMatchStart =
gScanFile.Details2.pMatchStart;
gScanFile.Details.pMatchEnd =
gScanFile.Details2.pMatchEnd;
Matched:
/*Are we reporting a line with normal termination?*/
if (gScanFile.Details.pLineEnd !=
gScanFile.pScanContext->pAfterEndOfBuffer) {
/*Yes, include the terminator in the display*/
if (*gScanFile.Details.pLineEnd++ == CR) {
gScanFile.Details.pLineEnd++;
}
}
/*Are we selecting matching lines?*/
if (gScanFile.SelectMatchingLines) {
/*Yes, remember that we've found at least one match*/
gScanFile.Details.LineMatchCount++;
gScanFile.Details.LineNr += BufferLines;
BufferLines = 1;
/*Handle selected line*/
if ((gScanFile.pSelect) &&
(! gScanFile.pSelect(&gScanFile.Details))) {
/*Function advises we may skip to next file*/
return FALSE;
}
} else {
/*No, inverted match: are there preceding lines?*/
if (pInBuf != gScanFile.Details.pLineStart) {
/*Yes, display them*/
if (! ScanFile_DisplayBlock(
&gScanFile.Details,
pInBuf,
gScanFile.Details.pLineStart)) {
/*We may skip to next file*/
return FALSE;
}
} else {
/*No, still count this line*/
gScanFile.Details.LineNr++;
}
}
/*Update search to start of next line*/
pBufCurr = gScanFile.Details.pLineEnd;
pInBuf = pBufCurr;
/*Have we hit the end of the buffer?*/
if (gScanFile.Details.pLineEnd >=
gScanFile.pScanContext->pAfterEndOfBuffer) {
/*Yes, finished this buffer*/
break;
}
}
/*Is match sense inverted?*/
if (! gScanFile.SelectMatchingLines) {
/*Yes, is there any unmatched text at the end of the buffer?*/
TRACERY(SCANFILE_T_SCAN, {
printf("InvAtEnd: pInBuf, pAfterEnd: %p %p\n",
pInBuf,
gScanFile.pScanContext->pAfterEndOfBuffer);
});
if (pInBuf < gScanFile.pScanContext->pAfterEndOfBuffer) {
/*Yes, display it now*/
if (! ScanFile_DisplayBlock(&gScanFile.Details,
pInBuf,
gScanFile.pScanContext->pAfterEndOfBuffer)) {
/*Display advises we may skip to next file*/
return FALSE;
}
}
} else {
/*Add in any remaining lines we counted at the end*/
gScanFile.Details.LineNr += BufferLines;
}
return TRUE;
} /*SearchBuffer*/
/************************************************************************/
/* */
/* Search -- Perform specified search on a file */
/* */
/* This function searches the specified file (or stdin if */
/* pFilename is NULL) using the search options specified */
/* by Configure. The function returns FALSE if the search */
/* has determined that there's no benefit in examining any */
/* more files. */
/* */
/* Parameter pParent is used for recursive searches, so that */
/* circular loops in the directory heirarchy can be detected */
/* and avoided. External callers must specify NULL for this */
/* parameter. */
/* */
/************************************************************************/
public_scope BOOL
ScanFile_Search(CHAR *pFilename, void *pParent)
{
BOOL Matched;
UINT32 NrChars;
BYTE *pInBuf;
ScanFile_Stats Stats;
/*Open file for scanning*/
if (! ScanFile_Open(pFilename, &Stats.stat)) {
/*Unable to access file: skip to next file, if any*/
return TRUE;
}
/*Is this file a directory (and we are recursing directories)?*/
if (S_ISDIR(Stats.stat.st_mode)) {
/*Yes, have we been asked to recurse directories?*/
if (gScanFile.RecurseDir) {
/*Yes, enumerate files in this directory*/
Stats.pParent = pParent;
/*?? Should close the opened handle?*/
return ScanFile_RecurseDir(pFilename, &Stats);
}
/*Sorry, we don't grep directories as binary files (yet)*/
/*Sorry, don't report skipped directories, either*/
return TRUE;
}
/*Initialise line match counter*/
gScanFile.Details.LineMatchCount = 0;
/*Set up match details structure for reporting*/
gScanFile.Details.LineNr = 1;
ReadFile:
/*Get next buffer of file, if any*/
if (! FastFile_Read(gScanFile.pHandle, &pInBuf, &NrChars,
&gScanFile.Details.BufferOffset)) {
/*Error while reading buffer*/
printf("?? ScanFile_Search: FastFile read error\n");
return FALSE;
}
/*Did we read any characters?*/
if (NrChars != 0) {
/*Yes, search the buffer we've received*/
TRACERY(SCANFILE_T_BUFFER, {
UINT i;
printf("ScanFile: Buffer %p..%p, %lu chars:", pInBuf,
&pInBuf[NrChars], NrChars);
for (i = 0; i < 6; i++) {
printf(" %02x", pInBuf[i]);
}
printf("...\n");
});
gScanFile.Details.pBufferStart = pInBuf;
/*Set up buffer end ptr (byte search and/or inverted match)*/
gScanFile.pScanContext->pAfterEndOfBuffer = &pInBuf[NrChars];
/*Destroy line end pointer in case there's no match*/
gScanFile.Details.pLineEnd = NULL;
/*Is the buffer bigger than the backtracking size?*/
if (NrChars > gScanFile.pScanContext->BacktrackSize) {
/*Yes, get RETable to allocate a suitable space*/
if (! RETable_AllocBacktrack(gScanFile.pScanContext,
NrChars)) {
/*Sorry, ran out of resources*/
fprintf(stderr,
"%s: Not enough backtrack memory\n",
Platform_ProgramName());
return TRUE;
}
}
/*Search this buffer*/
if (ScanFile_SearchBuffer(pInBuf)) {
/*Handle next buffer of file (if any)*/
goto ReadFile;
}
/*If we reach here, search isn't interested in file any more*/
}
Matched = gScanFile.Details.LineMatchCount != 0;
/*Were we asked to report if no lines matched within file?*/
if ((gScanFile.Details.ReportingOptions &
MATCHENG_RPT_NONMATCH_FILES) && ! Matched) {
/*Yes, report filename now and proceed to next file*/
gScanFile.pFilenameOut(&gScanFile.Details);
gScanFile.MatchedAny = TRUE;
return TRUE;
}
/*Accumulate status of match across all files*/
if (Matched) {
gScanFile.MatchedAny = TRUE;
}
/*Were we asked to count lines?*/
if (gScanFile.Details.ReportingOptions &
MATCHENG_RPT_LINECOUNT) {
/*Yes, report count now (include filename if selected)*/
gScanFile.pFilenameOut(&gScanFile.Details);
}
/*Request that enumeration continue*/
return TRUE;
} /*Search*/
/************************************************************************/
/* */
/* Pattern -- Specify RE to be searched */
/* */
/* pPattern is the "compiled" version created by RegExp. */
/* ScanOptions allows modifications to the pattern such as */
/* case insensitivity, word match and inverted match sense */
/* to be specified. */
/* */
/* Pattern expands the RE into a version optimised for speed, */
/* and return FALSE if it is unable to handle the RE */
/* (for example, if it runs out of RAM). */
/* */
/************************************************************************/
public_scope BOOL
ScanFile_Pattern(RegExp_Specification *pPattern, LWORD ScanOptions)
{
RegExp_Specification *pEasyBit = NULL;
RegExp_Specification *pHarderBit = NULL;
BOOL IgnoreCase;
STBM_SearchSpec *pSTBM;
LWORD ScanFlags = MATCHENG_SPEC_SKIP_BYTES |
MATCHENG_SPEC_ENDMARKER(SCANFILE_ENDMARKER_DEFAULT);
LWORD MatchFlags = 0;
UINT PatternLength;
BYTE TrailingLiteral;
/*Does the main search reference a valid RE specification?*/
if (pPattern == NULL) {
/*No, fault in configuration specification*/
return FALSE;
}
/*Allowable optimisations given in ScanOptions*/
if (ScanOptions & SCANFILE_DEBUG_COMPILED) {
RegExp_ShowCodes("RE.Original: ", pPattern);
}
/*Does the client want CR/LF line termination as well as LF?*/
if (ScanOptions & SCANFILE_OPT_CR_IS_TERMINATOR) {
/*Yes, tell match engine to set up to support this*/
MatchFlags |= MATCHENG_SPEC_CR_IS_TERMINATOR;
ScanFlags |= MATCHENG_SPEC_CR_IS_TERMINATOR;
}
/*Does the client want to recurse directories?*/
gScanFile.RecurseDir = FALSE;
if (ScanOptions & SCANFILE_OPT_RECURSE_DIRECTORIES) {
/*Yes, remember this for when we're dealing with files*/
gScanFile.RecurseDir = TRUE;
}
/*Default to no "easy" search before match search*/
gScanFile.pMatchContext = NULL;
/*Does the client want us to count matching lines?*/
if (ScanOptions & SCANFILE_NUMBER_MATCHING_LINES) {
/*Yes, can't use search optimisations that skip bytes*/
ScanOptions &= ~SCANFILE_OPT_SKIP;
/*Tell RE engine to include line counting and disallow skip*/
ScanFlags &= ~MATCHENG_SPEC_SKIP_BYTES;
ScanFlags |= MATCHENG_SPEC_COUNT_LINES;
}
/*Does the client want us to count nonmatching lines?*/
if (ScanOptions & SCANFILE_NUMBER_NONMATCH_LINES) {
/*Need to break blocks of text into lines*/
gScanFile.UnpackBlocks = TRUE;
}
/*Is the client happy with only an approximate match?*/
if (ScanOptions & SCANFILE_OPT_APPROXIMATE) {
/*Yes, remove optional first/last elements that slow us down*/
if (RegExp_SlashEnds(pPattern)) {
/*RE has been modified to simplify things*/
if (ScanOptions & SCANFILE_DEBUG_COMPILED) {
RegExp_ShowCodes("RE.Approx: ", pPattern);
}
}
}
/*Default to no match function*/
gScanFile.pMatch = NULLFUNC;
/*Can the entire RE be searched using STBM?*/
pSTBM = NULL;
if ((ScanOptions & SCANFILE_OPT_SKIP) &&
(ScanOptions & SCANFILE_OPT_SELF_TUNED_BM)) {
pSTBM = STBMShim_Pattern(pPattern,
&PatternLength,
&IgnoreCase,
&TrailingLiteral);
}
if (pSTBM != NULL) {
/*Yes, does the caller want to display the tables?*/
if (! (ScanOptions & SCANFILE_DEBUG_DISPLAY)) {
/*No, can skip a lot of unnecessary setup code*/
goto AfterTableAnalysis;
}
}
/*Is there an easier bit to search in the middle of the RE?*/
gScanFile.pScan = gScanFile.pExternMatchFunc;
if ((ScanOptions & SCANFILE_OPT_EASIEST_FIRST) &&
RegExp_EasiestFirst(pPattern, &pEasyBit)) {
/*Yes, modify search to scan for that part first*/
pHarderBit = pPattern;
pPattern = pEasyBit;
gScanFile.FindLineStart = TRUE;
if (ScanOptions & SCANFILE_DEBUG_COMPILED) {
RegExp_ShowCodes("RE.Easy: ", pPattern);
}
/*Use function to check for full match after easy bit found*/
gScanFile.pMatch = gScanFile.pExternMatchFunc;
/*Expand hard bit to line-based search*/
MatchFlags |= MATCHENG_SPEC_ENDMARKER(LF);
if (! RETable_Expand(pHarderBit,
MatchFlags,
&gScanFile.pMatchContext)) {
/*Expansion failed for some reason*/
printf("RETable.Expand (match) failed\n");
return FALSE;
}
}
/*Are we allowed to attempt optimisations that skip bytes?*/
if (ScanOptions & SCANFILE_OPT_SKIP) {
/*Yes, may we try to use self-tuning Boyer-Moore algorithm?*/
if (ScanOptions & SCANFILE_OPT_SELF_TUNED_BM) {
/*Yes, see if the algorithm can handle the search*/
pSTBM = STBMShim_Pattern(pPattern,
&PatternLength,
&IgnoreCase,
&TrailingLiteral);
}
}
/*FALLTHROUGH*/
AfterTableAnalysis:
/*If no STBM or if table display, expand RE into table-driven format*/
if ((pSTBM == NULL) || (ScanOptions & SCANFILE_DEBUG_DISPLAY)) {
/*Expand compact RE spec into table-driven version*/
if (! RETable_Expand(pPattern, ScanFlags,
&gScanFile.pScanContext)) {
/*Expansion failed for some reason*/
fprintf(stderr, "RETable.Expand (scan) failed\n");
return FALSE;
}
} else {
/*Using STBM, allocate scan context*/
if (! ScanFile_NewScanContext(&gScanFile.pScanContext)) {
/*Sorry, unable to set up scan context*/
fprintf(stderr, "STBM scan context error\n");
return FALSE;
}
}
/*Are we using STBM?*/
if (pSTBM != NULL) {
/*Yes, set up scan context*/
gScanFile.pScanContext->PatternLength = PatternLength;
gScanFile.pScanContext->TrailingLiteral =
TrailingLiteral;
gScanFile.pScanContext->EndCondition =
MATCHENG_CONDITION_TRAILING_LITERAL;
/*Configure search to use STBM interface*/
if (IgnoreCase) {
gScanFile.pScan = STBMShim_SearchInCase;
} else {
/*Select STBM or TBM as appropriate*/
if (ScanOptions & SCANFILE_OPT_TUNED_BM) {
/*Caller wants Tuned BM for comparison*/
gScanFile.pScan = STBMShim_SearchTBM;
} else {
/*Use behoffski's self-tuned BM*/
gScanFile.pScan = STBMShim_Search;
}
}
gScanFile.pScanContext->pSpare1 = pSTBM;
TRACERY(SCANFILE_T_SCAN, {
printf("\nScanfile: Using STBM");
});
}
/*Display tables if requested*/
if (ScanOptions & SCANFILE_DEBUG_DISPLAY) {
TblDisp_Describe(gScanFile.pScanContext, "Scan");
/*Does the RE have a match component as well?*/
if (gScanFile.pMatchContext != NULL) {
/*Yes, display it*/
TblDisp_Describe(gScanFile.pMatchContext,
"Match");
}
}
/*Report success to caller*/
return TRUE;
} /*Pattern*/
/************************************************************************/
/* */
/* Configure -- Define how the module searches and reports matches */
/* */
/************************************************************************/
public_scope BOOL
ScanFile_Configure(LWORD ReportingOptions)
{
TRACERY(SCANFILE_T_MATCH, {
printf("Scanfile_Configure(%08lx)\n", ReportingOptions);
});
/*Record reporting options and select functions accordingly*/
gScanFile.Details.ReportingOptions = ReportingOptions;
/*Default to normal reporting of lines*/
gScanFile.pSelect = gScanFile.pNormalOut;
gScanFile.FindLineStart = TRUE;
/*Does the client want to see lines?*/
if (! (ReportingOptions & MATCHENG_RPT_LINE)) {
/*No, just show filename on match and finish file*/
gScanFile.pSelect = gScanFile.pFilenameOut;
gScanFile.FindLineStart = FALSE;
}
/*Does the client want line counting?*/
if (ReportingOptions & MATCHENG_RPT_LINECOUNT) {
/*Yes, use match count function*/
gScanFile.pSelect = NULLFUNC;
}
/*Does the client want highlighted matches?*/
if (ReportingOptions & MATCHENG_RPT_HIGHLIGHT) {
/*Yes, use platform-specific function*/
gScanFile.pSelect = gScanFile.pHighlightOut;
}
/*Has the client requested that matches be reported with delimiters?*/
if (ReportingOptions & MATCHENG_RPT_MARKER_FLAG) {
/*Yes, use platform-specific function and unpack char*/
gScanFile.pSelect = gScanFile.pHighlightOut;
gScanFile.Details.MarkerChar =
MATCHENG_RPT_MARKER_UNPACK(ReportingOptions);
}
/*Does the client want to see nonmatch files?*/
if (ReportingOptions & MATCHENG_RPT_NONMATCH_FILES) {
/*Yes, abandon file as soon as match found*/
gScanFile.pSelect = ScanFile_MatchedAbandon;
gScanFile.FindLineStart = FALSE;
}
/*Does the client want to report non-matching lines?*/
gScanFile.SelectMatchingLines = TRUE;
if (ReportingOptions & MATCHENG_RPT_INVERT_MATCH_SENSE) {
/*Yes, select inverted match sense*/
gScanFile.SelectMatchingLines = FALSE;
gScanFile.FindLineStart = TRUE;
}
/*Does the client want to add information to each line?*/
if (ReportingOptions & (MATCHENG_RPT_LINENUMBER |
MATCHENG_RPT_BYTEOFFSET |
MATCHENG_RPT_FILENAME |
MATCHENG_RPT_LINECOUNT |
MATCHENG_RPT_REMOVE_TRAILING_CR)) {
/*Yes, remember to break apart blocks if inverted sense*/
gScanFile.UnpackBlocks = TRUE;
gScanFile.FindLineStart = TRUE;
}
/*Configured module successfully*/
return TRUE;
} /*Configure*/
/************************************************************************/
/* */
/* MatchedAny -- Report if any files matched search criteria */
/* */
/************************************************************************/
public_scope BOOL
ScanFile_MatchedAny(void)
{
/*Was internal flag set?*/
if (gScanFile.MatchedAny) {
/*Yes, clear it and then report state to caller*/
gScanFile.MatchedAny = FALSE;
return TRUE;
}
/*Internal flag was not set*/
return FALSE;
} /*MatchedAny*/
/************************************************************************/
/* */
/* MatchFunction -- Define routine to perform match */
/* */
/* ScanFile wishes to provide extreme high-performance searches */
/* to do so in a very portable fashion. This function is the */
/* result: ScanFile receives the address of the function that */
/* implements the match from an outsider (usually Platform). */
/* This function must be called before Pattern. */
/* */
/************************************************************************/
public_scope void
ScanFile_MatchFunction(MatchEng_MatchFunction pMatchFunc)
{
/*Remember function for operation*/
gScanFile.pExternMatchFunc = pMatchFunc;
} /*MatchFunction*/
/************************************************************************/
/* */
/* NoMatchFunction -- Place-holder to warn of incorrect config */
/* */
/* This function is called if ScanFile does not receive a */
/* match function appropriate to the platform. */
/* */
/************************************************************************/
module_scope BOOL
ScanFile_NoMatchFunction(MatchEng_Spec *pTable,
BYTE *pText,
MatchEng_Details *pDetails)
{
fprintf(stderr, "Scanfile: No match function provided!");
return FALSE;
} /*NoMatchFunction*/
/************************************************************************/
/* */
/* OutputFunctions -- Specify functions to perform match output */
/* */
/* In order to keep ScanFile as portable as possible, the */
/* match and filename display functions are provided by */
/* an external party, since generation and display of */
/* output (and especially highlighting) is platform-specific. */
/* */
/* This function must be called after Init but before any */
/* RE specification or module configuration. */
/* */
/************************************************************************/
public_scope void
ScanFile_OutputFunctions(MatchEng_SelectFunction *pNormal,
MatchEng_SelectFunction *pHighlight,
MatchEng_SelectFunction *pFilenameOut)
{
/*Record the functions to use for later*/
gScanFile.pNormalOut = pNormal;
gScanFile.pHighlightOut = pHighlight;
gScanFile.pFilenameOut = pFilenameOut;
} /*OutputFunctions*/
#ifdef TRACERY_ENABLED
/************************************************************************/
/* */
/* TraceryLink -- Tell Tracery how to deal with us */
/* */
/* This procedure is used by Tracery to find out how to */
/* manipulate the trace flags for this module and/or object. */
/* The platform should be able to hand this routine to */
/* Tracery when setting up the system without needing to */
/* know too many details about how the traces are to be */
/* set up. */
/* */
/* This function may be used to get the flags for the */
/* module, or for any object created by the module. */
/* If the pObject parameter is NULL, the module information */
/* is returned; otherwise, the object's info is returned. */
/* Currently we report our flag register, our preferred */
/* set of default flags, and a list of edit specifiers and */
/* bits to edit in the flag register. In the future this */
/* may change: Tracery is still rather tentative. */
/* */
/************************************************************************/
public_scope BOOL
ScanFile_TraceryLink(void *pObject, UINT Opcode, ...)
{
Tracery_ObjectInfo **ppInfoBlock;
LWORD *pDefaultFlags;
Tracery_EditEntry **ppEditList;
va_list ap;
va_start(ap, Opcode);
switch (Opcode) {
case TRACERY_REGCMD_GET_INFO_BLOCK:
/*Report module's block (we don't support objects as yet)*/
ppInfoBlock = va_arg(ap, Tracery_ObjectInfo **);
*ppInfoBlock = &gScanFile.TraceInfo;
break;
case TRACERY_REGCMD_GET_DEFAULT_FLAGS:
pDefaultFlags = va_arg(ap, LWORD *);
*pDefaultFlags =
SCANFILE_T_BUFFER |
SCANFILE_T_SCAN |
SCANFILE_T_MATCH |
SCANFILE_T_DIR;
break;
case TRACERY_REGCMD_GET_EDIT_LIST:
ppEditList = va_arg(ap, Tracery_EditEntry **);
*ppEditList = gScanFile_TraceryEditDefs;
break;
default:
/*Unsupported opcode*/
va_end(ap);
return FALSE;
}
va_end(ap);
return TRUE;
} /*TraceryLink*/
#endif /*TRACERY_ENABLED*/
/************************************************************************/
/* */
/* Init -- Prepare module for operation */
/* */
/************************************************************************/
public_scope void
ScanFile_Init(void)
{
/*No traces enabled by default*/
TRACERY_CLEAR_ALL_FLAGS(&TRACERY_MODULE_INFO);
/*Initialise selection reporting details*/
gScanFile.Details.LineMatchCount = 0;
/*No match function provided initially*/
gScanFile.pMatch = ScanFile_NoMatchFunction;
gScanFile.pScanContext = NIL;
/*Configure default scanning options (display matching lines)*/
gScanFile.SelectMatchingLines = TRUE;
/*By default, we don't recurse directories*/
gScanFile.RecurseDir = FALSE;
/*Assume that we don't need to find the line start*/
gScanFile.FindLineStart = FALSE;
/*No platform-specific display functions provided yet*/
gScanFile.pNormalOut = NULLFUNC;
gScanFile.pHighlightOut = NULLFUNC;
gScanFile.pFilenameOut = NULLFUNC;
gScanFile.pSelect = NULLFUNC;
/*Initialise FastFile configuration memory constant*/
gScanFile.PrecedingLF = LF;
/*Don't display any debug information*/
gScanFile.Debug = 0;
/*Default to treating blocks of file with minimal overhead*/
gScanFile.UnpackBlocks = FALSE;
} /*Init*/