| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654 | {* Copyright 1999-2005 The Apache Software Foundation or its licensors, as * applicable. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * *     http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. *}library mod_spelling;{$i define.inc}uses SysUtils, Classes, httpd, apr;var  speling_module: module; {$ifdef Unix} cvar; public; {$endif}  default_module_ptr: Pmodule;const  MODULE_NAME = 'mod_speling.so';{********************************************************************  Free Pascal only supports exporting variables on Windows*******************************************************************}{$ifdef WINDOWS}exports  speling_module name 'spelling_module';{$endif}{#include "apr.h"#include "apr_file_io.h"#include "apr_strings.h"#include "apr_lib.h"#define APR_WANT_STRFUNC#include "apr_want.h"#define WANT_BASENAME_MATCH#include "httpd.h"#include "http_core.h"#include "http_config.h"#include "http_request.h"#include "http_log.h" }{* mod_speling.c - by Alexei Kosut <[email protected]> June, 1996 * * Translated to pascal by Felipe Monteiro de Carvalho - July, 2006 * * This module is transparent, and simple. It attempts to correct * misspellings of URLs that users might have entered, namely by checking * capitalizations. If it finds a match, it sends a redirect. * * 08-Aug-1997 <[email protected]> * o Upgraded module interface to apache_1.3a2-dev API (more NULL's in *   speling_module). * o Integrated tcsh's "spelling correction" routine which allows one *   misspelling (character insertion/omission/typo/transposition). *   Rewrote it to ignore case as well. This ought to catch the majority *   of misspelled requests. * o Commented out the second pass where files' suffixes are stripped. *   Given the better hit rate of the first pass, this rather ugly *   (request index.html, receive index.db ?!?!) solution can be *   omitted. * o wrote a "kind of" html page for mod_speling * * Activate it with "CheckSpelling On" }type  spconfig = record    enabled: Integer;  end;    Pspconfig = ^spconfig;{ * Create a configuration specific to this module for a server or directory * location, and fill it with the default settings. * * The API says that in the absence of a merge function, the record for the * closest ancestor is used exclusively.  That's what we want, so we don't * bother to have such a function. }function mkconfig(p: Papr_pool_t): Pointer;var  cfg: Pspconfig;begin  cfg := apr_pcalloc(p, sizeof(spconfig));  cfg^.enabled := 0;  Result := cfg;end;{ * Respond to a callback to create configuration record for a server or * vhost environment. }function create_mconfig_for_server(p: Papr_pool_t; s: Pserver_rec): Pointer; cdecl;begin  Result := mkconfig(p);end;{ * Respond to a callback to create a config record for a specific directory. }function create_mconfig_for_directory(p: Papr_pool_t; dir: PAnsiChar): Pointer; cdecl;begin  Result := mkconfig(p);end;{ * Handler for the CheckSpelling directive, which is FLAG. }function set_speling(cmd: Pcmd_parms; mconfig: Pointer; arg: Integer): PAnsiChar; cdecl;var  cfg: Pspconfig;begin  cfg := Pspconfig(mconfig);  cfg^.enabled := arg;  Result := nil;end;  {* Define the directives specific to this module.  This structure is referenced   * later by the 'module' structure. }var  speling_cmds: command_rec;type  sp_reason = (    SP_IDENTICAL = 0,    SP_MISCAPITALIZED = 1,    SP_TRANSPOSITION = 2,    SP_MISSINGCHAR = 3,    SP_EXTRACHAR = 4,    SP_SIMPLETYPO = 5,    SP_VERYDIFFERENT = 6  );const  sp_reason_str: array [0..7] of PAnsiChar =  (    'identical',    'miscapitalized',    'transposed characters',    'character missing',    'extra character',    'mistyped character',    'common basename',    nil  );type  misspelled_file = record    name: PAnsiChar;    quality: sp_reason;  end;    Pmisspelled_file = ^misspelled_file;{ * spdist() is taken from Kernighan & Pike, *  _The_UNIX_Programming_Environment_ * and adapted somewhat to correspond better to psychological reality. * (Note the changes to the return values) * * According to Pollock and Zamora, CACM April 1984 (V. 27, No. 4), * page 363, the correct order for this is: * OMISSION = TRANSPOSITION > INSERTION > SUBSTITUTION * thus, it was exactly backwards in the old version. -- PWP * * This routine was taken out of tcsh's spelling correction code * (tcsh-6.07.04) and re-converted to apache data types ("AnsiChar" type * instead of tcsh's NLS'ed "AnsiChar"). Plus it now ignores the case * during comparisons, so is a "approximate strcasecmp()". * NOTE that is still allows only _one_ real "typo", * it does NOT try to correct multiple errors. }{  Extra notes about how this function works:    * s and t are supposed different    * s}function spdist(const cs, ct: PAnsiChar): sp_reason;var  s, t, i, j: PAnsiChar;begin  s := cs;  t := ct;  while apr_tolower(s^) = apr_tolower(t^) do  begin    if t^ = #0 then    begin      Result := SP_MISCAPITALIZED;   { exact match (sans case) }      Exit;    end;    Inc(s);    Inc(t);  end;  if s^ <> #0 then  begin    if t^ <> #0 then    begin      i := s;      Inc(i);      j := t;      Inc(j);          if Integer(i^) and Integer(j^) <> 0 then       if (apr_tolower(s^) = apr_tolower(j^)) and (apr_tolower(t^) = apr_tolower(i^)) then       begin         Inc(i);         Inc(j);                 if stricomp(i, j) = 0 then         begin           Result := SP_TRANSPOSITION;        { transposition }           Exit;         end;       end;            Dec(i);      Dec(j);            if (stricomp(i, j) = 0) then      begin        Result := SP_SIMPLETYPO;   { 1 AnsiChar mismatch }        Exit;      end;    end;            if (stricomp(i, t) = 0) then    begin      Result := SP_EXTRACHAR;        { extra character }      Exit;    end;  end;  if (t^ <> #0) and (stricomp(s, t + 1) = 0) then  begin    Result := SP_MISSINGCHAR;  { missing character }    Exit;  end;  Result := SP_VERYDIFFERENT;    { distance too large to fix. }end;function sort_by_quality(left, rite: Pointer): Integer;begin  Result := Integer(Pmisspelled_file(left)^.quality) - Integer(Pmisspelled_file(rite)^.quality);end;function check_speling(r: Prequest_rec): Integer; cdecl;var  cfg: Pspconfig;  good, bad, postgood, url: PAnsiChar;  dirent: apr_finfo_t;  filoc, dotloc, urlen, pglen: Integer;  candidates: Papr_array_header_t = nil;  dir: Papr_dir_t;  q: sp_reason;  sp_new, variant_, nvariant_: Pmisspelled_file;  nuri, ref, vuri, reason: PAnsiChar;  i, entloc: Integer;  p, sub_pool: Papr_pool_t;  notes: Papr_table_t;  t, v: Papr_array_header_t;  List: TList;  plist: Pointer;begin  cfg := Pspconfig(ap_get_module_config(r^.per_dir_config, @speling_module));  if (cfg^ .enabled = 0) then  begin    Result := DECLINED;    Exit;  end;  { We only want to worry about GETs }  if (r^.method_number <> M_GET) then  begin    Result := DECLINED;    Exit;  end;  { We've already got a file of some kind or another }  if (Integer(r^.finfo.filetype) <> 0) then  begin    Result := DECLINED;    Exit;  end;  { Not a file request }  if (r^.proxyreq>0) or not assigned(r^.filename) then  begin    Result := DECLINED;    Exit;  end;  { This is a sub request - don't mess with it }  if (r^.main <> nil) then  begin    Result := DECLINED;    Exit;  end;  {   * The request should end up looking like this:   * r->uri: /correct-url/mispelling/more   * r->filename: /correct-file/mispelling r->path_info: /more   *   * So we do this in steps. First break r->filename into two pieces   }  filoc := ap_rind(r^.filename, '/');  {   * Don't do anything if the request doesn't contain a slash, or   * requests "/"   }  if (filoc = -1) or (strcomp(r^.uri, '/') = 0) then  begin    Result := DECLINED;    Exit;  end;  { good = /correct-file }  good := apr_pstrndup(r^.pool, r^.filename, filoc);  { bad = mispelling }  bad := apr_pstrdup(r^.pool, r^.filename + filoc + 1);  { postgood = mispelling/more }  postgood := apr_pstrcat(r^.pool, [bad, r^.path_info, nil]);  urlen := strlen(r^.uri);  pglen := strlen(postgood);  { Check to see if the URL pieces add up }  if (strcomp(postgood, r^.uri + (urlen - pglen))) <> 0 then  begin    Result := DECLINED;    Exit;  end;  { url = /correct-url }  url := apr_pstrndup(r^.pool, r^.uri, (urlen - pglen));  { Now open the directory and do ourselves a check... }  if (apr_dir_open(@dir, good, r^.pool) <> APR_SUCCESS) then      { Oops, not a directory... }  begin    Result := DECLINED;    Exit;  end;  candidates := apr_array_make(r^.pool, 2, sizeof(misspelled_file));  dotloc := ap_ind(bad, '.');  if (dotloc = -1) then dotloc := strlen(bad);  while (apr_dir_read(@dirent, APR_FINFO_DIRENT, dir) = APR_SUCCESS) do  begin    {     * If we end up with a "fixed" URL which is identical to the     * requested one, we must have found a broken symlink or some such.     * Do _not_ try to redirect this, it causes a loop!     }    if (strcomp(bad, dirent.name) = 0) then    begin      apr_dir_close(dir);      Result := OK;    end    {     * miscapitalization errors are checked first (like, e.g., lower case     * file, upper case request)     }    else if (stricomp(bad, dirent.name) = 0) then    begin      sp_new := Pmisspelled_file(apr_array_push(candidates));      sp_new^.name := apr_pstrdup(r^.pool, dirent.name);      sp_new^.quality := SP_MISCAPITALIZED;    end    {     * simple typing errors are checked next (like, e.g.,     * missing/extra/transposed AnsiChar)     }    else if (spdist(bad, dirent.name) <> SP_VERYDIFFERENT) then    begin      q := spdist(bad, dirent.name);              sp_new := Pmisspelled_file(apr_array_push(candidates));      sp_new^.name := apr_pstrdup(r^.pool, dirent.name);      sp_new^.quality := q;    end    {     * The spdist() should have found the majority of the misspelled     * requests.  It is of questionable use to continue looking for     * files with the same base name, but potentially of totally wrong     * type (index.html <-> index.db).     * I would propose to not set the WANT_BASENAME_MATCH define.     *      08-Aug-1997 <[email protected]>     *     * However, Alexei replied giving some reasons to add it anyway:     * > Oh, by the way, I remembered why having the     * > extension-stripping-and-matching stuff is a good idea:     * >     * > If you're using MultiViews, and have a file named foobar.html,     * > which you refer to as "foobar", and someone tried to access     * > "Foobar", mod_speling won't find it, because it won't find     * > anything matching that spelling. With the extension-munging,     * > it would locate "foobar.html". Not perfect, but I ran into     * > that problem when I first wrote the module.     }    else    begin{$ifdef WANT_BASENAME_MATCH}      {       * Okay... we didn't find anything. Now we take out the hard-core       * power tools. There are several cases here. Someone might have       * entered a wrong extension (.htm instead of .html or vice       * versa) or the document could be negotiated. At any rate, now       * we just compare stuff before the first dot. If it matches, we       * figure we got us a match. This can result in wrong things if       * there are files of different content types but the same prefix       * (e.g. foo.gif and foo.html) This code will pick the first one       * it finds. Better than a Not Found, though.       }      entloc := ap_ind(dirent.name, '.');      if (entloc = -1) then entloc := strlen(dirent.name);      if ((dotloc = entloc) and not strncasecmp(bad, dirent.name, dotloc)) then      begin	sp_new := Pmisspelled_file(apr_array_push(candidates));        sp_new^.name := apr_pstrdup(r^.pool, dirent.name);        sp_new^.quality := SP_VERYDIFFERENT;      end;{$endif}    end;  end;      apr_dir_close(dir);  if (candidates^.nelts <> 0) then  begin    { Wow... we found us a mispelling. Construct a fixed url }    variant_ := Pmisspelled_file(candidates^.elts);    ref := apr_table_get(r^.headers_in, 'Referer');    List := TList.Create;        try      for i := 0 to candidates^.nelts - 1 do      begin        plist := Pointer(candidates^.elts);        Inc(plist, sizeof(misspelled_file));        List.Add(plist);      end;      List.Sort(@sort_by_quality);    finally      List.Free;    end;    {     * Conditions for immediate redirection:     *     a) the first candidate was not found by stripping the suffix     * AND b) there exists only one candidate OR the best match is not     *         ambiguous     * then return a redirection right away.     }    nvariant_ := variant_;    Inc(nvariant_, sizeof(misspelled_file));         if (variant_^.quality <> SP_VERYDIFFERENT) and ( (candidates^.nelts = 1)     or (Integer(variant_^.quality) <> Integer(nvariant_^.quality))) then    begin      nuri := ap_escape_uri(r^.pool, apr_pstrcat(r^.pool, [url,						     variant_^.name,						     r^.path_info, nil]));      if (r^.parsed_uri.query^ <> #0) then       nuri := apr_pstrcat(r^.pool, [nuri, PAnsiChar('?'), r^.parsed_uri.query, nil]);      apr_table_setn(r^.headers_out, 'Location',			  ap_construct_url(r^.pool, nuri, r));      if ref^ <> #0 then       ap_log_rerror(MODULE_NAME, 506, APLOG_INFO, APR_SUCCESS,        r, 'Fixed spelling: %s to %s from %s',  [r^.uri, nuri, ref])      else       ap_log_rerror(MODULE_NAME, 506, APLOG_INFO, APR_SUCCESS,        r, 'Fixed spelling: %s to %s',  [r^.uri, nuri, ref]);      Result := HTTP_MOVED_PERMANENTLY;      Exit;    end    {     * Otherwise, a "[300] Multiple Choices" list with the variants is     * returned.     }    else    begin      if (r^.main = nil) then      begin        p := r^.pool;        notes := r^.notes;      end      else      begin        p := r^.main^.pool;        notes := r^.main^.notes;      end;      if (apr_pool_create(@sub_pool, p) <> APR_SUCCESS) then      begin        Result := DECLINED;        Exit;      end;                t := apr_array_make(sub_pool, candidates^.nelts * 8 + 8, sizeof(PAnsiChar));      v := apr_array_make(sub_pool, candidates^.nelts * 5, sizeof(PAnsiChar));       { Generate the response text. }      PPAnsiChar(apr_array_push(t))^ := 'The document name you requested (<code>';      PPAnsiChar(apr_array_push(t))^ := ap_escape_html(sub_pool, r^.uri);      PPAnsiChar(apr_array_push(t))^ :=		   '</code>) could not be found on this server.' + LineEnding +		   'However, we found documents with names similar ' +		   'to the one you requested.<p>' +		   'Available documents:' + LineEnding + '<ul>' + LineEnding;            for i := 0 to candidates^.nelts -1 do            begin		reason := sp_reason_str[Integer(variant_[i].quality)];                { The format isn't very neat... }                if r^.parsed_uri.query <> nil then                 vuri := apr_pstrcat(sub_pool, [url, variant_[i].name, r^.path_info,                  '?', r^.parsed_uri.query, nil])                else vuri := apr_pstrcat(sub_pool, [url, variant_[i].name, r^.path_info,		 PAnsiChar(''), PAnsiChar(''), nil]);   		PPAnsiChar(apr_array_push(v))^ := '"';		PPAnsiChar(apr_array_push(v))^ := ap_escape_uri(sub_pool, vuri);		PPAnsiChar(apr_array_push(v))^ := '";"';		PPAnsiChar(apr_array_push(v))^ := reason;		PPAnsiChar(apr_array_push(v))^ := '"';		PPAnsiChar(apr_array_push(t))^ := '<li><a href="';		PPAnsiChar(apr_array_push(t))^ := ap_escape_uri(sub_pool, vuri);		PPAnsiChar(apr_array_push(t))^ := '">';		PPAnsiChar(apr_array_push(t))^ := ap_escape_html(sub_pool, vuri);		PPAnsiChar(apr_array_push(t))^ := '</a> (';		PPAnsiChar(apr_array_push(t))^ := reason;		PPAnsiChar(apr_array_push(t))^ := ')' + LineEnding;                {                 * when we have printed the "close matches" and there are                 * more "distant matches" (matched by stripping the suffix),                 * then we insert an additional separator text to suggest                 * that the user LOOK CLOSELY whether these are really the                 * files she wanted.                 }                if (i > 0) and (i < candidates^.nelts - 1)                    and (variant_[i].quality <> SP_VERYDIFFERENT)                    and (variant_[i + 1].quality = SP_VERYDIFFERENT) then                 PPAnsiChar(apr_array_push(t))^ :=		  '</ul>' + LineEnding + 'Furthermore, the following related ' +                  'documents were found:' + LineEnding + '<ul>' + LineEnding;            end;            	    PPAnsiChar(apr_array_push(t))^ := '</ul>' + LineEnding;            { If we know there was a referring page, add a note: }            if (ref <> nil) then            begin              PPAnsiChar(apr_array_push(t))^ :=	       'Please consider informing the owner of the <a href="';	      PPAnsiChar(apr_array_push(t))^ := ap_escape_uri(sub_pool, ref);              PPAnsiChar(apr_array_push(t))^ := '">referring page</a> about the broken link.' + LineEnding;            end;            { Pass our apr_table_t to http_protocol.c (see mod_negotiation): }            apr_table_setn(notes, 'variant-list', apr_array_pstrcat(p, t, #0));	    apr_table_mergen(r^.subprocess_env, 'VARIANTS', apr_array_pstrcat(p, v, ','));	    apr_pool_destroy(sub_pool);            if ref <> '' then             ap_log_rerror(MODULE_NAME, 609, APLOG_INFO, 0, r,	      'Spelling fix: %s: %d candidates from %s', [r^.uri, candidates^.nelts, ref])            else ap_log_rerror(MODULE_NAME, 609, APLOG_INFO, 0, r,	      'Spelling fix: %s: %d candidates', [r^.uri, candidates^.nelts, ref]);            Result := HTTP_MULTIPLE_CHOICES;        end;    end;    Result := OK;end;procedure register_hooks_(p: Papr_pool_t); cdecl;begin  ap_hook_fixups(@check_speling, nil, nil, APR_HOOK_LAST);end;begin  default_module_ptr := @speling_module;  FillChar(default_module_ptr^, SizeOf(default_module_ptr^), 0);  STANDARD20_MODULE_STUFF(default_module_ptr^);  {* Define the directives specific to this module.  This structure is referenced   * later by the 'module' structure. }     with speling_cmds do  begin    name := 'CheckSpelling';    func := cmd_func(@set_speling);    cmd_data := nil;    req_override := OR_OPTIONS;    args_how := FLAG;    errmsg := 'whether or not to fix miscapitalized/misspelled requests';  end;  with speling_module do  begin    name := MODULE_NAME;    magic := MODULE_MAGIC_COOKIE;    create_dir_config := @create_mconfig_for_directory;    { per-directory config creator }    merge_dir_config := nil;     { dir config merger }    create_server_config := @create_mconfig_for_server; { server config creator }    merge_server_config := nil;  { server config merger }    cmds := @speling_cmds;                 { command table }    register_hooks := @register_hooks_; { set up other request processing hooks }  end;end.
 |