#
# You can use this CREATE statement for "url" table instead of 
# default one. This structure usefull for huge "cache mode" databases 
# with several millions URLs.
#
#   New features of this scheme:
#
# * Support for MySQL RAID to break 2/4G data file size limit.
# * Relatively small "url.MYI" index file size:
#     there is no unique index on "url" field.
# * Quick search for expired documents at indexing time using 
#     "key_next_index_time" index. It significantly improves 
#     indexing speed for big databases.
# * UNIQUE rec_id is generated in indexer using CRC32(url)
# * It turns on large file MySQL support for "url" table.
#
# Disadvantage:
#  * This scheme probably will loose some documents as far as CRC32
#  algorythm which is used for rec_id generation can give same values for
#  different URLs. According to our tests it gives approximately
#  100 URL pairs with the same CRC32 within 3.5 millions of unique URLs. 
#  It means that 0.0028% documents will be losten.
#
# Requires:
# * Specify "--with-raid" and omit "--disable-large-files" when
#      installing MySQL.
# * Use "UseCRC32UrlID yes" command in your indexer.conf
#


DROP TABLE url;

CREATE TABLE url (
  rec_id int(11) DEFAULT '0' NOT NULL,
  status int(11) DEFAULT '0' NOT NULL,
  url varchar(128) DEFAULT '' NOT NULL,
  content_type varchar(48) DEFAULT '' NOT NULL,
  title varchar(128) DEFAULT '' NOT NULL,
  txt varchar(255) DEFAULT '' NOT NULL,
  docsize int(11) DEFAULT '0' NOT NULL,
  last_index_time INT NOT NULL,
  next_index_time INT NOT NULL,
  last_mod_time INT NOT NULL,
  referrer int(11) DEFAULT '0' NOT NULL,
  tag varchar(11) DEFAULT '0' NOT NULL,
  hops int(11) DEFAULT '0' NOT NULL,
  category varchar(11) DEFAULT '' NOT NULL,
  keywords varchar(255) DEFAULT '' NOT NULL,
  description varchar(100) DEFAULT '' NOT NULL,
  crc32 int(11) DEFAULT '0' NOT NULL,
  lang varchar(2) DEFAULT '' NOT NULL,
  PRIMARY KEY (rec_id),
  KEY key_crc (crc32),
  KEY key_next_index_time (next_index_time)
)  
  RAID_TYPE=STRIPPED RAID_CHUNKS=16 RAID_CHUNKSIZE=256
  MAX_ROWS=100000000 
  AVG_ROW_LENGTH=512
;
