| 1 | --- a/Documentation/Configure.help |
| 2 | +++ b/Documentation/Configure.help |
| 3 | @@ -29252,6 +29252,18 @@ CONFIG_SOUND_WM97XX |
| 4 | |
| 5 | If unsure, say N. |
| 6 | |
| 7 | +CONFIG_IP_NF_MATCH_LAYER7 |
| 8 | + Say Y if you want to be able to classify connections (and their |
| 9 | + packets) based on regular expression matching of their application |
| 10 | + layer data. This is one way to classify applications such as |
| 11 | + peer-to-peer filesharing systems that do not always use the same |
| 12 | + port. |
| 13 | + |
| 14 | + To compile it as a module, choose M here. If unsure, say N. |
| 15 | + |
| 16 | +CONFIG_IP_NF_MATCH_LAYER7_DEBUG |
| 17 | + Say Y to get lots of debugging output. |
| 18 | + |
| 19 | # |
| 20 | # A couple of things I keep forgetting: |
| 21 | # capitalize: AppleTalk, Ethernet, DOS, DMA, FAT, FTP, Internet, |
| 22 | --- a/include/linux/netfilter_ipv4/ip_conntrack.h |
| 23 | +++ b/include/linux/netfilter_ipv4/ip_conntrack.h |
| 24 | @@ -207,6 +207,17 @@ struct ip_conntrack |
| 25 | } nat; |
| 26 | #endif /* CONFIG_IP_NF_NAT_NEEDED */ |
| 27 | |
| 28 | +#if defined(CONFIG_IP_NF_MATCH_LAYER7) || defined(CONFIG_IP_NF_MATCH_LAYER7_MODULE) |
| 29 | + struct { |
| 30 | + unsigned int numpackets; /* surely this is kept track of somewhere else, right? I can't find it... */ |
| 31 | + char * app_proto; /* "http", "ftp", etc. NULL if unclassifed */ |
| 32 | + |
| 33 | + /* the application layer data so far. NULL if ->numpackets > numpackets */ |
| 34 | + char * app_data; |
| 35 | + |
| 36 | + unsigned int app_data_len; |
| 37 | + } layer7; |
| 38 | +#endif |
| 39 | }; |
| 40 | |
| 41 | /* get master conntrack via master expectation */ |
| 42 | --- /dev/null |
| 43 | +++ b/include/linux/netfilter_ipv4/ipt_layer7.h |
| 44 | @@ -0,0 +1,26 @@ |
| 45 | +/* |
| 46 | + By Matthew Strait <quadong@users.sf.net>, Dec 2003. |
| 47 | + http://l7-filter.sf.net |
| 48 | + |
| 49 | + This program is free software; you can redistribute it and/or |
| 50 | + modify it under the terms of the GNU General Public License |
| 51 | + as published by the Free Software Foundation; either version |
| 52 | + 2 of the License, or (at your option) any later version. |
| 53 | + http://www.gnu.org/licenses/gpl.txt |
| 54 | +*/ |
| 55 | + |
| 56 | +#ifndef _IPT_LAYER7_H |
| 57 | +#define _IPT_LAYER7_H |
| 58 | + |
| 59 | +#define MAX_PATTERN_LEN 8192 |
| 60 | +#define MAX_PROTOCOL_LEN 256 |
| 61 | + |
| 62 | +typedef char *(*proc_ipt_search) (char *, char, char *); |
| 63 | + |
| 64 | +struct ipt_layer7_info { |
| 65 | + char protocol[MAX_PROTOCOL_LEN]; |
| 66 | + char invert:1; |
| 67 | + char pattern[MAX_PATTERN_LEN]; |
| 68 | +}; |
| 69 | + |
| 70 | +#endif /* _IPT_LAYER7_H */ |
| 71 | --- a/net/ipv4/netfilter/Config.in |
| 72 | +++ b/net/ipv4/netfilter/Config.in |
| 73 | @@ -44,6 +44,9 @@ if [ "$CONFIG_IP_NF_IPTABLES" != "n" ]; |
| 74 | if [ "$CONFIG_EXPERIMENTAL" = "y" ]; then |
| 75 | dep_tristate ' Unclean match support (EXPERIMENTAL)' CONFIG_IP_NF_MATCH_UNCLEAN $CONFIG_IP_NF_IPTABLES |
| 76 | dep_tristate ' Owner match support (EXPERIMENTAL)' CONFIG_IP_NF_MATCH_OWNER $CONFIG_IP_NF_IPTABLES |
| 77 | + dep_tristate ' Layer 7 match support (EXPERIMENTAL)' CONFIG_IP_NF_MATCH_LAYER7 $CONFIG_IP_NF_CONNTRACK |
| 78 | + dep_mbool ' Layer 7 debugging output (EXPERIMENTAL)' CONFIG_IP_NF_MATCH_LAYER7_DEBUG $CONFIG_IP_NF_MATCH_LAYER7 |
| 79 | + |
| 80 | fi |
| 81 | # The targets |
| 82 | dep_tristate ' Packet filtering' CONFIG_IP_NF_FILTER $CONFIG_IP_NF_IPTABLES |
| 83 | --- a/net/ipv4/netfilter/Makefile |
| 84 | +++ b/net/ipv4/netfilter/Makefile |
| 85 | @@ -87,6 +87,7 @@ obj-$(CONFIG_IP_NF_MATCH_STATE) += ipt_s |
| 86 | obj-$(CONFIG_IP_NF_MATCH_CONNTRACK) += ipt_conntrack.o |
| 87 | obj-$(CONFIG_IP_NF_MATCH_UNCLEAN) += ipt_unclean.o |
| 88 | obj-$(CONFIG_IP_NF_MATCH_TCPMSS) += ipt_tcpmss.o |
| 89 | +obj-$(CONFIG_IP_NF_MATCH_LAYER7) += ipt_layer7.o |
| 90 | |
| 91 | # targets |
| 92 | obj-$(CONFIG_IP_NF_TARGET_REJECT) += ipt_REJECT.o |
| 93 | --- a/net/ipv4/netfilter/ip_conntrack_core.c |
| 94 | +++ b/net/ipv4/netfilter/ip_conntrack_core.c |
| 95 | @@ -346,6 +346,14 @@ destroy_conntrack(struct nf_conntrack *n |
| 96 | } |
| 97 | kfree(ct->master); |
| 98 | } |
| 99 | + |
| 100 | + #if defined(CONFIG_IP_NF_MATCH_LAYER7) || defined(CONFIG_IP_NF_MATCH_LAYER7_MODULE) |
| 101 | + if(ct->layer7.app_proto) |
| 102 | + kfree(ct->layer7.app_proto); |
| 103 | + if(ct->layer7.app_data) |
| 104 | + kfree(ct->layer7.app_data); |
| 105 | + #endif |
| 106 | + |
| 107 | WRITE_UNLOCK(&ip_conntrack_lock); |
| 108 | |
| 109 | if (master) |
| 110 | --- a/net/ipv4/netfilter/ip_conntrack_standalone.c |
| 111 | +++ b/net/ipv4/netfilter/ip_conntrack_standalone.c |
| 112 | @@ -107,6 +107,13 @@ print_conntrack(char *buffer, struct ip_ |
| 113 | len += sprintf(buffer + len, "[ASSURED] "); |
| 114 | len += sprintf(buffer + len, "use=%u ", |
| 115 | atomic_read(&conntrack->ct_general.use)); |
| 116 | + |
| 117 | + #if defined(CONFIG_IP_NF_MATCH_LAYER7) || defined(CONFIG_IP_NF_MATCH_LAYER7_MODULE) |
| 118 | + if(conntrack->layer7.app_proto) |
| 119 | + len += sprintf(buffer + len, "l7proto=%s ", |
| 120 | + conntrack->layer7.app_proto); |
| 121 | + #endif |
| 122 | + |
| 123 | len += sprintf(buffer + len, "\n"); |
| 124 | |
| 125 | return len; |
| 126 | --- /dev/null |
| 127 | +++ b/net/ipv4/netfilter/ipt_layer7.c |
| 128 | @@ -0,0 +1,570 @@ |
| 129 | +/* |
| 130 | + Kernel module to match application layer (OSI layer 7) |
| 131 | + data in connections. |
| 132 | + |
| 133 | + http://l7-filter.sf.net |
| 134 | + |
| 135 | + By Matthew Strait and Ethan Sommer, 2003-2005. |
| 136 | + |
| 137 | + This program is free software; you can redistribute it and/or |
| 138 | + modify it under the terms of the GNU General Public License |
| 139 | + as published by the Free Software Foundation; either version |
| 140 | + 2 of the License, or (at your option) any later version. |
| 141 | + http://www.gnu.org/licenses/gpl.txt |
| 142 | + |
| 143 | + Based on ipt_string.c (C) 2000 Emmanuel Roger <winfield@freegates.be> |
| 144 | + and cls_layer7.c (C) 2003 Matthew Strait, Ethan Sommer, Justin Levandoski |
| 145 | +*/ |
| 146 | + |
| 147 | +#include <linux/module.h> |
| 148 | +#include <linux/skbuff.h> |
| 149 | +#include <linux/netfilter_ipv4/ip_conntrack.h> |
| 150 | +#include <linux/proc_fs.h> |
| 151 | +#include <linux/ctype.h> |
| 152 | +#include <net/ip.h> |
| 153 | +#include <net/tcp.h> |
| 154 | +#include <linux/netfilter_ipv4/lockhelp.h> |
| 155 | + |
| 156 | +#include "regexp/regexp.c" |
| 157 | + |
| 158 | +#include <linux/netfilter_ipv4/ipt_layer7.h> |
| 159 | +#include <linux/netfilter_ipv4/ip_tables.h> |
| 160 | + |
| 161 | +MODULE_AUTHOR("Matthew Strait <quadong@users.sf.net>, Ethan Sommer <sommere@users.sf.net>"); |
| 162 | +MODULE_LICENSE("GPL"); |
| 163 | +MODULE_DESCRIPTION("iptables application layer match module"); |
| 164 | + |
| 165 | +static int maxdatalen = 2048; // this is the default |
| 166 | +MODULE_PARM(maxdatalen,"i"); |
| 167 | +MODULE_PARM_DESC(maxdatalen,"maximum bytes of data looked at by l7-filter"); |
| 168 | + |
| 169 | +#if defined(CONFIG_IP_NF_MATCH_LAYER7_DEBUG) |
| 170 | + #define DPRINTK(format,args...) printk(format,##args) |
| 171 | +#else |
| 172 | + #define DPRINTK(format,args...) |
| 173 | +#endif |
| 174 | + |
| 175 | +#define TOTAL_PACKETS master_conntrack->layer7.numpackets |
| 176 | + |
| 177 | +/* Number of packets whose data we look at. |
| 178 | +This can be modified through /proc/net/layer7_numpackets */ |
| 179 | +static int num_packets = 10; |
| 180 | + |
| 181 | +static struct pattern_cache { |
| 182 | + char * regex_string; |
| 183 | + regexp * pattern; |
| 184 | + struct pattern_cache * next; |
| 185 | +} * first_pattern_cache = NULL; |
| 186 | + |
| 187 | +/* I'm new to locking. Here are my assumptions: |
| 188 | + |
| 189 | +- No one will write to /proc/net/layer7_numpackets over and over very fast; |
| 190 | + if they did, nothing awful would happen. |
| 191 | + |
| 192 | +- This code will never be processing the same packet twice at the same time, |
| 193 | + because iptables rules are traversed in order. |
| 194 | + |
| 195 | +- It doesn't matter if two packets from different connections are in here at |
| 196 | + the same time, because they don't share any data. |
| 197 | + |
| 198 | +- It _does_ matter if two packets from the same connection are here at the same |
| 199 | + time. In this case, we have to protect the conntracks and the list of |
| 200 | + compiled patterns. |
| 201 | +*/ |
| 202 | +DECLARE_RWLOCK(ct_lock); |
| 203 | +DECLARE_LOCK(list_lock); |
| 204 | + |
| 205 | +#if CONFIG_IP_NF_MATCH_LAYER7_DEBUG |
| 206 | +/* Converts an unfriendly string into a friendly one by |
| 207 | +replacing unprintables with periods and all whitespace with " ". */ |
| 208 | +static char * friendly_print(unsigned char * s) |
| 209 | +{ |
| 210 | + char * f = kmalloc(strlen(s) + 1, GFP_ATOMIC); |
| 211 | + int i; |
| 212 | + |
| 213 | + if(!f) { |
| 214 | + if (net_ratelimit()) |
| 215 | + printk(KERN_ERR "layer7: out of memory in friendly_print, bailing.\n"); |
| 216 | + return NULL; |
| 217 | + } |
| 218 | + |
| 219 | + for(i = 0; i < strlen(s); i++){ |
| 220 | + if(isprint(s[i]) && s[i] < 128) f[i] = s[i]; |
| 221 | + else if(isspace(s[i])) f[i] = ' '; |
| 222 | + else f[i] = '.'; |
| 223 | + } |
| 224 | + f[i] = '\0'; |
| 225 | + return f; |
| 226 | +} |
| 227 | + |
| 228 | +static char dec2hex(int i) |
| 229 | +{ |
| 230 | + switch (i) { |
| 231 | + case 0 ... 9: |
| 232 | + return (char)(i + '0'); |
| 233 | + break; |
| 234 | + case 10 ... 15: |
| 235 | + return (char)(i - 10 + 'a'); |
| 236 | + break; |
| 237 | + default: |
| 238 | + if (net_ratelimit()) |
| 239 | + printk("Problem in dec2hex\n"); |
| 240 | + return '\0'; |
| 241 | + } |
| 242 | +} |
| 243 | + |
| 244 | +static char * hex_print(unsigned char * s) |
| 245 | +{ |
| 246 | + char * g = kmalloc(strlen(s)*3 + 1, GFP_ATOMIC); |
| 247 | + int i; |
| 248 | + |
| 249 | + if(!g) { |
| 250 | + if (net_ratelimit()) |
| 251 | + printk(KERN_ERR "layer7: out of memory in hex_print, bailing.\n"); |
| 252 | + return NULL; |
| 253 | + } |
| 254 | + |
| 255 | + for(i = 0; i < strlen(s); i++) { |
| 256 | + g[i*3 ] = dec2hex(s[i]/16); |
| 257 | + g[i*3 + 1] = dec2hex(s[i]%16); |
| 258 | + g[i*3 + 2] = ' '; |
| 259 | + } |
| 260 | + g[i*3] = '\0'; |
| 261 | + |
| 262 | + return g; |
| 263 | +} |
| 264 | +#endif // DEBUG |
| 265 | + |
| 266 | +/* Use instead of regcomp. As we expect to be seeing the same regexps over and |
| 267 | +over again, it make sense to cache the results. */ |
| 268 | +static regexp * compile_and_cache(char * regex_string, char * protocol) |
| 269 | +{ |
| 270 | + struct pattern_cache * node = first_pattern_cache; |
| 271 | + struct pattern_cache * last_pattern_cache = first_pattern_cache; |
| 272 | + struct pattern_cache * tmp; |
| 273 | + unsigned int len; |
| 274 | + |
| 275 | + while (node != NULL) { |
| 276 | + if (!strcmp(node->regex_string, regex_string)) |
| 277 | + return node->pattern; |
| 278 | + |
| 279 | + last_pattern_cache = node;/* points at the last non-NULL node */ |
| 280 | + node = node->next; |
| 281 | + } |
| 282 | + |
| 283 | + /* If we reach the end of the list, then we have not yet cached |
| 284 | + the pattern for this regex. Let's do that now. |
| 285 | + Be paranoid about running out of memory to avoid list corruption. */ |
| 286 | + tmp = kmalloc(sizeof(struct pattern_cache), GFP_ATOMIC); |
| 287 | + |
| 288 | + if(!tmp) { |
| 289 | + if (net_ratelimit()) |
| 290 | + printk(KERN_ERR "layer7: out of memory in compile_and_cache, bailing.\n"); |
| 291 | + return NULL; |
| 292 | + } |
| 293 | + |
| 294 | + tmp->regex_string = kmalloc(strlen(regex_string) + 1, GFP_ATOMIC); |
| 295 | + tmp->pattern = kmalloc(sizeof(struct regexp), GFP_ATOMIC); |
| 296 | + tmp->next = NULL; |
| 297 | + |
| 298 | + if(!tmp->regex_string || !tmp->pattern) { |
| 299 | + if (net_ratelimit()) |
| 300 | + printk(KERN_ERR "layer7: out of memory in compile_and_cache, bailing.\n"); |
| 301 | + kfree(tmp->regex_string); |
| 302 | + kfree(tmp->pattern); |
| 303 | + kfree(tmp); |
| 304 | + return NULL; |
| 305 | + } |
| 306 | + |
| 307 | + /* Ok. The new node is all ready now. */ |
| 308 | + node = tmp; |
| 309 | + |
| 310 | + if(first_pattern_cache == NULL) /* list is empty */ |
| 311 | + first_pattern_cache = node; /* make node the beginning */ |
| 312 | + else |
| 313 | + last_pattern_cache->next = node; /* attach node to the end */ |
| 314 | + |
| 315 | + /* copy the string and compile the regex */ |
| 316 | + len = strlen(regex_string); |
| 317 | + DPRINTK("About to compile this: \"%s\"\n", regex_string); |
| 318 | + node->pattern = regcomp(regex_string, &len); |
| 319 | + if ( !node->pattern ) { |
| 320 | + if (net_ratelimit()) |
| 321 | + printk(KERN_ERR "layer7: Error compiling regexp \"%s\" (%s)\n", regex_string, protocol); |
| 322 | + /* pattern is now cached as NULL, so we won't try again. */ |
| 323 | + } |
| 324 | + |
| 325 | + strcpy(node->regex_string, regex_string); |
| 326 | + return node->pattern; |
| 327 | +} |
| 328 | + |
| 329 | +static int can_handle(const struct sk_buff *skb) |
| 330 | +{ |
| 331 | + if(!skb->nh.iph) /* not IP */ |
| 332 | + return 0; |
| 333 | + if(skb->nh.iph->protocol != IPPROTO_TCP && |
| 334 | + skb->nh.iph->protocol != IPPROTO_UDP && |
| 335 | + skb->nh.iph->protocol != IPPROTO_ICMP) |
| 336 | + return 0; |
| 337 | + return 1; |
| 338 | +} |
| 339 | + |
| 340 | +/* Returns offset the into the skb->data that the application data starts */ |
| 341 | +static int app_data_offset(const struct sk_buff *skb) |
| 342 | +{ |
| 343 | + /* In case we are ported somewhere (ebtables?) where skb->nh.iph |
| 344 | + isn't set, this can be gotten from 4*(skb->data[0] & 0x0f) as well. */ |
| 345 | + int ip_hl = 4*skb->nh.iph->ihl; |
| 346 | + |
| 347 | + if( skb->nh.iph->protocol == IPPROTO_TCP ) { |
| 348 | + /* 12 == offset into TCP header for the header length field. |
| 349 | + Can't get this with skb->h.th->doff because the tcphdr |
| 350 | + struct doesn't get set when routing (this is confirmed to be |
| 351 | + true in Netfilter as well as QoS.) */ |
| 352 | + int tcp_hl = 4*(skb->data[ip_hl + 12] >> 4); |
| 353 | + |
| 354 | + return ip_hl + tcp_hl; |
| 355 | + } else if( skb->nh.iph->protocol == IPPROTO_UDP ) { |
| 356 | + return ip_hl + 8; /* UDP header is always 8 bytes */ |
| 357 | + } else if( skb->nh.iph->protocol == IPPROTO_ICMP ) { |
| 358 | + return ip_hl + 8; /* ICMP header is 8 bytes */ |
| 359 | + } else { |
| 360 | + if (net_ratelimit()) |
| 361 | + printk(KERN_ERR "layer7: tried to handle unknown protocol!\n"); |
| 362 | + return ip_hl + 8; /* something reasonable */ |
| 363 | + } |
| 364 | +} |
| 365 | + |
| 366 | +/* handles whether there's a match when we aren't appending data anymore */ |
| 367 | +static int match_no_append(struct ip_conntrack * conntrack, struct ip_conntrack * master_conntrack, |
| 368 | + enum ip_conntrack_info ctinfo, enum ip_conntrack_info master_ctinfo, |
| 369 | + struct ipt_layer7_info * info) |
| 370 | +{ |
| 371 | + /* If we're in here, throw the app data away */ |
| 372 | + WRITE_LOCK(&ct_lock); |
| 373 | + if(master_conntrack->layer7.app_data != NULL) { |
| 374 | + |
| 375 | + #ifdef CONFIG_IP_NF_MATCH_LAYER7_DEBUG |
| 376 | + if(!master_conntrack->layer7.app_proto) { |
| 377 | + char * f = friendly_print(master_conntrack->layer7.app_data); |
| 378 | + char * g = hex_print(master_conntrack->layer7.app_data); |
| 379 | + DPRINTK("\nl7-filter gave up after %d bytes (%d packets):\n%s\n", |
| 380 | + strlen(f), |
| 381 | + TOTAL_PACKETS, f); |
| 382 | + kfree(f); |
| 383 | + DPRINTK("In hex: %s\n", g); |
| 384 | + kfree(g); |
| 385 | + } |
| 386 | + #endif |
| 387 | + |
| 388 | + kfree(master_conntrack->layer7.app_data); |
| 389 | + master_conntrack->layer7.app_data = NULL; /* don't free again */ |
| 390 | + } |
| 391 | + WRITE_UNLOCK(&ct_lock); |
| 392 | + |
| 393 | + if(master_conntrack->layer7.app_proto){ |
| 394 | + /* Here child connections set their .app_proto (for /proc/net/ip_conntrack) */ |
| 395 | + WRITE_LOCK(&ct_lock); |
| 396 | + if(!conntrack->layer7.app_proto) { |
| 397 | + conntrack->layer7.app_proto = kmalloc(strlen(master_conntrack->layer7.app_proto)+1, GFP_ATOMIC); |
| 398 | + if(!conntrack->layer7.app_proto){ |
| 399 | + if (net_ratelimit()) |
| 400 | + printk(KERN_ERR "layer7: out of memory in match_no_append, bailing.\n"); |
| 401 | + WRITE_UNLOCK(&ct_lock); |
| 402 | + return 1; |
| 403 | + } |
| 404 | + strcpy(conntrack->layer7.app_proto, master_conntrack->layer7.app_proto); |
| 405 | + } |
| 406 | + WRITE_UNLOCK(&ct_lock); |
| 407 | + |
| 408 | + return (!strcmp(master_conntrack->layer7.app_proto, info->protocol)); |
| 409 | + } |
| 410 | + else { |
| 411 | + /* If not classified, set to "unknown" to distinguish from |
| 412 | + connections that are still being tested. */ |
| 413 | + WRITE_LOCK(&ct_lock); |
| 414 | + master_conntrack->layer7.app_proto = kmalloc(strlen("unknown")+1, GFP_ATOMIC); |
| 415 | + if(!master_conntrack->layer7.app_proto){ |
| 416 | + if (net_ratelimit()) |
| 417 | + printk(KERN_ERR "layer7: out of memory in match_no_append, bailing.\n"); |
| 418 | + WRITE_UNLOCK(&ct_lock); |
| 419 | + return 1; |
| 420 | + } |
| 421 | + strcpy(master_conntrack->layer7.app_proto, "unknown"); |
| 422 | + WRITE_UNLOCK(&ct_lock); |
| 423 | + return 0; |
| 424 | + } |
| 425 | +} |
| 426 | + |
| 427 | +/* add the new app data to the conntrack. Return number of bytes added. */ |
| 428 | +static int add_data(struct ip_conntrack * master_conntrack, |
| 429 | + char * app_data, int appdatalen) |
| 430 | +{ |
| 431 | + int length = 0, i; |
| 432 | + int oldlength = master_conntrack->layer7.app_data_len; |
| 433 | + |
| 434 | + /* Strip nulls. Make everything lower case (our regex lib doesn't |
| 435 | + do case insensitivity). Add it to the end of the current data. */ |
| 436 | + for(i = 0; i < maxdatalen-oldlength-1 && i < appdatalen; i++) { |
| 437 | + if(app_data[i] != '\0') { |
| 438 | + master_conntrack->layer7.app_data[length+oldlength] = |
| 439 | + /* the kernel version of tolower mungs 'upper ascii' */ |
| 440 | + isascii(app_data[i])? tolower(app_data[i]) : app_data[i]; |
| 441 | + length++; |
| 442 | + } |
| 443 | + } |
| 444 | + |
| 445 | + master_conntrack->layer7.app_data[length+oldlength] = '\0'; |
| 446 | + master_conntrack->layer7.app_data_len = length + oldlength; |
| 447 | + |
| 448 | + return length; |
| 449 | +} |
| 450 | + |
| 451 | +/* Returns true on match and false otherwise. */ |
| 452 | +static int match(/* const */struct sk_buff *skb, const struct net_device *in, |
| 453 | + const struct net_device *out, const void *matchinfo, |
| 454 | + int offset, int *hotdrop) |
| 455 | +{ |
| 456 | + struct ipt_layer7_info * info = (struct ipt_layer7_info *)matchinfo; |
| 457 | + enum ip_conntrack_info master_ctinfo, ctinfo; |
| 458 | + struct ip_conntrack *master_conntrack, *conntrack; |
| 459 | + unsigned char * app_data; |
| 460 | + unsigned int pattern_result, appdatalen; |
| 461 | + regexp * comppattern; |
| 462 | + |
| 463 | + if(!can_handle(skb)){ |
| 464 | + DPRINTK("layer7: This is some protocol I can't handle.\n"); |
| 465 | + return info->invert; |
| 466 | + } |
| 467 | + |
| 468 | + /* Treat the parent and all its children together as one connection, |
| 469 | + except for the purpose of setting conntrack->layer7.app_proto in the |
| 470 | + actual connection. This makes /proc/net/ip_conntrack somewhat more |
| 471 | + satisfying. */ |
| 472 | + if(!(conntrack = ip_conntrack_get((struct sk_buff *)skb, &ctinfo)) || |
| 473 | + !(master_conntrack = ip_conntrack_get((struct sk_buff *)skb, &master_ctinfo))) { |
| 474 | + DPRINTK("layer7: packet is not from a known connection, giving up.\n"); |
| 475 | + return info->invert; |
| 476 | + } |
| 477 | + |
| 478 | + /* Try to get a master conntrack (and its master etc) for FTP, etc. */ |
| 479 | + while (master_ct(master_conntrack) != NULL) |
| 480 | + master_conntrack = master_ct(master_conntrack); |
| 481 | + |
| 482 | + if(!skb->cb[0]){ |
| 483 | + WRITE_LOCK(&ct_lock); |
| 484 | + master_conntrack->layer7.numpackets++;/*starts at 0 via memset*/ |
| 485 | + WRITE_UNLOCK(&ct_lock); |
| 486 | + } |
| 487 | + |
| 488 | + /* if we've classified it or seen too many packets */ |
| 489 | + if(TOTAL_PACKETS > num_packets || |
| 490 | + master_conntrack->layer7.app_proto) { |
| 491 | + |
| 492 | + pattern_result = match_no_append(conntrack, master_conntrack, ctinfo, master_ctinfo, info); |
| 493 | + |
| 494 | + /* skb->cb[0] == seen. Avoid doing things twice if there are two l7 |
| 495 | + rules. I'm not sure that using cb for this purpose is correct, although |
| 496 | + it says "put your private variables there". But it doesn't look like it |
| 497 | + is being used for anything else in the skbs that make it here. How can |
| 498 | + I write to cb without making the compiler angry? */ |
| 499 | + skb->cb[0] = 1; /* marking it seen here is probably irrelevant, but consistant */ |
| 500 | + |
| 501 | + return (pattern_result ^ info->invert); |
| 502 | + } |
| 503 | + |
| 504 | + if(skb_is_nonlinear(skb)){ |
| 505 | + if(skb_linearize(skb, GFP_ATOMIC) != 0){ |
| 506 | + if (net_ratelimit()) |
| 507 | + printk(KERN_ERR "layer7: failed to linearize packet, bailing.\n"); |
| 508 | + return info->invert; |
| 509 | + } |
| 510 | + } |
| 511 | + |
| 512 | + /* now that the skb is linearized, it's safe to set these. */ |
| 513 | + app_data = skb->data + app_data_offset(skb); |
| 514 | + appdatalen = skb->tail - app_data; |
| 515 | + |
| 516 | + LOCK_BH(&list_lock); |
| 517 | + /* the return value gets checked later, when we're ready to use it */ |
| 518 | + comppattern = compile_and_cache(info->pattern, info->protocol); |
| 519 | + UNLOCK_BH(&list_lock); |
| 520 | + |
| 521 | + /* On the first packet of a connection, allocate space for app data */ |
| 522 | + WRITE_LOCK(&ct_lock); |
| 523 | + if(TOTAL_PACKETS == 1 && !skb->cb[0] && !master_conntrack->layer7.app_data) { |
| 524 | + master_conntrack->layer7.app_data = kmalloc(maxdatalen, GFP_ATOMIC); |
| 525 | + if(!master_conntrack->layer7.app_data){ |
| 526 | + if (net_ratelimit()) |
| 527 | + printk(KERN_ERR "layer7: out of memory in match, bailing.\n"); |
| 528 | + WRITE_UNLOCK(&ct_lock); |
| 529 | + return info->invert; |
| 530 | + } |
| 531 | + |
| 532 | + master_conntrack->layer7.app_data[0] = '\0'; |
| 533 | + } |
| 534 | + WRITE_UNLOCK(&ct_lock); |
| 535 | + |
| 536 | + /* Can be here, but unallocated, if numpackets is increased near |
| 537 | + the beginning of a connection */ |
| 538 | + if(master_conntrack->layer7.app_data == NULL) |
| 539 | + return (info->invert); /* unmatched */ |
| 540 | + |
| 541 | + if(!skb->cb[0]){ |
| 542 | + int newbytes; |
| 543 | + WRITE_LOCK(&ct_lock); |
| 544 | + newbytes = add_data(master_conntrack, app_data, appdatalen); |
| 545 | + WRITE_UNLOCK(&ct_lock); |
| 546 | + |
| 547 | + if(newbytes == 0) { /* didn't add any data */ |
| 548 | + skb->cb[0] = 1; |
| 549 | + /* Didn't match before, not going to match now */ |
| 550 | + return info->invert; |
| 551 | + } |
| 552 | + } |
| 553 | + |
| 554 | + /* If looking for "unknown", then never match. "Unknown" means that |
| 555 | + we've given up; we're still trying with these packets. */ |
| 556 | + if(!strcmp(info->protocol, "unknown")) { |
| 557 | + pattern_result = 0; |
| 558 | + /* If the regexp failed to compile, don't bother running it */ |
| 559 | + } else if(comppattern && regexec(comppattern, master_conntrack->layer7.app_data)) { |
| 560 | + DPRINTK("layer7: regexec positive: %s!\n", info->protocol); |
| 561 | + pattern_result = 1; |
| 562 | + } else pattern_result = 0; |
| 563 | + |
| 564 | + if(pattern_result) { |
| 565 | + WRITE_LOCK(&ct_lock); |
| 566 | + master_conntrack->layer7.app_proto = kmalloc(strlen(info->protocol)+1, GFP_ATOMIC); |
| 567 | + if(!master_conntrack->layer7.app_proto){ |
| 568 | + if (net_ratelimit()) |
| 569 | + printk(KERN_ERR "layer7: out of memory in match, bailing.\n"); |
| 570 | + WRITE_UNLOCK(&ct_lock); |
| 571 | + return (pattern_result ^ info->invert); |
| 572 | + } |
| 573 | + strcpy(master_conntrack->layer7.app_proto, info->protocol); |
| 574 | + WRITE_UNLOCK(&ct_lock); |
| 575 | + } |
| 576 | + |
| 577 | + /* mark the packet seen */ |
| 578 | + skb->cb[0] = 1; |
| 579 | + |
| 580 | + return (pattern_result ^ info->invert); |
| 581 | +} |
| 582 | + |
| 583 | +static int checkentry(const char *tablename, const struct ipt_ip *ip, |
| 584 | + void *matchinfo, unsigned int matchsize, unsigned int hook_mask) |
| 585 | +{ |
| 586 | + if (matchsize != IPT_ALIGN(sizeof(struct ipt_layer7_info))) |
| 587 | + return 0; |
| 588 | + return 1; |
| 589 | +} |
| 590 | + |
| 591 | +static struct ipt_match layer7_match = { |
| 592 | + .name = "layer7", |
| 593 | + .match = &match, |
| 594 | + .checkentry = &checkentry, |
| 595 | + .me = THIS_MODULE |
| 596 | +}; |
| 597 | + |
| 598 | +/* taken from drivers/video/modedb.c */ |
| 599 | +static int my_atoi(const char *s) |
| 600 | +{ |
| 601 | + int val = 0; |
| 602 | + |
| 603 | + for (;; s++) { |
| 604 | + switch (*s) { |
| 605 | + case '0'...'9': |
| 606 | + val = 10*val+(*s-'0'); |
| 607 | + break; |
| 608 | + default: |
| 609 | + return val; |
| 610 | + } |
| 611 | + } |
| 612 | +} |
| 613 | + |
| 614 | +/* write out num_packets to userland. */ |
| 615 | +static int layer7_read_proc(char* page, char ** start, off_t off, int count, |
| 616 | + int* eof, void * data) |
| 617 | +{ |
| 618 | + if(num_packets > 99 && net_ratelimit()) |
| 619 | + printk(KERN_ERR "layer7: NOT REACHED. num_packets too big\n"); |
| 620 | + |
| 621 | + page[0] = num_packets/10 + '0'; |
| 622 | + page[1] = num_packets%10 + '0'; |
| 623 | + page[2] = '\n'; |
| 624 | + page[3] = '\0'; |
| 625 | + |
| 626 | + *eof=1; |
| 627 | + |
| 628 | + return 3; |
| 629 | +} |
| 630 | + |
| 631 | +/* Read in num_packets from userland */ |
| 632 | +static int layer7_write_proc(struct file* file, const char* buffer, |
| 633 | + unsigned long count, void *data) |
| 634 | +{ |
| 635 | + char * foo = kmalloc(count, GFP_ATOMIC); |
| 636 | + |
| 637 | + if(!foo){ |
| 638 | + if (net_ratelimit()) |
| 639 | + printk(KERN_ERR "layer7: out of memory, bailing. num_packets unchanged.\n"); |
| 640 | + return count; |
| 641 | + } |
| 642 | + |
| 643 | + copy_from_user(foo, buffer, count); |
| 644 | + |
| 645 | + num_packets = my_atoi(foo); |
| 646 | + kfree (foo); |
| 647 | + |
| 648 | + /* This has an arbitrary limit to make the math easier. I'm lazy. |
| 649 | + But anyway, 99 is a LOT! If you want more, you're doing it wrong! */ |
| 650 | + if(num_packets > 99) { |
| 651 | + printk(KERN_WARNING "layer7: num_packets can't be > 99.\n"); |
| 652 | + num_packets = 99; |
| 653 | + } else if(num_packets < 1) { |
| 654 | + printk(KERN_WARNING "layer7: num_packets can't be < 1.\n"); |
| 655 | + num_packets = 1; |
| 656 | + } |
| 657 | + |
| 658 | + return count; |
| 659 | +} |
| 660 | + |
| 661 | +/* register the proc file */ |
| 662 | +static void layer7_init_proc(void) |
| 663 | +{ |
| 664 | + struct proc_dir_entry* entry; |
| 665 | + entry = create_proc_entry("layer7_numpackets", 0644, proc_net); |
| 666 | + entry->read_proc = layer7_read_proc; |
| 667 | + entry->write_proc = layer7_write_proc; |
| 668 | +} |
| 669 | + |
| 670 | +static void layer7_cleanup_proc(void) |
| 671 | +{ |
| 672 | + remove_proc_entry("layer7_numpackets", proc_net); |
| 673 | +} |
| 674 | + |
| 675 | +static int __init init(void) |
| 676 | +{ |
| 677 | + layer7_init_proc(); |
| 678 | + if(maxdatalen < 1) { |
| 679 | + printk(KERN_WARNING "layer7: maxdatalen can't be < 1, using 1\n"); |
| 680 | + maxdatalen = 1; |
| 681 | + } |
| 682 | + /* This is not a hard limit. It's just here to prevent people from |
| 683 | + bringing their slow machines to a grinding halt. */ |
| 684 | + else if(maxdatalen > 65536) { |
| 685 | + printk(KERN_WARNING "layer7: maxdatalen can't be > 65536, using 65536\n"); |
| 686 | + maxdatalen = 65536; |
| 687 | + } |
| 688 | + return ipt_register_match(&layer7_match); |
| 689 | +} |
| 690 | + |
| 691 | +static void __exit fini(void) |
| 692 | +{ |
| 693 | + layer7_cleanup_proc(); |
| 694 | + ipt_unregister_match(&layer7_match); |
| 695 | +} |
| 696 | + |
| 697 | +module_init(init); |
| 698 | +module_exit(fini); |
| 699 | --- /dev/null |
| 700 | +++ b/net/ipv4/netfilter/regexp/regexp.c |
| 701 | @@ -0,0 +1,1195 @@ |
| 702 | +/* |
| 703 | + * regcomp and regexec -- regsub and regerror are elsewhere |
| 704 | + * @(#)regexp.c 1.3 of 18 April 87 |
| 705 | + * |
| 706 | + * Copyright (c) 1986 by University of Toronto. |
| 707 | + * Written by Henry Spencer. Not derived from licensed software. |
| 708 | + * |
| 709 | + * Permission is granted to anyone to use this software for any |
| 710 | + * purpose on any computer system, and to redistribute it freely, |
| 711 | + * subject to the following restrictions: |
| 712 | + * |
| 713 | + * 1. The author is not responsible for the consequences of use of |
| 714 | + * this software, no matter how awful, even if they arise |
| 715 | + * from defects in it. |
| 716 | + * |
| 717 | + * 2. The origin of this software must not be misrepresented, either |
| 718 | + * by explicit claim or by omission. |
| 719 | + * |
| 720 | + * 3. Altered versions must be plainly marked as such, and must not |
| 721 | + * be misrepresented as being the original software. |
| 722 | + * |
| 723 | + * Beware that some of this code is subtly aware of the way operator |
| 724 | + * precedence is structured in regular expressions. Serious changes in |
| 725 | + * regular-expression syntax might require a total rethink. |
| 726 | + * |
| 727 | + * This code was modified by Ethan Sommer to work within the kernel |
| 728 | + * (it now uses kmalloc etc..) |
| 729 | + * |
| 730 | + * Modified slightly by Matthew Strait to use more modern C. |
| 731 | + */ |
| 732 | + |
| 733 | +#include "regexp.h" |
| 734 | +#include "regmagic.h" |
| 735 | + |
| 736 | +/* added by ethan and matt. Lets it work in both kernel and user space. |
| 737 | +(So iptables can use it, for instance.) Yea, it goes both ways... */ |
| 738 | +#if __KERNEL__ |
| 739 | + #define malloc(foo) kmalloc(foo,GFP_ATOMIC) |
| 740 | +#else |
| 741 | + #define printk(format,args...) printf(format,##args) |
| 742 | +#endif |
| 743 | + |
| 744 | +void regerror(char * s) |
| 745 | +{ |
| 746 | + printk("<3>Regexp: %s\n", s); |
| 747 | + /* NOTREACHED */ |
| 748 | +} |
| 749 | + |
| 750 | +/* |
| 751 | + * The "internal use only" fields in regexp.h are present to pass info from |
| 752 | + * compile to execute that permits the execute phase to run lots faster on |
| 753 | + * simple cases. They are: |
| 754 | + * |
| 755 | + * regstart char that must begin a match; '\0' if none obvious |
| 756 | + * reganch is the match anchored (at beginning-of-line only)? |
| 757 | + * regmust string (pointer into program) that match must include, or NULL |
| 758 | + * regmlen length of regmust string |
| 759 | + * |
| 760 | + * Regstart and reganch permit very fast decisions on suitable starting points |
| 761 | + * for a match, cutting down the work a lot. Regmust permits fast rejection |
| 762 | + * of lines that cannot possibly match. The regmust tests are costly enough |
| 763 | + * that regcomp() supplies a regmust only if the r.e. contains something |
| 764 | + * potentially expensive (at present, the only such thing detected is * or + |
| 765 | + * at the start of the r.e., which can involve a lot of backup). Regmlen is |
| 766 | + * supplied because the test in regexec() needs it and regcomp() is computing |
| 767 | + * it anyway. |
| 768 | + */ |
| 769 | + |
| 770 | +/* |
| 771 | + * Structure for regexp "program". This is essentially a linear encoding |
| 772 | + * of a nondeterministic finite-state machine (aka syntax charts or |
| 773 | + * "railroad normal form" in parsing technology). Each node is an opcode |
| 774 | + * plus a "next" pointer, possibly plus an operand. "Next" pointers of |
| 775 | + * all nodes except BRANCH implement concatenation; a "next" pointer with |
| 776 | + * a BRANCH on both ends of it is connecting two alternatives. (Here we |
| 777 | + * have one of the subtle syntax dependencies: an individual BRANCH (as |
| 778 | + * opposed to a collection of them) is never concatenated with anything |
| 779 | + * because of operator precedence.) The operand of some types of node is |
| 780 | + * a literal string; for others, it is a node leading into a sub-FSM. In |
| 781 | + * particular, the operand of a BRANCH node is the first node of the branch. |
| 782 | + * (NB this is *not* a tree structure: the tail of the branch connects |
| 783 | + * to the thing following the set of BRANCHes.) The opcodes are: |
| 784 | + */ |
| 785 | + |
| 786 | +/* definition number opnd? meaning */ |
| 787 | +#define END 0 /* no End of program. */ |
| 788 | +#define BOL 1 /* no Match "" at beginning of line. */ |
| 789 | +#define EOL 2 /* no Match "" at end of line. */ |
| 790 | +#define ANY 3 /* no Match any one character. */ |
| 791 | +#define ANYOF 4 /* str Match any character in this string. */ |
| 792 | +#define ANYBUT 5 /* str Match any character not in this string. */ |
| 793 | +#define BRANCH 6 /* node Match this alternative, or the next... */ |
| 794 | +#define BACK 7 /* no Match "", "next" ptr points backward. */ |
| 795 | +#define EXACTLY 8 /* str Match this string. */ |
| 796 | +#define NOTHING 9 /* no Match empty string. */ |
| 797 | +#define STAR 10 /* node Match this (simple) thing 0 or more times. */ |
| 798 | +#define PLUS 11 /* node Match this (simple) thing 1 or more times. */ |
| 799 | +#define OPEN 20 /* no Mark this point in input as start of #n. */ |
| 800 | + /* OPEN+1 is number 1, etc. */ |
| 801 | +#define CLOSE 30 /* no Analogous to OPEN. */ |
| 802 | + |
| 803 | +/* |
| 804 | + * Opcode notes: |
| 805 | + * |
| 806 | + * BRANCH The set of branches constituting a single choice are hooked |
| 807 | + * together with their "next" pointers, since precedence prevents |
| 808 | + * anything being concatenated to any individual branch. The |
| 809 | + * "next" pointer of the last BRANCH in a choice points to the |
| 810 | + * thing following the whole choice. This is also where the |
| 811 | + * final "next" pointer of each individual branch points; each |
| 812 | + * branch starts with the operand node of a BRANCH node. |
| 813 | + * |
| 814 | + * BACK Normal "next" pointers all implicitly point forward; BACK |
| 815 | + * exists to make loop structures possible. |
| 816 | + * |
| 817 | + * STAR,PLUS '?', and complex '*' and '+', are implemented as circular |
| 818 | + * BRANCH structures using BACK. Simple cases (one character |
| 819 | + * per match) are implemented with STAR and PLUS for speed |
| 820 | + * and to minimize recursive plunges. |
| 821 | + * |
| 822 | + * OPEN,CLOSE ...are numbered at compile time. |
| 823 | + */ |
| 824 | + |
| 825 | +/* |
| 826 | + * A node is one char of opcode followed by two chars of "next" pointer. |
| 827 | + * "Next" pointers are stored as two 8-bit pieces, high order first. The |
| 828 | + * value is a positive offset from the opcode of the node containing it. |
| 829 | + * An operand, if any, simply follows the node. (Note that much of the |
| 830 | + * code generation knows about this implicit relationship.) |
| 831 | + * |
| 832 | + * Using two bytes for the "next" pointer is vast overkill for most things, |
| 833 | + * but allows patterns to get big without disasters. |
| 834 | + */ |
| 835 | +#define OP(p) (*(p)) |
| 836 | +#define NEXT(p) (((*((p)+1)&0377)<<8) + (*((p)+2)&0377)) |
| 837 | +#define OPERAND(p) ((p) + 3) |
| 838 | + |
| 839 | +/* |
| 840 | + * See regmagic.h for one further detail of program structure. |
| 841 | + */ |
| 842 | + |
| 843 | + |
| 844 | +/* |
| 845 | + * Utility definitions. |
| 846 | + */ |
| 847 | +#ifndef CHARBITS |
| 848 | +#define UCHARAT(p) ((int)*(unsigned char *)(p)) |
| 849 | +#else |
| 850 | +#define UCHARAT(p) ((int)*(p)&CHARBITS) |
| 851 | +#endif |
| 852 | + |
| 853 | +#define FAIL(m) { regerror(m); return(NULL); } |
| 854 | +#define ISMULT(c) ((c) == '*' || (c) == '+' || (c) == '?') |
| 855 | +#define META "^$.[()|?+*\\" |
| 856 | + |
| 857 | +/* |
| 858 | + * Flags to be passed up and down. |
| 859 | + */ |
| 860 | +#define HASWIDTH 01 /* Known never to match null string. */ |
| 861 | +#define SIMPLE 02 /* Simple enough to be STAR/PLUS operand. */ |
| 862 | +#define SPSTART 04 /* Starts with * or +. */ |
| 863 | +#define WORST 0 /* Worst case. */ |
| 864 | + |
| 865 | +/* |
| 866 | + * Global work variables for regcomp(). |
| 867 | + */ |
| 868 | +static char *regparse; /* Input-scan pointer. */ |
| 869 | +static int regnpar; /* () count. */ |
| 870 | +static char regdummy; |
| 871 | +static char *regcode; /* Code-emit pointer; ®dummy = don't. */ |
| 872 | +static long regsize; /* Code size. */ |
| 873 | + |
| 874 | +/* |
| 875 | + * Forward declarations for regcomp()'s friends. |
| 876 | + */ |
| 877 | +#ifndef STATIC |
| 878 | +#define STATIC static |
| 879 | +#endif |
| 880 | +STATIC char *reg(int paren,int *flagp); |
| 881 | +STATIC char *regbranch(int *flagp); |
| 882 | +STATIC char *regpiece(int *flagp); |
| 883 | +STATIC char *regatom(int *flagp); |
| 884 | +STATIC char *regnode(char op); |
| 885 | +STATIC char *regnext(char *p); |
| 886 | +STATIC void regc(char b); |
| 887 | +STATIC void reginsert(char op, char *opnd); |
| 888 | +STATIC void regtail(char *p, char *val); |
| 889 | +STATIC void regoptail(char *p, char *val); |
| 890 | + |
| 891 | + |
| 892 | +__kernel_size_t my_strcspn(const char *s1,const char *s2) |
| 893 | +{ |
| 894 | + char *scan1; |
| 895 | + char *scan2; |
| 896 | + int count; |
| 897 | + |
| 898 | + count = 0; |
| 899 | + for (scan1 = (char *)s1; *scan1 != '\0'; scan1++) { |
| 900 | + for (scan2 = (char *)s2; *scan2 != '\0';) /* ++ moved down. */ |
| 901 | + if (*scan1 == *scan2++) |
| 902 | + return(count); |
| 903 | + count++; |
| 904 | + } |
| 905 | + return(count); |
| 906 | +} |
| 907 | + |
| 908 | +/* |
| 909 | + - regcomp - compile a regular expression into internal code |
| 910 | + * |
| 911 | + * We can't allocate space until we know how big the compiled form will be, |
| 912 | + * but we can't compile it (and thus know how big it is) until we've got a |
| 913 | + * place to put the code. So we cheat: we compile it twice, once with code |
| 914 | + * generation turned off and size counting turned on, and once "for real". |
| 915 | + * This also means that we don't allocate space until we are sure that the |
| 916 | + * thing really will compile successfully, and we never have to move the |
| 917 | + * code and thus invalidate pointers into it. (Note that it has to be in |
| 918 | + * one piece because free() must be able to free it all.) |
| 919 | + * |
| 920 | + * Beware that the optimization-preparation code in here knows about some |
| 921 | + * of the structure of the compiled regexp. |
| 922 | + */ |
| 923 | +regexp * |
| 924 | +regcomp(char *exp,int *patternsize) |
| 925 | +{ |
| 926 | + register regexp *r; |
| 927 | + register char *scan; |
| 928 | + register char *longest; |
| 929 | + register int len; |
| 930 | + int flags; |
| 931 | + /* commented out by ethan |
| 932 | + extern char *malloc(); |
| 933 | + */ |
| 934 | + |
| 935 | + if (exp == NULL) |
| 936 | + FAIL("NULL argument"); |
| 937 | + |
| 938 | + /* First pass: determine size, legality. */ |
| 939 | + regparse = exp; |
| 940 | + regnpar = 1; |
| 941 | + regsize = 0L; |
| 942 | + regcode = ®dummy; |
| 943 | + regc(MAGIC); |
| 944 | + if (reg(0, &flags) == NULL) |
| 945 | + return(NULL); |
| 946 | + |
| 947 | + /* Small enough for pointer-storage convention? */ |
| 948 | + if (regsize >= 32767L) /* Probably could be 65535L. */ |
| 949 | + FAIL("regexp too big"); |
| 950 | + |
| 951 | + /* Allocate space. */ |
| 952 | + *patternsize=sizeof(regexp) + (unsigned)regsize; |
| 953 | + r = (regexp *)malloc(sizeof(regexp) + (unsigned)regsize); |
| 954 | + if (r == NULL) |
| 955 | + FAIL("out of space"); |
| 956 | + |
| 957 | + /* Second pass: emit code. */ |
| 958 | + regparse = exp; |
| 959 | + regnpar = 1; |
| 960 | + regcode = r->program; |
| 961 | + regc(MAGIC); |
| 962 | + if (reg(0, &flags) == NULL) |
| 963 | + return(NULL); |
| 964 | + |
| 965 | + /* Dig out information for optimizations. */ |
| 966 | + r->regstart = '\0'; /* Worst-case defaults. */ |
| 967 | + r->reganch = 0; |
| 968 | + r->regmust = NULL; |
| 969 | + r->regmlen = 0; |
| 970 | + scan = r->program+1; /* First BRANCH. */ |
| 971 | + if (OP(regnext(scan)) == END) { /* Only one top-level choice. */ |
| 972 | + scan = OPERAND(scan); |
| 973 | + |
| 974 | + /* Starting-point info. */ |
| 975 | + if (OP(scan) == EXACTLY) |
| 976 | + r->regstart = *OPERAND(scan); |
| 977 | + else if (OP(scan) == BOL) |
| 978 | + r->reganch++; |
| 979 | + |
| 980 | + /* |
| 981 | + * If there's something expensive in the r.e., find the |
| 982 | + * longest literal string that must appear and make it the |
| 983 | + * regmust. Resolve ties in favor of later strings, since |
| 984 | + * the regstart check works with the beginning of the r.e. |
| 985 | + * and avoiding duplication strengthens checking. Not a |
| 986 | + * strong reason, but sufficient in the absence of others. |
| 987 | + */ |
| 988 | + if (flags&SPSTART) { |
| 989 | + longest = NULL; |
| 990 | + len = 0; |
| 991 | + for (; scan != NULL; scan = regnext(scan)) |
| 992 | + if (OP(scan) == EXACTLY && strlen(OPERAND(scan)) >= len) { |
| 993 | + longest = OPERAND(scan); |
| 994 | + len = strlen(OPERAND(scan)); |
| 995 | + } |
| 996 | + r->regmust = longest; |
| 997 | + r->regmlen = len; |
| 998 | + } |
| 999 | + } |
| 1000 | + |
| 1001 | + return(r); |
| 1002 | +} |
| 1003 | + |
| 1004 | +/* |
| 1005 | + - reg - regular expression, i.e. main body or parenthesized thing |
| 1006 | + * |
| 1007 | + * Caller must absorb opening parenthesis. |
| 1008 | + * |
| 1009 | + * Combining parenthesis handling with the base level of regular expression |
| 1010 | + * is a trifle forced, but the need to tie the tails of the branches to what |
| 1011 | + * follows makes it hard to avoid. |
| 1012 | + */ |
| 1013 | +static char * |
| 1014 | +reg(int paren, int *flagp /* Parenthesized? */ ) |
| 1015 | +{ |
| 1016 | + register char *ret; |
| 1017 | + register char *br; |
| 1018 | + register char *ender; |
| 1019 | + register int parno = 0; /* 0 makes gcc happy */ |
| 1020 | + int flags; |
| 1021 | + |
| 1022 | + *flagp = HASWIDTH; /* Tentatively. */ |
| 1023 | + |
| 1024 | + /* Make an OPEN node, if parenthesized. */ |
| 1025 | + if (paren) { |
| 1026 | + if (regnpar >= NSUBEXP) |
| 1027 | + FAIL("too many ()"); |
| 1028 | + parno = regnpar; |
| 1029 | + regnpar++; |
| 1030 | + ret = regnode(OPEN+parno); |
| 1031 | + } else |
| 1032 | + ret = NULL; |
| 1033 | + |
| 1034 | + /* Pick up the branches, linking them together. */ |
| 1035 | + br = regbranch(&flags); |
| 1036 | + if (br == NULL) |
| 1037 | + return(NULL); |
| 1038 | + if (ret != NULL) |
| 1039 | + regtail(ret, br); /* OPEN -> first. */ |
| 1040 | + else |
| 1041 | + ret = br; |
| 1042 | + if (!(flags&HASWIDTH)) |
| 1043 | + *flagp &= ~HASWIDTH; |
| 1044 | + *flagp |= flags&SPSTART; |
| 1045 | + while (*regparse == '|') { |
| 1046 | + regparse++; |
| 1047 | + br = regbranch(&flags); |
| 1048 | + if (br == NULL) |
| 1049 | + return(NULL); |
| 1050 | + regtail(ret, br); /* BRANCH -> BRANCH. */ |
| 1051 | + if (!(flags&HASWIDTH)) |
| 1052 | + *flagp &= ~HASWIDTH; |
| 1053 | + *flagp |= flags&SPSTART; |
| 1054 | + } |
| 1055 | + |
| 1056 | + /* Make a closing node, and hook it on the end. */ |
| 1057 | + ender = regnode((paren) ? CLOSE+parno : END); |
| 1058 | + regtail(ret, ender); |
| 1059 | + |
| 1060 | + /* Hook the tails of the branches to the closing node. */ |
| 1061 | + for (br = ret; br != NULL; br = regnext(br)) |
| 1062 | + regoptail(br, ender); |
| 1063 | + |
| 1064 | + /* Check for proper termination. */ |
| 1065 | + if (paren && *regparse++ != ')') { |
| 1066 | + FAIL("unmatched ()"); |
| 1067 | + } else if (!paren && *regparse != '\0') { |
| 1068 | + if (*regparse == ')') { |
| 1069 | + FAIL("unmatched ()"); |
| 1070 | + } else |
| 1071 | + FAIL("junk on end"); /* "Can't happen". */ |
| 1072 | + /* NOTREACHED */ |
| 1073 | + } |
| 1074 | + |
| 1075 | + return(ret); |
| 1076 | +} |
| 1077 | + |
| 1078 | +/* |
| 1079 | + - regbranch - one alternative of an | operator |
| 1080 | + * |
| 1081 | + * Implements the concatenation operator. |
| 1082 | + */ |
| 1083 | +static char * |
| 1084 | +regbranch(int *flagp) |
| 1085 | +{ |
| 1086 | + register char *ret; |
| 1087 | + register char *chain; |
| 1088 | + register char *latest; |
| 1089 | + int flags; |
| 1090 | + |
| 1091 | + *flagp = WORST; /* Tentatively. */ |
| 1092 | + |
| 1093 | + ret = regnode(BRANCH); |
| 1094 | + chain = NULL; |
| 1095 | + while (*regparse != '\0' && *regparse != '|' && *regparse != ')') { |
| 1096 | + latest = regpiece(&flags); |
| 1097 | + if (latest == NULL) |
| 1098 | + return(NULL); |
| 1099 | + *flagp |= flags&HASWIDTH; |
| 1100 | + if (chain == NULL) /* First piece. */ |
| 1101 | + *flagp |= flags&SPSTART; |
| 1102 | + else |
| 1103 | + regtail(chain, latest); |
| 1104 | + chain = latest; |
| 1105 | + } |
| 1106 | + if (chain == NULL) /* Loop ran zero times. */ |
| 1107 | + (void) regnode(NOTHING); |
| 1108 | + |
| 1109 | + return(ret); |
| 1110 | +} |
| 1111 | + |
| 1112 | +/* |
| 1113 | + - regpiece - something followed by possible [*+?] |
| 1114 | + * |
| 1115 | + * Note that the branching code sequences used for ? and the general cases |
| 1116 | + * of * and + are somewhat optimized: they use the same NOTHING node as |
| 1117 | + * both the endmarker for their branch list and the body of the last branch. |
| 1118 | + * It might seem that this node could be dispensed with entirely, but the |
| 1119 | + * endmarker role is not redundant. |
| 1120 | + */ |
| 1121 | +static char * |
| 1122 | +regpiece(int *flagp) |
| 1123 | +{ |
| 1124 | + register char *ret; |
| 1125 | + register char op; |
| 1126 | + register char *next; |
| 1127 | + int flags; |
| 1128 | + |
| 1129 | + ret = regatom(&flags); |
| 1130 | + if (ret == NULL) |
| 1131 | + return(NULL); |
| 1132 | + |
| 1133 | + op = *regparse; |
| 1134 | + if (!ISMULT(op)) { |
| 1135 | + *flagp = flags; |
| 1136 | + return(ret); |
| 1137 | + } |
| 1138 | + |
| 1139 | + if (!(flags&HASWIDTH) && op != '?') |
| 1140 | + FAIL("*+ operand could be empty"); |
| 1141 | + *flagp = (op != '+') ? (WORST|SPSTART) : (WORST|HASWIDTH); |
| 1142 | + |
| 1143 | + if (op == '*' && (flags&SIMPLE)) |
| 1144 | + reginsert(STAR, ret); |
| 1145 | + else if (op == '*') { |
| 1146 | + /* Emit x* as (x&|), where & means "self". */ |
| 1147 | + reginsert(BRANCH, ret); /* Either x */ |
| 1148 | + regoptail(ret, regnode(BACK)); /* and loop */ |
| 1149 | + regoptail(ret, ret); /* back */ |
| 1150 | + regtail(ret, regnode(BRANCH)); /* or */ |
| 1151 | + regtail(ret, regnode(NOTHING)); /* null. */ |
| 1152 | + } else if (op == '+' && (flags&SIMPLE)) |
| 1153 | + reginsert(PLUS, ret); |
| 1154 | + else if (op == '+') { |
| 1155 | + /* Emit x+ as x(&|), where & means "self". */ |
| 1156 | + next = regnode(BRANCH); /* Either */ |
| 1157 | + regtail(ret, next); |
| 1158 | + regtail(regnode(BACK), ret); /* loop back */ |
| 1159 | + regtail(next, regnode(BRANCH)); /* or */ |
| 1160 | + regtail(ret, regnode(NOTHING)); /* null. */ |
| 1161 | + } else if (op == '?') { |
| 1162 | + /* Emit x? as (x|) */ |
| 1163 | + reginsert(BRANCH, ret); /* Either x */ |
| 1164 | + regtail(ret, regnode(BRANCH)); /* or */ |
| 1165 | + next = regnode(NOTHING); /* null. */ |
| 1166 | + regtail(ret, next); |
| 1167 | + regoptail(ret, next); |
| 1168 | + } |
| 1169 | + regparse++; |
| 1170 | + if (ISMULT(*regparse)) |
| 1171 | + FAIL("nested *?+"); |
| 1172 | + |
| 1173 | + return(ret); |
| 1174 | +} |
| 1175 | + |
| 1176 | +/* |
| 1177 | + - regatom - the lowest level |
| 1178 | + * |
| 1179 | + * Optimization: gobbles an entire sequence of ordinary characters so that |
| 1180 | + * it can turn them into a single node, which is smaller to store and |
| 1181 | + * faster to run. Backslashed characters are exceptions, each becoming a |
| 1182 | + * separate node; the code is simpler that way and it's not worth fixing. |
| 1183 | + */ |
| 1184 | +static char * |
| 1185 | +regatom(int *flagp) |
| 1186 | +{ |
| 1187 | + register char *ret; |
| 1188 | + int flags; |
| 1189 | + |
| 1190 | + *flagp = WORST; /* Tentatively. */ |
| 1191 | + |
| 1192 | + switch (*regparse++) { |
| 1193 | + case '^': |
| 1194 | + ret = regnode(BOL); |
| 1195 | + break; |
| 1196 | + case '$': |
| 1197 | + ret = regnode(EOL); |
| 1198 | + break; |
| 1199 | + case '.': |
| 1200 | + ret = regnode(ANY); |
| 1201 | + *flagp |= HASWIDTH|SIMPLE; |
| 1202 | + break; |
| 1203 | + case '[': { |
| 1204 | + register int class; |
| 1205 | + register int classend; |
| 1206 | + |
| 1207 | + if (*regparse == '^') { /* Complement of range. */ |
| 1208 | + ret = regnode(ANYBUT); |
| 1209 | + regparse++; |
| 1210 | + } else |
| 1211 | + ret = regnode(ANYOF); |
| 1212 | + if (*regparse == ']' || *regparse == '-') |
| 1213 | + regc(*regparse++); |
| 1214 | + while (*regparse != '\0' && *regparse != ']') { |
| 1215 | + if (*regparse == '-') { |
| 1216 | + regparse++; |
| 1217 | + if (*regparse == ']' || *regparse == '\0') |
| 1218 | + regc('-'); |
| 1219 | + else { |
| 1220 | + class = UCHARAT(regparse-2)+1; |
| 1221 | + classend = UCHARAT(regparse); |
| 1222 | + if (class > classend+1) |
| 1223 | + FAIL("invalid [] range"); |
| 1224 | + for (; class <= classend; class++) |
| 1225 | + regc(class); |
| 1226 | + regparse++; |
| 1227 | + } |
| 1228 | + } else |
| 1229 | + regc(*regparse++); |
| 1230 | + } |
| 1231 | + regc('\0'); |
| 1232 | + if (*regparse != ']') |
| 1233 | + FAIL("unmatched []"); |
| 1234 | + regparse++; |
| 1235 | + *flagp |= HASWIDTH|SIMPLE; |
| 1236 | + } |
| 1237 | + break; |
| 1238 | + case '(': |
| 1239 | + ret = reg(1, &flags); |
| 1240 | + if (ret == NULL) |
| 1241 | + return(NULL); |
| 1242 | + *flagp |= flags&(HASWIDTH|SPSTART); |
| 1243 | + break; |
| 1244 | + case '\0': |
| 1245 | + case '|': |
| 1246 | + case ')': |
| 1247 | + FAIL("internal urp"); /* Supposed to be caught earlier. */ |
| 1248 | + break; |
| 1249 | + case '?': |
| 1250 | + case '+': |
| 1251 | + case '*': |
| 1252 | + FAIL("?+* follows nothing"); |
| 1253 | + break; |
| 1254 | + case '\\': |
| 1255 | + if (*regparse == '\0') |
| 1256 | + FAIL("trailing \\"); |
| 1257 | + ret = regnode(EXACTLY); |
| 1258 | + regc(*regparse++); |
| 1259 | + regc('\0'); |
| 1260 | + *flagp |= HASWIDTH|SIMPLE; |
| 1261 | + break; |
| 1262 | + default: { |
| 1263 | + register int len; |
| 1264 | + register char ender; |
| 1265 | + |
| 1266 | + regparse--; |
| 1267 | + len = my_strcspn((const char *)regparse, (const char *)META); |
| 1268 | + if (len <= 0) |
| 1269 | + FAIL("internal disaster"); |
| 1270 | + ender = *(regparse+len); |
| 1271 | + if (len > 1 && ISMULT(ender)) |
| 1272 | + len--; /* Back off clear of ?+* operand. */ |
| 1273 | + *flagp |= HASWIDTH; |
| 1274 | + if (len == 1) |
| 1275 | + *flagp |= SIMPLE; |
| 1276 | + ret = regnode(EXACTLY); |
| 1277 | + while (len > 0) { |
| 1278 | + regc(*regparse++); |
| 1279 | + len--; |
| 1280 | + } |
| 1281 | + regc('\0'); |
| 1282 | + } |
| 1283 | + break; |
| 1284 | + } |
| 1285 | + |
| 1286 | + return(ret); |
| 1287 | +} |
| 1288 | + |
| 1289 | +/* |
| 1290 | + - regnode - emit a node |
| 1291 | + */ |
| 1292 | +static char * /* Location. */ |
| 1293 | +regnode(char op) |
| 1294 | +{ |
| 1295 | + register char *ret; |
| 1296 | + register char *ptr; |
| 1297 | + |
| 1298 | + ret = regcode; |
| 1299 | + if (ret == ®dummy) { |
| 1300 | + regsize += 3; |
| 1301 | + return(ret); |
| 1302 | + } |
| 1303 | + |
| 1304 | + ptr = ret; |
| 1305 | + *ptr++ = op; |
| 1306 | + *ptr++ = '\0'; /* Null "next" pointer. */ |
| 1307 | + *ptr++ = '\0'; |
| 1308 | + regcode = ptr; |
| 1309 | + |
| 1310 | + return(ret); |
| 1311 | +} |
| 1312 | + |
| 1313 | +/* |
| 1314 | + - regc - emit (if appropriate) a byte of code |
| 1315 | + */ |
| 1316 | +static void |
| 1317 | +regc(char b) |
| 1318 | +{ |
| 1319 | + if (regcode != ®dummy) |
| 1320 | + *regcode++ = b; |
| 1321 | + else |
| 1322 | + regsize++; |
| 1323 | +} |
| 1324 | + |
| 1325 | +/* |
| 1326 | + - reginsert - insert an operator in front of already-emitted operand |
| 1327 | + * |
| 1328 | + * Means relocating the operand. |
| 1329 | + */ |
| 1330 | +static void |
| 1331 | +reginsert(char op, char* opnd) |
| 1332 | +{ |
| 1333 | + register char *src; |
| 1334 | + register char *dst; |
| 1335 | + register char *place; |
| 1336 | + |
| 1337 | + if (regcode == ®dummy) { |
| 1338 | + regsize += 3; |
| 1339 | + return; |
| 1340 | + } |
| 1341 | + |
| 1342 | + src = regcode; |
| 1343 | + regcode += 3; |
| 1344 | + dst = regcode; |
| 1345 | + while (src > opnd) |
| 1346 | + *--dst = *--src; |
| 1347 | + |
| 1348 | + place = opnd; /* Op node, where operand used to be. */ |
| 1349 | + *place++ = op; |
| 1350 | + *place++ = '\0'; |
| 1351 | + *place++ = '\0'; |
| 1352 | +} |
| 1353 | + |
| 1354 | +/* |
| 1355 | + - regtail - set the next-pointer at the end of a node chain |
| 1356 | + */ |
| 1357 | +static void |
| 1358 | +regtail(char *p, char *val) |
| 1359 | +{ |
| 1360 | + register char *scan; |
| 1361 | + register char *temp; |
| 1362 | + register int offset; |
| 1363 | + |
| 1364 | + if (p == ®dummy) |
| 1365 | + return; |
| 1366 | + |
| 1367 | + /* Find last node. */ |
| 1368 | + scan = p; |
| 1369 | + for (;;) { |
| 1370 | + temp = regnext(scan); |
| 1371 | + if (temp == NULL) |
| 1372 | + break; |
| 1373 | + scan = temp; |
| 1374 | + } |
| 1375 | + |
| 1376 | + if (OP(scan) == BACK) |
| 1377 | + offset = scan - val; |
| 1378 | + else |
| 1379 | + offset = val - scan; |
| 1380 | + *(scan+1) = (offset>>8)&0377; |
| 1381 | + *(scan+2) = offset&0377; |
| 1382 | +} |
| 1383 | + |
| 1384 | +/* |
| 1385 | + - regoptail - regtail on operand of first argument; nop if operandless |
| 1386 | + */ |
| 1387 | +static void |
| 1388 | +regoptail(char *p, char *val) |
| 1389 | +{ |
| 1390 | + /* "Operandless" and "op != BRANCH" are synonymous in practice. */ |
| 1391 | + if (p == NULL || p == ®dummy || OP(p) != BRANCH) |
| 1392 | + return; |
| 1393 | + regtail(OPERAND(p), val); |
| 1394 | +} |
| 1395 | + |
| 1396 | +/* |
| 1397 | + * regexec and friends |
| 1398 | + */ |
| 1399 | + |
| 1400 | +/* |
| 1401 | + * Global work variables for regexec(). |
| 1402 | + */ |
| 1403 | +static char *reginput; /* String-input pointer. */ |
| 1404 | +static char *regbol; /* Beginning of input, for ^ check. */ |
| 1405 | +static char **regstartp; /* Pointer to startp array. */ |
| 1406 | +static char **regendp; /* Ditto for endp. */ |
| 1407 | + |
| 1408 | +/* |
| 1409 | + * Forwards. |
| 1410 | + */ |
| 1411 | +STATIC int regtry(regexp *prog, char *string); |
| 1412 | +STATIC int regmatch(char *prog); |
| 1413 | +STATIC int regrepeat(char *p); |
| 1414 | + |
| 1415 | +#ifdef DEBUG |
| 1416 | +int regnarrate = 0; |
| 1417 | +void regdump(); |
| 1418 | +STATIC char *regprop(char *op); |
| 1419 | +#endif |
| 1420 | + |
| 1421 | +/* |
| 1422 | + - regexec - match a regexp against a string |
| 1423 | + */ |
| 1424 | +int |
| 1425 | +regexec(regexp *prog, char *string) |
| 1426 | +{ |
| 1427 | + register char *s; |
| 1428 | + |
| 1429 | + /* Be paranoid... */ |
| 1430 | + if (prog == NULL || string == NULL) { |
| 1431 | + printk("<3>Regexp: NULL parameter\n"); |
| 1432 | + return(0); |
| 1433 | + } |
| 1434 | + |
| 1435 | + /* Check validity of program. */ |
| 1436 | + if (UCHARAT(prog->program) != MAGIC) { |
| 1437 | + printk("<3>Regexp: corrupted program\n"); |
| 1438 | + return(0); |
| 1439 | + } |
| 1440 | + |
| 1441 | + /* If there is a "must appear" string, look for it. */ |
| 1442 | + if (prog->regmust != NULL) { |
| 1443 | + s = string; |
| 1444 | + while ((s = strchr(s, prog->regmust[0])) != NULL) { |
| 1445 | + if (strncmp(s, prog->regmust, prog->regmlen) == 0) |
| 1446 | + break; /* Found it. */ |
| 1447 | + s++; |
| 1448 | + } |
| 1449 | + if (s == NULL) /* Not present. */ |
| 1450 | + return(0); |
| 1451 | + } |
| 1452 | + |
| 1453 | + /* Mark beginning of line for ^ . */ |
| 1454 | + regbol = string; |
| 1455 | + |
| 1456 | + /* Simplest case: anchored match need be tried only once. */ |
| 1457 | + if (prog->reganch) |
| 1458 | + return(regtry(prog, string)); |
| 1459 | + |
| 1460 | + /* Messy cases: unanchored match. */ |
| 1461 | + s = string; |
| 1462 | + if (prog->regstart != '\0') |
| 1463 | + /* We know what char it must start with. */ |
| 1464 | + while ((s = strchr(s, prog->regstart)) != NULL) { |
| 1465 | + if (regtry(prog, s)) |
| 1466 | + return(1); |
| 1467 | + s++; |
| 1468 | + } |
| 1469 | + else |
| 1470 | + /* We don't -- general case. */ |
| 1471 | + do { |
| 1472 | + if (regtry(prog, s)) |
| 1473 | + return(1); |
| 1474 | + } while (*s++ != '\0'); |
| 1475 | + |
| 1476 | + /* Failure. */ |
| 1477 | + return(0); |
| 1478 | +} |
| 1479 | + |
| 1480 | +/* |
| 1481 | + - regtry - try match at specific point |
| 1482 | + */ |
| 1483 | +static int /* 0 failure, 1 success */ |
| 1484 | +regtry(regexp *prog, char *string) |
| 1485 | +{ |
| 1486 | + register int i; |
| 1487 | + register char **sp; |
| 1488 | + register char **ep; |
| 1489 | + |
| 1490 | + reginput = string; |
| 1491 | + regstartp = prog->startp; |
| 1492 | + regendp = prog->endp; |
| 1493 | + |
| 1494 | + sp = prog->startp; |
| 1495 | + ep = prog->endp; |
| 1496 | + for (i = NSUBEXP; i > 0; i--) { |
| 1497 | + *sp++ = NULL; |
| 1498 | + *ep++ = NULL; |
| 1499 | + } |
| 1500 | + if (regmatch(prog->program + 1)) { |
| 1501 | + prog->startp[0] = string; |
| 1502 | + prog->endp[0] = reginput; |
| 1503 | + return(1); |
| 1504 | + } else |
| 1505 | + return(0); |
| 1506 | +} |
| 1507 | + |
| 1508 | +/* |
| 1509 | + - regmatch - main matching routine |
| 1510 | + * |
| 1511 | + * Conceptually the strategy is simple: check to see whether the current |
| 1512 | + * node matches, call self recursively to see whether the rest matches, |
| 1513 | + * and then act accordingly. In practice we make some effort to avoid |
| 1514 | + * recursion, in particular by going through "ordinary" nodes (that don't |
| 1515 | + * need to know whether the rest of the match failed) by a loop instead of |
| 1516 | + * by recursion. |
| 1517 | + */ |
| 1518 | +static int /* 0 failure, 1 success */ |
| 1519 | +regmatch(char *prog) |
| 1520 | +{ |
| 1521 | + register char *scan = prog; /* Current node. */ |
| 1522 | + char *next; /* Next node. */ |
| 1523 | + |
| 1524 | +#ifdef DEBUG |
| 1525 | + if (scan != NULL && regnarrate) |
| 1526 | + fprintf(stderr, "%s(\n", regprop(scan)); |
| 1527 | +#endif |
| 1528 | + while (scan != NULL) { |
| 1529 | +#ifdef DEBUG |
| 1530 | + if (regnarrate) |
| 1531 | + fprintf(stderr, "%s...\n", regprop(scan)); |
| 1532 | +#endif |
| 1533 | + next = regnext(scan); |
| 1534 | + |
| 1535 | + switch (OP(scan)) { |
| 1536 | + case BOL: |
| 1537 | + if (reginput != regbol) |
| 1538 | + return(0); |
| 1539 | + break; |
| 1540 | + case EOL: |
| 1541 | + if (*reginput != '\0') |
| 1542 | + return(0); |
| 1543 | + break; |
| 1544 | + case ANY: |
| 1545 | + if (*reginput == '\0') |
| 1546 | + return(0); |
| 1547 | + reginput++; |
| 1548 | + break; |
| 1549 | + case EXACTLY: { |
| 1550 | + register int len; |
| 1551 | + register char *opnd; |
| 1552 | + |
| 1553 | + opnd = OPERAND(scan); |
| 1554 | + /* Inline the first character, for speed. */ |
| 1555 | + if (*opnd != *reginput) |
| 1556 | + return(0); |
| 1557 | + len = strlen(opnd); |
| 1558 | + if (len > 1 && strncmp(opnd, reginput, len) != 0) |
| 1559 | + return(0); |
| 1560 | + reginput += len; |
| 1561 | + } |
| 1562 | + break; |
| 1563 | + case ANYOF: |
| 1564 | + if (*reginput == '\0' || strchr(OPERAND(scan), *reginput) == NULL) |
| 1565 | + return(0); |
| 1566 | + reginput++; |
| 1567 | + break; |
| 1568 | + case ANYBUT: |
| 1569 | + if (*reginput == '\0' || strchr(OPERAND(scan), *reginput) != NULL) |
| 1570 | + return(0); |
| 1571 | + reginput++; |
| 1572 | + break; |
| 1573 | + case NOTHING: |
| 1574 | + case BACK: |
| 1575 | + break; |
| 1576 | + case OPEN+1: |
| 1577 | + case OPEN+2: |
| 1578 | + case OPEN+3: |
| 1579 | + case OPEN+4: |
| 1580 | + case OPEN+5: |
| 1581 | + case OPEN+6: |
| 1582 | + case OPEN+7: |
| 1583 | + case OPEN+8: |
| 1584 | + case OPEN+9: { |
| 1585 | + register int no; |
| 1586 | + register char *save; |
| 1587 | + |
| 1588 | + no = OP(scan) - OPEN; |
| 1589 | + save = reginput; |
| 1590 | + |
| 1591 | + if (regmatch(next)) { |
| 1592 | + /* |
| 1593 | + * Don't set startp if some later |
| 1594 | + * invocation of the same parentheses |
| 1595 | + * already has. |
| 1596 | + */ |
| 1597 | + if (regstartp[no] == NULL) |
| 1598 | + regstartp[no] = save; |
| 1599 | + return(1); |
| 1600 | + } else |
| 1601 | + return(0); |
| 1602 | + } |
| 1603 | + break; |
| 1604 | + case CLOSE+1: |
| 1605 | + case CLOSE+2: |
| 1606 | + case CLOSE+3: |
| 1607 | + case CLOSE+4: |
| 1608 | + case CLOSE+5: |
| 1609 | + case CLOSE+6: |
| 1610 | + case CLOSE+7: |
| 1611 | + case CLOSE+8: |
| 1612 | + case CLOSE+9: |
| 1613 | + { |
| 1614 | + register int no; |
| 1615 | + register char *save; |
| 1616 | + |
| 1617 | + no = OP(scan) - CLOSE; |
| 1618 | + save = reginput; |
| 1619 | + |
| 1620 | + if (regmatch(next)) { |
| 1621 | + /* |
| 1622 | + * Don't set endp if some later |
| 1623 | + * invocation of the same parentheses |
| 1624 | + * already has. |
| 1625 | + */ |
| 1626 | + if (regendp[no] == NULL) |
| 1627 | + regendp[no] = save; |
| 1628 | + return(1); |
| 1629 | + } else |
| 1630 | + return(0); |
| 1631 | + } |
| 1632 | + break; |
| 1633 | + case BRANCH: { |
| 1634 | + register char *save; |
| 1635 | + |
| 1636 | + if (OP(next) != BRANCH) /* No choice. */ |
| 1637 | + next = OPERAND(scan); /* Avoid recursion. */ |
| 1638 | + else { |
| 1639 | + do { |
| 1640 | + save = reginput; |
| 1641 | + if (regmatch(OPERAND(scan))) |
| 1642 | + return(1); |
| 1643 | + reginput = save; |
| 1644 | + scan = regnext(scan); |
| 1645 | + } while (scan != NULL && OP(scan) == BRANCH); |
| 1646 | + return(0); |
| 1647 | + /* NOTREACHED */ |
| 1648 | + } |
| 1649 | + } |
| 1650 | + break; |
| 1651 | + case STAR: |
| 1652 | + case PLUS: { |
| 1653 | + register char nextch; |
| 1654 | + register int no; |
| 1655 | + register char *save; |
| 1656 | + register int min; |
| 1657 | + |
| 1658 | + /* |
| 1659 | + * Lookahead to avoid useless match attempts |
| 1660 | + * when we know what character comes next. |
| 1661 | + */ |
| 1662 | + nextch = '\0'; |
| 1663 | + if (OP(next) == EXACTLY) |
| 1664 | + nextch = *OPERAND(next); |
| 1665 | + min = (OP(scan) == STAR) ? 0 : 1; |
| 1666 | + save = reginput; |
| 1667 | + no = regrepeat(OPERAND(scan)); |
| 1668 | + while (no >= min) { |
| 1669 | + /* If it could work, try it. */ |
| 1670 | + if (nextch == '\0' || *reginput == nextch) |
| 1671 | + if (regmatch(next)) |
| 1672 | + return(1); |
| 1673 | + /* Couldn't or didn't -- back up. */ |
| 1674 | + no--; |
| 1675 | + reginput = save + no; |
| 1676 | + } |
| 1677 | + return(0); |
| 1678 | + } |
| 1679 | + break; |
| 1680 | + case END: |
| 1681 | + return(1); /* Success! */ |
| 1682 | + break; |
| 1683 | + default: |
| 1684 | + printk("<3>Regexp: memory corruption\n"); |
| 1685 | + return(0); |
| 1686 | + break; |
| 1687 | + } |
| 1688 | + |
| 1689 | + scan = next; |
| 1690 | + } |
| 1691 | + |
| 1692 | + /* |
| 1693 | + * We get here only if there's trouble -- normally "case END" is |
| 1694 | + * the terminating point. |
| 1695 | + */ |
| 1696 | + printk("<3>Regexp: corrupted pointers\n"); |
| 1697 | + return(0); |
| 1698 | +} |
| 1699 | + |
| 1700 | +/* |
| 1701 | + - regrepeat - repeatedly match something simple, report how many |
| 1702 | + */ |
| 1703 | +static int |
| 1704 | +regrepeat(char *p) |
| 1705 | +{ |
| 1706 | + register int count = 0; |
| 1707 | + register char *scan; |
| 1708 | + register char *opnd; |
| 1709 | + |
| 1710 | + scan = reginput; |
| 1711 | + opnd = OPERAND(p); |
| 1712 | + switch (OP(p)) { |
| 1713 | + case ANY: |
| 1714 | + count = strlen(scan); |
| 1715 | + scan += count; |
| 1716 | + break; |
| 1717 | + case EXACTLY: |
| 1718 | + while (*opnd == *scan) { |
| 1719 | + count++; |
| 1720 | + scan++; |
| 1721 | + } |
| 1722 | + break; |
| 1723 | + case ANYOF: |
| 1724 | + while (*scan != '\0' && strchr(opnd, *scan) != NULL) { |
| 1725 | + count++; |
| 1726 | + scan++; |
| 1727 | + } |
| 1728 | + break; |
| 1729 | + case ANYBUT: |
| 1730 | + while (*scan != '\0' && strchr(opnd, *scan) == NULL) { |
| 1731 | + count++; |
| 1732 | + scan++; |
| 1733 | + } |
| 1734 | + break; |
| 1735 | + default: /* Oh dear. Called inappropriately. */ |
| 1736 | + printk("<3>Regexp: internal foulup\n"); |
| 1737 | + count = 0; /* Best compromise. */ |
| 1738 | + break; |
| 1739 | + } |
| 1740 | + reginput = scan; |
| 1741 | + |
| 1742 | + return(count); |
| 1743 | +} |
| 1744 | + |
| 1745 | +/* |
| 1746 | + - regnext - dig the "next" pointer out of a node |
| 1747 | + */ |
| 1748 | +static char* |
| 1749 | +regnext(char *p) |
| 1750 | +{ |
| 1751 | + register int offset; |
| 1752 | + |
| 1753 | + if (p == ®dummy) |
| 1754 | + return(NULL); |
| 1755 | + |
| 1756 | + offset = NEXT(p); |
| 1757 | + if (offset == 0) |
| 1758 | + return(NULL); |
| 1759 | + |
| 1760 | + if (OP(p) == BACK) |
| 1761 | + return(p-offset); |
| 1762 | + else |
| 1763 | + return(p+offset); |
| 1764 | +} |
| 1765 | + |
| 1766 | +#ifdef DEBUG |
| 1767 | + |
| 1768 | +STATIC char *regprop(); |
| 1769 | + |
| 1770 | +/* |
| 1771 | + - regdump - dump a regexp onto stdout in vaguely comprehensible form |
| 1772 | + */ |
| 1773 | +void |
| 1774 | +regdump(regexp *r) |
| 1775 | +{ |
| 1776 | + register char *s; |
| 1777 | + register char op = EXACTLY; /* Arbitrary non-END op. */ |
| 1778 | + register char *next; |
| 1779 | + /* extern char *strchr(); */ |
| 1780 | + |
| 1781 | + |
| 1782 | + s = r->program + 1; |
| 1783 | + while (op != END) { /* While that wasn't END last time... */ |
| 1784 | + op = OP(s); |
| 1785 | + printf("%2d%s", s-r->program, regprop(s)); /* Where, what. */ |
| 1786 | + next = regnext(s); |
| 1787 | + if (next == NULL) /* Next ptr. */ |
| 1788 | + printf("(0)"); |
| 1789 | + else |
| 1790 | + printf("(%d)", (s-r->program)+(next-s)); |
| 1791 | + s += 3; |
| 1792 | + if (op == ANYOF || op == ANYBUT || op == EXACTLY) { |
| 1793 | + /* Literal string, where present. */ |
| 1794 | + while (*s != '\0') { |
| 1795 | + putchar(*s); |
| 1796 | + s++; |
| 1797 | + } |
| 1798 | + s++; |
| 1799 | + } |
| 1800 | + putchar('\n'); |
| 1801 | + } |
| 1802 | + |
| 1803 | + /* Header fields of interest. */ |
| 1804 | + if (r->regstart != '\0') |
| 1805 | + printf("start `%c' ", r->regstart); |
| 1806 | + if (r->reganch) |
| 1807 | + printf("anchored "); |
| 1808 | + if (r->regmust != NULL) |
| 1809 | + printf("must have \"%s\"", r->regmust); |
| 1810 | + printf("\n"); |
| 1811 | +} |
| 1812 | + |
| 1813 | +/* |
| 1814 | + - regprop - printable representation of opcode |
| 1815 | + */ |
| 1816 | +static char * |
| 1817 | +regprop(char *op) |
| 1818 | +{ |
| 1819 | +#define BUFLEN 50 |
| 1820 | + register char *p; |
| 1821 | + static char buf[BUFLEN]; |
| 1822 | + |
| 1823 | + strcpy(buf, ":"); |
| 1824 | + |
| 1825 | + switch (OP(op)) { |
| 1826 | + case BOL: |
| 1827 | + p = "BOL"; |
| 1828 | + break; |
| 1829 | + case EOL: |
| 1830 | + p = "EOL"; |
| 1831 | + break; |
| 1832 | + case ANY: |
| 1833 | + p = "ANY"; |
| 1834 | + break; |
| 1835 | + case ANYOF: |
| 1836 | + p = "ANYOF"; |
| 1837 | + break; |
| 1838 | + case ANYBUT: |
| 1839 | + p = "ANYBUT"; |
| 1840 | + break; |
| 1841 | + case BRANCH: |
| 1842 | + p = "BRANCH"; |
| 1843 | + break; |
| 1844 | + case EXACTLY: |
| 1845 | + p = "EXACTLY"; |
| 1846 | + break; |
| 1847 | + case NOTHING: |
| 1848 | + p = "NOTHING"; |
| 1849 | + break; |
| 1850 | + case BACK: |
| 1851 | + p = "BACK"; |
| 1852 | + break; |
| 1853 | + case END: |
| 1854 | + p = "END"; |
| 1855 | + break; |
| 1856 | + case OPEN+1: |
| 1857 | + case OPEN+2: |
| 1858 | + case OPEN+3: |
| 1859 | + case OPEN+4: |
| 1860 | + case OPEN+5: |
| 1861 | + case OPEN+6: |
| 1862 | + case OPEN+7: |
| 1863 | + case OPEN+8: |
| 1864 | + case OPEN+9: |
| 1865 | + snprintf(buf+strlen(buf),BUFLEN-strlen(buf), "OPEN%d", OP(op)-OPEN); |
| 1866 | + p = NULL; |
| 1867 | + break; |
| 1868 | + case CLOSE+1: |
| 1869 | + case CLOSE+2: |
| 1870 | + case CLOSE+3: |
| 1871 | + case CLOSE+4: |
| 1872 | + case CLOSE+5: |
| 1873 | + case CLOSE+6: |
| 1874 | + case CLOSE+7: |
| 1875 | + case CLOSE+8: |
| 1876 | + case CLOSE+9: |
| 1877 | + snprintf(buf+strlen(buf),BUFLEN-strlen(buf), "CLOSE%d", OP(op)-CLOSE); |
| 1878 | + p = NULL; |
| 1879 | + break; |
| 1880 | + case STAR: |
| 1881 | + p = "STAR"; |
| 1882 | + break; |
| 1883 | + case PLUS: |
| 1884 | + p = "PLUS"; |
| 1885 | + break; |
| 1886 | + default: |
| 1887 | + printk("<3>Regexp: corrupted opcode\n"); |
| 1888 | + break; |
| 1889 | + } |
| 1890 | + if (p != NULL) |
| 1891 | + strncat(buf, p, BUFLEN-strlen(buf)); |
| 1892 | + return(buf); |
| 1893 | +} |
| 1894 | +#endif |
| 1895 | + |
| 1896 | + |
| 1897 | --- /dev/null |
| 1898 | +++ b/net/ipv4/netfilter/regexp/regexp.h |
| 1899 | @@ -0,0 +1,40 @@ |
| 1900 | +/* |
| 1901 | + * Definitions etc. for regexp(3) routines. |
| 1902 | + * |
| 1903 | + * Caveat: this is V8 regexp(3) [actually, a reimplementation thereof], |
| 1904 | + * not the System V one. |
| 1905 | + */ |
| 1906 | + |
| 1907 | +#ifndef REGEXP_H |
| 1908 | +#define REGEXP_H |
| 1909 | + |
| 1910 | +/* |
| 1911 | +http://www.opensource.apple.com/darwinsource/10.3/expect-1/expect/expect.h , |
| 1912 | +which contains a version of this library, says: |
| 1913 | + |
| 1914 | + * |
| 1915 | + * NSUBEXP must be at least 10, and no greater than 117 or the parser |
| 1916 | + * will not work properly. |
| 1917 | + * |
| 1918 | + |
| 1919 | +However, it looks rather like this library is limited to 10. If you think |
| 1920 | +otherwise, let us know. |
| 1921 | +*/ |
| 1922 | + |
| 1923 | +#define NSUBEXP 10 |
| 1924 | +typedef struct regexp { |
| 1925 | + char *startp[NSUBEXP]; |
| 1926 | + char *endp[NSUBEXP]; |
| 1927 | + char regstart; /* Internal use only. */ |
| 1928 | + char reganch; /* Internal use only. */ |
| 1929 | + char *regmust; /* Internal use only. */ |
| 1930 | + int regmlen; /* Internal use only. */ |
| 1931 | + char program[1]; /* Unwarranted chumminess with compiler. */ |
| 1932 | +} regexp; |
| 1933 | + |
| 1934 | +regexp * regcomp(char *exp, int *patternsize); |
| 1935 | +int regexec(regexp *prog, char *string); |
| 1936 | +void regsub(regexp *prog, char *source, char *dest); |
| 1937 | +void regerror(char *s); |
| 1938 | + |
| 1939 | +#endif |
| 1940 | --- /dev/null |
| 1941 | +++ b/net/ipv4/netfilter/regexp/regmagic.h |
| 1942 | @@ -0,0 +1,5 @@ |
| 1943 | +/* |
| 1944 | + * The first byte of the regexp internal "program" is actually this magic |
| 1945 | + * number; the start node begins in the second byte. |
| 1946 | + */ |
| 1947 | +#define MAGIC 0234 |
| 1948 | --- /dev/null |
| 1949 | +++ b/net/ipv4/netfilter/regexp/regsub.c |
| 1950 | @@ -0,0 +1,95 @@ |
| 1951 | +/* |
| 1952 | + * regsub |
| 1953 | + * @(#)regsub.c 1.3 of 2 April 86 |
| 1954 | + * |
| 1955 | + * Copyright (c) 1986 by University of Toronto. |
| 1956 | + * Written by Henry Spencer. Not derived from licensed software. |
| 1957 | + * |
| 1958 | + * Permission is granted to anyone to use this software for any |
| 1959 | + * purpose on any computer system, and to redistribute it freely, |
| 1960 | + * subject to the following restrictions: |
| 1961 | + * |
| 1962 | + * 1. The author is not responsible for the consequences of use of |
| 1963 | + * this software, no matter how awful, even if they arise |
| 1964 | + * from defects in it. |
| 1965 | + * |
| 1966 | + * 2. The origin of this software must not be misrepresented, either |
| 1967 | + * by explicit claim or by omission. |
| 1968 | + * |
| 1969 | + * 3. Altered versions must be plainly marked as such, and must not |
| 1970 | + * be misrepresented as being the original software. |
| 1971 | + * |
| 1972 | + * |
| 1973 | + * This code was modified by Ethan Sommer to work within the kernel |
| 1974 | + * (it now uses kmalloc etc..) |
| 1975 | + * |
| 1976 | + */ |
| 1977 | +#include "regexp.h" |
| 1978 | +#include "regmagic.h" |
| 1979 | +#include <linux/string.h> |
| 1980 | + |
| 1981 | + |
| 1982 | +#ifndef CHARBITS |
| 1983 | +#define UCHARAT(p) ((int)*(unsigned char *)(p)) |
| 1984 | +#else |
| 1985 | +#define UCHARAT(p) ((int)*(p)&CHARBITS) |
| 1986 | +#endif |
| 1987 | + |
| 1988 | +#if 0 |
| 1989 | +//void regerror(char * s) |
| 1990 | +//{ |
| 1991 | +// printk("regexp(3): %s", s); |
| 1992 | +// /* NOTREACHED */ |
| 1993 | +//} |
| 1994 | +#endif |
| 1995 | + |
| 1996 | +/* |
| 1997 | + - regsub - perform substitutions after a regexp match |
| 1998 | + */ |
| 1999 | +void |
| 2000 | +regsub(regexp * prog, char * source, char * dest) |
| 2001 | +{ |
| 2002 | + register char *src; |
| 2003 | + register char *dst; |
| 2004 | + register char c; |
| 2005 | + register int no; |
| 2006 | + register int len; |
| 2007 | + |
| 2008 | + /* Not necessary and gcc doesn't like it -MLS */ |
| 2009 | + /*extern char *strncpy();*/ |
| 2010 | + |
| 2011 | + if (prog == NULL || source == NULL || dest == NULL) { |
| 2012 | + regerror("NULL parm to regsub"); |
| 2013 | + return; |
| 2014 | + } |
| 2015 | + if (UCHARAT(prog->program) != MAGIC) { |
| 2016 | + regerror("damaged regexp fed to regsub"); |
| 2017 | + return; |
| 2018 | + } |
| 2019 | + |
| 2020 | + src = source; |
| 2021 | + dst = dest; |
| 2022 | + while ((c = *src++) != '\0') { |
| 2023 | + if (c == '&') |
| 2024 | + no = 0; |
| 2025 | + else if (c == '\\' && '0' <= *src && *src <= '9') |
| 2026 | + no = *src++ - '0'; |
| 2027 | + else |
| 2028 | + no = -1; |
| 2029 | + |
| 2030 | + if (no < 0) { /* Ordinary character. */ |
| 2031 | + if (c == '\\' && (*src == '\\' || *src == '&')) |
| 2032 | + c = *src++; |
| 2033 | + *dst++ = c; |
| 2034 | + } else if (prog->startp[no] != NULL && prog->endp[no] != NULL) { |
| 2035 | + len = prog->endp[no] - prog->startp[no]; |
| 2036 | + (void) strncpy(dst, prog->startp[no], len); |
| 2037 | + dst += len; |
| 2038 | + if (len != 0 && *(dst-1) == '\0') { /* strncpy hit NUL. */ |
| 2039 | + regerror("damaged match string"); |
| 2040 | + return; |
| 2041 | + } |
| 2042 | + } |
| 2043 | + } |
| 2044 | + *dst++ = '\0'; |
| 2045 | +} |
| 2046 | --- /dev/null |
| 2047 | +++ b/include/linux/netfilter/xt_layer7.h |
| 2048 | @@ -0,0 +1,14 @@ |
| 2049 | +#ifndef _XT_LAYER7_H |
| 2050 | +#define _XT_LAYER7_H |
| 2051 | + |
| 2052 | +#define MAX_PATTERN_LEN 8192 |
| 2053 | +#define MAX_PROTOCOL_LEN 256 |
| 2054 | + |
| 2055 | +struct xt_layer7_info { |
| 2056 | + char protocol[MAX_PROTOCOL_LEN]; |
| 2057 | + char pattern[MAX_PATTERN_LEN]; |
| 2058 | + u_int8_t invert; |
| 2059 | + u_int8_t pkt; |
| 2060 | +}; |
| 2061 | + |
| 2062 | +#endif /* _XT_LAYER7_H */ |
| 2063 | |