Mega Code Archive

 
Categories / C / Small Application
 

Compute string distance

#include <stdio.h> #include <string.h> #include <getopt.h> #include <stdlib.h> #include <unistd.h> #define VERSION "0.0.1" #define PACKAGE "levenshtein" void print_help(int exval); int levenshtein(char *stra, char *strb); int main(int argc, char *argv[]) { char *src_str = NULL; char *trgt_str = NULL; int opt = 0; int output_flag_opt = 0; if(argc == 1) print_help(0); while((opt = getopt(argc, argv, "hve")) != -1) { switch(opt) { case 'h': print_help(0); case 'v': return 0; case 'e': output_flag_opt = 1; break; case '?': fprintf(stderr, "%s: Error - No such option `%c'\n", PACKAGE, optopt); print_help(1); } /* switch */ } /* while */ if((argc - optind) != 2) { fprintf(stderr, "%s: Error - Need `SOURCE' and `TARGET' string\n\n", PACKAGE); print_help(1); } if(strlen(argv[optind]) < 2 || strlen(argv[optind]) > 255) { fprintf(stderr, "%s: Error - strlen(SOURCE) is either `< 2' or `> 255'\n", PACKAGE); return 1; } else src_str = strdup(argv[optind]); optind++; if(strlen(argv[optind]) < 2 || strlen(argv[optind]) > 255) { fprintf(stderr, "%s: Error - strlen(TARGET) is either `< 2' or `> 255'\n", PACKAGE); return 1; } else trgt_str = strdup(argv[optind]); if(output_flag_opt == 0) printf("%d\n", levenshtein(src_str, trgt_str)); else { printf("Source : %s\n", src_str); printf("Target : %s\n", trgt_str); printf("Lev.dist : %d\n", levenshtein(src_str, trgt_str)); } return 0; } int levenshtein(char *stra, char *strb) { int i, j, k, l, m1, m2, m3, laa, r; int lengtha, lengthb, lengthmin; int **d; char *a; lengtha = strlen(stra); lengthb = strlen(strb); lengthmin = lengtha; if(lengthb > lengthmin) lengthmin = lengthb; a = (char*)malloc((lengthmin+2)*sizeof(char)); d = (int**)malloc((lengthmin+2)*sizeof(int*)); for(i = 0; i < lengthmin + 2; i++) d[i] = (int*)malloc((lengthmin+2)*sizeof(int)); for(i = 0; i <= lengtha; i++) a[i] = stra[i]; laa = lengtha; d[0][0] = 0; for(i = 1; i <= lengtha; i++) d[i][0] = d[i-1][0]+1; for(j = 1; j <= lengthb; j++) d[0][j] = d[0][j-1]+1; for(i = 1; i <= laa; i++) { for(j = 1;j <= lengthb; j++) { r = 0; if(a[i-1] != strb[j-1]) { r = 1; a[i-1] = strb[j-1]; } m1 = d[i-1][j-1]+r; for(k = lengtha; k >= i-1; k--) a[k+1] = a[k]; a[j-1] = strb[j-1]; lengtha++; m2 = d[i][j-1]+1; for(k = i-1; k < lengtha; k++) a[k] = a[k+1]; lengtha--; m3 = d[i-1][j]+1; r = m1; if( m2 < r) r = m2; if(m3 < r) r = m3; d[i][j] = r; lengtha = laa; for(l = 0; l <= lengtha; l++) a[l] = stra[l]; } } r = d[lengtha][lengthb]; free(a); for(i = 0; i < lengthmin+2; i++) free(d[i]); free(d); return(r); } void print_help(int exval) { printf("%s,%s is a measure of the similarity inbetween two strings\n", PACKAGE, VERSION); printf("Usage: %s [-h] [-v] [-e] SOURCE TARGET\n\n", PACKAGE); printf(" -h print this help and exit\n"); printf(" -v print version and exit\n"); printf(" -e print the source, target string and levenshtein distance\n\n"); exit(exval); }