确定C中简单替换密码的移位

问题描述 投票:0回答:1

我正在尝试实现一个替换密码,将字母表向前移动三个字母以加密文本。

如何通过将实际字母频率与平均字母频率进行比较来解密文本。

下面的代码生成一个包含加密文本中实际字母频率的数组。

#include <ctype.h>
#include <stdio.h>
#include <string.h>
#include <stdlib.h>

static char clef[][7] =
{
  ['A'] = "X",
  ['B'] = "Y",
  ['C'] = "Z",
  ['D'] = "A",
  ['E'] = "B",
  ['F'] = "C",
  ['G'] = "D",
  ['H'] = "E",
  ['I'] = "F",
  ['J'] = "G",
  ['K'] = "H",
  ['L'] = "I",
  ['M'] = "J",
  ['N'] = "K",
  ['O'] = "L",
  ['P'] = "M",
  ['Q'] = "N",
  ['R'] = "O",
  ['S'] = "P",
  ['T'] = "Q",
  ['U'] = "R",
  ['V'] = "S",
  ['W'] = "T",
  ['X'] = "U",
  ['Y'] = "V",
  ['Z'] = "W"

};

double frequencyEn[] = {
    .082, .015, .028, .043, .127, .022,
    .020, .061, .070, .002, .008, .040,
    .024, .067, .075, .019, .001, .060,
    .063, .091, .028, .010, .024, .002,
    .020, .001 };

enum { MAX_CLEF = sizeof(clef) / sizeof(clef[0]) };

static char *prompt(FILE *fp, const char *prompt, char *buffer, size_t buflen)
{
  printf("%s", prompt);
  fflush(0);
  return fgets(buffer, buflen, fp);
}

static void substitute(FILE *fp, const char *buffer, const char *pad1, const char *pad2)
{
  int c;
  const char *pad = pad1;
  int col = 0;
  for (int i = 0; (c = buffer[i]) != '\0'; i++)
  {
    if (col == 0)
    {
      fputs(pad, fp);
      col += strlen(pad);
      pad = pad2;
    }

    col++;
    c = toupper(c);
    if (c < MAX_CLEF && clef[c][0] != '\0')
    {
      fputs(clef[c], fp);
      col += strlen(clef[c]);
    }
    else
    {
      putc(c, fp);
      col++;
    }
    if (col > 72)
    {
      putc('\n', fp);
      col = 0;
    }
  }
}


int main(void)
{
  char * buffer = 0;
  char * cryptText = 0;
  long length;
  FILE * plainTextFile = fopen ("plaintext.txt", "rb");
  FILE * cipherTextFile = fopen("ciphertext.txt", "w+");
  char string[100];
  int c = 0, count[26] = {0};
  int accum = 0;

  if (plainTextFile)
  {
    fseek (plainTextFile, 0, SEEK_END);
    length = ftell (plainTextFile);
    fseek (plainTextFile, 0, SEEK_SET);
    buffer = malloc (length);
    if (buffer)
    {
      fread (buffer, 1, length, plainTextFile);
    }
    fclose (plainTextFile);
  }

  if (buffer)
  {
    printf("%s", buffer);
  }
  else {
    printf("failure");
  }

  substitute(cipherTextFile, buffer, "", "     ");

  if (cipherTextFile)
  {
    fseek (cipherTextFile, 0, SEEK_END);
    length = ftell (cipherTextFile);
    fseek (cipherTextFile, 0, SEEK_SET);
    cryptText = malloc (length);
    if (cryptText)
    {
      fread (cryptText, 1, length, cipherTextFile);
    }
    fclose (cipherTextFile);
  }

  if (cryptText)
  {
    printf("%s", cryptText);
  }
  else {
    printf("failure");
  }

  while ( cryptText[c] != '\0' )
   {

      if ( cryptText[c] >= 'a' && cryptText[c] <= 'z' ){
         count[cryptText[c]-'a']++;
         accum++;
      }

      else if (cryptText[c] >= 'A' && cryptText[c] <= 'Z'){
          count[cryptText[c]-'A']++;
          accum++;
      }
      c++;
   }

   for ( c = 0 ; c < 26 ; c++ )
   {
      if( count[c] != 0 )
          printf( "%c %f\n", c+'a', ((double)count[c])/accum);


   }


}
c cryptography frequency-analysis caesar-cipher
1个回答
1
投票

tofro在他们的comment中提出的基本想法是合理的。

Chi-Squared Test计算期望值Ei和观测值Oi之间的差值的平方和除以期望值。维基百科页面甚至提到了一个应用程序:

在密码分析中,卡方检验用于比较明文和(可能)解密密文的分布。测试的最低值意味着解密成功的概率很高。

将其应用于手头的问题,您已经提供了普通英文文本中不同字母的预期频率表。我们需要一个程序将Caesar Cipher应用于给定的明文(或密文):

ec97.c

#include <ctype.h>
#include <stdio.h>
#include <stdlib.h>

// #include "stderr.h"
void err_setarg0(const char *argv0);
void err_usage(const char *usestr);
void err_error(const char *errmsg);

int main(int argc, char **argv)
{
    char  *buffer = 0;
    size_t buflen = 0;

    err_setarg0(argv[0]);
    if (argc != 2)
        err_usage("offset");
    int offset = atoi(argv[1]) % 26;
    if (offset < 0)
        err_error("Offset should be a positive number 1..25\n");

    while (getline(&buffer, &buflen, stdin) != -1)
    {
        char *ptr = buffer;
        unsigned char u;
        while ((u = (unsigned char)*ptr++) != '\0')
        {
            if (isupper(u))
                u = (u - 'A' + offset) % 26 + 'A';
            else if (islower(u))
                u = (u - 'a' + offset) % 26 + 'a';
            putchar(u);
        }
    }

    free(buffer);
    return 0;
}

// Minimal stderr.c code
static const char *arg0 = "unknown";
void err_setarg0(const char *argv0)
{
    arg0 = argv0;
}
void err_usage(const char *usestr)
{
    fprintf(stderr, "Usage: %s %s\n", arg0, usestr);
    exit(EXIT_FAILURE);
}
void err_error(const char *errmsg)
{
    fprintf(stderr, "%s: %s\n", arg0, errmsg);
    exit(EXIT_FAILURE);
}

使用示例(程序ec97):

$ ec97 3 <<< 'The quick brown fox jumped over the lazy dog!'
Wkh txlfn eurzq ira mxpshg ryhu wkh odcb grj!
$ ec97 23 <<< 'Wkh txlfn eurzq ira mxpshg ryhu wkh odcb grj!'
The quick brown fox jumped over the lazy dog!
$

我们需要一个程序:

  1. 计算输入中字母的频率,忽略大小写,加上输入中的字母总数N.
  2. 对于每个可能的键,使用Fi值作为Oi和字母频率,Li×N作为Ei计算Χ²值,并对下标进行一些小心的下沉。
  3. 找出最小的Χ²值;几乎可以肯定是正确的加密密钥。

或者,在代码中:

dc97.c

/* Determine shift used for text encrypted using Caesar Cipher */

#include <ctype.h>
#include <stdio.h>
#include <stdlib.h>

/* Letter frequencies a-z in English */
static const float freq_en[] =
{
    .082, .015, .028, .043, .127, .022,
    .020, .061, .070, .002, .008, .040,
    .024, .067, .075, .019, .001, .060,
    .063, .091, .028, .010, .024, .002,
    .020, .001
};

int main(void)
{
    char *buffer = 0;
    size_t buflen = 0;
    size_t freq[26] = { 0 };
    size_t count = 0;

    while (getline(&buffer, &buflen, stdin) != -1)
    {
        char *ptr = buffer;
        unsigned char u;
        while ((u = (unsigned char)*ptr++) != '\0')
        {
            if (isalpha(u))
            {
                count++;
                freq[tolower(u) - 'a']++;
            }
        }
    }
    free(buffer);

    if (count == 0)
    {
        fprintf(stderr, "No data read!\n");
        return 1;
    }

    double chisq[26];
    for (int shift = 0; shift < 26; shift++)
    {
        chisq[shift] = 0.0;
        for (int letter = 0; letter < 26; letter++)
        {
            int index = (shift + letter) % 26;
            double delta = freq[index] - count * freq_en[letter];
            chisq[shift] += (delta * delta) / (count * freq_en[letter]);
        }
        printf("'%c' = %13.6f\n", shift + 'A', chisq[shift]);
    }

    int min_i = 0;
    double val_i = chisq[0];
    for (int i = 1; i < 26; i++)
    {
        if (chisq[i] < val_i)
        {
            val_i = chisq[i];
            min_i = i;
        }
    }

    printf("Best match is (%d) '%c' = %10.6f\n", min_i, min_i + 'A', val_i);
    return 0;
}

Testing

示例运行:

$ ec97 3 <<< 'The quick brown fox jumped over the lazy dog!' | dc97
'A' =    143.784398
'B' =    564.772479
'C' =    125.131609
'D' =     87.069134
'E' =    178.249272
'F' =     90.994048
'G' =    194.326935
'H' =    301.117365
'I' =    710.786241
'J' =    147.377473
'K' =    304.179348
'L' =    243.699823
'M' =    137.639230
'N' =    183.885553
'O' =    135.720804
'P' =    106.261239
'Q' =    196.046792
'R' =    506.812184
'S' =    517.893291
'T' =    106.267925
'U' =    375.525078
'V' =    202.806561
'W' =    116.660543
'X' =    304.590809
'Y' =    364.746822
'Z' =    139.113568
Best match is (3) 'D' =  87.069134
$

我还在程序源代码和目录中的makefile以及几个数据文件上运行它。第一个是通常被称为'The Great Panjandrum'的废话:

So she went into the garden
to cut a cabbage-leaf
to make an apple-pie
and at the same time
a great she-bear coming down the street
pops its head into the shop
What no soap
So he died
and she very imprudently married the Barber
and there were present
the Picninnies
and the Joblillies
and the Garyulies
and the great Panjandrum himself
with the little round button at top
and they all fell to playing the game of catch-as-catch-can
till the gunpowder ran out at the heels of their boots

第二个是为这个问题精心制作的:

Bond was on a roll, playing jazz on his sax.
In all of his doings, nothing was as bad as what was going on now,
but nary a jocular hint did his writing contain that
all was not going as his boss though it would.
Quit? Zounds!  No way to quit now.

没有那个,可能会被认为可以抛弃它,但似乎没有问题。

$ x=$(random 1 25)
$ ec97 $x < great.panjandrum
Uq ujg ygpv kpvq vjg ictfgp
vq ewv c ecddcig-ngch
vq ocmg cp crrng-rkg
cpf cv vjg ucog vkog
c itgcv ujg-dgct eqokpi fqyp vjg uvtggv
rqru kvu jgcf kpvq vjg ujqr
Yjcv pq uqcr
Uq jg fkgf
cpf ujg xgta kortwfgpvna octtkgf vjg Dctdgt
cpf vjgtg ygtg rtgugpv
vjg Rkepkppkgu
cpf vjg Lqdnknnkgu
cpf vjg Ictawnkgu
cpf vjg itgcv Rcplcpftwo jkougnh
ykvj vjg nkvvng tqwpf dwvvqp cv vqr
cpf vjga cnn hgnn vq rncakpi vjg icog qh ecvej-cu-ecvej-ecp
vknn vjg iwprqyfgt tcp qwv cv vjg jggnu qh vjgkt dqqvu
$ ec97 $x | dc97
'A' =   3403.710518
'B' =   1821.123417
'C' =     29.719952
'D' =   5238.969620
'E' =   2077.413735
'F' =   8274.966485
'G' =   2446.331525
'H' =   8950.309208
'I' =   1362.257963
'J' =   4419.368172
'K' =   3161.502276
'L' =   3113.030682
'M' =   7778.647756
'N' =   1112.302912
'O' =   1497.782346
'P' =   1490.896824
'Q' =  10395.032795
'R' =   1985.696886
'S' =   2382.319358
'T' =   4874.708427
'U' =   3280.570608
'V' =   1467.488275
'W' =   7318.221432
'X' =   5634.124795
'Y' =   3108.392584
'Z' =   2849.154134
Best match is (2) 'C' =  29.719952
$ echo "Key = $x"
Key = 2
$

$ x=$(random 1 25)
$ ec97 $x < bond.jazz
Ylka txp lk x olii, mixvfkd gxww lk efp pxu.
Fk xii lc efp alfkdp, klqefkd txp xp yxa xp texq txp dlfkd lk klt,
yrq kxov x glzrixo efkq afa efp tofqfkd zlkqxfk qexq
xii txp klq dlfkd xp efp ylpp qelrde fq tlria.
Nrfq? Wlrkap!  Kl txv ql nrfq klt.
$ ec97 $x < bond.jazz | dc97
'A' =   2630.974532
'B' =   2107.704681
'C' =   1473.828862
'D' =    865.368278
'E' =    715.940980
'F' =   1010.885486
'G' =   2881.481606
'H' =   3297.014998
'I' =   1302.909485
'J' =    871.665974
'K' =    917.232399
'L' =   2716.342024
'M' =   2525.973294
'N' =   2077.065275
'O' =   3096.667665
'P' =   2211.782909
'Q' =   1793.302623
'R' =   1427.340376
'S' =   1537.934006
'T' =    702.667000
'U' =   3489.590647
'V' =   3111.999371
'W' =   1445.825861
'X' =    142.412135
'Y' =   2671.998367
'Z' =   1977.131986
Best match is (23) 'X' = 142.412135
$ echo "Key = $x"
Key = 23
$

它似乎工作得很好。

© www.soinside.com 2019 - 2024. All rights reserved.