/
crack-mac.c
247 lines (188 loc) · 7.68 KB
/
crack-mac.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
//
// main.c
// FrequencyAnalysis
//
// Created by Pauline Kelly on 2/08/15.
// Copyright (c) 2015 Pauline Kelly. All rights reserved.
//
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <ctype.h>
#define TEXT_SIZE 200000 // Note, the longer the text the more likely you will get a good 'decode' from the start.
#define ALEN 26 // Number of chars in ENGLISH alphabet
#define CHFREQ "ETAONRISHDLFCMUGYPWBVKJXQZ" // Characters in order of appearance in English documents.
#define ALPHABET "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
typedef char bool;
typedef char CharHashMap[ALEN];
/* Program developed for NWEN243, Victoria University of Wellington
Author: Kris Bubendorfer, this extended version (c) 2015
LAB: 2
This program applies a basic frequency analysis on a cyphertext. It has been extened over the 2014 version to
solve polyalphabetic cyphers - by brute force. In this case, it applies the frequency analysis for different
numbers of n keys (polyalphabetic Caeser). Obviously it will need a cypher of about n times
the typical length for a monoalphabetic cypher.
Program is used like this:
Compile: gcc -o crack crack.c
Test file (ctext): JWRLS, XSSH PZK JH HES BJFV, UZU (this is not a realistic length piece of cypher text)
crack n
Argument:
n number of keys to try
---
% cat ctext | crack 1
ALICE, MEET YOU AT THE PARK, BOB <-- of course it won't be this correct. Don't worry about that for the -d option.
AMFDE, UEET LNH AT TIE RASC, ONO <-- this is what it really looks like, a larger sample is better, this is short.
*/
char upcase(char ch){
if(islower(ch))
ch -= 'a' - 'A';
return ch;
}
void processFile(char *text, int n);
int main(int argc, char **argv){
// first allocate some space for our input text (we will read from stdin).
char* text = (char*)calloc(TEXT_SIZE+1, sizeof(char));
char ch;
int n, i;
if(argc > 1 && (n = atoi(argv[1])) > 0); else{ fprintf(stderr,"Malformed argument, use: crack [n], n > 0\n"); exit(-1);} // get the command line argument n
// Now read TEXT_SIZE or feof worth of characters (whichever is smaller) and convert to uppercase as we do it.
// Added: changed to count frequencies as we read it in
for(i = 0, ch = fgetc(stdin); i < TEXT_SIZE && !feof(stdin); i++, ch = fgetc(stdin)){
text[i] = (ch = (isalpha(ch)?upcase(ch):ch));
}
/* At this point we have two things,s
* 1. The input cyphertext in "text"
* 2. The maximum number of keys to try (n) - we'll be trying 1..n keys.
*
* What you need to do is as follows:
* 1. create a for-loop that will check key lengths from 1..n
* 2. for each i <= n, spit the cypher text into i sub-texts. For i = 1, 1 subtext, for i = 2, 2 subtexts, of alternating characters etc.
* 3. for each subtext:
* a. count the occurance of each letter
* b. then map this onto the CHFREQ, to create a map between the sub-text and english
* c. apply the new map to the subtext
* 4. merge the subtexts
* 5. output the 'possibly' partially decoded text to stdout. This will only look OK if i was the correct number of keys
*
* what you need to output (sample will be provided) - exactly:
* i maps -> stderr
* i 'possible' translations
*
* You would be wise to make seperate functions that perform various sub-tasks, and test them incrementally. Any other approach will likely
* make your brain revolt. This isn't a long program, mine is 160 lines, with comments (and written in a very verbose style) - if yours is
* getting too long, double check you're on the right track.
*
*/
processFile(text, n);
}
void initializeSubTexts(char **subTextStrings, char *text, size_t text_length, size_t sub_text_length, int i) {
for(int stringIndex = 0; stringIndex < i; ++stringIndex){
subTextStrings[stringIndex] = calloc(
sub_text_length, // Number of items
sizeof(char) // Size of each item
);
}
int appendIndex = 0;
int stringIndex = 0;
for(int index = 0; index < text_length; ++index){ //go through text
subTextStrings[stringIndex][appendIndex] = text[index];
stringIndex++;
if(stringIndex == i) {
stringIndex = 0;
appendIndex++;
}
}
}
//Merge the subtexts back into the text
char *mergeSubTexts(char **subTextStrings, size_t text_length, int i) {
char *newTextString = calloc(text_length+1, sizeof(char));
int appendIndex = 0;
int stringIndex = 0;
for(int index = 0; index < text_length; ++index){ //go through text
newTextString[index] = subTextStrings[stringIndex][appendIndex];
stringIndex++;
if(stringIndex == i) {
stringIndex = 0;
appendIndex++;
}
}
return newTextString;
}
void freeSubTexts(char **subTextStrings, int i) {
for(int stringIndex = 0; stringIndex < i; ++stringIndex){
free(subTextStrings[stringIndex]);
}
}
bool isLetter(char c) {
return 'A' <= c && c <= 'Z';
}
int frequencyOf(int *frequencyTable, char c) {
return frequencyTable[c - 'A'];
}
void initializeFrequencyTable(int *frequencyTable, char *text, size_t text_length) {
// Zero the table
bzero(frequencyTable, sizeof(int) * ALEN);
for(int index = 0; index < text_length; ++index){ //go through subtext chars
char c = text[index]; //get the current char
if(isLetter(c)){ //if its alphabetic
frequencyTable[c-'A']++;
}
}
}
int frequency_comparator(void *context, const void *a, const void *b) {
int *frequencyTable = (int *)context;
char char1 = *((const char *)a);
char char2 = *((const char *)b);
return frequencyOf(frequencyTable, char2) - frequencyOf(frequencyTable, char1);
}
void initializeSortedChars(char *sortedChars, int *frequencyTable) {
memcpy(sortedChars, ALPHABET, sizeof(char) * ALEN);
qsort_r(sortedChars, ALEN, sizeof(char), frequencyTable, frequency_comparator);
}
char CharHashMapGet(CharHashMap hash_map, char key) {
return hash_map[key - 'A'];
}
void CharHashMapSet(CharHashMap hash_map, char key, char value) {
hash_map[key - 'A'] = value;
}
void processFile(char *text, int n) {
size_t text_length = strlen(text);
for(int i = 1; i <= n; ++i){
size_t sub_text_length = text_length/i + 2;//accounts for null terminator and rounding errors on divide
//split cypher text into i subtexts
char *subTextStrings[i];
initializeSubTexts(subTextStrings, text, text_length, sub_text_length, i);
//Do frequency analysis
//for each character in the substring, get its count and store it -- move
for(int stringIndex = 0; stringIndex < i; ++stringIndex){ //for each substring
//get the next substring
char *currentSubtext = subTextStrings[stringIndex];
size_t currentSubtext_length = strlen(currentSubtext);
int frequencyTable[ALEN];
initializeFrequencyTable(frequencyTable, currentSubtext, currentSubtext_length);
//Now sort them based on frequency
char sortedChars[ALEN+1];
sortedChars[ALEN] = '\0';
initializeSortedChars(sortedChars, frequencyTable);
//now map our most frequent characters to the English languages most frequent characters
CharHashMap characterMap;
for(int i=0; i<ALEN; i++) {
char ith_most_frequent_cypertext_char = sortedChars[i];
char ith_most_frequent_english_char = CHFREQ[i];
CharHashMapSet(characterMap, ith_most_frequent_cypertext_char, ith_most_frequent_english_char);
}
//Now apply the most characters to that of
for(int i = 0; i < currentSubtext_length; ++i){
char c = currentSubtext[i];
if(isLetter(c)) {
currentSubtext[i] = CharHashMapGet(characterMap, c); //get the corresponding english value
}
}
}
//now merge the substrings back together
char *result = mergeSubTexts(subTextStrings, text_length, i);
printf("%s\n", result);
free(result);
freeSubTexts(subTextStrings, i);
}
}